Trouble with Refinery Optimization Python Script

60 Views Asked by At

Trying to write a Python script to determine how much heavy Canadian oil each of the refineries I have listed process per month based on a few constraints I listed regarding their capacities and outages, and the total amount that was processed between all of them. I have the dataframe and script below.

import pandas as pd
from pandas import Timestamp
from scipy.optimize import minimize

# Define the penalty function for constraints violation
def penalty(x, refinery_data):
    penalty_value = 0
    for index, row in refinery_data.iterrows():
        effective_coking_capacity = row['Coker_Capacity'] - row['Coker_Outage']
        # Penalties for not meeting the minimum run requirement (50% of effective coking capacity)
        penalty_value += max(0, 0.5 * effective_coking_capacity - x[index])
        # Penalties for exceeding 3 times the effective coking capacity
        penalty_value += max(0, x[index] - 3 * effective_coking_capacity)
        # Penalties for exceeding the reformer capacity
        penalty_value += max(0, x[index] * 0.1037 - row['Reformer_Capacity'])
        # Penalties for exceeding the FCC Hydrocracker capacity
        penalty_value += max(0, x[index] * 0.2960 - row['FCC_Hydrocracker_Capacity'])
        # Penalties for exceeding the total runs
        penalty_value += max(0, x[index] - row['Total_Runs'])
    return penalty_value

# Define the objective function to minimize the absolute difference with penalties for constraints violation
def objective_function(x, total_heavy_canadian_runs, refinery_data):
    return abs(total_heavy_canadian_runs - sum(x)) + penalty(x, refinery_data) * 1000  # Large penalty factor

# Define the constraints based on the user's requirements
def constraints(refinery_data):
    cons = []
    for index, row in refinery_data.iterrows():
        effective_coking_capacity = row['Coker_Capacity'] - row['Coker_Outage']
        # Constraint for minimum Heavy Canadian Runs (50% of effective coking capacity)
        cons.append({'type': 'ineq', 'fun': lambda x, idx=index: x[idx] - 0.5 * effective_coking_capacity})
        # Other constraints
        cons.append({'type': 'ineq', 'fun': lambda x, idx=index: 3 * effective_coking_capacity - x[idx]})
        cons.append({'type': 'ineq', 'fun': lambda x, idx=index: row['Reformer_Capacity'] - x[idx] * 0.1037})
        cons.append({'type': 'ineq', 'fun': lambda x, idx=index: row['FCC_Hydrocracker_Capacity'] - x[idx] * 0.2960})
        cons.append({'type': 'ineq', 'fun': lambda x, idx=index: row['Total_Runs'] - x[idx]})
    return cons

#january_data df description given below this codeblock
january_data = pd.DataFrame({'Date': {0: Timestamp('2021-01-01 00:00:00'), 1: Timestamp('2021-01-01 00:00:00'), 2: Timestamp('2021-01-01 00:00:00'), 3: Timestamp('2021-01-01 00:00:00'), 4: Timestamp('2021-01-01 00:00:00'), 5: Timestamp('2021-01-01 00:00:00'), 6: Timestamp('2021-01-01 00:00:00'), 7: Timestamp('2021-01-01 00:00:00'), 8: Timestamp('2021-01-01 00:00:00'), 9: Timestamp('2021-01-01 00:00:00'), 10: Timestamp('2021-01-01 00:00:00'), 11: Timestamp('2021-01-01 00:00:00'), 12: Timestamp('2021-01-01 00:00:00'), 13: Timestamp('2021-01-01 00:00:00'), 14: Timestamp('2021-01-01 00:00:00'), 15: Timestamp('2021-01-01 00:00:00'), 16: Timestamp('2021-01-01 00:00:00'), 17: Timestamp('2021-01-01 00:00:00'), 18: Timestamp('2021-01-01 00:00:00')}, 'Incoming': {0: 1106.255058329859, 1: 1106.255058329859, 2: 1106.255058329859, 3: 1106.255058329859, 4: 1106.255058329859, 5: 1106.255058329859, 6: 1106.255058329859, 7: 1106.255058329859, 8: 1106.255058329859, 9: 1106.255058329859, 10: 1106.255058329859, 11: 1106.255058329859, 12: 1106.255058329859, 13: 1106.255058329859, 14: 1106.255058329859, 15: 1106.255058329859, 16: 1106.255058329859, 17: 1106.255058329859, 18: 1106.255058329859}, 'Re-Exports': {0: 195.77522580645163, 1: 195.77522580645163, 2: 195.77522580645163, 3: 195.77522580645163, 4: 195.77522580645163, 5: 195.77522580645163, 6: 195.77522580645163, 7: 195.77522580645163, 8: 195.77522580645163, 9: 195.77522580645163, 10: 195.77522580645163, 11: 195.77522580645163, 12: 195.77522580645163, 13: 195.77522580645163, 14: 195.77522580645163, 15: 195.77522580645163, 16: 195.77522580645163, 17: 195.77522580645163, 18: 195.77522580645163}, 'Heavy_Canadian_Runs': {0: 910.4798325234073, 1: 910.4798325234073, 2: 910.4798325234073, 3: 910.4798325234073, 4: 910.4798325234073, 5: 910.4798325234073, 6: 910.4798325234073, 7: 910.4798325234073, 8: 910.4798325234073, 9: 910.4798325234073, 10: 910.4798325234073, 11: 910.4798325234073, 12: 910.4798325234073, 13: 910.4798325234073, 14: 910.4798325234073, 15: 910.4798325234073, 16: 910.4798325234073, 17: 910.4798325234073, 18: 910.4798325234073}, 'Refinery': {0: 'Deer Park (Pemex)', 1: 'Baytown (ExxonMobil)', 2: 'Houston Refining (LyondellBasell)', 3: 'Galveston Bay (Marathon)', 4: 'Sweeny (P66)', 5: 'Houston (Valero)', 6: 'Texas City (Valero)', 7: 'Beaumont (ExxonMobil)', 8: 'Port Arthur (Motiva)', 9: 'Port Arthur (Total)', 10: 'Port Arthur (Valero)', 11: 'Chalmette (PBF)', 12: 'Baton Rouge (ExxonMobil)', 13: 'Garyville (Marathon)', 14: 'St. Charles', 15: 'Meraux', 16: 'Lake Charles (Citgo)', 17: 'Lake Charles (P66)', 18: 'Tuscaloosa'}, 'Total_Runs': {0: 257.986677419355, 1: 488.248258064516, 2: 228.276, 3: 477.852161290323, 4: 147.900774193548, 5: 182.74764516129, 6: 185.506612903226, 7: 356.657096774194, 8: 567.9480645161291, 9: 178.156096774194, 10: 279.220677419355, 11: 106.22380645161292, 12: 519.2370645161291, 13: 606.0861612903226, 14: 110.51974193548386, 15: 95.6843870967742, 16: 343.4448064516129, 17: 202.95509677419355, 18: 48.21914580786877}, 'CDU_Capacity': {0: 325.0, 1: 563.03, 2: 270.0, 3: 592.98, 4: 256.03, 5: 205.0, 6: 223.99, 7: 380.0, 8: 660.02, 9: 208.0, 10: 335.01, 11: 192.98, 12: 502.51, 13: 556.0, 14: 215.0, 15: 125.0, 16: 418.0, 17: 260.0, 18: 50.0}, 'CDU_Outage': {0: 85.0, 1: 103.06, 2: 54.6, 3: 14.0, 4: 0.0, 5: 52.5, 6: 14.625, 7: 70.0, 8: 0.0, 9: 2.5806451612903225, 10: 33.75, 11: 48.0, 12: 105.41935483870967, 13: 165.61290322580646, 14: 58.75, 15: 20.25, 16: 132.0, 17: 48.4, 18: 0.0}, 'Coker_Capacity': {0: 81.0, 1: 86.5, 2: 93.52, 3: 30.69, 4: 70.7, 5: 0.0, 6: 53.0, 7: 45.46, 8: 145.0, 9: 50.0, 10: 95.0, 11: 40.01, 12: 113.49, 13: 98.8, 14: 87.0, 15: 0.0, 16: 98.0, 17: 57.8, 18: 32.0}, 'Coker_Outage': {0: 0.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0, 5: 0.0, 6: 0.0, 7: 0.0, 8: 0.0, 9: 0.0, 10: 0.0, 11: 0.0, 12: 0.0, 13: 0.0, 14: 0.0, 15: 0.0, 16: 98.0, 17: 0.0, 18: 0.0}, 'Reformer_Capacity': {0: 69.4, 1: 122.0, 2: 0.0, 3: 124.2, 4: 34.2, 5: 11.5, 6: 18.0, 7: 138.5, 8: 130.0, 9: 36.5, 10: 53.0, 11: 46.0, 12: 75.5, 13: 120.0, 14: 87.2, 15: 30.7, 16: 94.7, 17: 42.0, 18: 26.8}, 'FCC_Hydrocracker_Capacity': {0: 129.1, 1: 235.2, 2: 105.0, 3: 270.4, 4: 109.8, 5: 92.5, 6: 85.0, 7: 172.5, 8: 204.89999999999998, 9: 73.3, 10: 197.0, 11: 72.0, 12: 263.0, 13: 156.8, 14: 205.5, 15: 57.7, 16: 164.0, 17: 45.0, 18: 18.0}})

# Initial guess for the optimization (evenly distribute Heavy Canadian Runs among refineries)
initial_guess = [january_data['Heavy_Canadian_Runs'].iloc[0] / len(january_data)] * len(january_data)

# Define the constraints for January 2021
cons = constraints(january_data)

# Perform the optimization using Sequential Least Squares Programming (SLSQP)
result = minimize(objective_function, initial_guess, args=(january_data['Heavy_Canadian_Runs'].sum(), january_data), constraints=cons, method='SLSQP')

# Check if the optimization was successful
if result.success:
    # Print the optimization results with refinery names and values in standard notation
    print(f"Date: {january_data['Date'].iloc[0]}")
    for i, run in enumerate(result.x):
        refinery_name = january_data.iloc[i]['Refinery']
        print(f"{refinery_name}: {run:.2f} barrels")
else:
    print("Optimization was not successful. Please check the constraints and initial guess.")

here are the columns to my january_data dataframe. I am just testing out 01/01/2021. The Heavy_Canadian_Runs is the same value for each row (refinery), and one of the objectives is to get the sum of all individual refinery runs as close to that value as possible. My script works but it seems like the constraints have no impact and the initial guess is taking over, or each refinery is basically running the same amount, even with different chracteristics. Please let me know if I need to clarify anything. Thank you in advance for any help or tips that can be provided!

1

There are 1 best solutions below

0
Nick ODell On

The immediate problem you're having is that when defining a constraint like

cons.append({'type': 'ineq', 'fun': lambda x, idx=index: row['Reformer_Capacity'] - x[idx] * 0.1037})

The variable row binds at the time that the function is called, not the time it is defined. So, all of these constraints are referencing the last row.

However, I think there's a simpler way to solve this than constraints. These constraints can be expressed in a much simpler way. Let's take this constraint on the minimum runs, for example.

cons.append({'type': 'ineq', 'fun': lambda x, idx=index: x[idx] - 0.5 * effective_coking_capacity})

This is a constraint that x[idx] - 0.5 * effective_coking_capacity > 0. With some algebra, this turns into x[idx] > 0.5 * effective_coking_capacity, where effective_coking_capacity is a constant for each index in x. Since this constraint involves only one variable, and is being compared to a constant, this means that this can be turned from a constraint to a bound. minimize() can deal with bounds much more easily than it can deal with constraints.

You can re-write each of those constraints to find the lower and upper limits of each x value, for each refinery.

limit_df['Coke Limit Lower'] = 0.5 * (limit_df['Coker_Capacity'] - limit_df['Coker_Outage'])
limit_df['Coke Limit Upper'] = 3 * (limit_df['Coker_Capacity'] - limit_df['Coker_Outage'])
limit_df['Reformer Limit'] = limit_df['Reformer_Capacity'] / 0.1037
limit_df['Hydrocracker Limit'] = limit_df['FCC_Hydrocracker_Capacity'] / 0.2960
limit_df['Runs Limit'] = limit_df['Total_Runs']

These numbers represent the maximum that the refinery could produce, if each of those constraints were the only constraint on production.

Now, we have to combine these limits, and find the one which is the bottleneck. We have four upper limits, which we can combine by finding the minimum of all of them. We have one lower limit, which we can use as-is.

limit_df['Binding Limit Lower'] = limit_df['Coke Limit Lower']
limit_df['Binding Limit Upper'] = limit_df[['Coke Limit Upper', 'Reformer Limit', 'Hydrocracker Limit', 'Runs Limit']].min(axis=1)

Let's examine the limits we've found:

>>> print(limit_df[['Binding Limit Lower', 'Binding Limit Upper']])
    Binding Limit Lower  Binding Limit Upper
0                40.500           243.000000
1                43.250           259.500000
2                46.760             0.000000
3                15.345            92.070000
4                35.350           147.900774
5                 0.000             0.000000
6                26.500           159.000000
7                22.730           136.380000
8                72.500           435.000000
9                25.000           150.000000
10               47.500           279.220677
11               20.005           106.223806
12               56.745           340.470000
13               49.400           296.400000
14               43.500           110.519742
15                0.000             0.000000
16                0.000             0.000000
17               28.900           152.027027
18               16.000            48.219146

We have a problem with refinery 2: it has an upper limit lower than the lower limit. Let's explore why:

>>> print(january_data.loc[2])
Date                                       2021-01-01 00:00:00
Incoming                                           1106.255058
Re-Exports                                          195.775226
Heavy_Canadian_Runs                                 910.479833
Refinery                     Houston Refining (LyondellBasell)
Total_Runs                                             228.276
CDU_Capacity                                             270.0
CDU_Outage                                                54.6
Coker_Capacity                                           93.52
Coker_Outage                                               0.0
Reformer_Capacity                                          0.0
FCC_Hydrocracker_Capacity                                105.0
Name: 2, dtype: object

This refinery has a Coker Capacity of 93.52, and X must be at least half this value. So X >= 46.76. But it also has a Reformer Capacity of zero, which means that X <= 0. Therefore, some of the constraints are contradictory. This is probably why you're having so much trouble with the minimizer.

I'm going to assume this is a mistake in the original data, and refinery 2 has plenty of reformer capacity.

january_data.loc[2, 'Reformer_Capacity'] = 1000

Here's how you can give these bounds to the minimizer:

from scipy.optimize import Bounds
bounds = Bounds(lb=limit_df['Binding Limit Lower'], ub=limit_df['Binding Limit Upper'])
result = minimize(objective_function, initial_guess, args=(january_data['Heavy_Canadian_Runs'].iloc[0], january_data), bounds=bounds, method='SLSQP')

(Note: I changed january_data['Heavy_Canadian_Runs'].sum() to january_data['Heavy_Canadian_Runs'].iloc[0], because that looked like a mistake to me.)

Given these bounds, the minimizer can solve the problem. It can do it much faster, because it is much easier for the minimizer to reason about bounds than it is for it to reason about constraints. But it occurs to me, that if we know the maximum and minimum production figures for each refinery, then we don't need the minimizer at all. We can find a solution with some algebra.

Here's how: we work out how much production the refineries must make to satisfy the coking constraint. We subtract the minimum production from the maximum to find the discretionary production. This is production we can choose whether or not to use. We give each refinery an equal percentage of their discretionary production.

demand = january_data['Heavy_Canadian_Runs'].iloc[0]
discretionary_production = limit_df['Binding Limit Upper'] - limit_df['Binding Limit Lower']
min_production = limit_df['Binding Limit Lower'].sum()
max_production = limit_df['Binding Limit Upper'].sum()
percentage_of_discretionary = (demand - min_production) / (discretionary_production.sum())
# Clip percentage between 0 and 1
percentage_of_discretionary = max(min(percentage_of_discretionary, 1), 0)
production = percentage_of_discretionary * discretionary_production + limit_df['Binding Limit Lower']

Now production contains the production figures required to produce the demand, without violating your constraints.

Next, it can be printed out:

# Print summary
print(f"Demand: {demand:.2f}")
print(f"Total production: {production.sum():.2f}")
print()
for i, run in enumerate(production):
    refinery_name = january_data.iloc[i]['Refinery']
    min_ = limit_df.iloc[i]['Binding Limit Lower']
    max_ = limit_df.iloc[i]['Binding Limit Upper']
    print(f"{refinery_name}: {run:.2f} barrels, min {min_:.2f}, max {max_:.2f}")

Result:

Demand: 910.48
Total production: 910.48

Deer Park (Pemex): 65.52 barrels, min 40.50, max 243.00
Baytown (ExxonMobil): 69.97 barrels, min 43.25, max 259.50
Houston Refining (LyondellBasell): 69.18 barrels, min 46.76, max 228.28
Galveston Bay (Marathon): 24.82 barrels, min 15.35, max 92.07
Sweeny (P66): 49.25 barrels, min 35.35, max 147.90
Houston (Valero): 0.00 barrels, min 0.00, max 0.00
Texas City (Valero): 42.87 barrels, min 26.50, max 159.00
Beaumont (ExxonMobil): 36.77 barrels, min 22.73, max 136.38
Port Arthur (Motiva): 117.28 barrels, min 72.50, max 435.00
Port Arthur (Total): 40.44 barrels, min 25.00, max 150.00
Port Arthur (Valero): 76.13 barrels, min 47.50, max 279.22
Chalmette (PBF): 30.66 barrels, min 20.00, max 106.22
Baton Rouge (ExxonMobil): 91.80 barrels, min 56.74, max 340.47
Garyville (Marathon): 79.91 barrels, min 49.40, max 296.40
St. Charles: 51.78 barrels, min 43.50, max 110.52
Meraux: 0.00 barrels, min 0.00, max 0.00
Lake Charles (Citgo): 0.00 barrels, min 0.00, max 0.00
Lake Charles (P66): 44.11 barrels, min 28.90, max 152.03
Tuscaloosa: 19.98 barrels, min 16.00, max 48.22

Full code

import pandas as pd
from pandas import Timestamp


january_data = pd.DataFrame({'Date': {0: Timestamp('2021-01-01 00:00:00'), 1: Timestamp('2021-01-01 00:00:00'), 2: Timestamp('2021-01-01 00:00:00'), 3: Timestamp('2021-01-01 00:00:00'), 4: Timestamp('2021-01-01 00:00:00'), 5: Timestamp('2021-01-01 00:00:00'), 6: Timestamp('2021-01-01 00:00:00'), 7: Timestamp('2021-01-01 00:00:00'), 8: Timestamp('2021-01-01 00:00:00'), 9: Timestamp('2021-01-01 00:00:00'), 10: Timestamp('2021-01-01 00:00:00'), 11: Timestamp('2021-01-01 00:00:00'), 12: Timestamp('2021-01-01 00:00:00'), 13: Timestamp('2021-01-01 00:00:00'), 14: Timestamp('2021-01-01 00:00:00'), 15: Timestamp('2021-01-01 00:00:00'), 16: Timestamp('2021-01-01 00:00:00'), 17: Timestamp('2021-01-01 00:00:00'), 18: Timestamp('2021-01-01 00:00:00')}, 'Incoming': {0: 1106.255058329859, 1: 1106.255058329859, 2: 1106.255058329859, 3: 1106.255058329859, 4: 1106.255058329859, 5: 1106.255058329859, 6: 1106.255058329859, 7: 1106.255058329859, 8: 1106.255058329859, 9: 1106.255058329859, 10: 1106.255058329859, 11: 1106.255058329859, 12: 1106.255058329859, 13: 1106.255058329859, 14: 1106.255058329859, 15: 1106.255058329859, 16: 1106.255058329859, 17: 1106.255058329859, 18: 1106.255058329859}, 'Re-Exports': {0: 195.77522580645163, 1: 195.77522580645163, 2: 195.77522580645163, 3: 195.77522580645163, 4: 195.77522580645163, 5: 195.77522580645163, 6: 195.77522580645163, 7: 195.77522580645163, 8: 195.77522580645163, 9: 195.77522580645163, 10: 195.77522580645163, 11: 195.77522580645163, 12: 195.77522580645163, 13: 195.77522580645163, 14: 195.77522580645163, 15: 195.77522580645163, 16: 195.77522580645163, 17: 195.77522580645163, 18: 195.77522580645163}, 'Heavy_Canadian_Runs': {0: 910.4798325234073, 1: 910.4798325234073, 2: 910.4798325234073, 3: 910.4798325234073, 4: 910.4798325234073, 5: 910.4798325234073, 6: 910.4798325234073, 7: 910.4798325234073, 8: 910.4798325234073, 9: 910.4798325234073, 10: 910.4798325234073, 11: 910.4798325234073, 12: 910.4798325234073, 13: 910.4798325234073, 14: 910.4798325234073, 15: 910.4798325234073, 16: 910.4798325234073, 17: 910.4798325234073, 18: 910.4798325234073}, 'Refinery': {0: 'Deer Park (Pemex)', 1: 'Baytown (ExxonMobil)', 2: 'Houston Refining (LyondellBasell)', 3: 'Galveston Bay (Marathon)', 4: 'Sweeny (P66)', 5: 'Houston (Valero)', 6: 'Texas City (Valero)', 7: 'Beaumont (ExxonMobil)', 8: 'Port Arthur (Motiva)', 9: 'Port Arthur (Total)', 10: 'Port Arthur (Valero)', 11: 'Chalmette (PBF)', 12: 'Baton Rouge (ExxonMobil)', 13: 'Garyville (Marathon)', 14: 'St. Charles', 15: 'Meraux', 16: 'Lake Charles (Citgo)', 17: 'Lake Charles (P66)', 18: 'Tuscaloosa'}, 'Total_Runs': {0: 257.986677419355, 1: 488.248258064516, 2: 228.276, 3: 477.852161290323, 4: 147.900774193548, 5: 182.74764516129, 6: 185.506612903226, 7: 356.657096774194, 8: 567.9480645161291, 9: 178.156096774194, 10: 279.220677419355, 11: 106.22380645161292, 12: 519.2370645161291, 13: 606.0861612903226, 14: 110.51974193548386, 15: 95.6843870967742, 16: 343.4448064516129, 17: 202.95509677419355, 18: 48.21914580786877}, 'CDU_Capacity': {0: 325.0, 1: 563.03, 2: 270.0, 3: 592.98, 4: 256.03, 5: 205.0, 6: 223.99, 7: 380.0, 8: 660.02, 9: 208.0, 10: 335.01, 11: 192.98, 12: 502.51, 13: 556.0, 14: 215.0, 15: 125.0, 16: 418.0, 17: 260.0, 18: 50.0}, 'CDU_Outage': {0: 85.0, 1: 103.06, 2: 54.6, 3: 14.0, 4: 0.0, 5: 52.5, 6: 14.625, 7: 70.0, 8: 0.0, 9: 2.5806451612903225, 10: 33.75, 11: 48.0, 12: 105.41935483870967, 13: 165.61290322580646, 14: 58.75, 15: 20.25, 16: 132.0, 17: 48.4, 18: 0.0}, 'Coker_Capacity': {0: 81.0, 1: 86.5, 2: 93.52, 3: 30.69, 4: 70.7, 5: 0.0, 6: 53.0, 7: 45.46, 8: 145.0, 9: 50.0, 10: 95.0, 11: 40.01, 12: 113.49, 13: 98.8, 14: 87.0, 15: 0.0, 16: 98.0, 17: 57.8, 18: 32.0}, 'Coker_Outage': {0: 0.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0, 5: 0.0, 6: 0.0, 7: 0.0, 8: 0.0, 9: 0.0, 10: 0.0, 11: 0.0, 12: 0.0, 13: 0.0, 14: 0.0, 15: 0.0, 16: 98.0, 17: 0.0, 18: 0.0}, 'Reformer_Capacity': {0: 69.4, 1: 122.0, 2: 0.0, 3: 124.2, 4: 34.2, 5: 11.5, 6: 18.0, 7: 138.5, 8: 130.0, 9: 36.5, 10: 53.0, 11: 46.0, 12: 75.5, 13: 120.0, 14: 87.2, 15: 30.7, 16: 94.7, 17: 42.0, 18: 26.8}, 'FCC_Hydrocracker_Capacity': {0: 129.1, 1: 235.2, 2: 105.0, 3: 270.4, 4: 109.8, 5: 92.5, 6: 85.0, 7: 172.5, 8: 204.89999999999998, 9: 73.3, 10: 197.0, 11: 72.0, 12: 263.0, 13: 156.8, 14: 205.5, 15: 57.7, 16: 164.0, 17: 45.0, 18: 18.0}})

# Warning: terrible hack. This assumes that refinery #2 has lots of reformer capacity.
# You should figure out what's wrong with your data.
january_data.loc[2, 'Reformer_Capacity'] = 1000

# Find upper and lower limits of production for each refinery
limit_df = january_data.copy()
limit_df['Coke Limit Lower'] = 0.5 * (limit_df['Coker_Capacity'] - limit_df['Coker_Outage'])
limit_df['Coke Limit Upper'] = 3 * (limit_df['Coker_Capacity'] - limit_df['Coker_Outage'])
limit_df['Reformer Limit'] = limit_df['Reformer_Capacity'] / 0.1037
limit_df['Hydrocracker Limit'] = limit_df['FCC_Hydrocracker_Capacity'] / 0.2960
limit_df['Runs Limit'] = limit_df['Total_Runs']
limit_df['Binding Limit Lower'] = limit_df['Coke Limit Lower']
limit_df['Binding Limit Upper'] = limit_df[['Coke Limit Upper', 'Reformer Limit', 'Hydrocracker Limit', 'Runs Limit']].min(axis=1)


# Find production given the limits found in previous step
demand = january_data['Heavy_Canadian_Runs'].iloc[0]
discretionary_production = limit_df['Binding Limit Upper'] - limit_df['Binding Limit Lower']
min_production = limit_df['Binding Limit Lower'].sum()
percentage_of_discretionary = (demand - min_production) / (discretionary_production.sum())
# Clip percentage between 0 and 1
percentage_of_discretionary = max(min(percentage_of_discretionary, 1), 0)
production = percentage_of_discretionary * discretionary_production + limit_df['Binding Limit Lower']

# Print summary
print(f"Demand: {demand:.2f}")
print(f"Total production: {production.sum():.2f}")
print()
for i, run in enumerate(production):
    refinery_name = january_data.iloc[i]['Refinery']
    min_ = limit_df.iloc[i]['Binding Limit Lower']
    max_ = limit_df.iloc[i]['Binding Limit Upper']
    print(f"{refinery_name}: {run:.2f} barrels, min {min_:.2f}, max {max_:.2f}")