I am trying to run a Bayesian Linear Regression , but I am unable to create a posterior distribution using the sample() function from pymc. The code is as follows
import pandas as pd
from random import randint
# Generate date range
dates = pd.date_range(start="2021-01-01", end="2021-01-30")
data = {
"date": dates,
"gcm_direct_Impressions": [randint(10000, 20000) for _ in dates],
"tv_grps": [randint(30, 50) for _ in dates],
"tiktok_direct_Impressions": [randint(10000, 15000) for _ in dates],
"sell_out_quantity": [randint(150, 250) for _ in dates]
}
df = pd.DataFrame(data)
#df.to_csv("dataset.csv", index=False)
max(df['sell_out_quantity'].values)
# Assigning the 'final_data' dataset to a new variable 'data' for further analysis.
import pandas as pd
data = df
# Defining a list of variables for transformation. These include various factors that
# might impact the analysis like trends, seasons, holidays, competitor sales, and different
# marketing channels.
transform_variables = ["gcm_direct_Impressions","tv_grps","tiktok_direct_Impressions"]
# Identifying the channels that have a delay effect. This means the impact of these channels
# on the target variable (like revenue) might not be immediate but delayed.
delay_channels = ["gcm_direct_Impressions","tv_grps","tiktok_direct_Impressions"]
# Listing the media channels that are part of the analysis. These are the channels through
# which advertising or marketing is done.
media_channels = ["gcm_direct_Impressions","tv_grps","tiktok_direct_Impressions"]
# Specifying the control variables. These are the factors that need to be controlled or
# accounted for in the analysis to isolate the effects of the media channels.
#control_variables = ["trend", "season", "holiday", "competitor_sales_B", "events"]
# Defining the target variable for the analysis, which in this case is 'revenue'. This is
# likely the outcome or the dependent variable the analysis aims to predict or explain.
target = "sell_out_quantity"
#!pip install scikit-learn
from sklearn.preprocessing import MinMaxScaler
# Creating a copy of the 'data' dataframe to apply transformations. This ensures that
# the original data remains unchanged.
data_transformed = data.copy()
# Initializing a dictionary to store the MinMaxScaler instances for each feature.
# This will be useful for inverse transformations later.
numerical_encoder_dict = {}
# Looping through each feature in the list of variables to transform.
for feature in transform_variables:
# Initializing a MinMaxScaler. This scaler transforms each feature to a given range,
# usually between 0 and 1, which is helpful for normalization.
scaler = MinMaxScaler()
# Reshaping the data for the feature into a 2D array, as required by the scaler.
original = data[feature].values.reshape(-1, 1)
# Applying the scaler to the feature and transforming the data.
transformed = scaler.fit_transform(original)
# Storing the transformed data back into the 'data_transformed' DataFrame.
data_transformed[feature] = transformed
# Saving the scaler instance in the dictionary for each feature.
# This will be used for reversing the transformation if needed.
numerical_encoder_dict[feature] = scaler
# Placeholder for a potential dependent variable transformation, not utilized here.
dependent_transformation = None
# Scaling the target variable 'revenue' by dividing it by 100,000.
# This kind of scaling might be done to bring the target variable to a smaller range
# or to improve the interpretability of the model's results.
original = data[target].values
data_transformed[target] = original
import pymc as pm
import numpy as np
import pytensor.tensor as tt
# Initializing an empty list to store the mean response from different channels and control variables.
response_mean = []
# Creating a new PyMC3 model context. All the model definitions inside this block
# are part of 'model_2'.
with pm.Model() as model_2:
# Looping through each channel in the list of delay channels.
for channel_name in delay_channels:
print(f"Delay Channels: Adding {channel_name}")
# Extracting the transformed data for the current channel.
x = data_transformed[channel_name].values
# Defining Bayesian priors for the adstock, gamma, and alpha parameters for the current channel.
adstock_param = pm.Beta(f"{channel_name}_adstock", 2, 2)
saturation_gamma = pm.Beta(f"{channel_name}_gamma", 2, 2)
saturation_alpha = pm.Gamma(f"{channel_name}_alpha", 3, 1)
transformed_X1 = tt.zeros_like(x)
for i in range(1, len(x)):
transformed_X1 = tt.set_subtensor(transformed_X1[i], x[i] + adstock_param * x[i - 1])
transformed_X2 = tt.zeros_like(x)
for i in range(1,len(x)):
transformed_X2 = tt.set_subtensor(transformed_X2[i],(transformed_X1[i]**saturation_alpha)/(transformed_X1[i]**saturation_alpha+saturation_gamma**saturation_alpha))
channel_b = pm.HalfNormal(f"{channel_name}_media_coef", sigma = 250)
response_mean.append(transformed_X2 * channel_b)
intercept = pm.Normal("intercept",mu = np.mean(data_transformed[target].values), sigma = 3)
sigma = pm.HalfNormal("sigma", 4)
likelihood = pm.Normal("outcome", mu = intercept + sum(response_mean), sigma = sigma,
observed = data_transformed[target].values)
import arviz as az
# Continuing the model context defined previously as 'model_2'.
with model_2:
# Sampling from the posterior distribution of the model.
# This is the process where PyMC3 generates samples that represent the distribution
# of the parameters given the data and the priors.
# 'pm.sample()' is the main function to perform this sampling.
# The parameters of pm.sample() are set to control the sampling process:
# 1000 samples are drawn after a tuning phase of 1000 iterations.
# 'target_accept' is set to 0.95, which is the target acceptance rate of the sampler.
# A higher acceptance rate can help in achieving better convergence but might slow down the sampling.
# 'return_inferencedata=True' makes the function return an InferenceData object,
# which is useful for further analysis using ArviZ.
trace = pm.sample(1000, tune=1000, target_accept=0.95, return_inferencedata=True)
# Summarizing the trace. This generates a summary of the posterior distribution
# for each parameter in the model, providing statistics like mean, standard deviation,
# and the HPD (highest posterior density) interval.
# This summary is useful for understanding the results of the model and for diagnostics.
trace_summary = az.summary(trace)
After running the above code, I get the following error which I am unable to solve even though I removed and installed jupyter notebook and re-installed all the packages. This is on a Mac M1 machine.
Libraries and their versions
- pymc (5.6.1)
- numpy (1.23.5)
- pytensor (2.12.3)
You can find the C code in this temporary file: /var/folders/jw/qb6bs44j0vgfsf52lxw71h300000gn/T/pytensor_compilation_error_v3472mqt
---------------------------------------------------------------------------
CompileError Traceback (most recent call last)
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/vm.py in make_all(self, profiler, input_storage, output_storage, storage_map)
1242 thunks.append(
-> 1243 node.op.make_thunk(node, storage_map, compute_map, [], impl=impl)
1244 )
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/op.py in make_thunk(self, node, storage_map, compute_map, no_recycling, impl)
130 try:
--> 131 return self.make_c_thunk(node, storage_map, compute_map, no_recycling)
132 except (NotImplementedError, MethodNotDefined):
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/op.py in make_c_thunk(self, node, storage_map, compute_map, no_recycling)
95 raise NotImplementedError("float16")
---> 96 outputs = cl.make_thunk(
97 input_storage=node_input_storage, output_storage=node_output_storage
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/basic.py in make_thunk(self, input_storage, output_storage, storage_map, cache, **kwargs)
1199 init_tasks, tasks = self.get_init_tasks()
-> 1200 cthunk, module, in_storage, out_storage, error_storage = self.__compile__(
1201 input_storage, output_storage, storage_map, cache
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/basic.py in __compile__(self, input_storage, output_storage, storage_map, cache)
1119 output_storage = tuple(output_storage)
-> 1120 thunk, module = self.cthunk_factory(
1121 error_storage,
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/basic.py in cthunk_factory(self, error_storage, in_storage, out_storage, storage_map, cache)
1643 cache = get_module_cache()
-> 1644 module = cache.module_from_key(key=key, lnk=self)
1645
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/cmodule.py in module_from_key(self, key, lnk)
1239 location = dlimport_workdir(self.dirname)
-> 1240 module = lnk.compile_cmodule(location)
1241 name = module.__file__
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/basic.py in compile_cmodule(self, location)
1542 _logger.debug(f"LOCATION {location}")
-> 1543 module = c_compiler.compile_str(
1544 module_name=mod.code_hash,
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/cmodule.py in compile_str(module_name, src_code, location, include_dirs, lib_dirs, libs, preargs, py_module, hide_symbols)
2648 # compile_stderr = compile_stderr.replace("\n", ". ")
-> 2649 raise CompileError(
2650 f"Compilation failed (return status={status}):\n{' '.join(cmd)}\n{compile_stderr}"
CompileError: Compilation failed (return status=1):
/usr/bin/clang++ -dynamiclib -g -O3 -fno-math-errno -Wno-unused-label -Wno-unused-variable -Wno-write-strings -Wno-c++11-narrowing -fno-exceptions -fno-unwind-tables -fno-asynchronous-unwind-tables -DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION -fPIC -undefined dynamic_lookup -I/Users/adhokshaja/miniconda3/envs/pymc_env/lib/python3.8/site-packages/numpy/core/include -I/Users/adhokshaja/miniconda3/envs/pymc_env/include/python3.8 -I/Users/adhokshaja/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/c_code -L/Users/adhokshaja/miniconda3/envs/pymc_env/lib -fvisibility=hidden -o /Users/adhokshaja/.pytensor/compiledir_macOS-13.2-arm64-arm-64bit-arm-3.8.18-64/tmpjj80nvdi/me35a44294d03835a76b7f9ad569bbbc122b29dc588c89cb224fa59ca0e0ec6cd.so /Users/adhokshaja/.pytensor/compiledir_macOS-13.2-arm64-arm-64bit-arm-3.8.18-64/tmpjj80nvdi/mod.cpp
/Users/adhokshaja/.pytensor/compiledir_macOS-13.2-arm64-arm-64bit-arm-3.8.18-64/tmpjj80nvdi/mod.cpp:25480:32: fatal error: bracket nesting level exceeded maximum of 256
if (!PyErr_Occurred()) {
^
/Users/adhokshaja/.pytensor/compiledir_macOS-13.2-arm64-arm-64bit-arm-3.8.18-64/tmpjj80nvdi/mod.cpp:25480:32: note: use -fbracket-depth=N to increase maximum nesting level
1 error generated.
During handling of the above exception, another exception occurred:
CompileError Traceback (most recent call last)
<ipython-input-6-412f3306e835> in <cell line: 4>()
13 # 'return_inferencedata=True' makes the function return an InferenceData object,
14 # which is useful for further analysis using ArviZ.
---> 15 trace = pm.sample(1000, tune=1000, target_accept=0.95, return_inferencedata=True)
16
17 # Summarizing the trace. This generates a summary of the posterior distribution
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pymc/sampling/mcmc.py in sample(draws, tune, chains, cores, random_seed, progressbar, step, nuts_sampler, initvals, init, jitter_max_retries, n_init, trace, discard_tuned_samples, compute_convergence_checks, keep_warning_stat, return_inferencedata, idata_kwargs, nuts_sampler_kwargs, callback, mp_ctx, model, **kwargs)
651
652 initial_points = None
--> 653 step = assign_step_methods(model, step, methods=pm.STEP_METHODS, step_kwargs=kwargs)
654
655 if nuts_sampler != "pymc":
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pymc/sampling/mcmc.py in assign_step_methods(model, step, methods, step_kwargs)
231 selected_steps.setdefault(selected, []).append(var)
232
--> 233 return instantiate_steppers(model, steps, selected_steps, step_kwargs)
234
235
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pymc/sampling/mcmc.py in instantiate_steppers(model, steps, selected_steps, step_kwargs)
132 args = step_kwargs.get(name, {})
133 used_keys.add(name)
--> 134 step = step_class(vars=vars, model=model, **args)
135 steps.append(step)
136
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pymc/step_methods/hmc/nuts.py in __init__(self, vars, max_treedepth, early_max_treedepth, **kwargs)
178 `pm.sample` to the desired number of tuning steps.
179 """
--> 180 super().__init__(vars, **kwargs)
181
182 self.max_treedepth = max_treedepth
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pymc/step_methods/hmc/base_hmc.py in __init__(self, vars, scaling, step_scale, is_cov, model, blocked, potential, dtype, Emax, target_accept, gamma, k, t0, adapt_step_size, step_rand, **pytensor_kwargs)
107 else:
108 vars = get_value_vars_from_user_vars(vars, self._model)
--> 109 super().__init__(vars, blocked=blocked, model=self._model, dtype=dtype, **pytensor_kwargs)
110
111 self.adapt_step_size = adapt_step_size
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pymc/step_methods/arraystep.py in __init__(self, vars, model, blocked, dtype, logp_dlogp_func, **pytensor_kwargs)
162
163 if logp_dlogp_func is None:
--> 164 func = model.logp_dlogp_function(vars, dtype=dtype, **pytensor_kwargs)
165 else:
166 func = logp_dlogp_func
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pymc/model.py in logp_dlogp_function(self, grad_vars, tempered, **kwargs)
607 if var in input_vars and var not in grad_vars
608 }
--> 609 return ValueGradFunction(costs, grad_vars, extra_vars_and_values, **kwargs)
610
611 def compile_logp(
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pymc/model.py in __init__(self, costs, grad_vars, extra_vars_and_values, dtype, casting, compute_grads, **kwargs)
346 inputs = grad_vars
347
--> 348 self._pytensor_function = compile_pymc(inputs, outputs, givens=givens, **kwargs)
349
350 def set_weights(self, values):
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pymc/pytensorf.py in compile_pymc(inputs, outputs, random_seed, mode, **kwargs)
1194 opt_qry = mode.provided_optimizer.including("random_make_inplace", check_parameter_opt)
1195 mode = Mode(linker=mode.linker, optimizer=opt_qry)
-> 1196 pytensor_function = pytensor.function(
1197 inputs,
1198 outputs,
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/compile/function/__init__.py in function(inputs, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input)
313 # note: pfunc will also call orig_function -- orig_function is
314 # a choke point that all compilation must pass through
--> 315 fn = pfunc(
316 params=inputs,
317 outputs=outputs,
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/compile/function/pfunc.py in pfunc(params, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input, output_keys, fgraph)
365 )
366
--> 367 return orig_function(
368 inputs,
369 cloned_outputs,
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/compile/function/types.py in orig_function(inputs, outputs, mode, accept_inplace, name, profile, on_unused_input, output_keys, fgraph)
1754 )
1755 with config.change_flags(compute_test_value="off"):
-> 1756 fn = m.create(defaults)
1757 finally:
1758 t2 = time.perf_counter()
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/compile/function/types.py in create(self, input_storage, storage_map)
1647
1648 with config.change_flags(traceback__limit=config.traceback__compile_limit):
-> 1649 _fn, _i, _o = self.linker.make_thunk(
1650 input_storage=input_storage_lists, storage_map=storage_map
1651 )
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/basic.py in make_thunk(self, input_storage, output_storage, storage_map, **kwargs)
252 **kwargs,
253 ) -> Tuple["BasicThunkType", "InputStorageType", "OutputStorageType"]:
--> 254 return self.make_all(
255 input_storage=input_storage,
256 output_storage=output_storage,
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/vm.py in make_all(self, profiler, input_storage, output_storage, storage_map)
1250 thunks[-1].lazy = False
1251 except Exception:
-> 1252 raise_with_op(fgraph, node)
1253
1254 t1 = time.perf_counter()
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/utils.py in raise_with_op(fgraph, node, thunk, exc_info, storage_map)
533 # Some exception need extra parameter in inputs. So forget the
534 # extra long error message in that case.
--> 535 raise exc_value.with_traceback(exc_trace)
536
537
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/vm.py in make_all(self, profiler, input_storage, output_storage, storage_map)
1241 # no_recycling here.
1242 thunks.append(
-> 1243 node.op.make_thunk(node, storage_map, compute_map, [], impl=impl)
1244 )
1245 linker_make_thunk_time[node] = time.perf_counter() - thunk_start
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/op.py in make_thunk(self, node, storage_map, compute_map, no_recycling, impl)
129 )
130 try:
--> 131 return self.make_c_thunk(node, storage_map, compute_map, no_recycling)
132 except (NotImplementedError, MethodNotDefined):
133 # We requested the c code, so don't catch the error.
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/op.py in make_c_thunk(self, node, storage_map, compute_map, no_recycling)
94 print(f"Disabling C code for {self} due to unsupported float16")
95 raise NotImplementedError("float16")
---> 96 outputs = cl.make_thunk(
97 input_storage=node_input_storage, output_storage=node_output_storage
98 )
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/basic.py in make_thunk(self, input_storage, output_storage, storage_map, cache, **kwargs)
1198 """
1199 init_tasks, tasks = self.get_init_tasks()
-> 1200 cthunk, module, in_storage, out_storage, error_storage = self.__compile__(
1201 input_storage, output_storage, storage_map, cache
1202 )
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/basic.py in __compile__(self, input_storage, output_storage, storage_map, cache)
1118 input_storage = tuple(input_storage)
1119 output_storage = tuple(output_storage)
-> 1120 thunk, module = self.cthunk_factory(
1121 error_storage,
1122 input_storage,
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/basic.py in cthunk_factory(self, error_storage, in_storage, out_storage, storage_map, cache)
1642 if cache is None:
1643 cache = get_module_cache()
-> 1644 module = cache.module_from_key(key=key, lnk=self)
1645
1646 vars = self.inputs + self.outputs + self.orphans
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/cmodule.py in module_from_key(self, key, lnk)
1238 try:
1239 location = dlimport_workdir(self.dirname)
-> 1240 module = lnk.compile_cmodule(location)
1241 name = module.__file__
1242 assert name.startswith(location)
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/basic.py in compile_cmodule(self, location)
1541 try:
1542 _logger.debug(f"LOCATION {location}")
-> 1543 module = c_compiler.compile_str(
1544 module_name=mod.code_hash,
1545 src_code=src_code,
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/cmodule.py in compile_str(module_name, src_code, location, include_dirs, lib_dirs, libs, preargs, py_module, hide_symbols)
2647 # difficult to read.
2648 # compile_stderr = compile_stderr.replace("\n", ". ")
-> 2649 raise CompileError(
2650 f"Compilation failed (return status={status}):\n{' '.join(cmd)}\n{compile_stderr}"
2651 )
CompileError: Compilation failed (return status=1):