Unable to run pm.sample() function using PyMC Python library even though I re-installed the libraries

141 Views Asked by At

I am trying to run a Bayesian Linear Regression , but I am unable to create a posterior distribution using the sample() function from pymc. The code is as follows

import pandas as pd
from random import randint

# Generate date range
dates = pd.date_range(start="2021-01-01", end="2021-01-30")

data = {
    "date": dates,
    "gcm_direct_Impressions": [randint(10000, 20000) for _ in dates],
    "tv_grps": [randint(30, 50) for _ in dates],
    "tiktok_direct_Impressions": [randint(10000, 15000) for _ in dates],
    "sell_out_quantity": [randint(150, 250) for _ in dates]
}
df = pd.DataFrame(data)
#df.to_csv("dataset.csv", index=False)

max(df['sell_out_quantity'].values)

# Assigning the 'final_data' dataset to a new variable 'data' for further analysis.
import pandas as pd

data = df

# Defining a list of variables for transformation. These include various factors that 
# might impact the analysis like trends, seasons, holidays, competitor sales, and different 
# marketing channels.
transform_variables = ["gcm_direct_Impressions","tv_grps","tiktok_direct_Impressions"]

# Identifying the channels that have a delay effect. This means the impact of these channels 
# on the target variable (like revenue) might not be immediate but delayed.
delay_channels = ["gcm_direct_Impressions","tv_grps","tiktok_direct_Impressions"]

# Listing the media channels that are part of the analysis. These are the channels through 
# which advertising or marketing is done.
media_channels = ["gcm_direct_Impressions","tv_grps","tiktok_direct_Impressions"]

# Specifying the control variables. These are the factors that need to be controlled or 
# accounted for in the analysis to isolate the effects of the media channels.
#control_variables = ["trend", "season", "holiday", "competitor_sales_B", "events"]

# Defining the target variable for the analysis, which in this case is 'revenue'. This is 
# likely the outcome or the dependent variable the analysis aims to predict or explain.
target = "sell_out_quantity"

#!pip install scikit-learn


from sklearn.preprocessing import MinMaxScaler

# Creating a copy of the 'data' dataframe to apply transformations. This ensures that 
# the original data remains unchanged.
data_transformed = data.copy()

# Initializing a dictionary to store the MinMaxScaler instances for each feature. 
# This will be useful for inverse transformations later.
numerical_encoder_dict = {}

# Looping through each feature in the list of variables to transform.
for feature in transform_variables:
    # Initializing a MinMaxScaler. This scaler transforms each feature to a given range, 
    # usually between 0 and 1, which is helpful for normalization.
    scaler = MinMaxScaler()

    # Reshaping the data for the feature into a 2D array, as required by the scaler.
    original = data[feature].values.reshape(-1, 1)

    # Applying the scaler to the feature and transforming the data.
    transformed = scaler.fit_transform(original)

    # Storing the transformed data back into the 'data_transformed' DataFrame.
    data_transformed[feature] = transformed

    # Saving the scaler instance in the dictionary for each feature. 
    # This will be used for reversing the transformation if needed.
    numerical_encoder_dict[feature] = scaler

# Placeholder for a potential dependent variable transformation, not utilized here.
dependent_transformation = None

# Scaling the target variable 'revenue' by dividing it by 100,000. 
# This kind of scaling might be done to bring the target variable to a smaller range 
# or to improve the interpretability of the model's results.
original = data[target].values
data_transformed[target] = original

import pymc as pm
import numpy as np
import pytensor.tensor as tt

# Initializing an empty list to store the mean response from different channels and control variables.
response_mean = []

# Creating a new PyMC3 model context. All the model definitions inside this block 
# are part of 'model_2'.
with pm.Model() as model_2:
    # Looping through each channel in the list of delay channels.
    for channel_name in delay_channels:
        print(f"Delay Channels: Adding {channel_name}")

        # Extracting the transformed data for the current channel.
        x = data_transformed[channel_name].values
        
        # Defining Bayesian priors for the adstock, gamma, and alpha parameters for the current channel.
        adstock_param = pm.Beta(f"{channel_name}_adstock", 2, 2)
        saturation_gamma = pm.Beta(f"{channel_name}_gamma", 2, 2)
        saturation_alpha = pm.Gamma(f"{channel_name}_alpha", 3, 1)
        transformed_X1 = tt.zeros_like(x)
        for i in range(1, len(x)):
            transformed_X1 = tt.set_subtensor(transformed_X1[i], x[i] + adstock_param * x[i - 1])
        transformed_X2 = tt.zeros_like(x)
        for i in range(1,len(x)):
            transformed_X2 = tt.set_subtensor(transformed_X2[i],(transformed_X1[i]**saturation_alpha)/(transformed_X1[i]**saturation_alpha+saturation_gamma**saturation_alpha))
        
        channel_b = pm.HalfNormal(f"{channel_name}_media_coef", sigma = 250)
        response_mean.append(transformed_X2 * channel_b)
    intercept = pm.Normal("intercept",mu = np.mean(data_transformed[target].values), sigma = 3)
    sigma = pm.HalfNormal("sigma", 4)
    likelihood = pm.Normal("outcome", mu = intercept + sum(response_mean), sigma = sigma, 
                           observed = data_transformed[target].values)




import arviz as az

# Continuing the model context defined previously as 'model_2'.
with model_2:
    # Sampling from the posterior distribution of the model.
    # This is the process where PyMC3 generates samples that represent the distribution 
    # of the parameters given the data and the priors.
    # 'pm.sample()' is the main function to perform this sampling.
    # The parameters of pm.sample() are set to control the sampling process:
    # 1000 samples are drawn after a tuning phase of 1000 iterations.
    # 'target_accept' is set to 0.95, which is the target acceptance rate of the sampler.
    # A higher acceptance rate can help in achieving better convergence but might slow down the sampling.
    # 'return_inferencedata=True' makes the function return an InferenceData object, 
    # which is useful for further analysis using ArviZ.
    trace = pm.sample(1000, tune=1000, target_accept=0.95, return_inferencedata=True)
    
    # Summarizing the trace. This generates a summary of the posterior distribution 
    # for each parameter in the model, providing statistics like mean, standard deviation, 
    # and the HPD (highest posterior density) interval.
    # This summary is useful for understanding the results of the model and for diagnostics.
    trace_summary = az.summary(trace)


After running the above code, I get the following error which I am unable to solve even though I removed and installed jupyter notebook and re-installed all the packages. This is on a Mac M1 machine.

Libraries and their versions

  • pymc (5.6.1)
  • numpy (1.23.5)
  • pytensor (2.12.3)

You can find the C code in this temporary file: /var/folders/jw/qb6bs44j0vgfsf52lxw71h300000gn/T/pytensor_compilation_error_v3472mqt

---------------------------------------------------------------------------
CompileError                              Traceback (most recent call last)
~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/vm.py in make_all(self, profiler, input_storage, output_storage, storage_map)
   1242                 thunks.append(
-> 1243                     node.op.make_thunk(node, storage_map, compute_map, [], impl=impl)
   1244                 )

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/op.py in make_thunk(self, node, storage_map, compute_map, no_recycling, impl)
    130             try:
--> 131                 return self.make_c_thunk(node, storage_map, compute_map, no_recycling)
    132             except (NotImplementedError, MethodNotDefined):

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/op.py in make_c_thunk(self, node, storage_map, compute_map, no_recycling)
     95                 raise NotImplementedError("float16")
---> 96         outputs = cl.make_thunk(
     97             input_storage=node_input_storage, output_storage=node_output_storage

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/basic.py in make_thunk(self, input_storage, output_storage, storage_map, cache, **kwargs)
   1199         init_tasks, tasks = self.get_init_tasks()
-> 1200         cthunk, module, in_storage, out_storage, error_storage = self.__compile__(
   1201             input_storage, output_storage, storage_map, cache

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/basic.py in __compile__(self, input_storage, output_storage, storage_map, cache)
   1119         output_storage = tuple(output_storage)
-> 1120         thunk, module = self.cthunk_factory(
   1121             error_storage,

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/basic.py in cthunk_factory(self, error_storage, in_storage, out_storage, storage_map, cache)
   1643                 cache = get_module_cache()
-> 1644             module = cache.module_from_key(key=key, lnk=self)
   1645 

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/cmodule.py in module_from_key(self, key, lnk)
   1239                 location = dlimport_workdir(self.dirname)
-> 1240                 module = lnk.compile_cmodule(location)
   1241                 name = module.__file__

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/basic.py in compile_cmodule(self, location)
   1542                 _logger.debug(f"LOCATION {location}")
-> 1543                 module = c_compiler.compile_str(
   1544                     module_name=mod.code_hash,

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/cmodule.py in compile_str(module_name, src_code, location, include_dirs, lib_dirs, libs, preargs, py_module, hide_symbols)
   2648             # compile_stderr = compile_stderr.replace("\n", ". ")
-> 2649             raise CompileError(
   2650                 f"Compilation failed (return status={status}):\n{' '.join(cmd)}\n{compile_stderr}"

CompileError: Compilation failed (return status=1):
/usr/bin/clang++ -dynamiclib -g -O3 -fno-math-errno -Wno-unused-label -Wno-unused-variable -Wno-write-strings -Wno-c++11-narrowing -fno-exceptions -fno-unwind-tables -fno-asynchronous-unwind-tables -DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION -fPIC -undefined dynamic_lookup -I/Users/adhokshaja/miniconda3/envs/pymc_env/lib/python3.8/site-packages/numpy/core/include -I/Users/adhokshaja/miniconda3/envs/pymc_env/include/python3.8 -I/Users/adhokshaja/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/c_code -L/Users/adhokshaja/miniconda3/envs/pymc_env/lib -fvisibility=hidden -o /Users/adhokshaja/.pytensor/compiledir_macOS-13.2-arm64-arm-64bit-arm-3.8.18-64/tmpjj80nvdi/me35a44294d03835a76b7f9ad569bbbc122b29dc588c89cb224fa59ca0e0ec6cd.so /Users/adhokshaja/.pytensor/compiledir_macOS-13.2-arm64-arm-64bit-arm-3.8.18-64/tmpjj80nvdi/mod.cpp
/Users/adhokshaja/.pytensor/compiledir_macOS-13.2-arm64-arm-64bit-arm-3.8.18-64/tmpjj80nvdi/mod.cpp:25480:32: fatal error: bracket nesting level exceeded maximum of 256
        if (!PyErr_Occurred()) {
                               ^
/Users/adhokshaja/.pytensor/compiledir_macOS-13.2-arm64-arm-64bit-arm-3.8.18-64/tmpjj80nvdi/mod.cpp:25480:32: note: use -fbracket-depth=N to increase maximum nesting level
1 error generated.


During handling of the above exception, another exception occurred:

CompileError                              Traceback (most recent call last)
<ipython-input-6-412f3306e835> in <cell line: 4>()
     13     # 'return_inferencedata=True' makes the function return an InferenceData object,
     14     # which is useful for further analysis using ArviZ.
---> 15     trace = pm.sample(1000, tune=1000, target_accept=0.95, return_inferencedata=True)
     16 
     17     # Summarizing the trace. This generates a summary of the posterior distribution

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pymc/sampling/mcmc.py in sample(draws, tune, chains, cores, random_seed, progressbar, step, nuts_sampler, initvals, init, jitter_max_retries, n_init, trace, discard_tuned_samples, compute_convergence_checks, keep_warning_stat, return_inferencedata, idata_kwargs, nuts_sampler_kwargs, callback, mp_ctx, model, **kwargs)
    651 
    652     initial_points = None
--> 653     step = assign_step_methods(model, step, methods=pm.STEP_METHODS, step_kwargs=kwargs)
    654 
    655     if nuts_sampler != "pymc":

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pymc/sampling/mcmc.py in assign_step_methods(model, step, methods, step_kwargs)
    231             selected_steps.setdefault(selected, []).append(var)
    232 
--> 233     return instantiate_steppers(model, steps, selected_steps, step_kwargs)
    234 
    235 

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pymc/sampling/mcmc.py in instantiate_steppers(model, steps, selected_steps, step_kwargs)
    132             args = step_kwargs.get(name, {})
    133             used_keys.add(name)
--> 134             step = step_class(vars=vars, model=model, **args)
    135             steps.append(step)
    136 

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pymc/step_methods/hmc/nuts.py in __init__(self, vars, max_treedepth, early_max_treedepth, **kwargs)
    178         `pm.sample` to the desired number of tuning steps.
    179         """
--> 180         super().__init__(vars, **kwargs)
    181 
    182         self.max_treedepth = max_treedepth

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pymc/step_methods/hmc/base_hmc.py in __init__(self, vars, scaling, step_scale, is_cov, model, blocked, potential, dtype, Emax, target_accept, gamma, k, t0, adapt_step_size, step_rand, **pytensor_kwargs)
    107         else:
    108             vars = get_value_vars_from_user_vars(vars, self._model)
--> 109         super().__init__(vars, blocked=blocked, model=self._model, dtype=dtype, **pytensor_kwargs)
    110 
    111         self.adapt_step_size = adapt_step_size

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pymc/step_methods/arraystep.py in __init__(self, vars, model, blocked, dtype, logp_dlogp_func, **pytensor_kwargs)
    162 
    163         if logp_dlogp_func is None:
--> 164             func = model.logp_dlogp_function(vars, dtype=dtype, **pytensor_kwargs)
    165         else:
    166             func = logp_dlogp_func

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pymc/model.py in logp_dlogp_function(self, grad_vars, tempered, **kwargs)
    607             if var in input_vars and var not in grad_vars
    608         }
--> 609         return ValueGradFunction(costs, grad_vars, extra_vars_and_values, **kwargs)
    610 
    611     def compile_logp(

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pymc/model.py in __init__(self, costs, grad_vars, extra_vars_and_values, dtype, casting, compute_grads, **kwargs)
    346         inputs = grad_vars
    347 
--> 348         self._pytensor_function = compile_pymc(inputs, outputs, givens=givens, **kwargs)
    349 
    350     def set_weights(self, values):

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pymc/pytensorf.py in compile_pymc(inputs, outputs, random_seed, mode, **kwargs)
   1194     opt_qry = mode.provided_optimizer.including("random_make_inplace", check_parameter_opt)
   1195     mode = Mode(linker=mode.linker, optimizer=opt_qry)
-> 1196     pytensor_function = pytensor.function(
   1197         inputs,
   1198         outputs,

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/compile/function/__init__.py in function(inputs, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input)
    313         # note: pfunc will also call orig_function -- orig_function is
    314         #      a choke point that all compilation must pass through
--> 315         fn = pfunc(
    316             params=inputs,
    317             outputs=outputs,

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/compile/function/pfunc.py in pfunc(params, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input, output_keys, fgraph)
    365     )
    366 
--> 367     return orig_function(
    368         inputs,
    369         cloned_outputs,

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/compile/function/types.py in orig_function(inputs, outputs, mode, accept_inplace, name, profile, on_unused_input, output_keys, fgraph)
   1754         )
   1755         with config.change_flags(compute_test_value="off"):
-> 1756             fn = m.create(defaults)
   1757     finally:
   1758         t2 = time.perf_counter()

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/compile/function/types.py in create(self, input_storage, storage_map)
   1647 
   1648         with config.change_flags(traceback__limit=config.traceback__compile_limit):
-> 1649             _fn, _i, _o = self.linker.make_thunk(
   1650                 input_storage=input_storage_lists, storage_map=storage_map
   1651             )

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/basic.py in make_thunk(self, input_storage, output_storage, storage_map, **kwargs)
    252         **kwargs,
    253     ) -> Tuple["BasicThunkType", "InputStorageType", "OutputStorageType"]:
--> 254         return self.make_all(
    255             input_storage=input_storage,
    256             output_storage=output_storage,

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/vm.py in make_all(self, profiler, input_storage, output_storage, storage_map)
   1250                     thunks[-1].lazy = False
   1251             except Exception:
-> 1252                 raise_with_op(fgraph, node)
   1253 
   1254         t1 = time.perf_counter()

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/utils.py in raise_with_op(fgraph, node, thunk, exc_info, storage_map)
    533         # Some exception need extra parameter in inputs. So forget the
    534         # extra long error message in that case.
--> 535     raise exc_value.with_traceback(exc_trace)
    536 
    537 

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/vm.py in make_all(self, profiler, input_storage, output_storage, storage_map)
   1241                 # no_recycling here.
   1242                 thunks.append(
-> 1243                     node.op.make_thunk(node, storage_map, compute_map, [], impl=impl)
   1244                 )
   1245                 linker_make_thunk_time[node] = time.perf_counter() - thunk_start

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/op.py in make_thunk(self, node, storage_map, compute_map, no_recycling, impl)
    129             )
    130             try:
--> 131                 return self.make_c_thunk(node, storage_map, compute_map, no_recycling)
    132             except (NotImplementedError, MethodNotDefined):
    133                 # We requested the c code, so don't catch the error.

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/op.py in make_c_thunk(self, node, storage_map, compute_map, no_recycling)
     94                 print(f"Disabling C code for {self} due to unsupported float16")
     95                 raise NotImplementedError("float16")
---> 96         outputs = cl.make_thunk(
     97             input_storage=node_input_storage, output_storage=node_output_storage
     98         )

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/basic.py in make_thunk(self, input_storage, output_storage, storage_map, cache, **kwargs)
   1198         """
   1199         init_tasks, tasks = self.get_init_tasks()
-> 1200         cthunk, module, in_storage, out_storage, error_storage = self.__compile__(
   1201             input_storage, output_storage, storage_map, cache
   1202         )

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/basic.py in __compile__(self, input_storage, output_storage, storage_map, cache)
   1118         input_storage = tuple(input_storage)
   1119         output_storage = tuple(output_storage)
-> 1120         thunk, module = self.cthunk_factory(
   1121             error_storage,
   1122             input_storage,

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/basic.py in cthunk_factory(self, error_storage, in_storage, out_storage, storage_map, cache)
   1642             if cache is None:
   1643                 cache = get_module_cache()
-> 1644             module = cache.module_from_key(key=key, lnk=self)
   1645 
   1646         vars = self.inputs + self.outputs + self.orphans

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/cmodule.py in module_from_key(self, key, lnk)
   1238             try:
   1239                 location = dlimport_workdir(self.dirname)
-> 1240                 module = lnk.compile_cmodule(location)
   1241                 name = module.__file__
   1242                 assert name.startswith(location)

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/basic.py in compile_cmodule(self, location)
   1541             try:
   1542                 _logger.debug(f"LOCATION {location}")
-> 1543                 module = c_compiler.compile_str(
   1544                     module_name=mod.code_hash,
   1545                     src_code=src_code,

~/miniconda3/envs/pymc_env/lib/python3.8/site-packages/pytensor/link/c/cmodule.py in compile_str(module_name, src_code, location, include_dirs, lib_dirs, libs, preargs, py_module, hide_symbols)
   2647             # difficult to read.
   2648             # compile_stderr = compile_stderr.replace("\n", ". ")
-> 2649             raise CompileError(
   2650                 f"Compilation failed (return status={status}):\n{' '.join(cmd)}\n{compile_stderr}"
   2651             )

CompileError: Compilation failed (return status=1):


0

There are 0 best solutions below