How do I allow a pymc model to sample from a posterior predictive when a matrix of training data has shape (66289, 1024) and a matrix of test data has shape (2054937, 1024)? In other words, how do I resolve the following error?
ValueError: shape mismatch: objects cannot be broadcast to a single shape. Mismatch is between arg 0 with shape (66289,) and arg 1 with shape (2054937,).
An excerpt from my Python script Predict_Response_Values.py is below.
feature_matrix = np.loadtxt(path_to_dataset, delimiter = ',', dtype = np.float32, max_rows = 2*number_of_training_or_testing_observations)
two_dimensional_array_of_values_of_predictors_for_training = feature_matrix[0:number_of_training_or_testing_observations, 1:]
one_dimensional_array_of_response_values_for_training = feature_matrix[0:number_of_training_or_testing_observations, 0]
two_dimensional_array_of_values_of_predictors_for_testing = feature_matrix[number_of_training_or_testing_observations:2*number_of_training_or_testing_observations, 1:]
one_dimensional_array_of_response_values_for_testing = feature_matrix[number_of_training_or_testing_observations:2*number_of_training_or_testing_observations, 0]
with pymc.Model() as pymc_model:
MutableData_of_values_of_predictors = pymc.MutableData('MutableData_of_values_of_predictors', two_dimensional_array_of_values_of_predictors_for_training)
tensor_variable_representing_expected_value_mu_of_response_values = pymc_bart.BART(name = 'mu', X = MutableData_of_values_of_predictors, Y = one_dimensional_array_of_response_values_for_training, m = 50)
tensor_variable_representing_prior_probability_density_distribution_for_standard_deviation = pymc.HalfNormal('P(sigma)', sigma = 100)
tensor_variable_representing_likelihood_and_sampling_probability_density_distribution_of_response_values = pymc.Normal('P(response value | mu, sigma)', mu = tensor_variable_representing_expected_value_mu_of_response_values, sigma = tensor_variable_representing_prior_probability_density_distribution_for_standard_deviation, observed = one_dimensional_array_of_response_values_for_training)
inference_data = pymc.sample(random_seed = random_seed)
with pymc_model:
pymc.set_data({'MutableData_of_values_of_predictors': two_dimensional_array_of_values_of_predictors_for_testing})
inference_data_for_posterior_predictive_probability_density_distribution_for_testing_data = pymc.sample_posterior_predictive(trace = inference_data, random_seed = random_seed)
array_of_predicted_response_values = inference_data_for_posterior_predictive_probability_density_distribution_for_testing_data.posterior_predictive['P(response value | mu, sigma)']
one_dimensional_array_of_averages_of_predicted_response_values = array_of_predicted_response_values.mean(axis = (0, 1))