I am trying to run batch predictions for my custom trained model in vertex AI. The model was imported into the Model Registry from a Cloud Storage bucket. The model is stored as a "model.joblib" file with scikit-learn framework.
Since, I want to schedule batch predictions, I wrote a cloud function to do the same.
I followed this document. Here. Additionally, I added the following:
"dedicated_resources": {
"machine_spec": {
"machine_type": "c2-standard-16"
},
"starting_replica_count":1,
"max_replica_count":3,
I want to predict for about 300K users, over 47 features for a KMeans model of 7 clusters.
But I am constantly getting the following error:
- In logging:
ERROR:root:Failed to load model: Could not load the model: /tmp/model/0001/model.joblib. std::bad_alloc. (Error code: 0)
- In Batch Prediction UI:
Error: model server never became ready. Please validate that your model file or container configuration are valid.
I have tried different machine types:
- c2-standard-16
- n1-highcpu-4
- n1-highcpu-16
But the error is not changing. Is this a machine_type issue or is this something else? How do I resolve this?
Edit: Cloud function for batch prediction
from google.cloud import aiplatform_v1beta1
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value
def hello_pubsub(event, context):
project = "abc"
display_name = "audience-segmentation-predictions"
model_name = "projects/abc/locations/us-central1/models/19290"
instances_format = "bigquery"
bigquery_source_input_uri = "bq://abc.ml_final_g0_us.model_input_transformed_daily"
predictions_format = "bigquery"
bigquery_destination_output_uri = "bq://abc.ml_final_g0_us"
location = "us-central1"
api_endpoint = "us-central1-aiplatform.googleapis.com"
def create_batch_prediction_job_bigquery_sample(project,display_name,model_name,instances_format,bigquery_source_input_uri,predictions_format,bigquery_destination_output_uri,location,api_endpoint):
# The AI Platform services require regional API endpoints.
client_options = {"api_endpoint": api_endpoint}
# Initialize client that will be used to create and send requests.
# This client only needs to be created once, and can be reused for multiple requests.
client = aiplatform_v1beta1.JobServiceClient(client_options=client_options)
model_parameters_dict = {}
model_parameters = json_format.ParseDict(model_parameters_dict, Value())
batch_prediction_job = {
"display_name": display_name,
# Format: 'projects/{project}/locations/{location}/models/{model_id}'
"model": model_name,
"model_parameters": model_parameters,
"input_config": {
"instances_format": instances_format,
"bigquery_source": {"input_uri": bigquery_source_input_uri},
},
"output_config": {
"predictions_format": predictions_format,
"bigquery_destination": {"output_uri": bigquery_destination_output_uri},
},
"instance_config": {"excluded_fields": "clientid"},
"dedicated_resources": {
"machine_spec": {
"machine_type": "c2-standard-30"
},
},
}
parent = f"projects/{project}/locations/{location}"
response = client.create_batch_prediction_job(
parent=parent, batch_prediction_job=batch_prediction_job
)
print("response:", response)
create_batch_prediction_job_bigquery_sample(project,display_name,model_name,instances_format,bigquery_source_input_uri,predictions_format,bigquery_destination_output_uri,location,api_endpoint)