When I upload a csv file into a dataset via the Foundry REST API, it's just the file, but I would like the actual dataset to be populated with the contents from the file. I can do this on the webpage using "Apply a schema", and it would do the desired effect. However, I'm looking for a programmatic solution, preferably with Python. The upload file function that I have is as follows:
def upload_file_to_foundry(
input_file,
token,
dataset_rid,
base_url):
headers = {
"content-type": "application/octet-stream",
"authorization": "Bearer {}".format(token)
}
requests.post(f'{base_url}/api/v1/datasets/{dataset_rid}/files:upload?filePath=myUploadFile.csv',
headers=headers, data=open(input_file, 'rb'))
This successfully loads the file into Foundry, but I need to apply a schema. Some old code that I have from a colleague uses the following:
def query_foundry(url,
token,
headers=None,
#attrHeaders=None,
params=None,
data=None,
attrJson=None,
files=None,
stream=False
) -> (requests.Response):
'''
purpose: general post request used by various foundry queries
parameters:
url - url used for the post request
token - bearer token from foundry (auth token)
headers - set the auth and content-type for the post
attrHeaders - for providing additional headers
params - send params as a query string, similar to a get request
data - send payload as the body of the request
attrJson - send payload as the body of the request, taken in form of json
files - send payload as part of the body with a content-type of multipart/form-data
stream - stream the request (default: False)
'''
# prepare and execute POST request
if not headers:
headers = {}
headers["Authorization"] = f'Bearer {token}'
headers["Content-Type"] = 'application/json'
response = requests.post(url,
headers=headers,
params=params,
json=attrJson,
data=data,
files=files,
stream=stream
)
return response
def csv_to_foundry_dataset(token,
datasetRID,
transactionRID,
fileNameUpload,
branch='master'
):
'''
purpose: upload csv to foundry, commit the transaction and set
schema based on the foundry-schema-inference api
parameters:
token - bearer token from foundry (auth token)
datasetRID - the rid of the dataset to overwrite
transactionRID - the rid of the active transaction for the dataset
fileNameUpload - location of the csv to upload
branch - the branch of the target dataset (defualt: master)
'''
headers = {}
headers["Authorization"] = f'Bearer {token}'
# strip the byte order mask from the file and encode as utf-8. this is necessary
# when downloading a csv from foundry as it adds a BOM to the beginning of file
bom_file = open(fileNameUpload, mode='r', encoding='utf-8-sig').read()
open(fileNameUpload, mode='w', encoding='utf-8').write(bom_file)
url = f"{baseUrl}/foundry-data-proxy/api/dataproxy/datasets/{datasetRID}/transactions/{transactionRID}"
files = {'upload': ('data.csv', open(fileNameUpload, 'r'), 'csv')}
# upload the csv to foundry
response = query_foundry(url=url, token=token,
headers=headers, files=files)
if response.ok:
# commit the transaction
url = f"{baseUrl}/foundry-catalog/api/catalog/datasets/{datasetRID}/transactions/{transactionRID}/commit"
response = query_foundry(url=url, token=token, data="{}")
# get infered schema from foundry
url = f"{baseUrl}/foundry-schema-inference/api/datasets/{datasetRID}/branches/{branch}/schema"
response = query_foundry(url=url, token=token, data="{}")
c = json.loads(response.content)
schema = c["data"]["foundrySchema"]
# apply schema to foundry dataset
url = f"{baseUrl}/foundry-metadata/api/schemas/datasets/{datasetRID}/branches/{branch}"
response = query_foundry(url=url, token=token, attrJson=schema)
else:
response.raise_for_status()
return response
But this raises a 404 response on this portion:
# get infered schema from foundry
url = f"{baseUrl}/foundry-schema-inference/api/datasets/{datasetRID}/branches/{branch}/schema"
response = query_foundry(url=url, token=token, data="{}")
c = json.loads(response.content)
schema = c["data"]["foundrySchema"]