I tried to create a multipart upload of my Cloudwatch-logs to store it directly in S3 Glacier-vault. The code which I have attached below ran without throwing any errors. but when I retrieve the output file using the job-retrieval mechanism of S3 Glacier, all I get is an output file which is blank (or seems to be blank) but it does say, it has a storage of 20KB. I am not sure what is causing this error though I doubt it has something to do with my multipart upload code, so am attaching it below. The process am following is :
- First creating the buffer from the uploadData. The uploadData is in the array format.
- Calculating the number of parts. I have set the partSize as 32 MB.
- Calculating the treeHash of the entire data
- Initiating the upload
const multiPartUpload = async (uploadData, logGroupName) => {
let archiveID = null;
const archiveDescription = "Trying out multipart upload";
const partSize = 1024 * 1024 * 32;
const bufferUploadData = Buffer.from(uploadData);
let numParts = Math.ceil(bufferUploadData.length / partSize);
console.log("Nmber of parts ", numParts);
let startTime = new Date();
let params = { vaultName: vaultname, partSize: partSize.toString() };
// hashNode(bufferUploadData).then(console.log());
let tree_hash = crypto.createHash("sha256").update(bufferUploadData).digest("hex");
console.log("TreeHash is ", tree_hash);
// Initiate the multipart upload
console.log("Initiating upload to", vaultname);
// Call Glacier to initiate the upload.
glacier.initiateMultipartUpload(params, function (mpErr, multipart) {
if (mpErr) {
console.log("Error!", mpErr.stack);
return;
}
console.log("Got upload ID", multipart.uploadId);
// Grab each partSize chunk and upload it as a part
for (var i = 0; i < bufferUploadData.length; i += partSize) {
var end = Math.min(i + partSize, bufferUploadData.length),
partParams = {
vaultName: vaultname,
uploadId: multipart.uploadId,
range: "bytes " + i + "-" + (end - 1) + "/*",
body: bufferUploadData.slice(i, end),
};
// Send a single part
console.log("Uploading part", i, "=", partParams.range);
glacier.uploadMultipartPart(partParams, function (multiErr, mData) {
if (multiErr) {
console.log("Error ", multiErr);
return;
}
console.log("Completed part", mData);
// console.log("Completed part", this.request.params.range);
if (--numParts > 0) {
console.log("in num parts");
return; // complete only when all parts uploaded
}
let doneParams = {
vaultName: vaultname,
uploadId: multipart.uploadId,
archiveSize: bufferUploadData.length.toString(),
checksum: tree_hash, // the computed tree hash
};
console.log("Completing upload...");
let myPromise = new Promise(function (myResolve, myReject) {
// "Producing Code" (May take some time)
glacier.completeMultipartUpload(doneParams, function (err, data) {
if (err) {
console.log("An error occurred while uploading the archive");
console.log(err);
myReject();
} else {
var delta = (new Date() - startTime) / 1000;
console.log("Completed upload in", delta, "seconds");
console.log("Archive ID:", data.archiveId);
console.log("Checksum: ", data.checksum);
archiveID = data.archiveId;
myResolve();
}
});
});
myPromise
.then(function (value) {
/* code if successful */
// console.log("Value ", value);
let arr = logGroupName.split("/");
let functionName = arr[3];
return new Promise((resolve, reject) => {
// (*)
setTimeout(() => resolve(dynamodbPutItem(archiveID, functionName)), 1000);
});
})
.then(function (value) {
console.log("Here in second promise and archiveID is ", archiveID);
})
.catch((error) => {
console.log("Error in promises during completeMultipartUpload ", error);
});
});
}
});
};
Please note that there are two implementations of Amazon Glacier:
I recommend that you change your program to use Glacier storage classes in S3. It will be both cheaper and easier to use.