Problem: In the context of a StepFunction that loop on a single Lambda, sometimes the returned Payload from the Lambda is null. If I rerun the exact same loop iteration with the same input, the Payload is not necessary null.
Using: NodeJS 20 Runtime Async/Await Pattern EsModules Style
Having this StepFunction definition:
{
"Comment": "Ingest Large Dataset",
"StartAt": "Ingest_Dataset",
"States": {
"Ingest_Dataset": {
"Type": "Task",
"Resource": "arn:aws:states:::lambda:invoke",
"Parameters": {
"FunctionName": "arn:aws:lambda:eu-west-3:AccountID:function:LambdaName:$LATEST",
"Payload.$": "$.Payload"
},
"Next": "IsMoreToProcess"
},
"IsMoreToProcess": {
"Type": "Choice",
"Choices": [
{
"Variable": "$.Payload.continueProcessing",
"BooleanEquals": true,
"Next": "Ingest_Dataset"
}
],
"Default": "Done"
},
"Done": {
"Type": "Succeed"
}
}
}
Having this Lambda definition:
import { S3Client, GetObjectCommand } from "@aws-sdk/client-s3";
import { parse } from 'csv-parse';
import { safeHandler } from '/opt/nodejs/Wrapper.mjs'
import { logger } from '/opt/nodejs/Logger.mjs'
const s3Client = new S3Client({ region: "eu-west-3" });
const __PREFIX = 'TASK Lambda / Ingest Dataset'
export const handler = async (event) => {
return await safeHandler(__PREFIX, async () => {
const db = getDb()
const body = JSON.parse(event.body || '{}')
const key = body.key
const offset = body.offset || 0;
const limit = body.limit || 1000;
const countTry = body.countTry || 0;
const maxTry = 10
let headers = body.headers || null;
let currentRow = 0;
let processedRows = 0;
try {
await logger.info(`Downloading File From S3 : ${key}`)
const { Body } = await s3Client.send(new GetObjectCommand({ Bucket: BUCKET_NAME, Key: key }));
const rows = [];
const parser = Body.pipe(parse({
columns: headers,
from_line: offset + 2,
to_line: offset + limit + 1
}));
for await (const record of parser) {
await logger.info(__PREFIX, `Processing Row ${currentRow}`)
SOME CODE PROCESSING CSV ROWS...
}
return {
continueProcessing: processedRows === limit,
body: JSON.stringify({
...body,
offset: offset + processedRows,
countTry: 0,
headers,
})
};
} catch (e) {
await logger.error(__PREFIX, e, false)
return {
continueProcessing: countTry < maxTry,
body: JSON.stringify({
...body,
countTry: countTry + 1,
headers,
})
};
}
})
};
Having the safeHandler being:
export async function safeHandler(prefix, fn) {
try {
// Trying to manually timeout before AWS cut the Lambda so we can have some better logging
const timeoutPromise = new Promise((resolve, reject) => {
const timeout = setTimeout(() => reject(new Error('Timeout')), (__TIMEOUT * 1000) - 2000)
timeout.unref()
})
const res = await Promise.race([fn(), timeoutPromise])
return res
}
catch (e) {
await logger.error(prefix, e, false)
await logger.error(prefix, e.message, !(DO_NOT_PUBLISH === 'true'))
return {
statusCode: 500,
}
}
}
And having those logs:
Mar 4, 2024, 22:01:00.539 (UTC+01:00) START RequestId: 8a91277d-00cc-49cf-aadc-040dc057e1bb Version: $LATEST
Mar 4, 2024, 22:01:00.540 (UTC+01:00) 2024-03-04T21:01:00.540Z 8a91277d-00cc-49cf-aadc-040dc057e1bb INFO Connecting To Database...
Mar 4, 2024, 22:01:00.541 (UTC+01:00) 2024-03-04T21:01:00.541Z 8a91277d-00cc-49cf-aadc-040dc057e1bb INFO [TASK Lambda / Ingest Dataset] Downloading File From S3 : key.csv
Mar 4, 2024, 22:01:00.563 (UTC+01:00) END RequestId: 8a91277d-00cc-49cf-aadc-040dc057e1bb
Mar 4, 2024, 22:01:00.563 (UTC+01:00) REPORT RequestId: 8a91277d-00cc-49cf-aadc-040dc057e1bb Duration: 24.32 ms Billed Duration: 25 ms Memory Size: 512 MB Max Memory Used: 216 MB
So we can see that the function does not timeout. I don't understand because there is no case where the return can be null. I have tries/catches and when I restart the same step it works.
The bug seems to be "unstable". When the Payload is null I have no error log.