When I am trying to crop images based on the polygon vertices -> resonse generated from analyze expense, some images are not getting cropped properly.
def vertices_fun(bucket_name, img_path):
response = textract_client.analyze_expense(Document={'S3Object': {'Bucket': bucket_name, 'Name': img_path}})
points = []
for i in response['ExpenseDocuments'][0]['SummaryFields']:
if i['Type']['Text'] == <some id>:
points.append(i['ValueDetection']['Geometry']['Polygon'])
return points
def vertices_to_pixels(vertices, img_width, img_height):
pixel_vertices = []
for vertex in vertices:
x_pixel = int(vertex['X'] * img_width)
y_pixel = int(vertex['Y'] * img_height)
pixel_vertices.append((x_pixel, y_pixel))
print("pixel vertices:")
print(pixel_vertices)
return pixel_vertices
def crop_image(bucket_name, img_path, vertices, output_folder, check):
# Download image from S3
img_obj = s3_client.get_object(Bucket=bucket_name, Key=img_path)
img = Image.open(img_obj['Body'])
img_width, img_height = img.size
print("width: ",img_width)
print("height: ",img_height)
#img_height, img_width = img.size
# Convert vertices from normalized coordinates to pixels
pixel_vertices = vertices_to_pixels(vertices, img_width, img_height)
# Calculate bounding box
x_coords, y_coords = zip(*pixel_vertices)
left = min(x_coords)
upper = min(y_coords)
right = max(x_coords)
lower = max(y_coords)
print(left, right, upper, lower)
# Crop image
cropped_img = img.crop((left, upper, right, lower))
# Save cropped image to S3
cropped_img_bytes = io.BytesIO()
cropped_img.save(cropped_img_bytes, format='JPEG')
# Upload cropped image to S3 in the specified output folder
if(check == 0):
cropped_img_key = f"{output_folder}/{img_path.split('/')[-1]}"
s3_client.put_object(Bucket=bucket_name, Key=cropped_img_key, Body=cropped_img_bytes.getvalue())
return cropped_img_key
else:
# if same id has been detected at mupltiple areas will create image folder containing #image_versions
img_no, img_type = (img_path.split('/')[-1]).split('.')
cropped_img_key = output_folder + '/' + img_no + '_' + check + '.' + img_type
s3_client.put_object(Bucket=bucket_name, Key=cropped_img_key, Body=cropped_img_bytes.getvalue())
return cropped_img_key
is there something I am missing, found error pattern in jpg imgs, with different color backgrounds other than white, image text orientation