I have a zipfile containing two shapefiles that is hosted on a storage. Requirement it to read the shapefiles from the zipfile without downloading locally. The code below returns an error "`/vsimem/155d82d191b646f496de7ff3ef8283e7' not recognized as a supported file format." Not sure what more should I do to get this working.
import io
import json
import geopandas as gpd
import azure.functions as func
import requests
from zipfile import ZipFile
# util functions
is_shape = lambda string: string.endswith('shp')
def main(req: func.HttpRequest) -> func.HttpResponse:
try:
# Data.zip has two shapfiles. DAMSELFISH_distributions.shp and DAMSELFISH_distributions2.shp)
zipfile_url = "https://github.com/delatitude/spatialtestdata/raw/8c4dea03f4e325aefa523854d44a7084b6316f6e/Data.zip"
# Source : https://stackoverflow.com/questions/72533355/reading-shapefiles-inside-nested-zip-archives
gdfs = []
zipfile_url_response = requests.get(zipfile_url)
with ZipFile(io.BytesIO(zipfile_url_response.content)) as main_zfile:
for file_name in main_zfile.namelist():
if is_shape(file_name):
print("*** " + file_name)
with main_zfile.open(file_name, "r") as zipped_shp:
# for gpd.read_file() file position must be changed back to 0
zipped_shp.seek(0)
gdfs.append(gpd.read_file(zipped_shp))
rows, cols = gdfs[-1].shape
print(f'GeoDataFrame: {rows} rows, {cols} columns\n')
# head of first gdf
#print(gdfs[0].head())
return func.HttpResponse(json.dumps({ "Status" : "success" }),status_code=200, mimetype="application/json")
except Exception as e:
return func.HttpResponse(f"Error: {str(e)}", status_code=500)
geopandas.read_file uses GDAL under the hood, and GDAL supports reading remote zip files natively via its virtual_file_systems system.
Is there a specific reason why you don't use that, as it seems to work on those files?