Problem accessing HLS data
Posted: Fri Jul 26, 2024 6:36 pm America/New_York
Hello,
I'm trying to get the values of vegetation indices calculated on HLS data without downloading the tiffs, as it describe in tutorial. Initially the problem was the same as in this issue viewtopic.php?t=5207 , but I followed the instructions described there and the error message has changed. Now I have these:
https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T30UWB.2019110T112119.v2.0/HLS.S30.T30UWB.2019110T112119.v2.0.B04.tif
vsi curl error: invalid path or file: 0. Retrying...
vsi curl error: invalid path or file: 1. Retrying...
vsi curl error: invalid path or file: 2. Retrying...
vsi curl error: invalid path or file: 3. Retrying...
vsi curl error: invalid path or file: 4. Retrying...
vsi curl error: invalid path or file: 5. Retrying...
vsi curl error: invalid path or file: 6. Retrying...
vsi curl error: invalid path or file: 7. Retrying...
vsi curl error: invalid path or file: 8. Retrying...
vsi curl error: invalid path or file: 9. Retrying...
Failed to process https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T30UWB.2019110T112119.v2.0/HLS.S30.T30UWB.2019110T112119.v2.0.B04.tif after 10 retries. Please check to see you're authenticated with earthaccess.
The COGs have been loaded into memory!
The code is following
auth = earthaccess.login(persist=True)
test_point=-2.381371, 51.054344
transformer = Transformer.from_crs("EPSG:4326", "EPSG:32629", always_xy=True)
trans_point=transformer.transform(-2.381371, 51.054344)
temporal = ("2019-04-01T00:00:00", "2019-04-30T23:59:59")
granules = earthaccess.DataGranules(auth).parameters(
short_name=['HLSS30'],
# bounding_box=bbox,
temporal=temporal,
point=test_point,
).cloud_cover(max_cover=30)
granule_link=granules.get()
hls_result_url=granule_link[0].data_links()
# browse_url=granule_link[0].dataviz_links()[0]
indices_bands_links = []
indices_bands = ['B8A', 'B04', 'B03'] # NIR RED green for S30
# Subset the assets in the item down to only the desired bands
for a in hls_result_url:
if any(b in a for b in indices_bands):
indices_bands_links.append(a)
indices_bands_links
gdal_cookiefile=gdal.SetConfigOption('GDAL_HTTP_COOKIEFILE','~/cookies.txt')
gdal_cookiejar=gdal.SetConfigOption('GDAL_HTTP_COOKIEJAR', '~/cookies.txt')
gdal_disable_read=gdal.SetConfigOption('GDAL_DISABLE_READDIR_ON_OPEN','EMPTY_DIR')
gdal_curl_extensions=gdal.SetConfigOption('CPL_VSIL_CURL_ALLOWED_EXTENSIONS','TIF')
gdal_unsafessl=gdal.SetConfigOption('GDAL_HTTP_UNSAFESSL', 'YES')
gdal_config = {
"GDAL_HTTP_COOKIEFILE": gdal_cookiefile,
"GDAL_HTTP_COOKIEJAR": gdal_cookiejar,
"GDAL_DISABLE_READDIR_ON_OPEN": gdal_disable_read,
"CPL_VSIL_CURL_ALLOWED_EXTENSIONS": gdal_curl_extensions,
"GDAL_HTTP_UNSAFESSL": gdal_unsafessl,
}
# Use vsicurl to load the data directly into memory (be patient, may take a few seconds)
chunk_size = dict(band=1, x=512, y=512) # Tiles have 1 band and are divided into 512x512 pixel chunks
# Sometimes a vsi curl error occurs so we need to retry if it does
max_retries = 10
for e in indices_bands_links:
print(e)
# Try Loop
for _i in range(max_retries):
try:
with rasterio.Env(**gdal_config): # Setting up GDAL environment
with rasterio.open(_i) as src:
# Open and build datasets
if e.rsplit('.', 2)[-2] == indices_bands[0]: # NIR index
nir = rxr.open_rasterio(e, chunks=chunk_size, masked=True).squeeze('band', drop=True)
# nir.attrs['scale_factor'] = 0.0001 # hard coded the scale_factor attribute
elif e.rsplit('.', 2)[-2] == indices_bands[1]: # red index
red = rxr.open_rasterio(e, chunks=chunk_size, masked=True).squeeze('band', drop=True)
red.attrs['scale_factor'] = 0.0001 # hard coded the scale_factor attribute
elif e.rsplit('.', 2)[-2] == indices_bands[2]: # blue index
green = rxr.open_rasterio(e, chunks=chunk_size, masked=True).squeeze('band', drop=True)
blue.attrs['scale_factor'] = 0.0001 # hard coded the scale_factor attribute
break # Break out of the retry loop
except Exception as ex:
print(f"vsi curl error: {ex}. Retrying...")
else:
print(f"Failed to process {e} after {max_retries} retries. Please check to see you're authenticated with earthaccess.")
print("The COGs have been loaded into memory!")
I tried to do the authentication either with .netrc or auth in notebook. It didn't solve the problem.
So I don't know what to do. Any help is much appreciated.
I'm trying to get the values of vegetation indices calculated on HLS data without downloading the tiffs, as it describe in tutorial. Initially the problem was the same as in this issue viewtopic.php?t=5207 , but I followed the instructions described there and the error message has changed. Now I have these:
https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T30UWB.2019110T112119.v2.0/HLS.S30.T30UWB.2019110T112119.v2.0.B04.tif
vsi curl error: invalid path or file: 0. Retrying...
vsi curl error: invalid path or file: 1. Retrying...
vsi curl error: invalid path or file: 2. Retrying...
vsi curl error: invalid path or file: 3. Retrying...
vsi curl error: invalid path or file: 4. Retrying...
vsi curl error: invalid path or file: 5. Retrying...
vsi curl error: invalid path or file: 6. Retrying...
vsi curl error: invalid path or file: 7. Retrying...
vsi curl error: invalid path or file: 8. Retrying...
vsi curl error: invalid path or file: 9. Retrying...
Failed to process https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T30UWB.2019110T112119.v2.0/HLS.S30.T30UWB.2019110T112119.v2.0.B04.tif after 10 retries. Please check to see you're authenticated with earthaccess.
The COGs have been loaded into memory!
The code is following
auth = earthaccess.login(persist=True)
test_point=-2.381371, 51.054344
transformer = Transformer.from_crs("EPSG:4326", "EPSG:32629", always_xy=True)
trans_point=transformer.transform(-2.381371, 51.054344)
temporal = ("2019-04-01T00:00:00", "2019-04-30T23:59:59")
granules = earthaccess.DataGranules(auth).parameters(
short_name=['HLSS30'],
# bounding_box=bbox,
temporal=temporal,
point=test_point,
).cloud_cover(max_cover=30)
granule_link=granules.get()
hls_result_url=granule_link[0].data_links()
# browse_url=granule_link[0].dataviz_links()[0]
indices_bands_links = []
indices_bands = ['B8A', 'B04', 'B03'] # NIR RED green for S30
# Subset the assets in the item down to only the desired bands
for a in hls_result_url:
if any(b in a for b in indices_bands):
indices_bands_links.append(a)
indices_bands_links
gdal_cookiefile=gdal.SetConfigOption('GDAL_HTTP_COOKIEFILE','~/cookies.txt')
gdal_cookiejar=gdal.SetConfigOption('GDAL_HTTP_COOKIEJAR', '~/cookies.txt')
gdal_disable_read=gdal.SetConfigOption('GDAL_DISABLE_READDIR_ON_OPEN','EMPTY_DIR')
gdal_curl_extensions=gdal.SetConfigOption('CPL_VSIL_CURL_ALLOWED_EXTENSIONS','TIF')
gdal_unsafessl=gdal.SetConfigOption('GDAL_HTTP_UNSAFESSL', 'YES')
gdal_config = {
"GDAL_HTTP_COOKIEFILE": gdal_cookiefile,
"GDAL_HTTP_COOKIEJAR": gdal_cookiejar,
"GDAL_DISABLE_READDIR_ON_OPEN": gdal_disable_read,
"CPL_VSIL_CURL_ALLOWED_EXTENSIONS": gdal_curl_extensions,
"GDAL_HTTP_UNSAFESSL": gdal_unsafessl,
}
# Use vsicurl to load the data directly into memory (be patient, may take a few seconds)
chunk_size = dict(band=1, x=512, y=512) # Tiles have 1 band and are divided into 512x512 pixel chunks
# Sometimes a vsi curl error occurs so we need to retry if it does
max_retries = 10
for e in indices_bands_links:
print(e)
# Try Loop
for _i in range(max_retries):
try:
with rasterio.Env(**gdal_config): # Setting up GDAL environment
with rasterio.open(_i) as src:
# Open and build datasets
if e.rsplit('.', 2)[-2] == indices_bands[0]: # NIR index
nir = rxr.open_rasterio(e, chunks=chunk_size, masked=True).squeeze('band', drop=True)
# nir.attrs['scale_factor'] = 0.0001 # hard coded the scale_factor attribute
elif e.rsplit('.', 2)[-2] == indices_bands[1]: # red index
red = rxr.open_rasterio(e, chunks=chunk_size, masked=True).squeeze('band', drop=True)
red.attrs['scale_factor'] = 0.0001 # hard coded the scale_factor attribute
elif e.rsplit('.', 2)[-2] == indices_bands[2]: # blue index
green = rxr.open_rasterio(e, chunks=chunk_size, masked=True).squeeze('band', drop=True)
blue.attrs['scale_factor'] = 0.0001 # hard coded the scale_factor attribute
break # Break out of the retry loop
except Exception as ex:
print(f"vsi curl error: {ex}. Retrying...")
else:
print(f"Failed to process {e} after {max_retries} retries. Please check to see you're authenticated with earthaccess.")
print("The COGs have been loaded into memory!")
I tried to do the authentication either with .netrc or auth in notebook. It didn't solve the problem.
So I don't know what to do. Any help is much appreciated.