Dear NASA staff,
I have just prepared a draft to download the data of interest for my research using AI and Python. The points.csv file
has three fields with the following headers: ID (number of record considered), latitude, and longitude. You may insert
a couple of points for testing purposes.
Please let me know if this approach is feasible and/or how it could be improved to avoid overwhelming your servers.
Thanks for your constant guidance.
# python program
# requirements.txt
# requests==2.31.0
# pandas==2.0.3
# netCDF4==1.6.4
# matplotlib==3.7.1
# xarray==2023.10.0
import requests
import os
import tempfile
import pandas as pd
import netCDF4 as nc
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
# NASA Earthdata credentials
USERNAME = "your_username"
PASSWORD = "your_password"
# NASA Earthdata Search API base URL
BASE_URL = "
https://api.earthdata.nasa.gov/search/granules"
# Function to authenticate and get an access token
def get_access_token(username, password):
auth_url = "
https://urs.earthdata.nasa.gov/oauth/token"
headers = {"Content-Type": "application/x-www-form-urlencoded"}
data = {
"client_id": "app",
"username": username,
"password": password,
"grant_type": "password",
}
response = requests.post(auth_url, headers=headers, data=data)
if response.status_code == 200:
return response.json().get("access_token")
else:
print("Authentication failed:", response.text)
return None
# Function to query metadata for granules
def query_metadata(query, token, start_date, end_date):
headers = {"Authorization": f"Bearer {token}"}
params = {
"keyword": query,
"page_size": 100, # Number of results to return
"page_num": 1, # Page number
"temporal": f"{start_date}T00:00:00Z,{end_date}T23:59:59Z",
}
response = requests.get(BASE_URL, headers=headers, params=params)
if response.status_code == 200:
return response.json()
else:
print("Search failed:", response.text)
return None
# Function to download a granule
def download_granule(url, token, temp_dir):
headers = {"Authorization": f"Bearer {token}"}
response = requests.get(url, headers=headers, stream=True)
if response.status_code == 200:
filename = os.path.join(temp_dir, url.split("/")[-1])
with open(filename, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print(f"Downloaded: {filename}")
return filename
else:
print("Download failed:", response.text)
return None
# Function to process a granule
def process_granule(granule_path, lat, lon):
with nc.Dataset(granule_path) as ncfile:
lats = ncfile.variables['latitude'][:]
lons = ncfile.variables['longitude'][:]
no2_var = ncfile.variables['ColumnAmountNO2'][:]
lat_idx = (abs(lats - lat)).argmin()
lon_idx = (abs(lons - lon)).argmin()
no2_value = no2_var[lat_idx, lon_idx]
return no2_value
# Main script
if __name__ == "__main__":
# Authenticate and get access token
token = get_access_token(USERNAME, PASSWORD)
if not token:
print("Failed to authenticate. Exiting.")
exit()
# Load coordinates from CSV
points_df = pd.read_csv("points.csv")
latitudes = points_df['latitude'].tolist()
longitudes = points_df['longitude'].tolist()
point_ids = points_df['ID'].tolist() # Assuming your CSV has an 'ID' column
# Define time period
start_date = "2004-01-01"
end_date = datetime.now().strftime("%Y-%m-%d")
# Query metadata for OMNO2d_003 granules
query = "OMNO2d_003"
metadata = query_metadata(query, token, start_date, end_date)
if metadata:
print(f"Found {len(metadata.get('feed', {}).get('entry', []))} granules.")
# Use a temporary directory for caching
with tempfile.TemporaryDirectory() as temp_dir:
# Process each point individually
for point_id, lat, lon in zip(point_ids, latitudes, longitudes):
print(f"Processing point ID: {point_id} (Lat: {lat}, Lon: {lon})")
no2_values = []
# Download and process granules for the current point
with ThreadPoolExecutor(max_workers=5) as executor:
futures = []
for granule in metadata.get("feed", {}).get("entry", []):
granule_url = granule.get("links", [{}])[0].get("href")
if granule_url:
futures.append(executor.submit(download_granule, granule_url, token, temp_dir))
for future in futures:
granule_path = future.result()
if granule_path:
no2_value = process_granule(granule_path, lat, lon)
no2_values.append(no2_value)
# Calculate average NO2 concentration for the current point
if no2_values:
avg_no2 = sum(no2_values) / len(no2_values)
# Save results to a CSV file named ID_<point_id>.csv
output_file = f"ID_{point_id}.csv"
with open(output_file, "w") as f:
f.write(f"ID,Latitude,Longitude,Date,NO2_Concentration\n")
f.write(f"{point_id},{lat},{lon},{datetime.now().strftime('%Y-%m-%d')},{avg_no2}\n")
print(f"Results for point ID {point_id} saved to {output_file}")
else:
print(f"No NO2 values extracted for point ID: {point_id}")