import xarray as xr
import os
import glob
from datetime import datetime
import boto3
import s3fs
import tempfile
import numpy as np
import rasterio
from rasterio.enums import Resampling
from rio_cogeo.cogeo import cog_translate
from rio_cogeo.profiles import cog_profiles
GRA²PES Greenhouse Gas and Air Quality Species
Monthly, 0.036 degree resolution emissions of fossil fuel carbon dioxide (ffCO₂), carbon monoxide (CO), nitrogen oxide (NOₓ), sulfur dioxide (SO₂), and particulate matter (PM₂.₅) emissions for the year 2021 over the Contiguous United States from the Greenhouse gas And Air Pollutants Emissions System (GRA²PES).
This script was used to transform the GRA2PES dataset to Cloud Optimized GeoTIFF (COG) format for display in the Greenhouse Gas (GHG) Center.
= {
config "data_acquisition_method": "s3",
"raw_data_bucket" : "gsfc-ghg-store",
"raw_data_prefix": "GRA2PES/monthly_subset_regrid/2021",
"cog_data_bucket": "ghgc-data-store-develop",
"cog_data_prefix": "transformed_cogs/GRAAPES",
"date_fmt" :"%Y%m",
"transformation": {}
}
= boto3.session.Session()
session = session.client("s3")
s3_client
= config["raw_data_bucket"]
raw_data_bucket = config["raw_data_prefix"]
raw_data_prefix
= config['cog_data_bucket']
cog_data_bucket = config["cog_data_prefix"]
cog_data_prefix
=config['date_fmt']
date_fmt
= s3fs.S3FileSystem() fs
def get_all_s3_keys(bucket, model_name, ext):
"""Get a list of all keys in an S3 bucket."""
= []
keys
= {"Bucket": bucket, "Prefix": f"{model_name}/"}
kwargs while True:
= s3_client.list_objects_v2(**kwargs)
resp for obj in resp["Contents"]:
if obj["Key"].endswith(ext) and "historical" not in obj["Key"]:
"Key"])
keys.append(obj[
try:
"ContinuationToken"] = resp["NextContinuationToken"]
kwargs[except KeyError:
break
return keys
= get_all_s3_keys(raw_data_bucket, raw_data_prefix, ".nc4")
keys
def download_s3_objects(bucket, keys, download_dir):
"""Download all S3 objects listed in keys to the specified local directory."""
if not os.path.exists(download_dir):
os.makedirs(download_dir)
for key in keys:
= os.path.join(download_dir, os.path.basename(key))
local_filename try:
s3_client.download_file(bucket, key, local_filename)print(f"Downloaded {key} to {local_filename}")
except (NoCredentialsError, PartialCredentialsError) as e:
print(f"Credentials error: {e}")
except Exception as e:
print(f"Failed to download {key}: {e}")
"data")
download_s3_objects(raw_data_bucket, keys,
def extract_date_from_key(key):
# Split the key to isolate the part that contains the date
= key.split('_')
parts for part in parts:
# Check if the part is numeric and has the length of 6 (YYYYMM format)
if part.isdigit() and len(part) == 6:
return part
return None
= {"driver": "COG", "compress": "DEFLATE"}
COG_PROFILE = 4
OVERVIEW_LEVELS = 'average'
OVERVIEW_RESAMPLING
for key in glob.glob("data/*.nc4"):
= xr.open_dataset(key)
xds= xds.assign_coords(lon=(((xds.lon + 180) % 360) - 180)).sortby("lon")
xds
for var in ["PM25-PRI","CO2","CO","NOX","SOX"]:
= extract_date_from_key(key)
yearmonth = f"output/GRA2PESv1.0_total_{("-").join(var.split('_'))}_{yearmonth}.tif"
filename = getattr(xds,var)
data "lon", "lat", inplace=True)
data.rio.set_spatial_dims("epsg:4326", inplace=True)
data.rio.write_crs(
# Create a temporary file to hold the COG
with tempfile.NamedTemporaryFile(suffix='.tif', delete=False) as temp_file:
f"temp_{yearmonth}_{var}.tif", **COG_PROFILE, nodata=-9999)
data.rio.to_raster(# Create COG with overviews and nodata value
cog_translate(f"temp_{yearmonth}_{var}.tif",
temp_file.name,"deflate"),
cog_profiles.get(=OVERVIEW_LEVELS,
overview_level=OVERVIEW_RESAMPLING,
overview_resampling=-9999
nodata
)
# Move the temporary file to the desired local path
os.rename(temp_file.name, filename)
if os.path.exists(f"temp_{yearmonth}_{var}.tif"):
f"temp_{yearmonth}_{var}.tif")
os.remove(del data
print(f"Done for: {filename}")