import os
import xarray
import re
import pandas as pd
import json
import tempfile
import boto3
import rasterio
from datetime import datetime
from dateutil.relativedelta import relativedelta
Air-Sea CO₂ Flux, ECCO-Darwin Model v5
Documentation of data transformation
This script was used to transform the Air-Sea CO₂ Flux, ECCO-Darwin Mode dataset from netCDF to Cloud Optimized GeoTIFF (COG) format for display in the Greenhouse Gas (GHG) Center.
= boto3.session.Session()
session = session.client("s3")
s3_client
= (
bucket_name "ghgc-data-store-dev" # S3 bucket where the COGs are stored after transformation
)= "ecco-darwin"
FOLDER_NAME = "ecco_darwin"
s3_fol_name
# Reading the raw netCDF files from local machine
= pd.DataFrame(
files_processed =["file_name", "COGs_created"]
columns# A dataframe to keep track of the files that we have transformed into COGs
) for name in os.listdir(FOLDER_NAME):
= xarray.open_dataset(
xds f"{FOLDER_NAME}/{name}",
="netcdf4",
engine
)= xds.rename({"y": "latitude", "x": "longitude"})
xds = xds.assign_coords(longitude=((xds.longitude / 1440) * 360) - 180).sortby(
xds "longitude"
)= xds.assign_coords(latitude=((xds.latitude / 721) * 180) - 90).sortby(
xds "latitude"
)
= [var for var in xds.data_vars]
variable
for time_increment in xds.time.values:
for var in variable[2:]:
= name.split("/ ")[-1]
filename = re.split("[_ .]", filename)
filename_elements = xds[var]
data
= data.reindex(latitude=list(reversed(data.latitude)))
data "longitude", "latitude", inplace=True)
data.rio.set_spatial_dims("epsg:4326", inplace=True)
data.rio.write_crs(
# generate COG
= {"driver": "COG", "compress": "DEFLATE"}
COG_PROFILE
filename_elements.pop()-1] = filename_elements[-2] + filename_elements[-1]
filename_elements[-2)
filename_elements.pop(# # insert date of generated COG into filename
= "_".join(filename_elements)
cog_filename # # add extension
= f"{cog_filename}.tif"
cog_filename
with tempfile.NamedTemporaryFile() as temp_file:
**COG_PROFILE)
data.rio.to_raster(temp_file.name,
s3_client.upload_file(=temp_file.name,
Filename=bucket_name,
Bucket=f"{s3_fol_name}/{cog_filename}",
Key
)
= files_processed._append(
files_processed "file_name": name, "COGs_created": cog_filename},
{=True,
ignore_index
)del data
print(f"Generated and saved COG: {cog_filename}")
# Generate the json file with the metadata that is present in the netCDF files.
with tempfile.NamedTemporaryFile(mode="w+") as fp:
json.dump(xds.attrs, fp)"data_dimensions": dict(xds.dims)}, fp)
json.dump({"data_variables": list(xds.data_vars)}, fp)
json.dump({
fp.flush()
s3_client.upload_file(=fp.name,
Filename=bucket_name,
Bucket="s3_fol_name/metadata.json",
Key
)
# A csv file to store the names of all the files converted.
files_processed.to_csv(f"s3://{bucket_name}/{s3_fol_name}/files_converted.csv",
)print("Done generating COGs")