import os
import xarray
import re
import pandas as pd
import tempfile
import boto3
ODIAC Fossil Fuel CO₂ Emissions
Documentation of data transformation
This script was used to transform the ODIAC Fossil Fuel CO₂ Emissions dataset from GeoTIFF to Cloud Optimized GeoTIFF (COG) format for display in the Greenhouse Gas (GHG) Center.
= boto3.session.Session()
session = session.client("s3")
s3_client = "ghgc-data-store-dev" # S3 bucket where the COGs are stored after transformation
bucket_name
= os.listdir("ODIAC")
fold_names
= pd.DataFrame(columns=["file_name", "COGs_created"]) # A dataframe to keep track of the files that we have transformed into COGs
files_processed
# Reading the raw netCDF files from local machine
for fol_ in fold_names:
for name in os.listdir(f"ODIAC/{fol_}"):
= xarray.open_dataarray(f"ODIAC/{fol_}/{name}")
xds
= name.split("/ ")[-1]
filename = re.split("[_ .]", filename)
filename_elements # # insert date of generated COG into filename
filename_elements.pop()-1] = fol_ + filename_elements[-1][-2:]
filename_elements[
"x", "y", inplace=True)
xds.rio.set_spatial_dims(-9999, inplace=True)
xds.rio.write_nodata("epsg:4326", inplace=True)
xds.rio.write_crs(
= "_".join(filename_elements)
cog_filename # # add extension
= f"{cog_filename}.tif"
cog_filename
with tempfile.NamedTemporaryFile() as temp_file:
xds.rio.to_raster(
temp_file.name,="COG",
driver
)
s3_client.upload_file(=temp_file.name,
Filename=bucket_name,
Bucket=f"ODIAC_geotiffs_COGs/{cog_filename}",
Key
)
= files_processed._append(
files_processed "file_name": name, "COGs_created": cog_filename},
{=True,
ignore_index
)
print(f"Generated and saved COG: {cog_filename}")
# creating the csv file with the names of files transformed.
files_processed.to_csv(f"s3://{bucket_name}/ODIAC_COGs/files_converted.csv",
)print("Done generating COGs")