EMIT Methane Point Source Plume Complexes

Documentation of data transformation

Author

Vishal Gaur

Published

August 31, 2023

This script was used to read the EMIT Methane Point Source Plume Complexes dataset provided in Cloud Optimized GeoTIFF (COG) format for display in the Greenhouse Gas (GHG) Center.

import re
import pandas as pd
import json
import tempfile
import boto3

session_ghgc = boto3.session.Session(profile_name="ghg_user")
s3_client_ghgc = session_ghgc.client("s3")
session_veda_smce = boto3.session.Session()
s3_client_veda_smce = session_veda_smce.client("s3")

# Since the plume emissions were already COGs, we just had to transform their naming convention to be stored in the STAC collection.
SOURCE_BUCKET_NAME = "ghgc-data-staging-uah"
TARGET_BUCKET_NAME = "ghgc-data-store-dev"


keys = []
resp = s3_client_ghgc.list_objects_v2(Bucket=SOURCE_BUCKET_NAME)
for obj in resp["Contents"]:
    if "l3" in obj["Key"]:
        keys.append(obj["Key"])

for key in keys:
    s3_obj = s3_client_ghgc.get_object(Bucket=SOURCE_BUCKET_NAME, Key=key)[
        "Body"
    ]
    filename = key.split("/")[-1]
    filename_elements = re.split("[_ .]", filename)

    date = re.search("t\d\d\d\d\d\d\d\dt", key).group(0)
    filename_elements.insert(-1, date[1:-1])
    filename_elements.pop()

    cog_filename = "_".join(filename_elements)
    # # add extension
    cog_filename = f"{cog_filename}.tif"
    s3_client_veda_smce.upload_fileobj(
        Fileobj=s3_obj,
        Bucket=TARGET_BUCKET_NAME,
        Key=f"plum_data/{cog_filename}",
    )