# Import required libraries
# import earthaccess
import os
import warnings
import requests
import pandas as pd
'USE_PYGEOS'] = '0'
os.environ[import geopandas
import folium
import folium.plugins
import seaborn as sns
import glob
import numpy as np
import matplotlib.pyplot as plt
import branca.colormap as cm
from folium import Map, TileLayer
from branca.element import Figure
from pystac_client import Client
from pyproj import Geod
from shapely import wkt
from datetime import datetime
Interactive Session: Human Anthropogenic Emissions (EPA)
About the Data
The gridded EPA U.S. anthropogenic methane greenhouse gas inventory (gridded GHGI) includes spatially disaggregated (0.1 deg x 0.1 deg or approximately 10 x 10 km resolution) maps of annual anthropogenic methane emissions (for the contiguous United States (CONUS), consistent with national annual U.S. anthropogenic methane emissions reported in the U.S. EPA Inventory of U.S. Greenhouse Gas Emissions and Sinks (U.S. GHGI). This V2 Express Extension dataset contains methane emissions provided as fluxes, in units of molecules of methane per square cm per second, for over 25 individual emission source categories, including those from agriculture, petroleum and natural gas systems, coal mining, and waste. The data have been converted from their original NetCDF format to Cloud-Optimized GeoTIFF (COG) for use in the US GHG Center, thereby enabling user exploration of spatial anthropogenic methane emissions and their trends.
Requirements
- Set up Python Environment - See setup_instructions.md in the
/setup/
folder
Learning Objectives
- How to use
U.S. GHG Center STAC Catalog
to accessU.S. Gridded Anthropogenic Methane Emissions Inventory
data - How to visualize a dataset using
folium
and perform zonal statistics
Approach
- Identify available dates and temporal frequency of observations for the given collection using the GHGC API
/stac
endpoint. The collection processed in this notebook is the gridded methane emissions data product. - Pass the STAC item into the raster API
/stac/tilejson.json
endpoint. - Using
folium.plugins.DualMap
, we will visualize two tiles (side-by-side), allowing us to compare time points. - After the visualization, we will perform zonal statistics for a given polygon.
Installing the Required Libraries
Required libraries are
Define the Spatial Region of Interest
For this example, our spatial region of interest (ROI) will be the Permian Basin, USA. The Permian Basin is a regional depression expanding from the eastern side of New Mexico to West Texas, covering roughly 75 thousand square miles of land. This region is known to be “the largest hydrocarbon-producing basins in the United States and the world” according to the U.S. Energy Information Administration. Consequently, the Permian Basin is prone to excessive air pollution and a suitable site for monitoring anthropogenic methane emissions.
# Loading Permian Basin shape file as a ROI
# User can pass any json or shape file here
# The "geopandas" library makes working with geospatial data easier by facilitating spatial operations on geometric types
= f"{os.getenv('HOME')}/shared/data"
data_dir = geopandas.read_file(f'{data_dir}/use-case-3/Permian.zip') permian_basin
Querying the STAC API
Search for U.S. Gridded Anthropogenic Methane Emissions Inventory Data
# Provide STAC and RASTER API endpoints
= "http://ghg.center/api/stac"
STAC_API_URL = "https://ghg.center/api/raster"
RASTER_API_URL
# Please use the collection name similar to the one used in STAC collection.
# Name of the collection for gridded methane dataset.
= "epa-ch4emission-yeargrid-v2express" collection_name
# Fetching the collection from STAC collections using appropriate endpoint.
= requests.get(f"{STAC_API_URL}/collections/{collection_name}").json()
collection collection
Examining the contents of our collection
under the temporal
variable, we see that the data is available from January 2012 to December 2020. By looking at the dashboard:time density
, we observe that the periodic frequency of these observations is yearly.
# Create a function that would search for the number of items in above data collection in the STAC API
def get_item_count(collection_id):
= 0
count = f"{STAC_API_URL}/collections/{collection_id}/items"
items_url
while True:
= requests.get(items_url)
response
if not response.ok:
print("error getting items")
exit()
= response.json()
stac += int(stac["context"].get("returned", 0))
count next = [link for link in stac["links"] if link["rel"] == "next"]
if not next:
break
= next[0]["href"]
items_url
return count
# Check total number of items available
= get_item_count(collection_name)
number_of_items = requests.get(f"{STAC_API_URL}/collections/{collection_name}/items?limit={number_of_items}").json()["features"]
items print(f"Found {len(items)} items")
# Examining the first item in the collection
# Keep in mind that a list starts from 0, 1, 2,... therefore ‘[0]’ is referring to the first item in the list/collection
0] items[
Opening and Exploring Methane Emissions Using the Raster API
In this notebook, we will explore the temporal impacts of methane emissions. We will visualize the outputs on a map using folium
.
# To access the year value from each item more easily, this will let us query more explicity by year and month (e.g., 2020-02)
= {item["properties"]["datetime"][:7]: item for item in items}
items
# Set the asset values to the appropriate parameters
= "production-ngs"
asset_name_1 = 'production-ps'
asset_name_2 = 'total-methane' asset_name_3
# Below, we are entering the minimum and maximum values to provide our upper and lower bounds
= {'max': 20,'min': 0} rescale_values
# Examining the items in the collection
items
Now, we will pass the item id, collection name, and rescaling_factor
to the Raster API
endpoint. We will do this twice, once for January 2018 and again for January 2012, so that we can visualize each event independently.
# Set a colormap for the granule
# Please refer to matplotlib library if you'd prefer choosing a different color ramp (https://matplotlib.org/stable/users/explain/colors/colormaps.html)
= "rainbow"
color_map
# Petroleum-Production Methane Emissions
# Extract and display the January 2018 tile using the appropriate ID, colormap, rescale values, and datetime (items['2018-01'])
= requests.get(
january_2018_tile_ps f"{RASTER_API_URL}/stac/tilejson.json?collection={items['2018-01']['collection']}&item={items['2018-01']['id']}"
f"&assets={asset_name_1}"
f"&color_formula=gamma+r+1.05&colormap_name={color_map}"
f"&rescale={rescale_values['min']},{rescale_values['max']}",
).json()
# Print the information associated with this tile
january_2018_tile_ps
= requests.get(
january_2012_tile_ps f"{RASTER_API_URL}/stac/tilejson.json?collection={items['2012-01']['collection']}&item={items['2012-01']['id']}"
f"&assets={asset_name_1}"
f"&color_formula=gamma+r+1.05&colormap_name={color_map}"
f"&rescale={rescale_values['min']},{rescale_values['max']}",
).json() january_2012_tile_ps
# Natural Gas-Production Methane Emissions
= "rainbow" # please select the color ramp from matplotlib library.
color_map = requests.get(
january_2018_tile_ngs f"{RASTER_API_URL}/stac/tilejson.json?collection={items['2018-01']['collection']}&item={items['2018-01']['id']}"
f"&assets={asset_name_2}"
f"&color_formula=gamma+r+1.05&colormap_name={color_map}"
f"&rescale={rescale_values['min']},{rescale_values['max']}",
).json()
january_2018_tile_ngs
= requests.get(
january_2012_tile_ngs f"{RASTER_API_URL}/stac/tilejson.json?collection={items['2012-01']['collection']}&item={items['2012-01']['id']}"
f"&assets={asset_name_2}"
f"&color_formula=gamma+r+1.05&colormap_name={color_map}"
f"&rescale={rescale_values['min']},{rescale_values['max']}",
).json() january_2012_tile_ngs
# Total Methane Emissions
= "rainbow" # please select the color ramp from matplotlib library.
color_map = requests.get(
january_2018_tile_tm f"{RASTER_API_URL}/stac/tilejson.json?collection={items['2018-01']['collection']}&item={items['2018-01']['id']}"
f"&assets={asset_name_3}"
f"&color_formula=gamma+r+1.05&colormap_name={color_map}"
f"&rescale={rescale_values['min']},{rescale_values['max']}",
).json()
january_2018_tile_tm
= requests.get(
january_2012_tile_tm f"{RASTER_API_URL}/stac/tilejson.json?collection={items['2012-01']['collection']}&item={items['2012-01']['id']}"
f"&assets={asset_name_3}"
f"&color_formula=gamma+r+1.05&colormap_name={color_map}"
f"&rescale={rescale_values['min']},{rescale_values['max']}",
).json() january_2012_tile_tm
Visualizing Petroleum- Production (annual) CH₄ emissions
# Set initial zoom and center of map for CH₄ Layer
# Centre of map [latitude,longitude]
= folium.plugins.DualMap(location=(32, -102), zoom_start=6)
map_
# January 2018
= TileLayer(
map_layer_2018 =january_2018_tile_ps["tiles"][0],
tiles= 'Petroleum production for 2018',
name ="GHG",
attr=0.7,
opacity
)
map_layer_2018.add_to(map_.m1)
# January 2012
= TileLayer(
map_layer_2012 =january_2012_tile_ps["tiles"][0],
tiles= 'Petroleum production for 2012',
name ="GHG",
attr=0.7,
opacity
)
map_layer_2012.add_to(map_.m2)
# Load the GeoJSON file for the Permian Basin
= folium.GeoJson(permian_basin, name= 'Permian Shape')
map_layer_permian
map_layer_permian.add_to(map_)
# Adjust map elements
=True, position='bottomleft').add_to(map_)
folium.LayerControl(collapsed# visualising the map
map_
Visualizing Natural Gas - Production (annual) CH₄ emissions
= folium.plugins.DualMap(location=(32, -102), zoom_start=6)
map_
# January 2018
= TileLayer(
map_layer_2018 =january_2018_tile_ngs["tiles"][0],
tiles= 'Natural Gas production for 2018',
name ="GHG",
attr=0.7,
opacity
)
map_layer_2018.add_to(map_.m1)
# January 2012
= TileLayer(
map_layer_2012 =january_2012_tile_ngs["tiles"][0],
tiles= 'Natural Gas Production for 2012',
name ="GHG",
attr=0.7,
opacity
)
map_layer_2012.add_to(map_.m2)
= folium.GeoJson(permian_basin, name= 'Permian Shape')
map_layer_permian
map_layer_permian.add_to(map_)=True, position='bottomleft').add_to(map_)
folium.LayerControl(collapsed
# visualising the map
map_
Visualizing Total CH₄ emissions
# We will import folium to map and folium.plugins to allow side-by-side mapping
import folium
import folium.plugins
# Set initial zoom and center of map for CH₄ Layer
# Centre of map [latitude,longitude]
= folium.plugins.DualMap(location=(32, -102), zoom_start=6)
map_
# January 2018
= TileLayer(
map_layer_2018 =january_2018_tile_tm["tiles"][0],
tiles="GHG",
attr= 'Total methane emissions for 2018',
name =0.7,
opacity
)
map_layer_2018.add_to(map_.m1)
# January 2012
= TileLayer(
map_layer_2012 =january_2012_tile_tm["tiles"][0],
tiles= 'Total methane emissions for 2012',
name="GHG",
attr=0.7,
opacity
)
map_layer_2012.add_to(map_.m2)
= folium.GeoJson(permian_basin, name= 'Permian Shape')
map_layer_permian
map_layer_permian.add_to(map_)=True, position='bottomleft').add_to(map_)
folium.LayerControl(collapsed# visualising the map
map_
Calculating Zonal Statistics
# Check total number of items available
= requests.get(
items f"{STAC_API_URL}/collections/{collection_name}/items?limit=300"
"features"]
).json()[print(f"Found {len(items)} items")
# Explore the first item
0] items[
# The bounding box should be passed to the geojson param as a geojson Feature or FeatureCollection
# The following function generates statistics for the assigned parameter (total methane emissions)
def generate_stats(item, geojson):
= requests.post(
result f"{RASTER_API_URL}/cog/statistics",
={"url": item["assets"][asset_name_3]["href"]},
params=geojson,
json
).json()return {
**result["features"][0]["properties"],
"datetime": item["properties"]["datetime"],
}
With the function above we can generate the statistics for the AOI.
# We create a area of interest (polygon) on which will be used later
# https://geojson.io/#map=5.23/31.925/-104.738
= {
permian_basin "type": "FeatureCollection",
"features": [
{"type": "Feature",
"properties": {},
"geometry": {
"coordinates": [
[
[-104.6235111371881,
33.886802812506744
],
[-104.6235111371881,
30.500923965578963
],
[-101.35251243241865,
30.500923965578963
],
[-101.35251243241865,
33.886802812506744
],
[-104.6235111371881,
33.886802812506744
]
]
],"type": "Polygon"
}
}
] }
%%time
# Now we apply the above function to the area of interest
= [generate_stats(item, permian_basin) for item in items] stats
0] stats[
# Manipulating and cleaning the stats output from the previous cell
def clean_stats(stats_json) -> pd.DataFrame:
= pd.json_normalize(stats_json)
df = [col.replace("statistics.b1.", "") for col in df.columns]
df.columns "date"] = pd.to_datetime(df["datetime"])
df[return df
= clean_stats(stats)
df 5) df.head(
Visualizing the Data as a Time Series
We can now explore the total gridded methane emission time series (January 2000 -December 2021) available for the Permian Basin area of the U.S. We can plot the data set using the code below:
= plt.figure(figsize=(20, 10))
fig
# Set the plot elements
plt.plot("date"],
df["max"],
df[="red",
color="-",
linestyle=0.5,
linewidth="Max monthly CO₂ emissions",
label
)
# Set the labels for the X and Y axis and assign a title for the plot
plt.legend()"Years")
plt.xlabel("CH4 emissions Molecules CH₄/cm²/s")
plt.ylabel("Total CH4 gridded methane emission for Permian Basin (2012-2018)") plt.title(