Skip to content

cesl module

Utilities for working with the Coastal Ecosystem Spectral Library (CESL).

cesl_to_gdf(sample_ids=None, include_units=False, max_workers=8, timeout=30, skip_missing_coordinates=True, skip_errors=False, **search_kwargs)

Convert CESL site metadata to a GeoPandas GeoDataFrame.

Source code in hypercoast/cesl.py
def cesl_to_gdf(
    sample_ids: Optional[Iterable[int]] = None,
    include_units: bool = False,
    max_workers: int = 8,
    timeout: int = 30,
    skip_missing_coordinates: bool = True,
    skip_errors: bool = False,
    **search_kwargs: Any,
) -> gpd.GeoDataFrame:
    """Convert CESL site metadata to a GeoPandas GeoDataFrame."""

    try:
        import geopandas as gpd
    except ImportError as exc:
        raise ImportError(
            "geopandas is required to convert CESL sites to a GeoDataFrame."
        ) from exc
    import pandas as pd

    records = get_cesl_sites(
        sample_ids=sample_ids,
        include_units=include_units,
        max_workers=max_workers,
        timeout=timeout,
        skip_missing_coordinates=skip_missing_coordinates,
        skip_errors=skip_errors,
        **search_kwargs,
    )

    if not records:
        return gpd.GeoDataFrame(geometry=[], crs="EPSG:4326")

    frame = pd.DataFrame(records)
    return gpd.GeoDataFrame(
        frame,
        geometry=gpd.points_from_xy(frame["longitude"], frame["latitude"]),
        crs="EPSG:4326",
    )

cesl_to_geojson(output=None, sample_ids=None, include_units=False, max_workers=8, timeout=30, skip_missing_coordinates=True, skip_errors=False, **search_kwargs)

Create a GeoJSON feature collection for CESL sites.

Parameters:

Name Type Description Default
output str

Output GeoJSON path. If provided, the GeoJSON is written to disk.

None
sample_ids Iterable[int]

CESL sample IDs to export.

None
include_units bool

Whether to preserve units in properties. Defaults to False.

False
max_workers int

Number of worker threads used to fetch metadata. Defaults to 8.

8
timeout int

Request timeout in seconds. Defaults to 30.

30
skip_missing_coordinates bool

Whether to skip samples without coordinates. Defaults to True.

True
skip_errors bool

Whether to skip samples that fail for any reason (e.g. non-JSON API responses). Defaults to False.

False
**search_kwargs Any

Additional arguments passed to :func:search_cesl when sample_ids is not provided.

{}

Returns:

Type Description
dict[str, Any]

A GeoJSON FeatureCollection.

Source code in hypercoast/cesl.py
def cesl_to_geojson(
    output: Optional[str] = None,
    sample_ids: Optional[Iterable[int]] = None,
    include_units: bool = False,
    max_workers: int = 8,
    timeout: int = 30,
    skip_missing_coordinates: bool = True,
    skip_errors: bool = False,
    **search_kwargs: Any,
) -> Dict[str, Any]:
    """Create a GeoJSON feature collection for CESL sites.

    Args:
        output (str, optional): Output GeoJSON path. If provided, the GeoJSON is
            written to disk.
        sample_ids (Iterable[int], optional): CESL sample IDs to export.
        include_units (bool, optional): Whether to preserve units in properties.
            Defaults to False.
        max_workers (int, optional): Number of worker threads used to fetch
            metadata. Defaults to 8.
        timeout (int, optional): Request timeout in seconds. Defaults to 30.
        skip_missing_coordinates (bool, optional): Whether to skip samples
            without coordinates. Defaults to True.
        skip_errors (bool, optional): Whether to skip samples that fail for any
            reason (e.g. non-JSON API responses). Defaults to False.
        **search_kwargs: Additional arguments passed to :func:`search_cesl` when
            ``sample_ids`` is not provided.

    Returns:
        dict[str, Any]: A GeoJSON FeatureCollection.
    """

    records = get_cesl_sites(
        sample_ids=sample_ids,
        include_units=include_units,
        max_workers=max_workers,
        timeout=timeout,
        skip_missing_coordinates=skip_missing_coordinates,
        skip_errors=skip_errors,
        **search_kwargs,
    )
    feature_collection = {
        "type": "FeatureCollection",
        "features": [_build_feature(record) for record in records],
    }

    if output is not None:
        output = os.path.abspath(output)
        os.makedirs(os.path.dirname(output), exist_ok=True)
        with open(output, "w", encoding="utf-8") as file:
            json.dump(feature_collection, file, indent=2)

    return feature_collection

get_cesl_metadata(sample_id, include_units=False, timeout=30, crosswalk='speclib')

Retrieve metadata for a CESL sample.

Parameters:

Name Type Description Default
sample_id int

CESL sample ID.

required
include_units bool

Whether to preserve CESL units metadata. Defaults to False.

False
timeout int

Request timeout in seconds. Defaults to 30.

30
crosswalk str

Metadata crosswalk type. Defaults to "speclib".

'speclib'

Returns:

Type Description
dict[str, Any]

Normalized metadata for the sample.

Source code in hypercoast/cesl.py
def get_cesl_metadata(
    sample_id: int,
    include_units: bool = False,
    timeout: int = 30,
    crosswalk: str = "speclib",
) -> Dict[str, Any]:
    """Retrieve metadata for a CESL sample.

    Args:
        sample_id (int): CESL sample ID.
        include_units (bool, optional): Whether to preserve CESL units metadata.
            Defaults to False.
        timeout (int, optional): Request timeout in seconds. Defaults to 30.
        crosswalk (str, optional): Metadata crosswalk type. Defaults to
            ``"speclib"``.

    Returns:
        dict[str, Any]: Normalized metadata for the sample.
    """

    data = _request_cesl(
        f"sample/{sample_id}/metadata",
        params={"format": "json", "crosswalk": crosswalk},
        timeout=timeout,
    )
    payload = _get_payload(data, _METADATA_KEY_CANDIDATES)
    return _normalize_cesl_metadata(payload, include_units=include_units)

get_cesl_sites(sample_ids=None, include_units=False, max_workers=8, timeout=30, skip_missing_coordinates=True, skip_errors=False, **search_kwargs)

Retrieve CESL site metadata for a set of sample IDs.

Parameters:

Name Type Description Default
sample_ids Iterable[int]

CESL sample IDs to retrieve. Defaults to the full CESL catalog or a filtered catalog search.

None
include_units bool

Whether to preserve units in metadata. Defaults to False.

False
max_workers int

Number of worker threads used to fetch metadata. Defaults to 8.

8
timeout int

Request timeout in seconds. Defaults to 30.

30
skip_missing_coordinates bool

Whether to skip samples without coordinates. Defaults to True.

True
skip_errors bool

Whether to skip samples that fail for any reason (e.g. non-JSON API responses). A warning is emitted for each skipped sample. Defaults to False.

False
**search_kwargs Any

Additional arguments passed to :func:search_cesl when sample_ids is not provided.

{}

Returns:

Type Description
list[dict[str, Any]]

Normalized site records including coordinates.

Source code in hypercoast/cesl.py
def get_cesl_sites(
    sample_ids: Optional[Iterable[int]] = None,
    include_units: bool = False,
    max_workers: int = 8,
    timeout: int = 30,
    skip_missing_coordinates: bool = True,
    skip_errors: bool = False,
    **search_kwargs: Any,
) -> List[Dict[str, Any]]:
    """Retrieve CESL site metadata for a set of sample IDs.

    Args:
        sample_ids (Iterable[int], optional): CESL sample IDs to retrieve.
            Defaults to the full CESL catalog or a filtered catalog search.
        include_units (bool, optional): Whether to preserve units in metadata.
            Defaults to False.
        max_workers (int, optional): Number of worker threads used to fetch
            metadata. Defaults to 8.
        timeout (int, optional): Request timeout in seconds. Defaults to 30.
        skip_missing_coordinates (bool, optional): Whether to skip samples
            without coordinates. Defaults to True.
        skip_errors (bool, optional): Whether to skip samples that fail for any
            reason (e.g. non-JSON API responses). A warning is emitted for each
            skipped sample. Defaults to False.
        **search_kwargs: Additional arguments passed to :func:`search_cesl` when
            ``sample_ids`` is not provided.

    Returns:
        list[dict[str, Any]]: Normalized site records including coordinates.
    """

    if sample_ids is None:
        sample_ids = search_cesl(timeout=timeout, **search_kwargs)

    sample_ids = list(sample_ids)

    def fetch_site(sample_id: int) -> Dict[str, Any]:
        metadata = get_cesl_metadata(
            sample_id=sample_id, include_units=include_units, timeout=timeout
        )
        latitude = _extract_coordinate(metadata, _LATITUDE_KEYS, "latitude")
        longitude = _extract_coordinate(metadata, _LONGITUDE_KEYS, "longitude")

        return {
            **metadata,
            "sample_id": sample_id,
            "latitude": latitude,
            "longitude": longitude,
        }

    _BATCH_SIZE = 50
    records: List[Dict[str, Any]] = []
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        for batch_start in range(0, len(sample_ids), _BATCH_SIZE):
            batch = sample_ids[batch_start : batch_start + _BATCH_SIZE]
            futures = {
                executor.submit(fetch_site, sample_id): sample_id for sample_id in batch
            }
            for future in as_completed(futures):
                sample_id = futures[future]
                try:
                    records.append(future.result())
                except _MissingCoordinateError:
                    if not skip_missing_coordinates:
                        raise
                except Exception as exc:
                    if skip_errors:
                        warnings.warn(
                            f"Skipping sample {sample_id}: {exc}",
                            RuntimeWarning,
                            stacklevel=2,
                        )
                    else:
                        raise RuntimeError(
                            f"Failed to retrieve CESL metadata for sample {sample_id}."
                        ) from exc

    records.sort(key=lambda record: record["sample_id"])
    return records

get_cesl_spectrum(sample_id, spectrum_key=None, timeout=30)

Retrieve the wavelength and spectrum values for a CESL sample.

Parameters:

Name Type Description Default
sample_id int

CESL sample ID.

required
spectrum_key str

Name of the spectrum field to extract. Defaults to the first non-wavelength field returned by the API.

None
timeout int

Request timeout in seconds. Defaults to 30.

30

Returns:

Type Description
pandas.DataFrame

A DataFrame with wavelength and spectrum columns.

Source code in hypercoast/cesl.py
def get_cesl_spectrum(
    sample_id: int, spectrum_key: Optional[str] = None, timeout: int = 30
) -> pd.DataFrame:
    """Retrieve the wavelength and spectrum values for a CESL sample.

    Args:
        sample_id (int): CESL sample ID.
        spectrum_key (str, optional): Name of the spectrum field to extract.
            Defaults to the first non-``wavelength`` field returned by the API.
        timeout (int, optional): Request timeout in seconds. Defaults to 30.

    Returns:
        pandas.DataFrame: A DataFrame with ``wavelength`` and spectrum columns.
    """

    import pandas as pd

    data = _request_cesl(
        f"sample/{sample_id}/data", params={"format": "json"}, timeout=timeout
    )
    payload = _get_payload(data, _DATA_KEY_CANDIDATES)

    if "wavelength" not in payload:
        raise KeyError("CESL spectrum response does not contain 'wavelength'.")

    if spectrum_key is None:
        spectrum_keys = [key for key in payload if key.lower() != "wavelength"]
        if not spectrum_keys:
            raise KeyError("CESL spectrum response does not contain any spectral data.")
        spectrum_key = spectrum_keys[0]
    else:
        matching_keys = {
            key.lower(): key for key in payload if key.lower() != "wavelength"
        }
        resolved_key = matching_keys.get(spectrum_key.lower())
        if resolved_key is None:
            raise KeyError(f"Could not find spectrum key '{spectrum_key}'.")
        spectrum_key = resolved_key

    spectrum = pd.DataFrame(
        {"wavelength": payload["wavelength"], spectrum_key: payload[spectrum_key]}
    )
    spectrum.attrs["sample_id"] = sample_id
    spectrum.attrs["spectrum_key"] = spectrum_key
    return spectrum

plot_cesl_spectrum(sample_id, spectrum_key=None, ax=None, title=None, xlabel='Wavelength (nm)', ylabel=None, figsize=None, x_range=None, y_range=None, timeout=30, **kwargs)

Plot a CESL spectrum for a selected sample.

Parameters:

Name Type Description Default
sample_id int

CESL sample ID.

required
spectrum_key str

Name of the spectrum field to plot.

None
ax matplotlib.axes.Axes

Existing axes to plot on.

None
title str

Plot title. Defaults to CESL Sample <id>.

None
xlabel str

X-axis label. Defaults to Wavelength (nm).

'Wavelength (nm)'
ylabel str

Y-axis label. Defaults to the selected spectrum key.

None
figsize Sequence[float]

Figure size passed to matplotlib.pyplot.subplots when ax is not provided.

None
x_range Sequence[float]

Two-element x-axis range used to exclude wavelength outliers from the visible plot extent.

None
y_range Sequence[float]

Two-element y-axis range used to exclude reflectance outliers from the visible plot extent.

None
timeout int

Request timeout in seconds. Defaults to 30.

30
**kwargs Any

Additional keyword arguments passed to Axes.plot.

{}

Returns:

Type Description
matplotlib.axes.Axes

The axes containing the plot.

Source code in hypercoast/cesl.py
def plot_cesl_spectrum(
    sample_id: int,
    spectrum_key: Optional[str] = None,
    ax: Optional[plt.Axes] = None,
    title: Optional[str] = None,
    xlabel: str = "Wavelength (nm)",
    ylabel: Optional[str] = None,
    figsize: Optional[Sequence[float]] = None,
    x_range: Optional[Sequence[float]] = None,
    y_range: Optional[Sequence[float]] = None,
    timeout: int = 30,
    **kwargs: Any,
) -> plt.Axes:
    """Plot a CESL spectrum for a selected sample.

    Args:
        sample_id (int): CESL sample ID.
        spectrum_key (str, optional): Name of the spectrum field to plot.
        ax (matplotlib.axes.Axes, optional): Existing axes to plot on.
        title (str, optional): Plot title. Defaults to ``CESL Sample <id>``.
        xlabel (str, optional): X-axis label. Defaults to ``Wavelength (nm)``.
        ylabel (str, optional): Y-axis label. Defaults to the selected spectrum
            key.
        figsize (Sequence[float], optional): Figure size passed to
            ``matplotlib.pyplot.subplots`` when ``ax`` is not provided.
        x_range (Sequence[float], optional): Two-element x-axis range used to
            exclude wavelength outliers from the visible plot extent.
        y_range (Sequence[float], optional): Two-element y-axis range used to
            exclude reflectance outliers from the visible plot extent.
        timeout (int, optional): Request timeout in seconds. Defaults to 30.
        **kwargs: Additional keyword arguments passed to ``Axes.plot``.

    Returns:
        matplotlib.axes.Axes: The axes containing the plot.
    """

    import matplotlib.pyplot as plt

    spectrum = get_cesl_spectrum(
        sample_id=sample_id, spectrum_key=spectrum_key, timeout=timeout
    )
    spectrum_key = spectrum.attrs["spectrum_key"]

    if ax is None:
        _, ax = plt.subplots(figsize=figsize)

    label = kwargs.pop("label", f"Sample {sample_id}")
    ax.plot(spectrum["wavelength"], spectrum[spectrum_key], label=label, **kwargs)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel or spectrum_key)
    ax.set_title(title or f"CESL Sample {sample_id}")

    if x_range is not None:
        ax.set_xlim(x_range)
    if y_range is not None:
        ax.set_ylim(y_range)

    if label is not None:
        ax.legend()

    return ax

search_cesl(bbox=None, circle=None, publish_date_start=None, publish_date_end=None, taxonomy=None, biomass=None, coverage=None, timeout=30)

Search the CESL catalog and return matching sample IDs.

Parameters:

Name Type Description Default
bbox Sequence[float]

Bounding box formatted as (north, south, east, west).

None
circle Sequence[float]

Circular search formatted as (latitude, longitude, radius_km).

None
publish_date_start str

ISO8601 start date for published samples.

None
publish_date_end str

ISO8601 end date for published samples.

None
taxonomy str

Binomial or taxonomy path filter.

None
biomass bool

Whether biomass measurements are required.

None
coverage float

Minimum percentage coverage threshold.

None
timeout int

Request timeout in seconds. Defaults to 30.

30

Returns:

Type Description
list[int]

Matching CESL sample IDs.

Source code in hypercoast/cesl.py
def search_cesl(
    bbox: Optional[Sequence[float]] = None,
    circle: Optional[Sequence[float]] = None,
    publish_date_start: Optional[str] = None,
    publish_date_end: Optional[str] = None,
    taxonomy: Optional[str] = None,
    biomass: Optional[bool] = None,
    coverage: Optional[float] = None,
    timeout: int = 30,
) -> List[int]:
    """Search the CESL catalog and return matching sample IDs.

    Args:
        bbox (Sequence[float], optional): Bounding box formatted as
            ``(north, south, east, west)``.
        circle (Sequence[float], optional): Circular search formatted as
            ``(latitude, longitude, radius_km)``.
        publish_date_start (str, optional): ISO8601 start date for published
            samples.
        publish_date_end (str, optional): ISO8601 end date for published
            samples.
        taxonomy (str, optional): Binomial or taxonomy path filter.
        biomass (bool, optional): Whether biomass measurements are required.
        coverage (float, optional): Minimum percentage coverage threshold.
        timeout (int, optional): Request timeout in seconds. Defaults to 30.

    Returns:
        list[int]: Matching CESL sample IDs.
    """

    params: Dict[str, Any] = {"format": "json"}
    formatted_bbox = _format_catalog_param(bbox, 4, "bbox")
    formatted_circle = _format_catalog_param(circle, 3, "circle")

    if formatted_bbox is not None:
        params["bbox"] = formatted_bbox
    if formatted_circle is not None:
        params["circle"] = formatted_circle
    if publish_date_start is not None:
        params["publish_date_start"] = publish_date_start
    if publish_date_end is not None:
        params["publish_date_end"] = publish_date_end
    if taxonomy is not None:
        params["taxonomy"] = taxonomy
    if biomass is not None:
        params["biomass"] = str(biomass).lower()
    if coverage is not None:
        params["coverage"] = coverage

    data = _request_cesl("catalog", params=params, timeout=timeout)
    payload = _get_payload(data, _CATALOG_KEY_CANDIDATES)
    return payload.get("ids", [])