cesl module¶

Utilities for working with the Coastal Ecosystem Spectral Library (CESL).

`cesl_to_gdf(sample_ids=None, include_units=False, max_workers=8, timeout=30, skip_missing_coordinates=True, skip_errors=False, **search_kwargs)` ¶

Convert CESL site metadata to a GeoPandas GeoDataFrame.

Source code in hypercoast/cesl.py

def cesl_to_gdf(
    sample_ids: Optional[Iterable[int]] = None,
    include_units: bool = False,
    max_workers: int = 8,
    timeout: int = 30,
    skip_missing_coordinates: bool = True,
    skip_errors: bool = False,
    **search_kwargs: Any,
) -> gpd.GeoDataFrame:
    """Convert CESL site metadata to a GeoPandas GeoDataFrame."""

    try:
        import geopandas as gpd
    except ImportError as exc:
        raise ImportError(
            "geopandas is required to convert CESL sites to a GeoDataFrame."
        ) from exc
    import pandas as pd

    records = get_cesl_sites(
        sample_ids=sample_ids,
        include_units=include_units,
        max_workers=max_workers,
        timeout=timeout,
        skip_missing_coordinates=skip_missing_coordinates,
        skip_errors=skip_errors,
        **search_kwargs,
    )

    if not records:
        return gpd.GeoDataFrame(geometry=[], crs="EPSG:4326")

    frame = pd.DataFrame(records)
    return gpd.GeoDataFrame(
        frame,
        geometry=gpd.points_from_xy(frame["longitude"], frame["latitude"]),
        crs="EPSG:4326",
    )

`cesl_to_geojson(output=None, sample_ids=None, include_units=False, max_workers=8, timeout=30, skip_missing_coordinates=True, skip_errors=False, **search_kwargs)` ¶

Create a GeoJSON feature collection for CESL sites.

Parameters:

Name	Type	Description	Default
`output`	`str`	Output GeoJSON path. If provided, the GeoJSON is written to disk.	`None`
`sample_ids`	`Iterable[int]`	CESL sample IDs to export.	`None`
`include_units`	`bool`	Whether to preserve units in properties. Defaults to False.	`False`
`max_workers`	`int`	Number of worker threads used to fetch metadata. Defaults to 8.	`8`
`timeout`	`int`	Request timeout in seconds. Defaults to 30.	`30`
`skip_missing_coordinates`	`bool`	Whether to skip samples without coordinates. Defaults to True.	`True`
`skip_errors`	`bool`	Whether to skip samples that fail for any reason (e.g. non-JSON API responses). Defaults to False.	`False`
`**search_kwargs`	`Any`	Additional arguments passed to :func:`search_cesl` when `sample_ids` is not provided.	`{}`

Returns:

Type	Description
`dict[str, Any]`	A GeoJSON FeatureCollection.

Source code in hypercoast/cesl.py

def cesl_to_geojson(
    output: Optional[str] = None,
    sample_ids: Optional[Iterable[int]] = None,
    include_units: bool = False,
    max_workers: int = 8,
    timeout: int = 30,
    skip_missing_coordinates: bool = True,
    skip_errors: bool = False,
    **search_kwargs: Any,
) -> Dict[str, Any]:
    """Create a GeoJSON feature collection for CESL sites.

    Args:
        output (str, optional): Output GeoJSON path. If provided, the GeoJSON is
            written to disk.
        sample_ids (Iterable[int], optional): CESL sample IDs to export.
        include_units (bool, optional): Whether to preserve units in properties.
            Defaults to False.
        max_workers (int, optional): Number of worker threads used to fetch
            metadata. Defaults to 8.
        timeout (int, optional): Request timeout in seconds. Defaults to 30.
        skip_missing_coordinates (bool, optional): Whether to skip samples
            without coordinates. Defaults to True.
        skip_errors (bool, optional): Whether to skip samples that fail for any
            reason (e.g. non-JSON API responses). Defaults to False.
        **search_kwargs: Additional arguments passed to :func:`search_cesl` when
            ``sample_ids`` is not provided.

    Returns:
        dict[str, Any]: A GeoJSON FeatureCollection.
    """

    records = get_cesl_sites(
        sample_ids=sample_ids,
        include_units=include_units,
        max_workers=max_workers,
        timeout=timeout,
        skip_missing_coordinates=skip_missing_coordinates,
        skip_errors=skip_errors,
        **search_kwargs,
    )
    feature_collection = {
        "type": "FeatureCollection",
        "features": [_build_feature(record) for record in records],
    }

    if output is not None:
        output = os.path.abspath(output)
        os.makedirs(os.path.dirname(output), exist_ok=True)
        with open(output, "w", encoding="utf-8") as file:
            json.dump(feature_collection, file, indent=2)

    return feature_collection

`get_cesl_metadata(sample_id, include_units=False, timeout=30, crosswalk='speclib')` ¶

Retrieve metadata for a CESL sample.

Parameters:

Name	Type	Description	Default
`sample_id`	`int`	CESL sample ID.	required
`include_units`	`bool`	Whether to preserve CESL units metadata. Defaults to False.	`False`
`timeout`	`int`	Request timeout in seconds. Defaults to 30.	`30`
`crosswalk`	`str`	Metadata crosswalk type. Defaults to `"speclib"`.	`'speclib'`

Returns:

Type	Description
`dict[str, Any]`	Normalized metadata for the sample.

Source code in hypercoast/cesl.py

def get_cesl_metadata(
    sample_id: int,
    include_units: bool = False,
    timeout: int = 30,
    crosswalk: str = "speclib",
) -> Dict[str, Any]:
    """Retrieve metadata for a CESL sample.

    Args:
        sample_id (int): CESL sample ID.
        include_units (bool, optional): Whether to preserve CESL units metadata.
            Defaults to False.
        timeout (int, optional): Request timeout in seconds. Defaults to 30.
        crosswalk (str, optional): Metadata crosswalk type. Defaults to
            ``"speclib"``.

    Returns:
        dict[str, Any]: Normalized metadata for the sample.
    """

    data = _request_cesl(
        f"sample/{sample_id}/metadata",
        params={"format": "json", "crosswalk": crosswalk},
        timeout=timeout,
    )
    payload = _get_payload(data, _METADATA_KEY_CANDIDATES)
    return _normalize_cesl_metadata(payload, include_units=include_units)

`get_cesl_sites(sample_ids=None, include_units=False, max_workers=8, timeout=30, skip_missing_coordinates=True, skip_errors=False, **search_kwargs)` ¶

Retrieve CESL site metadata for a set of sample IDs.

Parameters:

Name	Type	Description	Default
`sample_ids`	`Iterable[int]`	CESL sample IDs to retrieve. Defaults to the full CESL catalog or a filtered catalog search.	`None`
`include_units`	`bool`	Whether to preserve units in metadata. Defaults to False.	`False`
`max_workers`	`int`	Number of worker threads used to fetch metadata. Defaults to 8.	`8`
`timeout`	`int`	Request timeout in seconds. Defaults to 30.	`30`
`skip_missing_coordinates`	`bool`	Whether to skip samples without coordinates. Defaults to True.	`True`
`skip_errors`	`bool`	Whether to skip samples that fail for any reason (e.g. non-JSON API responses). A warning is emitted for each skipped sample. Defaults to False.	`False`
`**search_kwargs`	`Any`	Additional arguments passed to :func:`search_cesl` when `sample_ids` is not provided.	`{}`

Returns:

Type	Description
`list[dict[str, Any]]`	Normalized site records including coordinates.

Source code in hypercoast/cesl.py

def get_cesl_sites(
    sample_ids: Optional[Iterable[int]] = None,
    include_units: bool = False,
    max_workers: int = 8,
    timeout: int = 30,
    skip_missing_coordinates: bool = True,
    skip_errors: bool = False,
    **search_kwargs: Any,
) -> List[Dict[str, Any]]:
    """Retrieve CESL site metadata for a set of sample IDs.

    Args:
        sample_ids (Iterable[int], optional): CESL sample IDs to retrieve.
            Defaults to the full CESL catalog or a filtered catalog search.
        include_units (bool, optional): Whether to preserve units in metadata.
            Defaults to False.
        max_workers (int, optional): Number of worker threads used to fetch
            metadata. Defaults to 8.
        timeout (int, optional): Request timeout in seconds. Defaults to 30.
        skip_missing_coordinates (bool, optional): Whether to skip samples
            without coordinates. Defaults to True.
        skip_errors (bool, optional): Whether to skip samples that fail for any
            reason (e.g. non-JSON API responses). A warning is emitted for each
            skipped sample. Defaults to False.
        **search_kwargs: Additional arguments passed to :func:`search_cesl` when
            ``sample_ids`` is not provided.

    Returns:
        list[dict[str, Any]]: Normalized site records including coordinates.
    """

    if sample_ids is None:
        sample_ids = search_cesl(timeout=timeout, **search_kwargs)

    sample_ids = list(sample_ids)

    def fetch_site(sample_id: int) -> Dict[str, Any]:
        metadata = get_cesl_metadata(
            sample_id=sample_id, include_units=include_units, timeout=timeout
        )
        latitude = _extract_coordinate(metadata, _LATITUDE_KEYS, "latitude")
        longitude = _extract_coordinate(metadata, _LONGITUDE_KEYS, "longitude")

        return {
            **metadata,
            "sample_id": sample_id,
            "latitude": latitude,
            "longitude": longitude,
        }

    _BATCH_SIZE = 50
    records: List[Dict[str, Any]] = []
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        for batch_start in range(0, len(sample_ids), _BATCH_SIZE):
            batch = sample_ids[batch_start : batch_start + _BATCH_SIZE]
            futures = {
                executor.submit(fetch_site, sample_id): sample_id for sample_id in batch
            }
            for future in as_completed(futures):
                sample_id = futures[future]
                try:
                    records.append(future.result())
                except _MissingCoordinateError:
                    if not skip_missing_coordinates:
                        raise
                except Exception as exc:
                    if skip_errors:
                        warnings.warn(
                            f"Skipping sample {sample_id}: {exc}",
                            RuntimeWarning,
                            stacklevel=2,
                        )
                    else:
                        raise RuntimeError(
                            f"Failed to retrieve CESL metadata for sample {sample_id}."
                        ) from exc

    records.sort(key=lambda record: record["sample_id"])
    return records

`get_cesl_spectrum(sample_id, spectrum_key=None, timeout=30)` ¶

Retrieve the wavelength and spectrum values for a CESL sample.

Parameters:

Name	Type	Description	Default
`sample_id`	`int`	CESL sample ID.	required
`spectrum_key`	`str`	Name of the spectrum field to extract. Defaults to the first non-`wavelength` field returned by the API.	`None`
`timeout`	`int`	Request timeout in seconds. Defaults to 30.	`30`

Returns:

Type	Description
`pandas.DataFrame`	A DataFrame with `wavelength` and spectrum columns.

Source code in hypercoast/cesl.py

def get_cesl_spectrum(
    sample_id: int, spectrum_key: Optional[str] = None, timeout: int = 30
) -> pd.DataFrame:
    """Retrieve the wavelength and spectrum values for a CESL sample.

    Args:
        sample_id (int): CESL sample ID.
        spectrum_key (str, optional): Name of the spectrum field to extract.
            Defaults to the first non-``wavelength`` field returned by the API.
        timeout (int, optional): Request timeout in seconds. Defaults to 30.

    Returns:
        pandas.DataFrame: A DataFrame with ``wavelength`` and spectrum columns.
    """

    import pandas as pd

    data = _request_cesl(
        f"sample/{sample_id}/data", params={"format": "json"}, timeout=timeout
    )
    payload = _get_payload(data, _DATA_KEY_CANDIDATES)

    if "wavelength" not in payload:
        raise KeyError("CESL spectrum response does not contain 'wavelength'.")

    if spectrum_key is None:
        spectrum_keys = [key for key in payload if key.lower() != "wavelength"]
        if not spectrum_keys:
            raise KeyError("CESL spectrum response does not contain any spectral data.")
        spectrum_key = spectrum_keys[0]
    else:
        matching_keys = {
            key.lower(): key for key in payload if key.lower() != "wavelength"
        }
        resolved_key = matching_keys.get(spectrum_key.lower())
        if resolved_key is None:
            raise KeyError(f"Could not find spectrum key '{spectrum_key}'.")
        spectrum_key = resolved_key

    spectrum = pd.DataFrame(
        {"wavelength": payload["wavelength"], spectrum_key: payload[spectrum_key]}
    )
    spectrum.attrs["sample_id"] = sample_id
    spectrum.attrs["spectrum_key"] = spectrum_key
    return spectrum

`plot_cesl_spectrum(sample_id, spectrum_key=None, ax=None, title=None, xlabel='Wavelength (nm)', ylabel=None, figsize=None, x_range=None, y_range=None, timeout=30, **kwargs)` ¶

Plot a CESL spectrum for a selected sample.

Parameters:

Name	Type	Description	Default
`sample_id`	`int`	CESL sample ID.	required
`spectrum_key`	`str`	Name of the spectrum field to plot.	`None`
`ax`	`matplotlib.axes.Axes`	Existing axes to plot on.	`None`
`title`	`str`	Plot title. Defaults to `CESL Sample <id>`.	`None`
`xlabel`	`str`	X-axis label. Defaults to `Wavelength (nm)`.	`'Wavelength (nm)'`
`ylabel`	`str`	Y-axis label. Defaults to the selected spectrum key.	`None`
`figsize`	`Sequence[float]`	Figure size passed to `matplotlib.pyplot.subplots` when `ax` is not provided.	`None`
`x_range`	`Sequence[float]`	Two-element x-axis range used to exclude wavelength outliers from the visible plot extent.	`None`
`y_range`	`Sequence[float]`	Two-element y-axis range used to exclude reflectance outliers from the visible plot extent.	`None`
`timeout`	`int`	Request timeout in seconds. Defaults to 30.	`30`
`**kwargs`	`Any`	Additional keyword arguments passed to `Axes.plot`.	`{}`

Returns:

Type	Description
`matplotlib.axes.Axes`	The axes containing the plot.

Source code in hypercoast/cesl.py

def plot_cesl_spectrum(
    sample_id: int,
    spectrum_key: Optional[str] = None,
    ax: Optional[plt.Axes] = None,
    title: Optional[str] = None,
    xlabel: str = "Wavelength (nm)",
    ylabel: Optional[str] = None,
    figsize: Optional[Sequence[float]] = None,
    x_range: Optional[Sequence[float]] = None,
    y_range: Optional[Sequence[float]] = None,
    timeout: int = 30,
    **kwargs: Any,
) -> plt.Axes:
    """Plot a CESL spectrum for a selected sample.

    Args:
        sample_id (int): CESL sample ID.
        spectrum_key (str, optional): Name of the spectrum field to plot.
        ax (matplotlib.axes.Axes, optional): Existing axes to plot on.
        title (str, optional): Plot title. Defaults to ``CESL Sample <id>``.
        xlabel (str, optional): X-axis label. Defaults to ``Wavelength (nm)``.
        ylabel (str, optional): Y-axis label. Defaults to the selected spectrum
            key.
        figsize (Sequence[float], optional): Figure size passed to
            ``matplotlib.pyplot.subplots`` when ``ax`` is not provided.
        x_range (Sequence[float], optional): Two-element x-axis range used to
            exclude wavelength outliers from the visible plot extent.
        y_range (Sequence[float], optional): Two-element y-axis range used to
            exclude reflectance outliers from the visible plot extent.
        timeout (int, optional): Request timeout in seconds. Defaults to 30.
        **kwargs: Additional keyword arguments passed to ``Axes.plot``.

    Returns:
        matplotlib.axes.Axes: The axes containing the plot.
    """

    import matplotlib.pyplot as plt

    spectrum = get_cesl_spectrum(
        sample_id=sample_id, spectrum_key=spectrum_key, timeout=timeout
    )
    spectrum_key = spectrum.attrs["spectrum_key"]

    if ax is None:
        _, ax = plt.subplots(figsize=figsize)

    label = kwargs.pop("label", f"Sample {sample_id}")
    ax.plot(spectrum["wavelength"], spectrum[spectrum_key], label=label, **kwargs)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel or spectrum_key)
    ax.set_title(title or f"CESL Sample {sample_id}")

    if x_range is not None:
        ax.set_xlim(x_range)
    if y_range is not None:
        ax.set_ylim(y_range)

    if label is not None:
        ax.legend()

    return ax

`search_cesl(bbox=None, circle=None, publish_date_start=None, publish_date_end=None, taxonomy=None, biomass=None, coverage=None, timeout=30)` ¶

Search the CESL catalog and return matching sample IDs.

Parameters:

Name	Type	Description	Default
`bbox`	`Sequence[float]`	Bounding box formatted as `(north, south, east, west)`.	`None`
`circle`	`Sequence[float]`	Circular search formatted as `(latitude, longitude, radius_km)`.	`None`
`publish_date_start`	`str`	ISO8601 start date for published samples.	`None`
`publish_date_end`	`str`	ISO8601 end date for published samples.	`None`
`taxonomy`	`str`	Binomial or taxonomy path filter.	`None`
`biomass`	`bool`	Whether biomass measurements are required.	`None`
`coverage`	`float`	Minimum percentage coverage threshold.	`None`
`timeout`	`int`	Request timeout in seconds. Defaults to 30.	`30`

Returns:

Type	Description
`list[int]`	Matching CESL sample IDs.

Source code in hypercoast/cesl.py

def search_cesl(
    bbox: Optional[Sequence[float]] = None,
    circle: Optional[Sequence[float]] = None,
    publish_date_start: Optional[str] = None,
    publish_date_end: Optional[str] = None,
    taxonomy: Optional[str] = None,
    biomass: Optional[bool] = None,
    coverage: Optional[float] = None,
    timeout: int = 30,
) -> List[int]:
    """Search the CESL catalog and return matching sample IDs.

    Args:
        bbox (Sequence[float], optional): Bounding box formatted as
            ``(north, south, east, west)``.
        circle (Sequence[float], optional): Circular search formatted as
            ``(latitude, longitude, radius_km)``.
        publish_date_start (str, optional): ISO8601 start date for published
            samples.
        publish_date_end (str, optional): ISO8601 end date for published
            samples.
        taxonomy (str, optional): Binomial or taxonomy path filter.
        biomass (bool, optional): Whether biomass measurements are required.
        coverage (float, optional): Minimum percentage coverage threshold.
        timeout (int, optional): Request timeout in seconds. Defaults to 30.

    Returns:
        list[int]: Matching CESL sample IDs.
    """

    params: Dict[str, Any] = {"format": "json"}
    formatted_bbox = _format_catalog_param(bbox, 4, "bbox")
    formatted_circle = _format_catalog_param(circle, 3, "circle")

    if formatted_bbox is not None:
        params["bbox"] = formatted_bbox
    if formatted_circle is not None:
        params["circle"] = formatted_circle
    if publish_date_start is not None:
        params["publish_date_start"] = publish_date_start
    if publish_date_end is not None:
        params["publish_date_end"] = publish_date_end
    if taxonomy is not None:
        params["taxonomy"] = taxonomy
    if biomass is not None:
        params["biomass"] = str(biomass).lower()
    if coverage is not None:
        params["coverage"] = coverage

    data = _request_cesl("catalog", params=params, timeout=timeout)
    payload = _get_payload(data, _CATALOG_KEY_CANDIDATES)
    return payload.get("ids", [])

cesl module¶

cesl_to_gdf(sample_ids=None, include_units=False, max_workers=8, timeout=30, skip_missing_coordinates=True, skip_errors=False, **search_kwargs) ¶

cesl_to_geojson(output=None, sample_ids=None, include_units=False, max_workers=8, timeout=30, skip_missing_coordinates=True, skip_errors=False, **search_kwargs) ¶

get_cesl_metadata(sample_id, include_units=False, timeout=30, crosswalk='speclib') ¶

get_cesl_sites(sample_ids=None, include_units=False, max_workers=8, timeout=30, skip_missing_coordinates=True, skip_errors=False, **search_kwargs) ¶

get_cesl_spectrum(sample_id, spectrum_key=None, timeout=30) ¶

plot_cesl_spectrum(sample_id, spectrum_key=None, ax=None, title=None, xlabel='Wavelength (nm)', ylabel=None, figsize=None, x_range=None, y_range=None, timeout=30, **kwargs) ¶

search_cesl(bbox=None, circle=None, publish_date_start=None, publish_date_end=None, taxonomy=None, biomass=None, coverage=None, timeout=30) ¶

`cesl_to_gdf(sample_ids=None, include_units=False, max_workers=8, timeout=30, skip_missing_coordinates=True, skip_errors=False, **search_kwargs)` ¶

`cesl_to_geojson(output=None, sample_ids=None, include_units=False, max_workers=8, timeout=30, skip_missing_coordinates=True, skip_errors=False, **search_kwargs)` ¶

`get_cesl_metadata(sample_id, include_units=False, timeout=30, crosswalk='speclib')` ¶

`get_cesl_sites(sample_ids=None, include_units=False, max_workers=8, timeout=30, skip_missing_coordinates=True, skip_errors=False, **search_kwargs)` ¶

`get_cesl_spectrum(sample_id, spectrum_key=None, timeout=30)` ¶

`plot_cesl_spectrum(sample_id, spectrum_key=None, ax=None, title=None, xlabel='Wavelength (nm)', ylabel=None, figsize=None, x_range=None, y_range=None, timeout=30, **kwargs)` ¶

`search_cesl(bbox=None, circle=None, publish_date_start=None, publish_date_end=None, taxonomy=None, biomass=None, coverage=None, timeout=30)` ¶