pace module¶

This module contains functions to read and process PACE data.

`apply_kmeans(dataset, n_clusters=6, filter_condition=None, plot=True, figsize=(8, 6), colors=None, extent=None, title=None, **kwargs)` ¶

Applies K-means clustering to the dataset and optionally plots the results.

Parameters:

Name	Type	Description	Default
`dataset`	`xr.Dataset \| str`	The dataset containing the PACE data or the file path to the dataset.	required
`n_clusters`	`int`	Number of clusters for K-means. Defaults to 6.	`6`
`plot`	`bool`	Whether to plot the data. Defaults to True.	`True`
`figsize`	`tuple[int, int]`	Figure size for the plot. Defaults to (8, 6).	`(8, 6)`
`colors`	`list[str]`	List of colors to use for the clusters. Defaults to None.	`None`
`extent`	`list[float] \| None`	The extent to zoom in to the specified region. Defaults to None.	`None`
`title`	`str \| None`	Title for the plot. Defaults to None.	`None`
`**kwargs`		Additional keyword arguments to pass to the `plt.subplots` function.	`{}`

Returns:

Type	Description
`tuple[np.ndarray, np.ndarray, np.ndarray]`	The cluster labels, latitudes, and longitudes.

Source code in hypercoast/pace.py

def apply_kmeans(
    dataset: Union[xr.Dataset, str],
    n_clusters: int = 6,
    filter_condition: Optional[Callable[[xr.DataArray], xr.DataArray]] = None,
    plot: bool = True,
    figsize: tuple[int, int] = (8, 6),
    colors: list[str] = None,
    extent: list[float] = None,
    title: str = None,
    **kwargs,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Applies K-means clustering to the dataset and optionally plots the results.

    Args:
        dataset (xr.Dataset | str): The dataset containing the PACE data or the file path to the dataset.
        n_clusters (int, optional): Number of clusters for K-means. Defaults to 6.
        plot (bool, optional): Whether to plot the data. Defaults to True.
        figsize (tuple[int, int], optional): Figure size for the plot. Defaults to (8, 6).
        colors (list[str], optional): List of colors to use for the clusters. Defaults to None.
        extent (list[float] | None, optional): The extent to zoom in to the specified region. Defaults to None.
        title (str | None, optional): Title for the plot. Defaults to None.
        **kwargs: Additional keyword arguments to pass to the `plt.subplots` function.

    Returns:
        tuple[np.ndarray, np.ndarray, np.ndarray]: The cluster labels, latitudes, and longitudes.
    """

    import numpy as np
    from sklearn.cluster import KMeans

    import matplotlib.pyplot as plt
    import matplotlib.colors as mcolors
    import cartopy.crs as ccrs
    import cartopy.feature as cfeature

    if isinstance(dataset, str):
        dataset = read_pace(dataset)
    elif isinstance(dataset, xr.DataArray):
        dataset = dataset.to_dataset()
    elif not isinstance(dataset, xr.Dataset):
        raise ValueError("dataset must be an xarray Dataset")

    if title is None:
        title = f"K-means Clustering with {n_clusters} Clusters"

    da = dataset["Rrs"]

    reshaped_data = da.values.reshape(-1, da.shape[-1])
    reshaped_data_no_nan = reshaped_data[~np.isnan(reshaped_data).any(axis=1)]

    # Apply K-means clustering to classify into 5-6 water types.
    kmeans = KMeans(n_clusters=n_clusters, random_state=0)
    kmeans.fit(reshaped_data_no_nan)

    # Initialize an array for cluster labels with NaN
    labels = np.full(reshaped_data.shape[0], np.nan)

    # Assign the computed cluster labels to the non-NaN positions
    labels[~np.isnan(reshaped_data).any(axis=1)] = kmeans.labels_

    # Reshape the labels back to the original spatial dimensions
    cluster_labels = labels.reshape(da.shape[:-1])

    if filter_condition is not None:
        cluster_labels = np.where(filter_condition, cluster_labels, np.nan)

    latitudes = da.coords["latitude"].values
    longitudes = da.coords["longitude"].values

    if plot:

        # Create a custom discrete color map for K-means clusters
        if colors is None:
            colors = ["#377eb8", "#ff7f00", "#4daf4a", "#f781bf", "#a65628", "#984ea3"]
        cmap = mcolors.ListedColormap(colors)
        bounds = np.arange(-0.5, n_clusters, 1)
        norm = mcolors.BoundaryNorm(bounds, cmap.N)

        # Create a figure and axis with the correct map projection

        if "dpi" not in kwargs:
            kwargs["dpi"] = 100

        if "subplot_kw" not in kwargs:
            kwargs["subplot_kw"] = {"projection": ccrs.PlateCarree()}

        fig, ax = plt.subplots(
            figsize=figsize,
            **kwargs,
        )

        # Plot the K-means classification results on the map
        im = ax.pcolormesh(
            longitudes,
            latitudes,
            cluster_labels,
            cmap=cmap,
            norm=norm,
            transform=ccrs.PlateCarree(),
        )

        # Add geographic features for context
        ax.add_feature(cfeature.COASTLINE)
        ax.add_feature(cfeature.BORDERS, linestyle=":")
        ax.add_feature(cfeature.STATES, linestyle="--")

        # Add gridlines
        ax.gridlines(draw_labels=True)

        # Set the extent to zoom in to the specified region
        if extent is not None:
            ax.set_extent(extent, crs=ccrs.PlateCarree())

        # Add color bar with labels
        cbar = plt.colorbar(
            im,
            ax=ax,
            orientation="vertical",
            # pad=0.02,
            fraction=0.05,
            ticks=np.arange(n_clusters),
        )
        cbar.ax.set_yticklabels([f"Class {i+1}" for i in range(n_clusters)])
        cbar.set_label("Water Types", rotation=270, labelpad=10)

        # Add title
        ax.set_title(title, fontsize=14)
        ax.set_xlabel("Longitude")
        ax.set_ylabel("Latitude")

        # Show the plot
        plt.show()

    return cluster_labels, latitudes, longitudes

`apply_pca(dataset, n_components=3, plot=True, figsize=(8, 6), x_component=0, y_component=1, color='blue', title='PCA of Spectral Data', **kwargs)` ¶

Applies Principal Component Analysis (PCA) to the dataset and optionally plots the results.

Parameters:

Name	Type	Description	Default
`dataset`	`xr.Dataset \| str`	The dataset containing the PACE data or the file path to the dataset.	required
`n_components`	`int`	Number of principal components to compute. Defaults to 3.	`3`
`plot`	`bool`	Whether to plot the data. Defaults to True.	`True`
`figsize`	`tuple[int, int]`	Figure size for the plot. Defaults to (8, 6).	`(8, 6)`
`x_component`	`int`	The principal component to plot on the x-axis. Defaults to 0.	`0`
`y_component`	`int`	The principal component to plot on the y-axis. Defaults to 1.	`1`
`color`	`str`	Color of the scatter plot points. Defaults to "blue".	`'blue'`
`title`	`str`	Title for the plot. Defaults to "PCA of Spectral Data".	`'PCA of Spectral Data'`
`**kwargs`		Additional keyword arguments to pass to the `plt.scatter` function.	`{}`

Returns:

Type	Description
`np.ndarray`	The PCA-transformed data.

Source code in hypercoast/pace.py

def apply_pca(
    dataset: Union[xr.Dataset, str],
    n_components: int = 3,
    plot: bool = True,
    figsize: tuple[int, int] = (8, 6),
    x_component: int = 0,
    y_component: int = 1,
    color: str = "blue",
    title: str = "PCA of Spectral Data",
    **kwargs,
) -> np.ndarray:
    """
    Applies Principal Component Analysis (PCA) to the dataset and optionally plots the results.

    Args:
        dataset (xr.Dataset | str): The dataset containing the PACE data or the file path to the dataset.
        n_components (int, optional): Number of principal components to compute. Defaults to 3.
        plot (bool, optional): Whether to plot the data. Defaults to True.
        figsize (tuple[int, int], optional): Figure size for the plot. Defaults to (8, 6).
        x_component (int, optional): The principal component to plot on the x-axis. Defaults to 0.
        y_component (int, optional): The principal component to plot on the y-axis. Defaults to 1.
        color (str, optional): Color of the scatter plot points. Defaults to "blue".
        title (str, optional): Title for the plot. Defaults to "PCA of Spectral Data".
        **kwargs: Additional keyword arguments to pass to the `plt.scatter` function.

    Returns:
        np.ndarray: The PCA-transformed data.
    """
    from sklearn.decomposition import PCA

    if isinstance(dataset, str):
        dataset = read_pace(dataset)
    elif not isinstance(dataset, xr.Dataset):
        raise ValueError("dataset must be an xarray Dataset")

    da = dataset["Rrs"]

    # Reshape data to (n_pixels, n_bands)
    reshaped_data = da.values.reshape(-1, da.shape[-1])

    # Handle NaNs by removing them
    reshaped_data_no_nan = reshaped_data[~np.isnan(reshaped_data).any(axis=1)]

    # Apply PCA to reduce dimensionality
    pca = PCA(n_components=n_components)
    pca_data = pca.fit_transform(reshaped_data_no_nan)

    if plot:
        plt.figure(figsize=figsize)
        if "s" not in kwargs:
            kwargs["s"] = 1
        plt.scatter(
            pca_data[:, x_component], pca_data[:, y_component], c=color, **kwargs
        )
        plt.title(title)
        plt.xlabel(f"Principal Component {x_component + 1}")
        plt.ylabel(f"Principal Component {y_component + 1}")
        plt.show()

    return pca_data

`apply_sam(dataset, n_components=3, n_clusters=6, random_state=0, spectral_library=None, filter_condition=None, plot=True, figsize=(8, 6), extent=None, colors=None, title=None, **kwargs)` ¶

Applies Spectral Angle Mapper (SAM) to the dataset and optionally plots the results.

Parameters:

Name	Type	Description	Default
`dataset`	`Union[xr.Dataset, str]`	The dataset containing the PACE data or the file path to the dataset.	required
`n_components`	`int`	Number of principal components to compute. Defaults to 3.	`3`
`n_clusters`	`int`	Number of clusters for K-means. Defaults to 6.	`6`
`random_state`	`int`	Random state for K-means. Defaults to 0.	`0`
`spectral_library`	`Union[str, list[str]]`	Path to the spectral library or a list of paths. Defaults to None.	`None`
`filter_condition`	`Callable[[xr.DataArray], xr.DataArray]`	A function to filter the data. Defaults to None.	`None`
`plot`	`bool`	Whether to plot the data. Defaults to True.	`True`
`figsize`	`Tuple[int, int]`	Figure size for the plot. Defaults to (8, 6).	`(8, 6)`
`extent`	`List[float]`	The extent to zoom in to the specified region. Defaults to None.	`None`
`colors`	`List[str]`	Colors for the clusters. Defaults to None.	`None`
`title`	`str`	Title for the plot. Defaults to "Spectral Angle Mapper".	`None`
`**kwargs`		Additional keyword arguments to pass to the `plt.subplots` function.	`{}`

Returns:

Type	Description
`Tuple[np.ndarray, np.ndarray, np.ndarray]`	The best match classification, latitudes, and longitudes.

Source code in hypercoast/pace.py

def apply_sam(
    dataset: Union[xr.Dataset, str],
    n_components: int = 3,
    n_clusters: int = 6,
    random_state: int = 0,
    spectral_library: Union[str, list[str]] = None,
    filter_condition: Optional[Callable[[xr.DataArray], xr.DataArray]] = None,
    plot: bool = True,
    figsize: tuple[int, int] = (8, 6),
    extent: list[float] = None,
    colors: list[str] = None,
    title: str = None,
    **kwargs,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Applies Spectral Angle Mapper (SAM) to the dataset and optionally plots the results.

    Args:
        dataset (Union[xr.Dataset, str]): The dataset containing the PACE data or the file path to the dataset.
        n_components (int, optional): Number of principal components to compute. Defaults to 3.
        n_clusters (int, optional): Number of clusters for K-means. Defaults to 6.
        random_state (int, optional): Random state for K-means. Defaults to 0.
        spectral_library (Union[str, list[str]], optional): Path to the spectral library or a list of paths. Defaults to None.
        filter_condition (Callable[[xr.DataArray], xr.DataArray], optional): A function to filter the data. Defaults to None.
        plot (bool, optional): Whether to plot the data. Defaults to True.
        figsize (Tuple[int, int], optional): Figure size for the plot. Defaults to (8, 6).
        extent (List[float], optional): The extent to zoom in to the specified region. Defaults to None.
        colors (List[str], optional): Colors for the clusters. Defaults to None.
        title (str, optional): Title for the plot. Defaults to "Spectral Angle Mapper".
        **kwargs: Additional keyword arguments to pass to the `plt.subplots` function.

    Returns:
        Tuple[np.ndarray, np.ndarray, np.ndarray]: The best match classification, latitudes, and longitudes.
    """
    import glob
    import pandas as pd
    from sklearn.cluster import KMeans
    import matplotlib.colors as mcolors
    import cartopy.crs as ccrs
    import cartopy.feature as cfeature
    from sklearn.decomposition import PCA
    from scipy.interpolate import interp1d

    if isinstance(dataset, str):
        dataset = read_pace(dataset)
    elif isinstance(dataset, xr.DataArray):
        dataset = dataset.to_dataset()
    elif not isinstance(dataset, xr.Dataset):
        raise ValueError("dataset must be an xarray Dataset")

    da = dataset["Rrs"]
    pace_wavelengths = da["wavelength"].values

    # Reshape data to (n_pixels, n_bands)
    reshaped_data = da.values.reshape(-1, da.shape[-1])

    # Handle NaNs by removing them
    reshaped_data_no_nan = reshaped_data[~np.isnan(reshaped_data).any(axis=1)]

    if isinstance(spectral_library, str):
        endmember_paths = sorted(glob.glob(spectral_library))
    elif isinstance(spectral_library, list):
        endmember_paths = spectral_library
    else:
        endmember_paths = None

    # Function to load and resample a single CSV spectral library file
    def load_and_resample_spectral_library(csv_path, target_wavelengths):
        df = pd.read_csv(csv_path)
        original_wavelengths = df.iloc[:, 0].values  # First column is wavelength
        spectra_values = df.iloc[:, 1].values  # Second column is spectral values

        # Interpolation function
        interp_func = interp1d(
            original_wavelengths,
            spectra_values,
            kind="linear",
            fill_value="extrapolate",
        )

        # Resample to the target (PACE) wavelengths
        resampled_spectra = interp_func(target_wavelengths)

        return resampled_spectra

    if endmember_paths is not None:
        endmembers = np.array(
            [
                load_and_resample_spectral_library(path, pace_wavelengths)
                for path in endmember_paths
            ]
        )
    else:
        endmembers = None

    if endmembers is None:
        # Apply PCA to reduce dimensionality
        pca = PCA(n_components=n_components)
        pca_data = pca.fit_transform(reshaped_data_no_nan)

        # Apply K-means to find clusters representing endmembers
        kmeans = KMeans(n_clusters=n_clusters, random_state=random_state)
        kmeans.fit(pca_data)

        # The cluster centers in the original spectral space are your endmembers
        endmembers = pca.inverse_transform(kmeans.cluster_centers_)

    def spectral_angle_mapper(pixel, reference):
        norm_pixel = np.linalg.norm(pixel)
        norm_reference = np.linalg.norm(reference)
        cos_theta = np.dot(pixel, reference) / (norm_pixel * norm_reference)
        angle = np.arccos(np.clip(cos_theta, -1, 1))
        return angle

    # Apply SAM for each pixel and each endmember
    angles = np.zeros((reshaped_data_no_nan.shape[0], endmembers.shape[0]))

    for i in range(reshaped_data_no_nan.shape[0]):
        for j in range(endmembers.shape[0]):
            angles[i, j] = spectral_angle_mapper(
                reshaped_data_no_nan[i, :], endmembers[j, :]
            )

    # Find the minimum angle (best match) for each pixel
    best_match = np.argmin(angles, axis=1)

    # Reshape best_match back to the original spatial dimensions
    original_shape = da.shape[:-1]  # Get the spatial dimensions
    best_match_full = np.full(reshaped_data.shape[0], np.nan)
    best_match_full[~np.isnan(reshaped_data).any(axis=1)] = best_match
    best_match_full = best_match_full.reshape(original_shape)

    if filter_condition is not None:
        best_match_full = np.where(filter_condition, best_match_full, np.nan)

    latitudes = da.coords["latitude"].values
    longitudes = da.coords["longitude"].values

    # Plot sample spectra from the CSV files and their resampled versions
    def plot_sample_spectra(csv_paths, pace_wavelengths):
        plt.figure(figsize=figsize)

        for i, csv_path in enumerate(csv_paths):
            df = pd.read_csv(csv_path)
            original_wavelengths = df.iloc[:, 0].values
            spectra_values = df.iloc[:, 1].values
            resampled_spectra = load_and_resample_spectral_library(
                csv_path, pace_wavelengths
            )

            plt.plot(
                original_wavelengths,
                spectra_values,
                label=f"Original Spectra {i+1}",
                linestyle="--",
            )
            plt.plot(
                pace_wavelengths, resampled_spectra, label=f"Resampled Spectra {i+1}"
            )

        plt.xlabel("Wavelength (nm)")
        plt.ylabel("Spectral Reflectance")
        plt.title("Comparison of Original and Resampled Spectra")
        plt.legend()
        plt.grid(True)
        plt.show()

    if plot:

        if endmember_paths is not None:

            plot_sample_spectra(endmember_paths, pace_wavelengths)

        if colors is None:
            colors = ["#377eb8", "#ff7f00", "#4daf4a", "#f781bf", "#a65628", "#984ea3"]

        if title is None:
            title = "Spectral Angle Mapper Water Type Classification"
        # Create a custom discrete color map
        cmap = mcolors.ListedColormap(colors)
        if spectral_library is not None:
            n_clusters = len(endmember_paths)
        bounds = np.arange(-0.5, n_clusters, 1)
        norm = mcolors.BoundaryNorm(bounds, cmap.N)

        # Create a figure and axis with the correct map projection
        fig, ax = plt.subplots(
            figsize=figsize, subplot_kw={"projection": ccrs.PlateCarree()}, **kwargs
        )

        # Plot the SAM classification results
        im = ax.pcolormesh(
            longitudes,
            latitudes,
            best_match_full,
            cmap=cmap,
            norm=norm,
            transform=ccrs.PlateCarree(),
        )

        # Add geographic features for context
        ax.add_feature(cfeature.COASTLINE)
        ax.add_feature(cfeature.BORDERS, linestyle=":")
        ax.add_feature(cfeature.STATES, linestyle="--")

        # Add gridlines
        ax.gridlines(draw_labels=True)

        # Set the extent to zoom in to the specified region
        if extent is not None:
            ax.set_extent(extent, crs=ccrs.PlateCarree())

        # Add color bar with labels
        cbar = plt.colorbar(
            im,
            ax=ax,
            orientation="vertical",
            # pad=0.02,
            fraction=0.05,
            ticks=np.arange(n_clusters),
        )
        cbar.ax.set_yticklabels([f"Class {i+1}" for i in range(n_clusters)])
        cbar.set_label("Water Types", rotation=270, labelpad=20)

        # Add title
        ax.set_title(title, fontsize=14)

        # Show the plot
        plt.show()

    return best_match_full, latitudes, longitudes

`apply_sam_spectral(dataset, spectral_library=None, filter_condition=None, plot=True, figsize=(8, 6), extent=None, colors=None, title=None, **kwargs)` ¶

Applies Spectral Angle Mapper (SAM) to the dataset and optionally plots the results.

Parameters:

Name	Type	Description	Default
`dataset`	`Union[xr.Dataset, str]`	The dataset containing the PACE data or the file path to the dataset.	required
`spectral_library`	`Union[str, list[str]]`	The spectral library file path or list of file paths.	`None`
`filter_condition`	`Callable[[xr.DataArray], xr.DataArray]`	A function to filter the data. Defaults to None.	`None`
`plot`	`bool`	Whether to plot the data. Defaults to True.	`True`
`figsize`	`Tuple[int, int]`	Figure size for the plot. Defaults to (8, 6).	`(8, 6)`
`extent`	`List[float]`	The extent to zoom in to the specified region. Defaults to None.	`None`
`colors`	`List[str]`	Colors for the clusters. Defaults to None.	`None`
`title`	`str`	Title for the plot. Defaults to "Spectral Angle Mapper Water Type Classification".	`None`
`**kwargs`		Additional keyword arguments to pass to the `plt.subplots` function.	`{}`

Returns:

Type	Description
`Tuple[np.ndarray, np.ndarray, np.ndarray]`	The best match classification, latitudes, and longitudes.

Source code in hypercoast/pace.py

def apply_sam_spectral(
    dataset: Union[xr.Dataset, str],
    spectral_library: Union[str, list[str]] = None,
    filter_condition: Optional[Callable[[xr.DataArray], xr.DataArray]] = None,
    plot: bool = True,
    figsize: tuple[int, int] = (8, 6),
    extent: list[float] = None,
    colors: list[str] = None,
    title: str = None,
    **kwargs,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Applies Spectral Angle Mapper (SAM) to the dataset and optionally plots the results.

    Args:
        dataset (Union[xr.Dataset, str]): The dataset containing the PACE data or the file path to the dataset.
        spectral_library (Union[str, list[str]]): The spectral library file path or list of file paths.
        filter_condition (Callable[[xr.DataArray], xr.DataArray], optional): A function to filter the data. Defaults to None.
        plot (bool, optional): Whether to plot the data. Defaults to True.
        figsize (Tuple[int, int], optional): Figure size for the plot. Defaults to (8, 6).
        extent (List[float], optional): The extent to zoom in to the specified region. Defaults to None.
        colors (List[str], optional): Colors for the clusters. Defaults to None.
        title (str, optional): Title for the plot. Defaults to "Spectral Angle Mapper Water Type Classification".
        **kwargs: Additional keyword arguments to pass to the `plt.subplots` function.

    Returns:
        Tuple[np.ndarray, np.ndarray, np.ndarray]: The best match classification, latitudes, and longitudes.
    """
    import glob
    import pandas as pd
    import matplotlib.colors as mcolors
    import cartopy.crs as ccrs
    import cartopy.feature as cfeature
    from scipy.interpolate import interp1d

    if isinstance(dataset, str):
        dataset = read_pace(dataset)
    elif isinstance(dataset, xr.DataArray):
        dataset = dataset.to_dataset()
    elif not isinstance(dataset, xr.Dataset):
        raise ValueError("dataset must be an xarray Dataset")

    da = dataset["Rrs"]
    pace_wavelengths = da["wavelength"].values

    if isinstance(spectral_library, str):
        endmember_paths = sorted(glob.glob(spectral_library))
    elif isinstance(spectral_library, list):
        endmember_paths = spectral_library
    else:
        endmember_paths = None

    # Function to load and resample a single CSV spectral library file
    def load_and_resample_spectral_library(csv_path, target_wavelengths):
        df = pd.read_csv(csv_path)
        original_wavelengths = df.iloc[:, 0].values  # First column is wavelength
        spectra_values = df.iloc[:, 1].values  # Second column is spectral values

        # Interpolation function
        interp_func = interp1d(
            original_wavelengths,
            spectra_values,
            kind="linear",
            fill_value="extrapolate",
        )

        # Resample to the target (PACE) wavelengths
        resampled_spectra = interp_func(target_wavelengths)

        return resampled_spectra

    if endmember_paths is not None:
        endmembers = np.array(
            [
                load_and_resample_spectral_library(path, pace_wavelengths)
                for path in endmember_paths
            ]
        )
    else:
        endmembers = None

    # Function to calculate spectral angle
    def spectral_angle_mapper(pixel, reference):
        norm_pixel = np.linalg.norm(pixel)
        norm_reference = np.linalg.norm(reference)
        cos_theta = np.dot(pixel, reference) / (norm_pixel * norm_reference)
        angle = np.arccos(np.clip(cos_theta, -1, 1))
        return angle

    # Reshape data to (n_pixels, n_bands)
    reshaped_data = da.values.reshape(-1, da.shape[-1])

    # Apply SAM for each pixel and each endmember
    angles = np.zeros((reshaped_data.shape[0], endmembers.shape[0]))

    for i in range(reshaped_data.shape[0]):
        for j in range(endmembers.shape[0]):
            angles[i, j] = spectral_angle_mapper(reshaped_data[i, :], endmembers[j, :])

    # Find the minimum angle (best match) for each pixel
    best_match = np.argmin(angles, axis=1)

    # Reshape best_match back to the original spatial dimensions
    best_match = best_match.reshape(da.shape[:-1])

    if filter_condition is not None:
        best_match = np.where(filter_condition, best_match, np.nan)

    latitudes = da.coords["latitude"].values
    longitudes = da.coords["longitude"].values

    # Plot sample spectra from the CSV files and their resampled versions
    def plot_sample_spectra(csv_paths, pace_wavelengths):
        plt.figure(figsize=figsize)

        for i, csv_path in enumerate(csv_paths):
            df = pd.read_csv(csv_path)
            original_wavelengths = df.iloc[:, 0].values
            spectra_values = df.iloc[:, 1].values
            resampled_spectra = load_and_resample_spectral_library(
                csv_path, pace_wavelengths
            )

            plt.plot(
                original_wavelengths,
                spectra_values,
                label=f"Original Spectra {i+1}",
                linestyle="--",
            )
            plt.plot(
                pace_wavelengths, resampled_spectra, label=f"Resampled Spectra {i+1}"
            )

        plt.xlabel("Wavelength (nm)")
        plt.ylabel("Spectral Reflectance")
        plt.title("Comparison of Original and Resampled Spectra")
        plt.legend()
        plt.grid(True)
        plt.show()

    if plot:

        if endmember_paths is not None:

            plot_sample_spectra(endmember_paths, pace_wavelengths)

        if colors is None:
            colors = ["#377eb8", "#e41a1c", "#4daf4a", "#f781bf", "#a65628", "#984ea3"]

        if title is None:
            title = "Spectral Angle Mapper Water Type Classification"
        # Create a custom discrete color map
        cmap = mcolors.ListedColormap(colors)
        bounds = np.arange(-0.5, len(endmembers), 1)
        norm = mcolors.BoundaryNorm(bounds, cmap.N)

        # Create a figure and axis with the correct map projection
        _, ax = plt.subplots(
            figsize=figsize, subplot_kw={"projection": ccrs.PlateCarree()}, **kwargs
        )

        # Plot the SAM classification results
        im = ax.pcolormesh(
            longitudes,
            latitudes,
            best_match,
            cmap=cmap,
            norm=norm,
            transform=ccrs.PlateCarree(),
        )

        # Add geographic features for context
        ax.add_feature(cfeature.COASTLINE)
        ax.add_feature(cfeature.BORDERS, linestyle=":")
        ax.add_feature(cfeature.STATES, linestyle="--")

        # Adding axis labels
        ax.set_xlabel("Longitude")
        ax.set_ylabel("Latitude")

        # Adding a title
        ax.set_title(title, fontsize=14)

        # Adding a color bar with discrete values
        cbar = plt.colorbar(
            im,
            ax=ax,
            orientation="vertical",
            # pad=0.02,
            fraction=0.05,
            ticks=np.arange(len(endmembers)),
        )
        cbar.ax.set_yticklabels([f"Class {i+1}" for i in range(len(endmembers))])
        cbar.set_label("Water Types", rotation=270, labelpad=20)

        # Adding gridlines
        ax.gridlines(draw_labels=True, linestyle="--", linewidth=0.5)

        # Set the extent to zoom in to the specified region (adjust as needed)
        if extent is not None:
            ax.set_extent(extent, crs=ccrs.PlateCarree())

        # Show the plot
        plt.show()

    return best_match, latitudes, longitudes

`cyano_band_ratios(dataset, plot=True, extent=None, figsize=(12, 6), **kwargs)` ¶

Calculates cyanobacteria band ratios from PACE data.

Parameters:

Name	Type	Description	Default
`dataset`	`xr.Dataset or str`	The dataset containing the PACE data or the file path to the dataset.	required
`plot`	`bool`	Whether to plot the data. Defaults to True.	`True`
`extent`	`list`	The extent of the plot. Defaults to None.	`None`
`figsize`	`tuple`	Figure size. Defaults to (12, 6).	`(12, 6)`
`**kwargs`		Additional keyword arguments to pass to the `plt.subplots` function.	`{}`

Returns:

Type	Description
`xr.DataArray`	The cyanobacteria band ratios.

Source code in hypercoast/pace.py

def cyano_band_ratios(
    dataset: Union[xr.Dataset, str],
    plot: bool = True,
    extent: List[float] = None,
    figsize: tuple[int, int] = (12, 6),
    **kwargs,
) -> xr.DataArray:
    """
    Calculates cyanobacteria band ratios from PACE data.

    Args:
        dataset (xr.Dataset or str): The dataset containing the PACE data or the file path to the dataset.
        plot (bool, optional): Whether to plot the data. Defaults to True.
        extent (list, optional): The extent of the plot. Defaults to None.
        figsize (tuple, optional): Figure size. Defaults to (12, 6).
        **kwargs: Additional keyword arguments to pass to the `plt.subplots` function.

    Returns:
        xr.DataArray: The cyanobacteria band ratios.
    """
    import cartopy.crs as ccrs
    import cartopy.feature as cfeature

    if isinstance(dataset, str):
        dataset = read_pace(dataset)
    elif not isinstance(dataset, xr.Dataset):
        raise ValueError("dataset must be an xarray Dataset")

    da = dataset["Rrs"]
    data = (
        (da.sel(wavelength=650) > da.sel(wavelength=620))
        & (da.sel(wavelength=701) > da.sel(wavelength=681))
        & (da.sel(wavelength=701) > da.sel(wavelength=450))
    )

    if plot:
        # Create a plot
        _, ax = plt.subplots(
            figsize=figsize, subplot_kw={"projection": ccrs.PlateCarree()}, **kwargs
        )

        if extent is not None:
            ax.set_extent(extent, crs=ccrs.PlateCarree())

        # Plot the data
        data.plot(
            ax=ax,
            transform=ccrs.PlateCarree(),
            cmap="coolwarm",
            cbar_kwargs={"label": "Cyano"},
        )

        # Add coastlines
        ax.coastlines()

        # Add state boundaries
        states_provinces = cfeature.NaturalEarthFeature(
            category="cultural",
            name="admin_1_states_provinces_lines",
            scale="50m",
            facecolor="none",
        )

        ax.add_feature(states_provinces, edgecolor="gray")

        # Optionally, add gridlines, labels, etc.
        ax.gridlines(draw_labels=True)
        plt.show()

    return data

`extract_pace(dataset, latitude, longitude, delta=0.01, return_plot=False, **kwargs)` ¶

Extracts data from a PACE dataset for a given latitude and longitude range and calculates the mean over these dimensions.

Parameters:

Name	Type	Description	Default
`dataset`	`Union[xr.Dataset, str]`	The PACE dataset or path to the dataset file.	required
`latitude`	`Union[float, Tuple[float, float]]`	The latitude or range of latitudes to extract data for.	required
`longitude`	`Union[float, Tuple[float, float]]`	The longitude or range of longitudes to extract data for.	required
`delta`	`float`	The range to add/subtract to the latitude and longitude if they are not ranges. Defaults to 0.01.	`0.01`
`return_plot`	`bool`	Whether to return a plot of the data. Defaults to False.	`False`
`**kwargs`		Additional keyword arguments to pass to the plot function.	`{}`

Returns:

Type	Description
`Union[xr.DataArray, plt.figure.Figure]`	The mean data over the latitude and longitude dimensions, or a plot of this data if return_plot is True.

Source code in hypercoast/pace.py

def extract_pace(
    dataset: Union[xr.Dataset, str],
    latitude: Union[float, Tuple[float, float]],
    longitude: Union[float, Tuple[float, float]],
    delta: float = 0.01,
    return_plot: bool = False,
    **kwargs,
) -> Union[xr.DataArray, plt.Figure]:
    """
    Extracts data from a PACE dataset for a given latitude and longitude range
        and calculates the mean over these dimensions.

    Args:
        dataset (Union[xr.Dataset, str]): The PACE dataset or path to the dataset file.
        latitude (Union[float, Tuple[float, float]]): The latitude or range of
            latitudes to extract data for.
        longitude (Union[float, Tuple[float, float]]): The longitude or range of
            longitudes to extract data for.
        delta (float, optional): The range to add/subtract to the latitude and
            longitude if they are not ranges. Defaults to 0.01.
        return_plot (bool, optional): Whether to return a plot of the data. Defaults to False.
        **kwargs: Additional keyword arguments to pass to the plot function.

    Returns:
        Union[xr.DataArray, plt.figure.Figure]: The mean data over the latitude
            and longitude dimensions, or a plot of this data if return_plot is True.
    """
    if isinstance(latitude, list) or isinstance(latitude, tuple):
        pass
    else:
        latitude = (latitude - delta, latitude + delta)

    if isinstance(longitude, list) or isinstance(longitude, tuple):
        pass
    else:
        longitude = (longitude - delta, longitude + delta)

    ds = filter_pace(dataset, latitude, longitude, return_plot=False)
    data = ds.mean(dim=["latitude", "longitude"])
    if return_plot:
        return data.plot.line(**kwargs)
    else:
        return data

`filter_pace(dataset, latitude, longitude, drop=True, return_plot=False, **kwargs)` ¶

Filters a PACE dataset based on latitude and longitude.

Parameters:

Name	Type	Description	Default
`dataset`	`xr.Dataset`	The PACE dataset to filter.	required
`latitude`	`float or tuple`	The latitude to filter by. If a tuple or list, it represents a range.	required
`longitude`	`float or tuple`	The longitude to filter by. If a tuple or list, it represents a range.	required
`drop`	`bool`	Whether to drop the filtered out data. Defaults to True.	`True`

Returns:

Type	Description
`xr.DataArray`	The filtered PACE data.

Source code in hypercoast/pace.py

def filter_pace(dataset, latitude, longitude, drop=True, return_plot=False, **kwargs):
    """
    Filters a PACE dataset based on latitude and longitude.

    Args:
        dataset (xr.Dataset): The PACE dataset to filter.
        latitude (float or tuple): The latitude to filter by. If a tuple or list, it represents a range.
        longitude (float or tuple): The longitude to filter by. If a tuple or list, it represents a range.
        drop (bool, optional): Whether to drop the filtered out data. Defaults to True.

    Returns:
        xr.DataArray: The filtered PACE data.
    """
    if isinstance(latitude, list) or isinstance(latitude, tuple):
        lat_con = (dataset["latitude"] > latitude[0]) & (
            dataset["latitude"] < latitude[1]
        )
    else:
        lat_con = dataset["latitude"] == latitude

    if isinstance(longitude, list) or isinstance(longitude, tuple):
        lon_con = (dataset["longitude"] > longitude[0]) & (
            dataset["longitude"] < longitude[1]
        )
    else:
        lon_con = dataset["longitude"] == longitude

    da = dataset["Rrs"].where(lat_con & lon_con, drop=drop, **kwargs)
    da_filtered = da.dropna(dim="latitude", how="all")
    da_filtered = da_filtered.dropna(dim="longitude", how="all")

    if return_plot:
        rrs_stack = da_filtered.stack(
            {"pixel": ["latitude", "longitude"]},
            create_index=False,
        )
        rrs_stack.plot.line(hue="pixel")
    else:
        return da_filtered

`grid_pace(dataset, wavelengths=None, method='nearest', **kwargs)` ¶

Grids a PACE dataset based on latitude and longitude.

Parameters:

Name	Type	Description	Default
`dataset`	`xr.Dataset`	The PACE dataset to grid.	required
`wavelengths`	`float or int`	The wavelength to select.	`None`
`method`	`str`	The method to use for griddata interpolation. Defaults to "nearest".	`'nearest'`
`**kwargs`		Additional keyword arguments to pass to the xr.Dataset constructor.	`{}`

Returns:

Type	Description
`xr.DataArray`	The gridded PACE data.

Source code in hypercoast/pace.py

def grid_pace(dataset, wavelengths=None, method="nearest", **kwargs):
    """
    Grids a PACE dataset based on latitude and longitude.

    Args:
        dataset (xr.Dataset): The PACE dataset to grid.
        wavelengths (float or int): The wavelength to select.
        method (str, optional): The method to use for griddata interpolation.
            Defaults to "nearest".
        **kwargs: Additional keyword arguments to pass to the xr.Dataset constructor.

    Returns:
        xr.DataArray: The gridded PACE data.
    """
    from scipy.interpolate import griddata

    if wavelengths is None:
        wavelengths = dataset.coords["wavelength"].values[0]

    # Ensure wavelengths is a list
    if not isinstance(wavelengths, list):
        wavelengths = [wavelengths]

    lat = dataset.latitude
    lon = dataset.longitude

    grid_lat = np.linspace(lat.min().values, lat.max().values, lat.shape[0])
    grid_lon = np.linspace(lon.min().values, lon.max().values, lon.shape[1])
    grid_lon_2d, grid_lat_2d = np.meshgrid(grid_lon, grid_lat)

    gridded_data_dict = {}
    for wavelength in wavelengths:
        data = dataset.sel(wavelength=wavelength, method="nearest")["Rrs"]
        gridded_data = griddata(
            (lat.data.flatten(), lon.data.flatten()),
            data.data.flatten(),
            (grid_lat_2d, grid_lon_2d),
            method=method,
        )
        gridded_data_dict[wavelength] = gridded_data

    # Create a 3D array with dimensions latitude, longitude, and wavelength
    gridded_data_3d = np.dstack(list(gridded_data_dict.values()))

    dataset2 = xr.Dataset(
        {"Rrs": (("latitude", "longitude", "wavelength"), gridded_data_3d)},
        coords={
            "latitude": ("latitude", grid_lat),
            "longitude": ("longitude", grid_lon),
            "wavelength": ("wavelength", list(gridded_data_dict.keys())),
        },
        **kwargs,
    )

    dataset2["Rrs"].rio.write_crs("EPSG:4326", inplace=True)

    return dataset2

`grid_pace_bgc(dataset, variable='chlor_a', method='nearest', **kwargs)` ¶

Grids PACE BGC data using specified interpolation method.

This function takes an xarray Dataset containing PACE BGC data, interpolates it onto a regular grid using the specified method, and returns the gridded data as an xarray DataArray with the specified variable.

Parameters:

Name	Type	Description	Default
`dataset`	`xr.Dataset`	The input dataset containing PACE BGC data with latitude and longitude coordinates.	required
`variable`	`str`	The variable within the dataset to grid. Can be one of chlor_a, carbon_phyto, poc, chlor_a_unc, carbon_phyto_unc, and l2_flags. Defaults to "chlor_a".	`'chlor_a'`
`method`	`str`	The interpolation method to use. Options include "nearest", "linear", and "cubic". Defaults to "nearest".	`'nearest'`
`**kwargs`	`Any`	Additional keyword arguments to pass to the xr.Dataset creation.	`{}`

Returns:

Type	Description
`xr.DataArray`	The gridded data as an xarray DataArray, with the specified variable and EPSG:4326 CRS.

Examples:

>>> dataset = hypercoast.read_pace_bgc("path_to_your_dataset.nc")
>>> gridded_data = grid_pace_bgc(dataset, variable="chlor_a", method="nearest")
>>> print(gridded_data)

Source code in hypercoast/pace.py

def grid_pace_bgc(
    dataset: xr.Dataset,
    variable: str = "chlor_a",
    method: str = "nearest",
    **kwargs: Any,
) -> xr.DataArray:
    """
    Grids PACE BGC data using specified interpolation method.

    This function takes an xarray Dataset containing PACE BGC data, interpolates it onto a regular grid
    using the specified method, and returns the gridded data as an xarray DataArray with the specified
    variable.

    Args:
        dataset (xr.Dataset): The input dataset containing PACE BGC data with latitude and longitude coordinates.
        variable (str, optional): The variable within the dataset to grid. Can be
            one of chlor_a, carbon_phyto, poc, chlor_a_unc, carbon_phyto_unc, and l2_flags.
            Defaults to "chlor_a".
        method (str, optional): The interpolation method to use. Options include "nearest", "linear", and "cubic".
            Defaults to "nearest".
        **kwargs (Any): Additional keyword arguments to pass to the xr.Dataset creation.

    Returns:
        xr.DataArray: The gridded data as an xarray DataArray, with the specified variable and EPSG:4326 CRS.

    Example:
        >>> dataset = hypercoast.read_pace_bgc("path_to_your_dataset.nc")
        >>> gridded_data = grid_pace_bgc(dataset, variable="chlor_a", method="nearest")
        >>> print(gridded_data)
    """
    import rioxarray
    from scipy.interpolate import griddata

    lat = dataset.latitude
    lon = dataset.longitude

    grid_lat = np.linspace(lat.min().values, lat.max().values, lat.shape[0])
    grid_lon = np.linspace(lon.min().values, lon.max().values, lon.shape[1])
    grid_lon_2d, grid_lat_2d = np.meshgrid(grid_lon, grid_lat)

    data = dataset[variable]
    gridded_data = griddata(
        (lat.data.flatten(), lon.data.flatten()),
        data.data.flatten(),
        (grid_lat_2d, grid_lon_2d),
        method=method,
    )

    dataset2 = xr.Dataset(
        {variable: (("latitude", "longitude"), gridded_data)},
        coords={
            "latitude": ("latitude", grid_lat),
            "longitude": ("longitude", grid_lon),
        },
        **kwargs,
    )

    dataset2 = dataset2[variable].rio.write_crs("EPSG:4326")

    return dataset2

`pace_chla_to_image(data, output=None, **kwargs)` ¶

Converts PACE chlorophyll-a data to an image.

Parameters:

Name	Type	Description	Default
`data`	`xr.DataArray or str`	The chlorophyll-a data or the file path to the data.	required
`output`	`str`	The file path where the image will be saved. If None, the image will be returned as a PIL Image object. Defaults to None.	`None`
`**kwargs`		Additional keyword arguments to be passed to `leafmap.array_to_image`.	`{}`

Returns:

Type	Description
`rasterio.Dataset or None`	The image converted from the data. If `output` is provided, the image will be saved to the specified file and the function will return None.

Source code in hypercoast/pace.py

def pace_chla_to_image(data, output=None, **kwargs):
    """
    Converts PACE chlorophyll-a data to an image.

    Args:
        data (xr.DataArray or str): The chlorophyll-a data or the file path to the data.
        output (str, optional): The file path where the image will be saved. If None, the image will be returned as a PIL Image object. Defaults to None.
        **kwargs: Additional keyword arguments to be passed to `leafmap.array_to_image`.

    Returns:
        rasterio.Dataset or None: The image converted from the data. If `output` is provided, the image will be saved to the specified file and the function will return None.
    """
    from leafmap import array_to_image, image_to_geotiff

    if isinstance(data, str):
        data = read_pace_chla(data)
    elif not isinstance(data, xr.DataArray):
        raise ValueError("data must be an xarray DataArray")

    image = array_to_image(data, transpose=False, output=None, **kwargs)

    if output is not None:
        image_to_geotiff(image, output, dtype="float32")

    return image

`pace_to_image(dataset, wavelengths=None, method='nearest', gridded=False, output=None, **kwargs)` ¶

Converts an PACE dataset to an image.

Parameters:

Name	Type	Description	Default
`dataset`	`xarray.Dataset or str`	The dataset containing the EMIT data or the file path to the dataset.	required
`wavelengths`	`array-like`	The specific wavelengths to select. If None, all wavelengths are selected. Defaults to None.	`None`
`method`	`str`	The method to use for data interpolation. Defaults to "nearest".	`'nearest'`
`gridded`	`bool`	Whether the dataset is a gridded dataset. Defaults to False,	`False`
`output`	`str`	The file path where the image will be saved. If None, the image will be returned as a PIL Image object. Defaults to None.	`None`
`**kwargs`		Additional keyword arguments to be passed to `leafmap.array_to_image`.	`{}`

Returns:

Type	Description
`rasterio.Dataset or None`	The image converted from the dataset. If `output` is provided, the image will be saved to the specified file and the function will return None.

Source code in hypercoast/pace.py

def pace_to_image(
    dataset, wavelengths=None, method="nearest", gridded=False, output=None, **kwargs
):
    """
    Converts an PACE dataset to an image.

    Args:
        dataset (xarray.Dataset or str): The dataset containing the EMIT data or the file path to the dataset.
        wavelengths (array-like, optional): The specific wavelengths to select. If None, all wavelengths are selected. Defaults to None.
        method (str, optional): The method to use for data interpolation. Defaults to "nearest".
        gridded (bool, optional): Whether the dataset is a gridded dataset. Defaults to False,
        output (str, optional): The file path where the image will be saved. If None, the image will be returned as a PIL Image object. Defaults to None.
        **kwargs: Additional keyword arguments to be passed to `leafmap.array_to_image`.

    Returns:
        rasterio.Dataset or None: The image converted from the dataset. If `output` is provided, the image will be saved to the specified file and the function will return None.
    """
    from leafmap import array_to_image

    if isinstance(dataset, str):
        dataset = read_pace(dataset, wavelengths=wavelengths, method="nearest")

    if wavelengths is not None:
        dataset = dataset.sel(wavelength=wavelengths, method="nearest")

    if not gridded:
        grid = grid_pace(dataset, wavelengths=wavelengths, method=method)
    else:
        grid = dataset
    data = grid["Rrs"]
    data.rio.write_crs("EPSG:4326", inplace=True)

    return array_to_image(data, transpose=False, output=output, **kwargs)

`read_pace(filepath, wavelengths=None, method='nearest', engine='h5netcdf', **kwargs)` ¶

Reads PACE data from a given file and returns an xarray Dataset.

Parameters:

Name	Type	Description	Default
`filepath`	`str`	Path to the file to read.	required
`wavelengths`	`array-like`	Specific wavelengths to select. If None, all wavelengths are selected.	`None`
`method`	`str`	Method to use for selection when wavelengths is not None. Defaults to "nearest".	`'nearest'`
`**kwargs`		Additional keyword arguments to pass to the `sel` method when wavelengths is not None.	`{}`

Returns:

Type	Description
`xr.Dataset`	An xarray Dataset containing the PACE data.

Source code in hypercoast/pace.py

def read_pace(
    filepath, wavelengths=None, method="nearest", engine="h5netcdf", **kwargs
):
    """
    Reads PACE data from a given file and returns an xarray Dataset.

    Args:
        filepath (str): Path to the file to read.
        wavelengths (array-like, optional): Specific wavelengths to select. If None, all wavelengths are selected.
        method (str, optional): Method to use for selection when wavelengths is not None. Defaults to "nearest".
        **kwargs: Additional keyword arguments to pass to the `sel` method when wavelengths is not None.

    Returns:
        xr.Dataset: An xarray Dataset containing the PACE data.
    """

    rrs = xr.open_dataset(filepath, engine=engine, group="geophysical_data")["Rrs"]
    wvl = xr.open_dataset(filepath, engine=engine, group="sensor_band_parameters")
    dataset = xr.open_dataset(filepath, engine=engine, group="navigation_data")
    dataset = dataset.set_coords(("longitude", "latitude"))
    if "pixel_control_points" in dataset.dims:
        dataset = dataset.rename({"pixel_control_points": "pixels_per_line"})
    dataset = xr.merge([rrs, dataset.coords.to_dataset()])
    dataset.coords["wavelength_3d"] = wvl.coords["wavelength_3d"]
    dataset = dataset.rename(
        {
            "number_of_lines": "latitude",
            "pixels_per_line": "longitude",
            "wavelength_3d": "wavelength",
        }
    )

    if wavelengths is not None:
        dataset = dataset.sel(wavelength=wavelengths, method=method, **kwargs)

    return dataset

`read_pace_aop(filepath, engine='h5netcdf', **kwargs)` ¶

Reads PACE data from a given file and returns an xarray Dataset.

Parameters:

Name	Type	Description	Default
`filepath`	`str`	Path to the file to read.	required
`wavelengths`	`array-like`	Specific wavelengths to select. If None, all wavelengths are selected.	required
`method`	`str`	Method to use for selection when wavelengths is not None. Defaults to "nearest".	required
`**kwargs`		Additional keyword arguments to pass to the `sel` method when wavelengths is not None.	`{}`

Returns:

Type	Description
`xr.Dataset`	An xarray Dataset containing the PACE data.

Source code in hypercoast/pace.py

def read_pace_aop(filepath, engine="h5netcdf", **kwargs):
    """
    Reads PACE data from a given file and returns an xarray Dataset.

    Args:
        filepath (str): Path to the file to read.
        wavelengths (array-like, optional): Specific wavelengths to select. If None, all wavelengths are selected.
        method (str, optional): Method to use for selection when wavelengths is not None. Defaults to "nearest".
        **kwargs: Additional keyword arguments to pass to the `sel` method when wavelengths is not None.

    Returns:
        xr.Dataset: An xarray Dataset containing the PACE data.
    """

    rrs = xr.open_dataset(filepath, engine=engine, group="geophysical_data", **kwargs)[
        "Rrs"
    ]
    wvl = xr.open_dataset(
        filepath, engine=engine, group="sensor_band_parameters", **kwargs
    )
    dataset = xr.open_dataset(
        filepath, engine=engine, group="navigation_data", **kwargs
    )
    dataset = dataset.set_coords(("longitude", "latitude"))
    if "pixel_control_points" in dataset.dims:
        dataset = dataset.rename({"pixel_control_points": "pixels_per_line"})
    dataset = xr.merge([rrs, dataset.coords.to_dataset()])
    dataset.coords["wavelength_3d"] = wvl.coords["wavelength_3d"]

    return dataset

`read_pace_bgc(filepath, variable=None, engine='h5netcdf', **kwargs)` ¶

Reads PACE BGC data from a specified file and returns an xarray Dataset.

This function opens a dataset from a file using the specified engine, optionally selects a single variable, merges geophysical and navigation data, sets appropriate coordinates, and renames dimensions for easier use.

Parameters:

Name	Type	Description	Default
`filepath`	`str`	The path to the file containing the PACE BGC data.	required
`variable`	`Optional[str]`	The specific variable to extract from the geophysical_data group. If None, all variables are read. Defaults to None.	`None`
`engine`	`str`	The engine to use for reading the file. Defaults to "h5netcdf".	`'h5netcdf'`
`**kwargs`	`Any`	Additional keyword arguments to pass to `xr.open_dataset`.	`{}`

Returns:

Type	Description
`xr.Dataset`	An xarray Dataset containing the requested PACE BGC data, with merged geophysical and navigation data, set coordinates, and renamed dimensions.

Examples:

>>> dataset = read_pace_bgc("path/to/your/datafile.h5", variable="chlor_a")
>>> print(dataset)

Source code in hypercoast/pace.py

def read_pace_bgc(
    filepath: str,
    variable: Optional[str] = None,
    engine: str = "h5netcdf",
    **kwargs: Any,
) -> xr.Dataset:
    """
    Reads PACE BGC data from a specified file and returns an xarray Dataset.

    This function opens a dataset from a file using the specified engine,
    optionally selects a single variable, merges geophysical and navigation data,
    sets appropriate coordinates, and renames dimensions for easier use.

    Args:
        filepath (str): The path to the file containing the PACE BGC data.
        variable (Optional[str], optional): The specific variable to extract
            from the geophysical_data group. If None, all variables are read. Defaults to None.
        engine (str, optional): The engine to use for reading the file. Defaults to "h5netcdf".
        **kwargs (Any): Additional keyword arguments to pass to `xr.open_dataset`.

    Returns:
        xr.Dataset: An xarray Dataset containing the requested PACE BGC data,
        with merged geophysical and navigation data, set coordinates, and renamed dimensions.

    Example:
        >>> dataset = read_pace_bgc("path/to/your/datafile.h5", variable="chlor_a")
        >>> print(dataset)
    """

    ds = xr.open_dataset(filepath, engine=engine, group="geophysical_data", **kwargs)
    if variable is not None:
        ds = ds[variable]
    dataset = xr.open_dataset(
        filepath, engine=engine, group="navigation_data", **kwargs
    )
    dataset = dataset.set_coords(("longitude", "latitude"))
    if "pixel_control_points" in dataset.dims:
        dataset = dataset.rename({"pixel_control_points": "pixels_per_line"})
    dataset = xr.merge([ds, dataset.coords.to_dataset()])
    dataset = dataset.rename(
        {
            "number_of_lines": "latitude",
            "pixels_per_line": "longitude",
        }
    )
    attrs = xr.open_dataset(filepath, engine=engine, **kwargs).attrs
    dataset.attrs.update(attrs)

    return dataset

`read_pace_chla(filepaths, engine='h5netcdf', **kwargs)` ¶

Reads chlorophyll-a data from PACE files and applies a logarithmic transformation.

This function supports reading from a single file or multiple files. For multiple files, it combines them into a single dataset. It then extracts the chlorophyll-a variable, applies a logarithmic transformation, and sets the coordinate reference system to EPSG:4326.

Parameters:

Name	Type	Description	Default
`filepaths`	`Union[str, List[str]]`	A string or a list of strings containing the file path(s) to the PACE chlorophyll-a data files.	required
`engine`	`str`	The backend engine to use for reading files. Defaults to "h5netcdf".	`'h5netcdf'`
`**kwargs`		Additional keyword arguments to pass to `xr.open_dataset` or `xr.open_mfdataset`.	`{}`

Returns:

Type	Description
`DataArray`	An xarray DataArray containing the logarithmically transformed chlorophyll-a data with updated attributes.

Examples:

Read chlorophyll-a data from a single file:

>>> chla_data = read_pace_chla('path/to/single/file.nc')

Read and combine chlorophyll-a data from multiple files:

>>> chla_data = read_pace_chla(['path/to/file1.nc', 'path/to/file2.nc'], combine='by_coords')

Source code in hypercoast/pace.py

def read_pace_chla(
    filepaths: Union[str, List[str]], engine: str = "h5netcdf", **kwargs
) -> xr.DataArray:
    """
    Reads chlorophyll-a data from PACE files and applies a logarithmic transformation.

    This function supports reading from a single file or multiple files. For multiple files,
    it combines them into a single dataset. It then extracts the chlorophyll-a variable,
    applies a logarithmic transformation, and sets the coordinate reference system to EPSG:4326.

    Args:
        filepaths: A string or a list of strings containing the file path(s) to the PACE chlorophyll-a data files.
        engine: The backend engine to use for reading files. Defaults to "h5netcdf".
        **kwargs: Additional keyword arguments to pass to `xr.open_dataset` or `xr.open_mfdataset`.

    Returns:
        An xarray DataArray containing the logarithmically transformed chlorophyll-a data with updated attributes.

    Examples:
        Read chlorophyll-a data from a single file:
        >>> chla_data = read_pace_chla('path/to/single/file.nc')

        Read and combine chlorophyll-a data from multiple files:
        >>> chla_data = read_pace_chla(['path/to/file1.nc', 'path/to/file2.nc'], combine='by_coords')
    """

    import os
    import glob
    import rioxarray

    date = None
    if isinstance(filepaths, str) and os.path.isfile(filepaths):
        filepaths = [filepaths]
    if "combine" not in kwargs:
        kwargs["combine"] = "nested"
    if "concat_dim" not in kwargs:
        kwargs["concat_dim"] = "date"
    dataset = xr.open_mfdataset(filepaths, engine=engine, **kwargs)
    if not isinstance(filepaths, list):
        filepaths = glob.glob(filepaths)
        filepaths.sort()

    dates = [extract_date_from_filename(f) for f in filepaths]
    date = [timestamp.strftime("%Y-%m-%d") for timestamp in dates]
    dataset = dataset.assign_coords(date=("date", date))

    chla = np.log10(dataset["chlor_a"])
    chla.attrs.update(
        {
            "units": f'lg({dataset["chlor_a"].attrs["units"]})',
        }
    )

    if date is not None:
        chla.attrs["date"] = date

    chla = chla.transpose("lat", "lon", "date")

    chla.rio.write_crs("EPSG:4326", inplace=True)

    return chla

`view_pace_pixel_locations(filepath, step=20, figsize=(8, 6), **kwargs)` ¶

Visualizes a subset of PACE pixel locations on a scatter plot.

This function reads PACE AOP data from a specified file, subsamples the data according to a step size, and plots the longitude and latitude of the selected pixels using a scatter plot.

Parameters:

Name	Type	Description	Default
`filepath`	`str`	The path to the file containing the PACE AOP data.	required
`step`	`int`	The step size for subsampling the data. A smaller step size results in more data points being plotted. Defaults to 20.	`20`
`**kwargs`	`Any`	Additional keyword arguments to pass to the `plot.scatter` method.	`{}`

Returns:

Type	Description
`plt.Figure`	A matplotlib figure object containing the scatter plot.

Examples:

>>> plot = view_pace_pixel_locations("path/to/your/datafile.h5", step=10)
>>> plt.show()

Source code in hypercoast/pace.py

def view_pace_pixel_locations(
    filepath: str, step: int = 20, figsize: Tuple[float, float] = (8, 6), **kwargs: Any
) -> plt.Figure:
    """
    Visualizes a subset of PACE pixel locations on a scatter plot.

    This function reads PACE AOP data from a specified file, subsamples the data according to a step size,
    and plots the longitude and latitude of the selected pixels using a scatter plot.

    Args:
        filepath (str): The path to the file containing the PACE AOP data.
        step (int, optional): The step size for subsampling the data. A smaller step size results in more
            data points being plotted. Defaults to 20.
        **kwargs (Any): Additional keyword arguments to pass to the `plot.scatter` method.

    Returns:
        plt.Figure: A matplotlib figure object containing the scatter plot.

    Example:
        >>> plot = view_pace_pixel_locations("path/to/your/datafile.h5", step=10)
        >>> plt.show()
    """

    # Create a new figure
    fig, ax = plt.subplots(figsize=figsize)

    # Create the plot
    dataset = read_pace_aop(filepath)
    number_of_lines = dataset.sizes["number_of_lines"]
    pixels_per_line = dataset.sizes["pixels_per_line"]

    ax.scatter(
        dataset.sel(
            {
                "number_of_lines": slice(None, None, number_of_lines // step),
                "pixels_per_line": slice(None, None, pixels_per_line // step),
            }
        ).longitude,
        dataset.sel(
            {
                "number_of_lines": slice(None, None, number_of_lines // step),
                "pixels_per_line": slice(None, None, pixels_per_line // step),
            }
        ).latitude,
        **kwargs,
    )

    # Set labels and title
    ax.set_xlabel("Longitude")
    ax.set_ylabel("Latitude")
    ax.set_title("PACE Pixel Locations")

    return fig

`viz_pace(dataset, wavelengths=None, method='nearest', figsize=(6.4, 4.8), cmap='jet', vmin=0, vmax=0.02, ncols=1, crs=None, xlim=None, ylim=None, **kwargs)` ¶

Plots PACE data from a given xarray Dataset.

Parameters:

Name	Type	Description	Default
`dataset`	`xr.Dataset`	An xarray Dataset containing the PACE data.	required
`wavelengths`	`array-like`	Specific wavelengths to select. If None, all wavelengths are selected.	`None`
`method`	`str`	Method to use for selection when wavelengths is not None. Defaults to "nearest".	`'nearest'`
`figsize`	`tuple`	Figure size. Defaults to (6.4, 4.8).	`(6.4, 4.8)`
`cmap`	`str`	Colormap to use. Defaults to "jet".	`'jet'`
`vmin`	`float`	Minimum value for the colormap. Defaults to 0.	`0`
`vmax`	`float`	Maximum value for the colormap. Defaults to 0.02.	`0.02`
`ncols`	`int`	Number of columns in the plot. Defaults to 1.	`1`
`crs`	`str or cartopy.crs.CRS`	Coordinate reference system to use. If None, a simple plot is created. Defaults to None. See https://scitools.org.uk/cartopy/docs/latest/reference/projections.html	`None`
`xlim`	`array-like`	Limits for the x-axis. Defaults to None.	`None`
`ylim`	`array-like`	Limits for the y-axis. Defaults to None.	`None`
`**kwargs`		Additional keyword arguments to pass to the `plt.subplots` function.	`{}`

Source code in hypercoast/pace.py

def viz_pace(
    dataset: Union[xr.Dataset, str],
    wavelengths: Optional[Union[List[float], float]] = None,
    method: str = "nearest",
    figsize: Tuple[float, float] = (6.4, 4.8),
    cmap: str = "jet",
    vmin: float = 0,
    vmax: float = 0.02,
    ncols: int = 1,
    crs: Optional[str] = None,
    xlim: Optional[List[float]] = None,
    ylim: Optional[List[float]] = None,
    **kwargs,
):
    """
    Plots PACE data from a given xarray Dataset.

    Args:
        dataset (xr.Dataset): An xarray Dataset containing the PACE data.
        wavelengths (array-like, optional): Specific wavelengths to select. If None, all wavelengths are selected.
        method (str, optional): Method to use for selection when wavelengths is not None. Defaults to "nearest".
        figsize (tuple, optional): Figure size. Defaults to (6.4, 4.8).
        cmap (str, optional): Colormap to use. Defaults to "jet".
        vmin (float, optional): Minimum value for the colormap. Defaults to 0.
        vmax (float, optional): Maximum value for the colormap. Defaults to 0.02.
        ncols (int, optional): Number of columns in the plot. Defaults to 1.
        crs (str or cartopy.crs.CRS, optional): Coordinate reference system to use. If None, a simple plot is created. Defaults to None.
            See https://scitools.org.uk/cartopy/docs/latest/reference/projections.html
        xlim (array-like, optional): Limits for the x-axis. Defaults to None.
        ylim (array-like, optional): Limits for the y-axis. Defaults to None.
        **kwargs: Additional keyword arguments to pass to the `plt.subplots` function.
    """

    import matplotlib.pyplot as plt
    import numpy as np
    import math

    if isinstance(dataset, str):
        dataset = read_pace(dataset, wavelengths, method)

    if wavelengths is not None:
        if not isinstance(wavelengths, list):
            wavelengths = [wavelengths]
        dataset = dataset.sel(wavelength=wavelengths, method=method)
    else:
        wavelengths = dataset.coords["wavelength"][0].values.tolist()

    lat = dataset.coords["latitude"]
    lon = dataset.coords["longitude"]

    nrows = math.ceil(len(wavelengths) / ncols)

    if crs is None:

        fig, axes = plt.subplots(
            nrows=nrows,
            ncols=ncols,
            figsize=(figsize[0] * ncols, figsize[1] * nrows),
            **kwargs,
        )

        for i in range(nrows):
            for j in range(ncols):
                index = i * ncols + j
                if index < len(wavelengths):
                    wavelength = wavelengths[index]
                    data = dataset.sel(wavelength=wavelength, method=method)["Rrs"]

                    if min(nrows, ncols) == 1:
                        ax = axes[index]
                    else:
                        ax = axes[i, j]
                    im = ax.pcolormesh(
                        lon, lat, np.squeeze(data), cmap=cmap, vmin=vmin, vmax=vmax
                    )
                    ax.set_xlabel("Longitude")
                    ax.set_ylabel("Latitude")
                    ax.set_title(
                        f"wavelength = {dataset.coords['wavelength'].values[index]} [nm]"
                    )
                    fig.colorbar(im, ax=ax, label="Reflectance")

        plt.tight_layout()
        plt.show()

    else:

        import cartopy
        from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter

        if crs == "default":
            crs = cartopy.crs.PlateCarree()

        if xlim is None:
            xlim = [math.floor(lon.min()), math.ceil(lon.max())]

        if ylim is None:
            ylim = [math.floor(lat.min()), math.ceil(lat.max())]

        fig, axes = plt.subplots(
            nrows=nrows,
            ncols=ncols,
            figsize=(figsize[0] * ncols, figsize[1] * nrows),
            subplot_kw={"projection": cartopy.crs.PlateCarree()},
            **kwargs,
        )

        for i in range(nrows):
            for j in range(ncols):
                index = i * ncols + j
                if index < len(wavelengths):
                    wavelength = wavelengths[index]
                    data = dataset.sel(wavelength=wavelength, method=method)["Rrs"]

                    if min(nrows, ncols) == 1:
                        ax = axes[index]
                    else:
                        ax = axes[i, j]
                    im = ax.pcolormesh(lon, lat, data, cmap="jet", vmin=0, vmax=0.02)
                    ax.coastlines()
                    ax.add_feature(cartopy.feature.STATES, linewidth=0.5)
                    ax.set_xticks(np.linspace(xlim[0], xlim[1], 5), crs=crs)
                    ax.set_yticks(np.linspace(ylim[0], ylim[1], 5), crs=crs)
                    lon_formatter = LongitudeFormatter(zero_direction_label=True)
                    lat_formatter = LatitudeFormatter()
                    ax.xaxis.set_major_formatter(lon_formatter)
                    ax.yaxis.set_major_formatter(lat_formatter)
                    ax.set_xlabel("Longitude")
                    ax.set_ylabel("Latitude")
                    ax.set_title(
                        f"wavelength = {dataset.coords['wavelength'].values[index]} [nm]"
                    )
                    plt.colorbar(im, label="Reflectance")

        plt.tight_layout()
        plt.show()

`viz_pace_chla(data, date=None, aspect=2, cmap='jet', size=6, **kwargs)` ¶

Visualizes PACE chlorophyll-a data using an xarray DataArray.

This function supports loading data from a file path (str) or directly using an xarray DataArray. It allows for selection of a specific date for visualization or averages over all dates if none is specified.

Parameters:

Name	Type	Description	Default
`data`	`Union[str, xr.DataArray]`	The chlorophyll-a data to visualize. Can be a file path or an xarray DataArray.	required
`date`	`Optional[str]`	Specific date to visualize. If None, averages over all dates. Defaults to None.	`None`
`aspect`	`float`	Aspect ratio of the plot. Defaults to 2.	`2`
`cmap`	`str`	Colormap for the plot. Defaults to "jet".	`'jet'`
`size`	`int`	Size of the plot. Defaults to 6.	`6`
`**kwargs`	`Any`	Additional keyword arguments to pass to `xarray.plot`.	`{}`

Returns:

Type	Description
`xr.plot.facetgrid.FacetGrid`	The plot generated from the chlorophyll-a data.

Exceptions:

Type	Description
`ValueError`	If `data` is not a file path (str) or an xarray DataArray.

Source code in hypercoast/pace.py

def viz_pace_chla(
    data: Union[str, xr.DataArray],
    date: Optional[str] = None,
    aspect: float = 2,
    cmap: str = "jet",
    size: int = 6,
    **kwargs: Any,
) -> xr.plot.facetgrid.FacetGrid:
    """
    Visualizes PACE chlorophyll-a data using an xarray DataArray.

    This function supports loading data from a file path (str) or directly using an xarray DataArray.
    It allows for selection of a specific date for visualization or averages over all dates if none is specified.

    Args:
        data (Union[str, xr.DataArray]): The chlorophyll-a data to visualize. Can be a file path or an xarray DataArray.
        date (Optional[str], optional): Specific date to visualize. If None, averages over all dates. Defaults to None.
        aspect (float, optional): Aspect ratio of the plot. Defaults to 2.
        cmap (str, optional): Colormap for the plot. Defaults to "jet".
        size (int, optional): Size of the plot. Defaults to 6.
        **kwargs (Any): Additional keyword arguments to pass to `xarray.plot`.

    Returns:
        xr.plot.facetgrid.FacetGrid: The plot generated from the chlorophyll-a data.

    Raises:
        ValueError: If `data` is not a file path (str) or an xarray DataArray.
    """
    if isinstance(data, str):
        data = read_pace_chla(data)
    elif not isinstance(data, xr.DataArray):
        raise ValueError("data must be an xarray DataArray")

    if date is not None:
        data = data.sel(date=date)
    else:
        if "date" in data.coords:
            data = data.mean(dim="date")

    return data.plot(aspect=aspect, cmap=cmap, size=size, **kwargs)

pace module¶

apply_kmeans(dataset, n_clusters=6, filter_condition=None, plot=True, figsize=(8, 6), colors=None, extent=None, title=None, **kwargs) ¶

apply_pca(dataset, n_components=3, plot=True, figsize=(8, 6), x_component=0, y_component=1, color='blue', title='PCA of Spectral Data', **kwargs) ¶

apply_sam(dataset, n_components=3, n_clusters=6, random_state=0, spectral_library=None, filter_condition=None, plot=True, figsize=(8, 6), extent=None, colors=None, title=None, **kwargs) ¶

apply_sam_spectral(dataset, spectral_library=None, filter_condition=None, plot=True, figsize=(8, 6), extent=None, colors=None, title=None, **kwargs) ¶

cyano_band_ratios(dataset, plot=True, extent=None, figsize=(12, 6), **kwargs) ¶

extract_pace(dataset, latitude, longitude, delta=0.01, return_plot=False, **kwargs) ¶

filter_pace(dataset, latitude, longitude, drop=True, return_plot=False, **kwargs) ¶

grid_pace(dataset, wavelengths=None, method='nearest', **kwargs) ¶

grid_pace_bgc(dataset, variable='chlor_a', method='nearest', **kwargs) ¶

pace_chla_to_image(data, output=None, **kwargs) ¶

pace_to_image(dataset, wavelengths=None, method='nearest', gridded=False, output=None, **kwargs) ¶

read_pace(filepath, wavelengths=None, method='nearest', engine='h5netcdf', **kwargs) ¶

read_pace_aop(filepath, engine='h5netcdf', **kwargs) ¶

read_pace_bgc(filepath, variable=None, engine='h5netcdf', **kwargs) ¶

read_pace_chla(filepaths, engine='h5netcdf', **kwargs) ¶

view_pace_pixel_locations(filepath, step=20, figsize=(8, 6), **kwargs) ¶

viz_pace(dataset, wavelengths=None, method='nearest', figsize=(6.4, 4.8), cmap='jet', vmin=0, vmax=0.02, ncols=1, crs=None, xlim=None, ylim=None, **kwargs) ¶

viz_pace_chla(data, date=None, aspect=2, cmap='jet', size=6, **kwargs) ¶

`apply_kmeans(dataset, n_clusters=6, filter_condition=None, plot=True, figsize=(8, 6), colors=None, extent=None, title=None, **kwargs)` ¶

`apply_pca(dataset, n_components=3, plot=True, figsize=(8, 6), x_component=0, y_component=1, color='blue', title='PCA of Spectral Data', **kwargs)` ¶

`apply_sam(dataset, n_components=3, n_clusters=6, random_state=0, spectral_library=None, filter_condition=None, plot=True, figsize=(8, 6), extent=None, colors=None, title=None, **kwargs)` ¶

`apply_sam_spectral(dataset, spectral_library=None, filter_condition=None, plot=True, figsize=(8, 6), extent=None, colors=None, title=None, **kwargs)` ¶

`cyano_band_ratios(dataset, plot=True, extent=None, figsize=(12, 6), **kwargs)` ¶

`extract_pace(dataset, latitude, longitude, delta=0.01, return_plot=False, **kwargs)` ¶

`filter_pace(dataset, latitude, longitude, drop=True, return_plot=False, **kwargs)` ¶

`grid_pace(dataset, wavelengths=None, method='nearest', **kwargs)` ¶

`grid_pace_bgc(dataset, variable='chlor_a', method='nearest', **kwargs)` ¶

`pace_chla_to_image(data, output=None, **kwargs)` ¶

`pace_to_image(dataset, wavelengths=None, method='nearest', gridded=False, output=None, **kwargs)` ¶

`read_pace(filepath, wavelengths=None, method='nearest', engine='h5netcdf', **kwargs)` ¶

`read_pace_aop(filepath, engine='h5netcdf', **kwargs)` ¶

`read_pace_bgc(filepath, variable=None, engine='h5netcdf', **kwargs)` ¶

`read_pace_chla(filepaths, engine='h5netcdf', **kwargs)` ¶

`view_pace_pixel_locations(filepath, step=20, figsize=(8, 6), **kwargs)` ¶

`viz_pace(dataset, wavelengths=None, method='nearest', figsize=(6.4, 4.8), cmap='jet', vmin=0, vmax=0.02, ncols=1, crs=None, xlim=None, ylim=None, **kwargs)` ¶

`viz_pace_chla(data, date=None, aspect=2, cmap='jet', size=6, **kwargs)` ¶