Skip to content

chla module

Module for training and evaluating the VAE model for Chl-a concentration estimation.

VAE (Module)

Source code in hypercoast/chla.py
class VAE(nn.Module):
    def __init__(self, input_dim: int, output_dim: int) -> None:
        """Initializes the VAE model with encoder and decoder layers.

        Args:
            input_dim (int): Dimension of the input features.
            output_dim (int): Dimension of the output features.
        """
        super().__init__()

        # encoder
        self.encoder_layer = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, 64),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(0.2),
        )

        self.fc1 = nn.Linear(64, 32)
        self.fc2 = nn.Linear(64, 32)

        # decoder
        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, 64),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, output_dim),
            nn.Softplus(),
        )

    def encode(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
        """Encodes the input tensor into mean and log variance.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            tuple[torch.Tensor, torch.Tensor]: Mean and log variance tensors.
        """
        x = self.encoder_layer(x)
        mu = self.fc1(x)
        log_var = self.fc2(x)
        return mu, log_var

    def reparameterize(self, mu: torch.Tensor, log_var: torch.Tensor) -> torch.Tensor:
        """Applies the reparameterization trick to sample from the latent space.

        Args:
            mu (torch.Tensor): Mean tensor.
            log_var (torch.Tensor): Log variance tensor.

        Returns:
            torch.Tensor: Sampled latent vector.
        """
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        z = mu + eps * std
        return z

    def decode(self, z: torch.Tensor) -> torch.Tensor:
        """Decodes the latent vector back to the original space.

        Args:
            z (torch.Tensor): Latent vector.

        Returns:
            torch.Tensor: Reconstructed tensor.
        """
        return self.decoder(z)

    def forward(
        self, x: torch.Tensor
    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        """Performs a forward pass through the VAE model.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Reconstructed tensor,
                mean, and log variance.
        """
        mu, log_var = self.encode(x)
        z = self.reparameterize(mu, log_var)
        x_reconstructed = self.decode(z)
        return x_reconstructed, mu, log_var

__init__(self, input_dim, output_dim) special

Initializes the VAE model with encoder and decoder layers.

Parameters:

Name Type Description Default
input_dim int

Dimension of the input features.

required
output_dim int

Dimension of the output features.

required
Source code in hypercoast/chla.py
def __init__(self, input_dim: int, output_dim: int) -> None:
    """Initializes the VAE model with encoder and decoder layers.

    Args:
        input_dim (int): Dimension of the input features.
        output_dim (int): Dimension of the output features.
    """
    super().__init__()

    # encoder
    self.encoder_layer = nn.Sequential(
        nn.Linear(input_dim, 64),
        nn.BatchNorm1d(64),
        nn.LeakyReLU(0.2),
        nn.Linear(64, 64),
        nn.BatchNorm1d(64),
        nn.LeakyReLU(0.2),
    )

    self.fc1 = nn.Linear(64, 32)
    self.fc2 = nn.Linear(64, 32)

    # decoder
    self.decoder = nn.Sequential(
        nn.Linear(32, 64),
        nn.BatchNorm1d(64),
        nn.LeakyReLU(0.2),
        nn.Linear(64, 64),
        nn.BatchNorm1d(64),
        nn.LeakyReLU(0.2),
        nn.Linear(64, output_dim),
        nn.Softplus(),
    )

decode(self, z)

Decodes the latent vector back to the original space.

Parameters:

Name Type Description Default
z torch.Tensor

Latent vector.

required

Returns:

Type Description
torch.Tensor

Reconstructed tensor.

Source code in hypercoast/chla.py
def decode(self, z: torch.Tensor) -> torch.Tensor:
    """Decodes the latent vector back to the original space.

    Args:
        z (torch.Tensor): Latent vector.

    Returns:
        torch.Tensor: Reconstructed tensor.
    """
    return self.decoder(z)

encode(self, x)

Encodes the input tensor into mean and log variance.

Parameters:

Name Type Description Default
x torch.Tensor

Input tensor.

required

Returns:

Type Description
tuple[torch.Tensor, torch.Tensor]

Mean and log variance tensors.

Source code in hypercoast/chla.py
def encode(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
    """Encodes the input tensor into mean and log variance.

    Args:
        x (torch.Tensor): Input tensor.

    Returns:
        tuple[torch.Tensor, torch.Tensor]: Mean and log variance tensors.
    """
    x = self.encoder_layer(x)
    mu = self.fc1(x)
    log_var = self.fc2(x)
    return mu, log_var

forward(self, x)

Performs a forward pass through the VAE model.

Parameters:

Name Type Description Default
x torch.Tensor

Input tensor.

required

Returns:

Type Description
tuple[torch.Tensor, torch.Tensor, torch.Tensor]

Reconstructed tensor, mean, and log variance.

Source code in hypercoast/chla.py
def forward(
    self, x: torch.Tensor
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
    """Performs a forward pass through the VAE model.

    Args:
        x (torch.Tensor): Input tensor.

    Returns:
        tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Reconstructed tensor,
            mean, and log variance.
    """
    mu, log_var = self.encode(x)
    z = self.reparameterize(mu, log_var)
    x_reconstructed = self.decode(z)
    return x_reconstructed, mu, log_var

reparameterize(self, mu, log_var)

Applies the reparameterization trick to sample from the latent space.

Parameters:

Name Type Description Default
mu torch.Tensor

Mean tensor.

required
log_var torch.Tensor

Log variance tensor.

required

Returns:

Type Description
torch.Tensor

Sampled latent vector.

Source code in hypercoast/chla.py
def reparameterize(self, mu: torch.Tensor, log_var: torch.Tensor) -> torch.Tensor:
    """Applies the reparameterization trick to sample from the latent space.

    Args:
        mu (torch.Tensor): Mean tensor.
        log_var (torch.Tensor): Log variance tensor.

    Returns:
        torch.Tensor: Sampled latent vector.
    """
    std = torch.exp(0.5 * log_var)
    eps = torch.randn_like(std)
    z = mu + eps * std
    return z

calculate_metrics(predictions, actuals, threshold=0.8)

Calculates epsilon, beta, and additional metrics (RMSE, RMSLE, MAPE, Bias, MAE).

Parameters:

Name Type Description Default
predictions np.ndarray

Predicted values.

required
actuals np.ndarray

Actual values.

required
threshold float

Relative error threshold. Defaults to 0.8.

0.8

Returns:

Type Description
tuple[float, float, float, float, float, float, float]

epsilon, beta, rmse, rmsle, mape, bias, mae.

Source code in hypercoast/chla.py
def calculate_metrics(
    predictions: np.ndarray, actuals: np.ndarray, threshold: float = 0.8
) -> tuple[float, float, float, float, float, float, float]:
    """Calculates epsilon, beta, and additional metrics (RMSE, RMSLE, MAPE, Bias, MAE).

    Args:
        predictions (np.ndarray): Predicted values.
        actuals (np.ndarray): Actual values.
        threshold (float, optional): Relative error threshold. Defaults to 0.8.

    Returns:
        tuple[float, float, float, float, float, float, float]: epsilon, beta, rmse, rmsle, mape, bias, mae.
    """
    # Apply the threshold to filter out predictions with large relative error
    mask = np.abs(predictions - actuals) / np.abs(actuals + 1e-10) < threshold
    filtered_predictions = predictions[mask]
    filtered_actuals = actuals[mask]

    # Calculate epsilon and beta
    log_ratios = np.log10(filtered_predictions / filtered_actuals)
    Y = np.median(np.abs(log_ratios))
    Z = np.median(log_ratios)
    epsilon = 50 * (10**Y - 1)
    beta = 50 * np.sign(Z) * (10 ** np.abs(Z) - 1)

    # Calculate additional metrics
    rmse = np.sqrt(np.mean((filtered_predictions - filtered_actuals) ** 2))
    rmsle = np.sqrt(
        np.mean(
            (np.log10(filtered_predictions + 1) - np.log10(filtered_actuals + 1)) ** 2
        )
    )
    mape = 50 * np.median(
        np.abs((filtered_predictions - filtered_actuals) / filtered_actuals)
    )
    bias = 10 ** (np.mean(np.log10(filtered_predictions) - np.log10(filtered_actuals)))
    mae = 10 ** np.mean(
        np.abs(np.log10(filtered_predictions) - np.log10(filtered_actuals))
    )

    return epsilon, beta, rmse, rmsle, mape, bias, mae

chla_predict(pace_filepath, best_model_path, chla_data_file=None, device=None)

Predicts chlorophyll-a concentration using a pre-trained VAE model.

Parameters:

Name Type Description Default
pace_filepath str

Path to the PACE dataset file.

required
best_model_path str

Path to the pre-trained VAE model file.

required
chla_data_file str

Path to save the predicted chlorophyll-a data. Defaults to None.

None
device torch.device

Device to perform inference on. Defaults to None.

None
Source code in hypercoast/chla.py
def chla_predict(
    pace_filepath: str,
    best_model_path: str,
    chla_data_file: str = None,
    device: torch.device = None,
) -> None:
    """Predicts chlorophyll-a concentration using a pre-trained VAE model.

    Args:
        pace_filepath (str): Path to the PACE dataset file.
        best_model_path (str): Path to the pre-trained VAE model file.
        chla_data_file (str, optional): Path to save the predicted chlorophyll-a data. Defaults to None.
        device (torch.device, optional): Device to perform inference on. Defaults to None.
    """

    from .pace import read_pace

    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load PACE dataset and prepare data
    PACE_dataset = read_pace(pace_filepath)
    da = PACE_dataset["Rrs"]
    wl = da.wavelength.values
    Rrs = da.values
    latitude = da.latitude.values
    longitude = da.longitude.values

    # Filter wavelengths between 400 and 703 nm
    indices = np.where((wl >= 400) & (wl <= 703))[0]
    filtered_Rrs = Rrs[:, :, indices]

    # Save filtered Rrs and wavelength
    filtered_wl = wl[indices]

    # Create a mask that is 1 where all wavelengths for a given pixel have non-NaN values, and 0 otherwise
    mask = np.all(~np.isnan(filtered_Rrs), axis=2).astype(int)

    # Define input and output dimensions
    input_dim = 148
    output_dim = 1

    # Load test data and mask
    test_data = filtered_Rrs
    mask_data = mask

    # Filter valid data using the mask
    mask = mask_data == 1
    N = np.sum(mask)
    valid_test_data = test_data[mask]

    # Normalize data
    valid_test_data = np.array(
        [
            (
                MinMaxScaler(feature_range=(1, 10))
                .fit_transform(row.reshape(-1, 1))
                .flatten()
                if not np.isnan(row).any()
                else row
            )
            for row in valid_test_data
        ]
    )
    valid_test_data = valid_test_data.reshape(N, input_dim)

    # Create DataLoader for test data
    test_tensor = TensorDataset(torch.tensor(valid_test_data).float())
    test_loader = DataLoader(test_tensor, batch_size=2048, shuffle=False)

    # Load the pre-trained VAE model
    model = VAE(input_dim, output_dim).to(device)
    model.load_state_dict(torch.load(best_model_path, map_location=device))
    model.eval()

    # Perform inference
    predictions_all = []
    with torch.no_grad():
        for batch in test_loader:
            batch = batch[0].to(device)
            predictions, _, _ = model(batch)  # VAE model inference
            predictions_all.append(predictions.cpu().numpy())

    # Concatenate all batch predictions
    predictions_all = np.vstack(predictions_all)

    # Ensure predictions are in the correct shape
    if predictions_all.shape[-1] == 1:
        predictions_all = predictions_all.squeeze(-1)
    # if predictions_all.dim() == 3:
    # all_outputs = predictions_all.squeeze(1)

    # Initialize output array with NaNs
    outputs = np.full((test_data.shape[0], test_data.shape[1]), np.nan)

    # Fill in the valid mask positions with predictions
    outputs[mask] = predictions_all

    # Flatten latitude, longitude, and predictions for output
    lat_flat = latitude.flatten()
    lon_flat = longitude.flatten()
    output_flat = outputs.flatten()

    # Combine latitude, longitude, and predictions
    final_output = np.column_stack((lat_flat, lon_flat, output_flat))

    # Save the final output including latitude and longitude
    if chla_data_file is None:
        chla_data_file = pace_filepath.replace(".nc", ".npy")
    np.save(chla_data_file, final_output)

chla_viz(rgb_image_tif_file, chla_data_file, output_file, title='PACE', figsize=(12, 8), cmap='jet')

Visualizes the chlorophyll-a concentration over an RGB image.

Parameters:

Name Type Description Default
rgb_image_tif_file str

Path to the RGB image file.

required
chla_data_file str

Path to the chlorophyll-a data file.

required
output_file str

Path to save the output visualization.

required
title str

Title of the plot. Defaults to "PACE".

'PACE'
figsize tuple

Figure size for the plot. Defaults to (12, 8).

(12, 8)
cmap str

Colormap for the chlorophyll-a concentration. Defaults to "jet".

'jet'
Source code in hypercoast/chla.py
def chla_viz(
    rgb_image_tif_file: str,
    chla_data_file: str,
    output_file: str,
    title: str = "PACE",
    figsize: tuple = (12, 8),
    cmap: str = "jet",
) -> None:
    """Visualizes the chlorophyll-a concentration over an RGB image.

    Args:
        rgb_image_tif_file (str): Path to the RGB image file.
        chla_data_file (str): Path to the chlorophyll-a data file.
        output_file (str): Path to save the output visualization.
        title (str, optional): Title of the plot. Defaults to "PACE".
        figsize (tuple, optional): Figure size for the plot. Defaults to (12, 8).
        cmap (str, optional): Colormap for the chlorophyll-a concentration. Defaults to "jet".
    """

    # Read RGB Image
    # rgb_image_tif_file = "data/snapshot-2024-08-10T00_00_00Z.tif"

    with rasterio.open(rgb_image_tif_file) as dataset:
        # Read R、G、B bands
        R = dataset.read(1)
        G = dataset.read(2)
        B = dataset.read(3)

        # # Get geographic extent, resolution information.
        extent = [
            dataset.bounds.left,
            dataset.bounds.right,
            dataset.bounds.bottom,
            dataset.bounds.top,
        ]
        transform = dataset.transform
        width, height = dataset.width, dataset.height

    # Combine the R, G, B bands into a 3D array.
    rgb_image = np.stack((R, G, B), axis=-1)

    # Load Chla data
    chla_data = np.load(chla_data_file)
    # chla_data = final_output

    # Extract the latitude, longitude, and concentration values of the chlorophyll-a data.
    latitude = chla_data[:, 0]
    longitude = chla_data[:, 1]
    chla_values = chla_data[:, 2]

    # Extract the pixels within the geographic extent of the RGB image.
    mask = (
        (latitude >= extent[2])
        & (latitude <= extent[3])
        & (longitude >= extent[0])
        & (longitude <= extent[1])
    )
    latitude = latitude[mask]
    longitude = longitude[mask]
    chla_values = chla_values[mask]

    # Create a grid with the same resolution as the RGB image.
    grid_lon = np.linspace(extent[0], extent[1], width)
    grid_lat = np.linspace(extent[3], extent[2], height)
    grid_lon, grid_lat = np.meshgrid(grid_lon, grid_lat)

    # Resample the chlorophyll-a data to the size of the RGB image using interpolation.
    chla_resampled = griddata(
        (longitude, latitude), chla_values, (grid_lon, grid_lat), method="linear"
    )

    # Keep NaN values as transparent regions.
    chla_resampled = np.ma.masked_invalid(chla_resampled)

    plt.figure(figsize=figsize)

    plt.imshow(rgb_image / 255.0, extent=extent, origin="upper")

    vmin, vmax = 0, 35
    im = plt.imshow(
        chla_resampled,
        extent=extent,
        cmap=cmap,
        alpha=0.6,
        origin="upper",
        vmin=vmin,
        vmax=vmax,
    )

    cbar = plt.colorbar(im, orientation="horizontal")
    cbar.set_label("Chlorophyll-a Concentration (mg/m³)")

    plt.title(title)
    plt.xlabel("Longitude")
    plt.ylabel("Latitude")

    # output_file = "20241024-2.png"
    plt.savefig(output_file, dpi=300, bbox_inches="tight", pad_inches=0.1)
    print(f"Saved overlay image to {output_file}")

    plt.show()

evaluate(model, test_dl, device=None)

Evaluates the VAE model.

Parameters:

Name Type Description Default
model VAE

VAE model to be evaluated.

required
test_dl DataLoader

DataLoader for test data.

required
device torch.device

Device to evaluate on. Defaults to None.

None

Returns:

Type Description
tuple[np.ndarray, np.ndarray]

Predictions and actual values.

Source code in hypercoast/chla.py
def evaluate(
    model: VAE, test_dl: DataLoader, device: torch.device = None
) -> tuple[np.ndarray, np.ndarray]:
    """Evaluates the VAE model.

    Args:
        model (VAE): VAE model to be evaluated.
        test_dl (DataLoader): DataLoader for test data.
        device (torch.device, optional): Device to evaluate on. Defaults to None.

    Returns:
        tuple[np.ndarray, np.ndarray]: Predictions and actual values.
    """

    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model.eval()
    predictions, actuals = [], []
    with torch.no_grad():
        for x, y in test_dl:
            x, y = x.to(device), y.to(device)
            y_pred, _, _ = model(x)
            predictions.append(y_pred.cpu().numpy())
            actuals.append(y.cpu().numpy())
    return np.vstack(predictions), np.vstack(actuals)

load_real_data(aphy_file_path, rrs_file_path)

Loads and preprocesses real data for training and testing.

Parameters:

Name Type Description Default
aphy_file_path str

Path to the aphy file.

required
rrs_file_path str

Path to the rrs file.

required

Returns:

Type Description
tuple[DataLoader, DataLoader, int, int]

Training DataLoader, testing DataLoader, input dimension, output dimension.

Source code in hypercoast/chla.py
def load_real_data(
    aphy_file_path: str, rrs_file_path: str
) -> tuple[DataLoader, DataLoader, int, int]:
    """Loads and preprocesses real data for training and testing.

    Args:
        aphy_file_path (str): Path to the aphy file.
        rrs_file_path (str): Path to the rrs file.

    Returns:
        tuple[DataLoader, DataLoader, int, int]: Training DataLoader, testing
            DataLoader, input dimension, output dimension.
    """
    array1 = np.loadtxt(aphy_file_path, delimiter=",", dtype=float)
    array2 = np.loadtxt(rrs_file_path, delimiter=",", dtype=float)

    array1 = array1.reshape(-1, 1)

    Rrs_real = array2
    Chl_real = array1

    input_dim = Rrs_real.shape[1]
    output_dim = Chl_real.shape[1]

    scalers_Rrs_real = [
        MinMaxScaler(feature_range=(1, 10)) for _ in range(Rrs_real.shape[0])
    ]

    Rrs_real_normalized = np.array(
        [
            scalers_Rrs_real[i].fit_transform(row.reshape(-1, 1)).flatten()
            for i, row in enumerate(Rrs_real)
        ]
    )

    Rrs_real_tensor = torch.tensor(Rrs_real_normalized, dtype=torch.float32)
    Chl_real_tensor = torch.tensor(Chl_real, dtype=torch.float32)

    dataset_real = TensorDataset(Rrs_real_tensor, Chl_real_tensor)

    train_size = int(0.7 * len(dataset_real))
    test_size = len(dataset_real) - train_size
    train_dataset_real, test_dataset_real = random_split(
        dataset_real, [train_size, test_size]
    )

    train_real_dl = DataLoader(
        train_dataset_real, batch_size=1024, shuffle=True, num_workers=12
    )
    test_real_dl = DataLoader(
        test_dataset_real, batch_size=1024, shuffle=False, num_workers=12
    )

    return train_real_dl, test_real_dl, input_dim, output_dim

load_real_test(aphy_file_path, rrs_file_path)

Loads and preprocesses real data for testing.

Parameters:

Name Type Description Default
aphy_file_path str

Path to the aphy file.

required
rrs_file_path str

Path to the rrs file.

required

Returns:

Type Description
tuple[DataLoader, int, int]

Testing DataLoader, input dimension, output dimension.

Source code in hypercoast/chla.py
def load_real_test(
    aphy_file_path: str, rrs_file_path: str
) -> tuple[DataLoader, int, int]:
    """Loads and preprocesses real data for testing.

    Args:
        aphy_file_path (str): Path to the aphy file.
        rrs_file_path (str): Path to the rrs file.

    Returns:
        tuple[DataLoader, int, int]: Testing DataLoader, input dimension, output dimension.
    """
    array1 = np.loadtxt(aphy_file_path, delimiter=",", dtype=float)
    array2 = np.loadtxt(rrs_file_path, delimiter=",", dtype=float)

    array1 = array1.reshape(-1, 1)

    Rrs_real = array2
    Chl_real = array1

    input_dim = Rrs_real.shape[1]
    output_dim = Chl_real.shape[1]

    scalers_Rrs_real = [
        MinMaxScaler(feature_range=(1, 10)) for _ in range(Rrs_real.shape[0])
    ]

    Rrs_real_normalized = np.array(
        [
            scalers_Rrs_real[i].fit_transform(row.reshape(-1, 1)).flatten()
            for i, row in enumerate(Rrs_real)
        ]
    )

    Rrs_real_tensor = torch.tensor(Rrs_real_normalized, dtype=torch.float32)
    Chl_real_tensor = torch.tensor(Chl_real, dtype=torch.float32)

    dataset_real = TensorDataset(Rrs_real_tensor, Chl_real_tensor)
    dataset_size = int(len(dataset_real))
    test_real_dl = DataLoader(
        dataset_real, batch_size=dataset_size, shuffle=False, num_workers=12
    )

    return test_real_dl, input_dim, output_dim

loss_function(recon_x, x, mu, log_var)

Computes the VAE loss function.

Parameters:

Name Type Description Default
recon_x torch.Tensor

Reconstructed tensor.

required
x torch.Tensor

Original input tensor.

required
mu torch.Tensor

Mean tensor.

required
log_var torch.Tensor

Log variance tensor.

required

Returns:

Type Description
torch.Tensor

Computed loss.

Source code in hypercoast/chla.py
def loss_function(
    recon_x: torch.Tensor, x: torch.Tensor, mu: torch.Tensor, log_var: torch.Tensor
) -> torch.Tensor:
    """Computes the VAE loss function.

    Args:
        recon_x (torch.Tensor): Reconstructed tensor.
        x (torch.Tensor): Original input tensor.
        mu (torch.Tensor): Mean tensor.
        log_var (torch.Tensor): Log variance tensor.

    Returns:
        torch.Tensor: Computed loss.
    """
    L1 = F.l1_loss(recon_x, x, reduction="mean")
    BCE = F.mse_loss(recon_x, x, reduction="mean")
    KLD = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
    return L1

plot_results(predictions_rescaled, actuals_rescaled, save_dir, threshold=0.5, mode='test')

Plots the results of the predictions against the actual values.

Parameters:

Name Type Description Default
predictions_rescaled np.ndarray

Rescaled predicted values.

required
actuals_rescaled np.ndarray

Rescaled actual values.

required
save_dir str

Directory to save the plot.

required
threshold float

Relative error threshold. Defaults to 0.5.

0.5
mode str

Mode of the plot (e.g., "test"). Defaults to "test".

'test'
Source code in hypercoast/chla.py
def plot_results(
    predictions_rescaled: np.ndarray,
    actuals_rescaled: np.ndarray,
    save_dir: str,
    threshold: float = 0.5,
    mode: str = "test",
) -> None:
    """Plots the results of the predictions against the actual values.

    Args:
        predictions_rescaled (np.ndarray): Rescaled predicted values.
        actuals_rescaled (np.ndarray): Rescaled actual values.
        save_dir (str): Directory to save the plot.
        threshold (float, optional): Relative error threshold. Defaults to 0.5.
        mode (str, optional): Mode of the plot (e.g., "test"). Defaults to "test".
    """

    actuals = actuals_rescaled.flatten()
    predictions = predictions_rescaled.flatten()

    mask = np.abs(predictions - actuals) / np.abs(actuals + 1e-10) < 1
    filtered_predictions = predictions[mask]
    filtered_actuals = actuals[mask]

    log_actual = np.log10(np.where(actuals == 0, 1e-10, actuals))
    log_prediction = np.log10(np.where(predictions == 0, 1e-10, predictions))

    filtered_log_actual = np.log10(
        np.where(filtered_actuals == 0, 1e-10, filtered_actuals)
    )
    filtered_log_prediction = np.log10(
        np.where(filtered_predictions == 0, 1e-10, filtered_predictions)
    )

    epsilon, beta, rmse, rmsle, mape, bias, mae = calculate_metrics(
        filtered_predictions, filtered_actuals, threshold
    )

    valid_mask = np.isfinite(filtered_log_actual) & np.isfinite(filtered_log_prediction)
    slope, intercept = np.polyfit(
        filtered_log_actual[valid_mask], filtered_log_prediction[valid_mask], 1
    )
    x = np.array([-2, 4])
    y = slope * x + intercept

    _ = plt.figure(figsize=(6, 6))

    plt.plot(x, y, linestyle="--", color="blue", linewidth=0.8)
    lims = [-2, 4]
    plt.plot(lims, lims, linestyle="-", color="black", linewidth=0.8)

    sns.scatterplot(x=log_actual, y=log_prediction, alpha=0.5)

    sns.kdeplot(
        x=filtered_log_actual,
        y=filtered_log_prediction,
        levels=3,
        color="black",
        fill=False,
        linewidths=0.8,
    )

    plt.xlabel("Actual $Chla$ Values", fontsize=16)
    plt.ylabel("Predicted $Chla$ Values", fontsize=16)
    plt.xlim(-2, 4)
    plt.ylim(-2, 4)
    plt.grid(True, which="both", ls="--")

    plt.legend(
        title=(
            f"MAE = {mae:.2f}, RMSE = {rmse:.2f}, RMSLE = {rmsle:.2f} \n"
            f"Bias = {bias:.2f}, Slope = {slope:.2f} \n"
            f"MAPE = {mape:.2f}%, ε = {epsilon:.2f}%, β = {beta:.2f}%"
        ),
        fontsize=16,
        title_fontsize=12,
    )

    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    plt.show()

    plt.savefig(os.path.join(save_dir, f"{mode}_plot.pdf"), bbox_inches="tight")
    # plt.close()

save_to_csv(data, file_path)

Saves data to a CSV file.

Parameters:

Name Type Description Default
data np.ndarray

Data to be saved.

required
file_path str

Path to the CSV file.

required
Source code in hypercoast/chla.py
def save_to_csv(data: np.ndarray, file_path: str) -> None:
    """Saves data to a CSV file.

    Args:
        data (np.ndarray): Data to be saved.
        file_path (str): Path to the CSV file.
    """
    df = pd.DataFrame(data)
    df.to_csv(file_path, index=False)

train(model, train_dl, epochs=200, device=None, opt=None, model_path='model/vae_model_PACE.pth', best_model_path='model/vae_trans_model_best_Chl_PACE.pth')

Trains the VAE model.

Parameters:

Name Type Description Default
model VAE

VAE model to be trained.

required
train_dl DataLoader

DataLoader for training data.

required
epochs int

Number of epochs to train. Defaults to 200.

200
device torch.device

Device to train on. Defaults to None.

None
opt torch.optim.Optimizer

Optimizer. Defaults to None.

None
model_path str

Path to save the model. Defaults to "model/vae_model_PACE.pth".

'model/vae_model_PACE.pth'
best_model_path str

Path to save the best model. Defaults to "model/vae_trans_model_best_Chl_PACE.pth"

'model/vae_trans_model_best_Chl_PACE.pth'
Source code in hypercoast/chla.py
def train(
    model: VAE,
    train_dl: DataLoader,
    epochs: int = 200,
    device: torch.device = None,
    opt: torch.optim.Optimizer = None,
    model_path: str = "model/vae_model_PACE.pth",
    best_model_path: str = "model/vae_trans_model_best_Chl_PACE.pth",
) -> None:
    """Trains the VAE model.

    Args:
        model (VAE): VAE model to be trained.
        train_dl (DataLoader): DataLoader for training data.
        epochs (int, optional): Number of epochs to train. Defaults to 200.
        device (torch.device, optional): Device to train on. Defaults to None.
        opt (torch.optim.Optimizer, optional): Optimizer. Defaults to None.
        model_path (str, optional): Path to save the model. Defaults to
            "model/vae_model_PACE.pth".
        best_model_path (str, optional): Path to save the best model. Defaults
            to "model/vae_trans_model_best_Chl_PACE.pth"
    """

    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    if opt is None:
        opt = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-3)

    model.train()

    min_total_loss = float("inf")
    # Save the optimal model

    for epoch in range(epochs):
        total_loss = 0.0
        for x, y in train_dl:
            x, y = x.to(device), y.to(device)
            y_pred, mu, log_var = model(x)
            loss = loss_function(y_pred, y, mu, log_var)
            opt.zero_grad()
            loss.backward()
            opt.step()
            total_loss += loss.item()

        avg_total_loss = total_loss / len(train_dl)
        print(f"epoch = {epoch + 1}, total_loss = {avg_total_loss:.4f}")

        if avg_total_loss < min_total_loss:
            min_total_loss = avg_total_loss
            torch.save(model.state_dict(), best_model_path)
    # Save the model from the last epoch.
    torch.save(model.state_dict(), model_path)