Source code for firescipy.utils

import numpy as np
import pandas as pd

from typing import List, Dict, Union  # for type hints in functions


[docs]def series_to_numpy(data: Union[np.ndarray, pd.Series]) -> np.ndarray:
    """
    Helper function that converts a Pandas Series to a NumPy array if necessary.

    Parameters
    ----------
    data : np.ndarray or pd.Series
        Input data to be converted. If already a NumPy array,
        it is returned unchanged.

    Returns
    -------
    np.ndarray
        The input data as a NumPy array.
    """

    if type(data) == pd.Series:
        data = data.to_numpy()
    elif type(data) == list:
        data = np.array(data)
    else:
        data

    return data


[docs]def ensure_nested_dict(nested_dict, keys):
    """
    Ensures a nested dictionary structure exists the given sequence of keys.

    Parameters
    ----------
    nested_dict: dict
        The dictionary to operate on. It will be modified in place
        to include the full nested structure.
    keys : Sequence
        Sequence of keys representing the path of nested dictionaries to create.

    Returns
    -------
    dict
        The final nested dictionary at the end of the path.
    """

    # Iterate through keys to build the nested structure
    for key in keys:
        if key not in nested_dict:
            nested_dict[key] = dict()
        nested_dict = nested_dict[key]
    # Return nested dictionary
    return nested_dict


[docs]def store_in_nested_dict(nested_dict, new_data, keys):
    """
    Stores data in a nested dictionary structure, for the given keys.
    Intermediate levels will be created if they do not already exist.

    Parameters
    ----------
    nested_dict : dict
        The dictionary to operate on. It will be modified in place
    new_data : Any
        Data to store at the specified nested location.
    keys : Sequence
        Sequence of keys representing the nested path.

    Returns
    -------
    None
        The nested dictionary is changed in place.
    """

    if not isinstance(nested_dict, dict):
        raise TypeError("Expected 'dictionary' to be of type dict.")

    if not isinstance(keys, (list, tuple)) or not keys:
        raise ValueError("Expected 'keys' to be a non-empty list or tuple.")

    storage_location = ensure_nested_dict(nested_dict, keys[:-1])
    storage_location[keys[-1]] = new_data


[docs]def get_nested_value(nested_dict, keys):
    """
    Retrieve a value from a nested dictionary, using a sequence of keys.

    Parameters
    ----------
    nested_dict : dict
        The nested dictionary to traverse.
    keys : Sequence
        A sequence of keys (e.g., list or tuple) specifying the path
        to the data.

    Returns
    -------
    Any
        The data at the specified location in the nested dictionary.
    """

    current = nested_dict
    for key in keys:
        try:
            current = current[key]
        except KeyError:
            print(f" * The key '{key}' does not exist.")
            # Gracefully return None if any key is missing in the nested path
            return None

    return current


[docs]def linear_model(x, m, b):
    # TODO: Move to numpy polyfit/polyval?
    """
    Linear model function: y = mx + b.
    """

    return m * x + b


[docs]def calculate_residuals(data_y, fit_y):
    """
    Compute the residuals between observed data and fitted values.

    Residuals are calculated as the difference between actual values (data_y)
    and predicted values (fit_y). Useful for evaluating the quality of
    regression or curve fitting results.

    Parameters
    ----------
    data_y : array-like
        The observed data values.
    fit_y : array-like
        The predicted or fitted values.

    Returns
    -------
    np.ndarray
        The residuals, calculated as data_y - fit_y.
    """

    # Element-wise subtraction of predicted values from actual data
    residuals = data_y - fit_y
    return residuals


[docs]def calculate_R_squared(residuals, data_y):
    """
    Calculate the coefficient of determination (R-squared) for a set of data.

    R-squared is defined as:

    .. math::

        R^2 = 1 - (SS_{res} / SS_{tot})

    where SS_res is the sum of squares of residuals (the differences between the
    observed and predicted values) and SS_tot is the total sum of squares
    (the differences between the observed values and their mean).
    This metric indicates the proportion of the variance in the dependent
    variable that is explained by the model.

    Parameters
    ----------
    residuals : array-like
        The residuals (errors) from the fitted model,
        typically computed as (observed - predicted).
    data_y : array-like
        The observed data values.

    Returns
    -------
    float
        The R-squared value. Higher values indicate a better fit.
        Note: R² can be negative if the model performs worse
        than a constant mean.
    """

    # Calculate the sum of squares of residuals.
    ss_res = np.sum(residuals**2)

    # Calculate the total sum of squares relative to the mean of the observed data.
    ss_tot = np.sum((data_y - np.mean(data_y))**2)

    # Compute R-squared: 1 - (sum of squares of residuals divided by total sum of squares)
    r_squared = 1 - (ss_res / ss_tot)

    return r_squared


[docs]def calculate_RMSE(residuals):
    """
    Compute the Root Mean Squared Error (RMSE) from residuals.

    RMSE is the square root of the average of the squared residuals.
    It provides an estimate of the standard deviation of the prediction errors
    and is commonly used to quantify the accuracy of a model.

    Parameters
    ----------
    residuals : array-like
        The residuals (differences between observed and predicted values).

    Returns
    -------
    float
        The RMSE value, representing the standard deviation  of residual errors.
    """

    # Compute RMSE by taking the square root of the mean squared residuals
    rmse = np.sqrt(np.mean(residuals**2))
    return rmse


[docs]def gaussian(x, mu, sigma, a=1.0):
    """
    Compute the Gaussian (normal) distribution function.

    The Gaussian function is defined as shown below.

    .. math::

        f(x) = \\frac{a}{\\sigma \\sqrt{2\\pi}} \\exp\\left( -\\frac{1}{2} \\left( \\frac{x - \\mu}{\\sigma} \\right)^2 \\right)

    Parameters
    ----------
    x : float or ndarray
        The input value(s) where the Gaussian function is evaluated.
    mu : float
        The mean (center) of the Gaussian distribution.
    sigma : float
        The standard deviation (spread) of the Gaussian distribution.
        Must be positive.
    a : float
        A scaling factor of the Gaussian distribution, default: 1.0.

    Returns
    -------
    float or ndarray
        The computed value(s) of the Gaussian function at `x`.
    """

    exponent = -0.5 * ((x - mu) / sigma) ** 2
    normalisation = a / (sigma * np.sqrt(2 * np.pi))
    f_x =  normalisation * np.exp(exponent)
    return f_x