Source code for firescipy.utils
import numpy as np
import pandas as pd
from typing import List, Dict, Union # for type hints in functions
[docs]def series_to_numpy(data: Union[np.ndarray, pd.Series]) -> np.ndarray:
"""
Helper function that converts a Pandas Series to a NumPy array if necessary.
Parameters
----------
data : np.ndarray or pd.Series
Input data to be converted. If already a NumPy array,
it is returned unchanged.
Returns
-------
np.ndarray
The input data as a NumPy array.
"""
if type(data) == pd.Series:
data = data.to_numpy()
elif type(data) == list:
data = np.array(data)
else:
data
return data
[docs]def ensure_nested_dict(nested_dict, keys):
"""
Ensures a nested dictionary structure exists the given sequence of keys.
Parameters
----------
nested_dict: dict
The dictionary to operate on. It will be modified in place
to include the full nested structure.
keys : Sequence
Sequence of keys representing the path of nested dictionaries to create.
Returns
-------
dict
The final nested dictionary at the end of the path.
"""
# Iterate through keys to build the nested structure
for key in keys:
if key not in nested_dict:
nested_dict[key] = dict()
nested_dict = nested_dict[key]
# Return nested dictionary
return nested_dict
[docs]def store_in_nested_dict(nested_dict, new_data, keys):
"""
Stores data in a nested dictionary structure, for the given keys.
Intermediate levels will be created if they do not already exist.
Parameters
----------
nested_dict : dict
The dictionary to operate on. It will be modified in place
new_data : Any
Data to store at the specified nested location.
keys : Sequence
Sequence of keys representing the nested path.
Returns
-------
None
The nested dictionary is changed in place.
"""
if not isinstance(nested_dict, dict):
raise TypeError("Expected 'dictionary' to be of type dict.")
if not isinstance(keys, (list, tuple)) or not keys:
raise ValueError("Expected 'keys' to be a non-empty list or tuple.")
storage_location = ensure_nested_dict(nested_dict, keys[:-1])
storage_location[keys[-1]] = new_data
[docs]def get_nested_value(nested_dict, keys):
"""
Retrieve a value from a nested dictionary, using a sequence of keys.
Parameters
----------
nested_dict : dict
The nested dictionary to traverse.
keys : Sequence
A sequence of keys (e.g., list or tuple) specifying the path
to the data.
Returns
-------
Any
The data at the specified location in the nested dictionary.
"""
current = nested_dict
for key in keys:
try:
current = current[key]
except KeyError:
print(f" * The key '{key}' does not exist.")
# Gracefully return None if any key is missing in the nested path
return None
return current
[docs]def linear_model(x, m, b):
# TODO: Move to numpy polyfit/polyval?
"""
Linear model function: y = mx + b.
"""
return m * x + b
[docs]def calculate_residuals(data_y, fit_y):
"""
Compute the residuals between observed data and fitted values.
Residuals are calculated as the difference between actual values (data_y)
and predicted values (fit_y). Useful for evaluating the quality of
regression or curve fitting results.
Parameters
----------
data_y : array-like
The observed data values.
fit_y : array-like
The predicted or fitted values.
Returns
-------
np.ndarray
The residuals, calculated as data_y - fit_y.
"""
# Element-wise subtraction of predicted values from actual data
residuals = data_y - fit_y
return residuals
[docs]def calculate_R_squared(residuals, data_y):
"""
Calculate the coefficient of determination (R-squared) for a set of data.
R-squared is defined as:
.. math::
R^2 = 1 - (SS_{res} / SS_{tot})
where SS_res is the sum of squares of residuals (the differences between the
observed and predicted values) and SS_tot is the total sum of squares
(the differences between the observed values and their mean).
This metric indicates the proportion of the variance in the dependent
variable that is explained by the model.
Parameters
----------
residuals : array-like
The residuals (errors) from the fitted model,
typically computed as (observed - predicted).
data_y : array-like
The observed data values.
Returns
-------
float
The R-squared value. Higher values indicate a better fit.
Note: R² can be negative if the model performs worse
than a constant mean.
"""
# Calculate the sum of squares of residuals.
ss_res = np.sum(residuals**2)
# Calculate the total sum of squares relative to the mean of the observed data.
ss_tot = np.sum((data_y - np.mean(data_y))**2)
# Compute R-squared: 1 - (sum of squares of residuals divided by total sum of squares)
r_squared = 1 - (ss_res / ss_tot)
return r_squared
[docs]def calculate_RMSE(residuals):
"""
Compute the Root Mean Squared Error (RMSE) from residuals.
RMSE is the square root of the average of the squared residuals.
It provides an estimate of the standard deviation of the prediction errors
and is commonly used to quantify the accuracy of a model.
Parameters
----------
residuals : array-like
The residuals (differences between observed and predicted values).
Returns
-------
float
The RMSE value, representing the standard deviation of residual errors.
"""
# Compute RMSE by taking the square root of the mean squared residuals
rmse = np.sqrt(np.mean(residuals**2))
return rmse
[docs]def gaussian(x, mu, sigma, a=1.0):
"""
Compute the Gaussian (normal) distribution function.
The Gaussian function is defined as shown below.
.. math::
f(x) = \\frac{a}{\\sigma \\sqrt{2\\pi}} \\exp\\left( -\\frac{1}{2} \\left( \\frac{x - \\mu}{\\sigma} \\right)^2 \\right)
Parameters
----------
x : float or ndarray
The input value(s) where the Gaussian function is evaluated.
mu : float
The mean (center) of the Gaussian distribution.
sigma : float
The standard deviation (spread) of the Gaussian distribution.
Must be positive.
a : float
A scaling factor of the Gaussian distribution, default: 1.0.
Returns
-------
float or ndarray
The computed value(s) of the Gaussian function at `x`.
"""
exponent = -0.5 * ((x - mu) / sigma) ** 2
normalisation = a / (sigma * np.sqrt(2 * np.pi))
f_x = normalisation * np.exp(exponent)
return f_x