Source code for firescipy.pyrolysis.kinetics

import warnings

import numpy as np
import pandas as pd

from scipy.interpolate import interp1d
from scipy.optimize import curve_fit, minimize
from typing import List, Dict, Union  # for type hints in functions
from firescipy.utils import series_to_numpy, ensure_nested_dict, get_nested_value, linear_model, calculate_residuals, calculate_R_squared, calculate_RMSE
from firescipy.constants import GAS_CONSTANT


[docs]def initialize_investigation_skeleton(material, investigator=None, instrument=None, date=None, notes=None, signal=None): """ Create a base dictionary representing a new investigation. This function returns a standardized data structure for documenting experimental investigations (e.g., thermal analysis). Optional metadata like investigator name, instrument, and signal description can be included. Parameters ---------- material : str Material being investigated. investigator : str, optional Name of the investigator. instrument : str, optional Device label (e.g., "TGA" or "TGA/DSC 3+, Mettler Toledo"). date : str, optional Date of the investigation. notes : str, optional Free-form notes or comments about the investigation. signal : dict, optional Dictionary describing the primary measurement signal. For example: {"name": "Mass", "unit": "mg"}. Common alternatives include {"name": "HeatFlow"} or {"name": "Enthalpy"}. This field guides downstream analysis and unit handling. Returns ------- dict A dictionary representing the initialized investigation skeleton. """ skeleton = { "general_info": { "material": material, "investigator": investigator, "instrument": instrument, "date": date, "notes": notes, "signal": signal }, "experiments": dict(), # Add more experiment types as needed } return skeleton
[docs]def add_isothermal_tga(database, condition, repetition, raw_data, data_type=None, set_value=None): """ Add raw data for an isothermal TGA experiment to the database. This function ensures that the hierarchical structure for storing isothermal TGA experiment data is present in the database. If the structure does not exist, it will be created. Then, the provided raw data for a specific experimental condition and repetition is added to the database. The `database` argument is expected to be a nested dictionary following the structure created by `initialize_investigation_skeleton()`. Parameters ---------- database : dict The main dictionary where all experimental data is stored. condition : str The experimental condition (e.g., "300_C") under which the isothermal TGA data was collected. repetition : str Identifier for the specific repetition of the experiment (e.g., "Rep_1", "Rep_2"). raw_data : pd.DataFrame The raw data collected for the specific condition and repetition. data_type : str, optional Type of data: "differential" or "integral". Defaults to None. If not provided, existing value in database is kept. set_value : list of [float, str] The nominal temperature program, value and unit [float, str] (e.g. [300, "°C"]). Defaults to None. Returns ------- None Updates the database in place, adding the isothermal data under the specified condition and repetition. """ # Ensure the path exists path_keys = ["experiments", "TGA", "isothermal", condition, "raw"] raw_dict = ensure_nested_dict(database, path_keys) # Get the nominal temperature program setting if set_value == None: nominal_beta = {"value": None, "unit": None} else: nominal_beta = {"value": set_value[0], "unit": set_value[1]} database["experiments"]["TGA"]["isothermal"][condition]["set_value"] = nominal_beta # Add data type expected_types = ["differential", "integral"] if data_type not in expected_types: raise ValueError(f" * Either 'differential' or 'integral' needs to be provided for 'data_type'!") else: database["experiments"]["TGA"]["isothermal"][condition]["data_type"] = data_type # Add the raw data for the given repetition raw_dict[repetition] = raw_data
[docs]def add_constant_heating_rate_tga(database, condition, repetition, raw_data, data_type=None, set_value=None): """ Add raw data for a constant heating rate TGA experiment to the database. This function ensures that the hierarchical structure needed to store constant heating rate TGA data exists in the `database`. If not, it is created automatically. The provided raw data for a specific experimental condition and repetition is then added to the database. The `database` argument is expected to be a nested dictionary following the structure created by `initialize_investigation_skeleton()`. Parameters ---------- database : dict The main dictionary where all experimental data is stored. condition : str The experimental condition (e.g., "10_Kmin") under which the constant heating rate TGA data was collected. repetition : str Identifier for the specific repetition of the experiment (e.g., "Rep_1", "Rep_2"). raw_data : pd.DataFrame The raw data collected for the specific condition repetition. data_type : str, optional Type of data: "differential" or "integral". Defaults to None. If not provided, existing value in database is kept. set_value : list of [float, str] The nominal temperature program, value and unit [float, str] (e.g. [2.5, "K/min"]). Defaults to None. Returns ------- None Updates the dictionary in place, adding the constant heating rate data under the specified condition. """ # Ensure the path exists path_keys = ["experiments", "TGA", "constant_heating_rate", condition, "raw"] raw_dict = ensure_nested_dict(database, path_keys) # Get the nominal temperature program setting if set_value == None: nominal_beta = {"value": None, "unit": None} else: nominal_beta = {"value": set_value[0], "unit": set_value[1]} database["experiments"]["TGA"]["constant_heating_rate"][condition]["set_value"] = nominal_beta # Add data type expected_types = ["differential", "integral"] if data_type not in expected_types: raise ValueError(f" * Either 'differential' or 'integral' needs to be provided for 'data_type'!") else: database["experiments"]["TGA"]["constant_heating_rate"][condition]["data_type"] = data_type # Add the raw data for the given repetition raw_dict[repetition] = raw_data
[docs]def combine_repetitions(database, condition, temp_program="constant_heating_rate", column_mapping=None): """ Combine raw data from multiple repetitions for a specific condition. This function extracts and combines raw data for a given experimental condition (e.g., "300_C" or "10_Kmin") under a specified temperature program (e.g., "isothermal" or "constant_heating_rate"). The `column_mapping` parameter allows data files with non-standard column headers to be mapped into a unified format expected by downstream functions. For example, :func:`compute_conversion` expects columns labeled 'Time', 'Temperature', and the signal name defined in the investigation skeleton. Parameters ---------- database : dict The main data structure storing all experimental data. Should follow the format initialized by :func:`initialize_investigation_skeleton()`. condition : str The condition to combine (e.g., "300_C" or "10_Kmin"). temp_program : str The temperature program of the experiment (e.g. "isothermal" or "constant_heating_rate"). column_mapping : dict, optional Mapping from custom column labels to standardised ones. Expected keys: 'time', 'temp', and 'signal'. Here, 'signal' indicates the recorded quantity, like 'mass' in the TGA. Example:: { 'time': 'Time (s)', 'temp': 'Temperature (deg C)', 'signal': 'Mass (mg)' } Returns ------- None Updates the dictionary in place, adding the combined data under the specified condition and temperature program. """ # Check if proper temperature program is chosen assert temp_program in ("isothermal", "constant_heating_rate"), f"Unknown temp_program: '{temp_program}'" # Check if information about the recorded quantity exists if "signal" not in database.get("general_info", {}): warnings.warn("* Signal metadata missing in general_info. Using 'signal' as default label.") # Get info on recorded quantity, with fallbacks if missing signal_meta = database["general_info"].get("signal", dict()) signal_name = signal_meta.get("name", "Signal") signal_unit = signal_meta.get("unit", "") # Get the raw data and set data_root = database["experiments"]["TGA"][temp_program][condition] raw_data = data_root["raw"] # Standardised column label definitions standard_columns = { 'time': 'Time', 'temp': 'Temperature', 'signal': signal_name} # User-defined signal (e.g., "Mass", "HeatFlow") # Merge user-provided column mapping with defaults, # allow replacement of individual key-value pairs instead of a full dict # Defaults to empty dict() if `column_mapping` is None. column_mapping = {**standard_columns, **(column_mapping or dict())} # Unpack final column names to be used time_col_default = standard_columns["time"] temp_col_default = standard_columns["temp"] signal_col_default = standard_columns["signal"] # Set column mapping from user input time_col = column_mapping["time"] temp_col = column_mapping["temp"] signal_col = column_mapping["signal"] # Step 1: Find reference time from longest series longest_time = None for rep in raw_data.values(): if longest_time is None or rep[time_col].iloc[-1] > longest_time.iloc[-1]: longest_time = rep[time_col] reference_time = longest_time.values # Step 2: Interpolate all repetitions to reference time combined_data = {time_col_default: reference_time} for rep_name, rep_data in raw_data.items(): for col, col_default in [(temp_col, temp_col_default), (signal_col, signal_col_default)]: if col not in rep_data.columns: raise ValueError(f"Missing column '{col}' in repetition '{rep_name}'.") combined_data[f"{col_default}_{rep_name}"] = np.interp( reference_time, rep_data[time_col], rep_data[col]) # Step 3: Compute mean and std dev for col_default in [temp_col_default, signal_col_default]: values = [combined_data[k] for k in combined_data if k.startswith(f"{col_default}_")] combined_data[f"{col_default}_Avg"] = np.mean(values, axis=0) combined_data[f"{col_default}_Std"] = np.std(values, axis=0) # Step 4: Store and return df_combined = pd.DataFrame(combined_data) data_root["combined"] = df_combined return df_combined
[docs]def differential_conversion(differential_data, m_0=None, m_f=None): # TODO: add differential conversion computation raise ValueError(f" * Still under development.") return
[docs]def integral_conversion(integral_data, m_0=None, m_f=None): """ Calculate the conversion (alpha) from integral experimental data. This function computes the conversion (alpha) for a series of integral experimental data, such as mass or concentration, based on the formula: .. math:: \\alpha = \\frac{m_0 - m_i}{m_0 - m_f} where: m_0 = initial mass/concentration, m_i = instantaneous mass/concentration, m_f = final mass/concentration. If `m_0` and `m_f` are not provided, they default to the first and last values of the `integral_data` series, respectively. Parameters ---------- integral_data : pd.Series or np.ndarray Experimental data representing integral quantities (e.g., mass or concentration over time) to calculate the conversion. m_0 : float, optional Initial mass/concentration. Defaults to the first value of `integral_data`. m_f : float, optional Final mass/concentration. Defaults to the last value of `integral_data`. Returns ------- np.ndarray Array of alpha values representing the conversion. """ # Convert the input data to a numpy array for calculations m_i = series_to_numpy(integral_data) # Use the provided m_0 or default to the first value in the series m_0 = m_0 if m_0 is not None else m_i[0] # Use the provided m_f or default to the last value in the series m_f = m_f if m_f is not None else m_i[-1] # Calculate conversion (alpha) using the standard formula alpha = (m_0 - m_i) / (m_0 - m_f) return alpha
[docs]def compute_conversion(database, condition="all", setup="constant_heating_rate"): """ Compute conversion values from experimental or model data for one or more conditions. This function processes combined experimental data in the given `database` and adds conversion values (alpha) under each specified condition. Requires combined data to be present under each condition, typically created using :func:`combine_repetitions`. Parameters ---------- database : dict The main data structure storing all experimental data. condition : str or list, optional The specific experimental condition(s) to process. If a string is provided, it can be a single condition (e.g., "300_C"), or "all" to process all conditions. If a list is provided, it should contain multiple condition labels. setup : str, optional The experimental setup to process, either "isothermal" or "constant_heating_rate". Defaults to "constant_heating_rate". Returns ------- None Updates the database in place by adding conversion data under each condition. """ # Validate setup if setup not in {"isothermal", "constant_heating_rate"}: raise ValueError(f"Invalid setup '{setup}'. Must be 'isothermal' or 'constant_heating_rate'.") # Get all available conditions for the given setup available_conditions = database["experiments"]["TGA"].get(setup, {}).keys() # Determine which conditions to process if isinstance(condition, str): if condition == "all": conditions_to_process = available_conditions elif condition in available_conditions: conditions_to_process = [condition] else: raise KeyError(f"Condition '{condition}' not found in the '{setup}' setup.") elif isinstance(condition, list): # Ensure all conditions in the list exist in the database invalid_conditions = [cond for cond in condition if cond not in available_conditions] if invalid_conditions: raise KeyError(f"The following conditions were not found in the '{setup}' setup: {invalid_conditions}") conditions_to_process = condition else: raise TypeError("Condition must be a string ('all', specific condition) or a list of conditions.") # Helper function to process a single condition def process_condition(cond): # Check if combined data exists if "combined" not in database["experiments"]["TGA"][setup][cond]: raise KeyError(f"No 'combined' data found for condition '{cond}' under '{setup}' setup.") # Check for data type data_type = database["experiments"]["TGA"][setup][cond].get("data_type") if data_type not in {"integral", "differential"}: raise ValueError(f"Invalid or missing data type for condition '{cond}' under '{setup}' setup.") # Fetch combined data combined_data = database["experiments"]["TGA"][setup][cond]["combined"] time = combined_data["Time"] temp_avg = combined_data["Temperature_Avg"] mass_avg = combined_data["Mass_Avg"] # Compute conversion based on data type if data_type == "integral": alpha = integral_conversion(mass_avg) # Optionally pass m_0 and m_f here elif data_type == "differential": alpha = differential_conversion(mass_avg) # Placeholder for differential logic # Store the conversion data back in the database conversion_data = pd.DataFrame({ "Time": time, "Temperature_Avg": temp_avg, "Mass_Avg": mass_avg, "Alpha": alpha}) database["experiments"]["TGA"][setup][cond]["conversion"] = conversion_data # Process each condition for cond in conditions_to_process: process_condition(cond)
[docs]def compute_conversion_levels(database, desired_levels=None, setup="constant_heating_rate", condition="all"): """ Interpolate conversion (alpha) data to desired conversion levels (fractions) for specified experimental conditions. This function interpolates the conversion values across different conditions to align them with a common set of points. This ensures consistency in follow-up steps such as isoconversional analysis. Parameters ---------- database : dict The main data structure storing all experimental data. Must follow the format initialized by :func:`initialize_investigation_skeleton`. desired_levels : array-like, optional Desired conversion levels (fractions) for interpolation. Defaults to `np.linspace(0.05, 0.95, 37)`. setup : str, optional The temperature program to process, either "isothermal" or "constant_heating_rate". Defaults to "constant_heating_rate". condition : str or list, optional The specific experimental condition(s) to process. If a string is provided, it can be a single condition (e.g., "300_C"), or "all" to process all conditions. If a list is provided, it should contain multiple condition names. Returns ------- None Updates the database in place by adding the interpolated conversion levels under each condition and setup. """ # Validate setup if setup not in {"isothermal", "constant_heating_rate"}: raise ValueError(f" * Invalid setup '{setup}'. Must be 'isothermal' or 'constant_heating_rate'.") # Default conversion levels. if desired_levels is None: desired_levels = np.linspace(0.05, 0.95, 37) # Check if desired_levels is monotonic if not np.all(np.diff(desired_levels) > 0) or not (0 <= np.min(desired_levels) <= np.max(desired_levels) <= 1): raise ValueError(" * `desired_levels` must be a monotonic array of values between 0 and 1.") # Get all available conditions for the given setup available_conditions = database["experiments"]["TGA"].get(setup, {}).keys() # Determine which conditions to process if isinstance(condition, str): if condition == "all": conditions_to_process = available_conditions elif condition in available_conditions: conditions_to_process = [condition] else: raise KeyError(f"Condition '{condition}' not found in the '{setup}' setup.") elif isinstance(condition, list): # Ensure all conditions in the list exist in the database invalid_conditions = [cond for cond in condition if cond not in available_conditions] if invalid_conditions: raise KeyError(f" * The following conditions were not found in the '{setup}' setup: {invalid_conditions}") conditions_to_process = condition else: raise TypeError(" * Condition must be a string ('all', specific condition) or a list of conditions.") # Helper function to process a single condition def process_condition(cond): # # Check if combined data exists # if "combined" not in database["experiments"]["TGA"][setup][cond]: # raise KeyError(f" * No 'combined' data found for condition '{cond}' under '{setup}' setup.") # # Check for data type # data_type = database["experiments"]["TGA"][setup][cond].get("data_type") # if data_type not in {"integral", "differential"}: # raise ValueError(f" * Invalid or missing data type for condition '{cond}' under '{setup}' setup.") # Check if conversion data exists if "conversion" not in database["experiments"]["TGA"][setup][cond]: raise KeyError(f" * Conversion data is missing for condition '{cond}'. Run `compute_conversion` first.") # Fetch conversion data conversion_data = database["experiments"]["TGA"][setup][cond]["conversion"] time = conversion_data["Time"] temp_avg = conversion_data["Temperature_Avg"] alpha_avg = conversion_data["Alpha"] # Check if desired levels are within range of the provided conversion data if desired_levels[0] < np.min(alpha_avg) or desired_levels[-1] > np.max(alpha_avg): raise ValueError( f" * Desired levels {desired_levels} exceed the range of available alpha values: " f" [{np.min(alpha_avg):.3f}, {np.max(alpha_avg):.3f}] for condition '{cond}'.") # Interpolate data new_time = np.interp(desired_levels, alpha_avg, time) new_temp = np.interp(desired_levels, alpha_avg, temp_avg) # Store the conversion levels back in the database conversion_fractions = pd.DataFrame({ "Time": new_time, "Temperature_Avg": new_temp, "Alpha": desired_levels}) database["experiments"]["TGA"][setup][cond]["conversion_fractions"] = conversion_fractions # Process each condition for cond in conditions_to_process: process_condition(cond)
[docs]def KAS_Ea(temperature, heating_rate, B=1.92, C=1.0008): """ Kissinger–Akahira–Sunose method (KAS), with Starink improvement by default. Estimates the activation energy for a given level of conversion :math:`E_{\\alpha}`. This estimation is based on a linear regression based on the isoconversional assumption. By default, the Starink improvement is used, i.e.: B = 1.92, C = 1.0008 (https://doi.org/10.1016/S0040-6031(03)00144-8). The baseline KAS parameters would be: B = 2.0, C = 1.0. The KAS equation is presented below. .. math:: \\ln\\left( \\frac{\\beta_i}{T_{\\alpha,i}^{B}} \\right) = \\text{Const} - C \\left( \\frac{E_{\\alpha}}{R\\, T_{\\alpha,i}} \\right) Formula 3.10 in: Vyazovkin et al. (2011). ICTAC Kinetics Committee recommendations for performing kinetic computations on thermal analysis data *Thermochimica Acta*, 520(1–2), 1–19. https://doi.org/10.1016/j.tca.2011.03.034 Parameters ---------- temperature : array-like Sample temperatures in Kelvin. heating_rate : array-like Heating rates in Kelvin per second. B : float Exponent for the temperature term. Default is 1.92 (Starink improvement). C : float Coefficient for activation energy expression. Default is 1.0008 (Starink improvement). Returns ------- tuple A tuple containing: - slope (float): Slope of the linear fit. - intercept (float): Intercept of the linear fit. - Ea_i (float): Apparent activation energy in J/mol. - fit_points (tuple): Data points (x, y) used for the linear fit. """ # Ensure numpy arrays temperature = series_to_numpy(temperature) heating_rate = series_to_numpy(heating_rate) # Input validation if len(temperature) != len(heating_rate): raise ValueError("temperature and heating_rate must have the same length.") if np.any(temperature <= 0) or np.any(heating_rate <= 0): raise ValueError("temperature and heating_rate must be positive.") if np.any(np.power(temperature, B) <= 0): raise ValueError("Temperature raised to exponent B must be positive.") # Prepare x and y data for the linear fit data_x = 1/temperature data_y = np.log(heating_rate / np.power(temperature, B)) fit_points = (data_x, data_y) # Perform the linear fit popt, pcov = curve_fit(linear_model, data_x, data_y, maxfev=10000) # Extract the fitted parameters m_fit, b_fit = popt # Calculate estimate of (Ea_i), in J/mol. Ea_i = -(m_fit * GAS_CONSTANT) / C # TODO: consider to use namedtuple or dataclass return popt, Ea_i, fit_points
[docs]def compute_Ea_KAS(database, data_keys=["experiments", "TGA", "constant_heating_rate"], **kwargs): """ Wrapper function to easily compute activation energies using the Kissinger–Akahira–Sunose (KAS) method. This function applies the KAS method to interpolated conversion data (as prepared by :func:`compute_conversion_levels`) and estimates the activation energy (:math:`E_a`) at each conversion level. It computes linear regression statistics, including RMSE and R², and stores the results in a DataFrame. The final DataFrame is stored in the database at the specified location defined by `data_keys`. This function requires that :func:`compute_conversion_levels` has been called beforehand to prepare the input data. Parameters ---------- database : dict The main data structure storing all experimental data. Must follow the format initialized by :func:`initialize_investigation_skeleton`. data_keys: list Keys that define the path to the relevant dataset inside the database. For example: ["experiments", "TGA", "constant_heating_rate"]. **kwargs Additional arguments passed to :func:`KAS_Ea`. For example, to override the default Starink constants (B, C). Returns ------- None Adds a DataFrame named ``Ea_results_KAS`` to the corresponding location in the database. The DataFrame contains: - ``Conversion``: Conversion level α - ``Ea``: Activation energy in J/mol - ``m_fit``: Slope of the linear fit - ``b_fit``: Intercept of the linear fit - ``R_squared``: Coefficient of determination - ``RMSE``: Root mean square error - ``fit_points``: Data points used in the linear fit """ # Safely access the dataset dataset = get_nested_value(database, data_keys) if dataset is None: raise ValueError(f"Dataset not found at the specified keys: {data_keys}") # Safely access the parent dictionary to store results store_Ea = get_nested_value(database, data_keys[:-1]) if store_Ea is None: raise ValueError(f"Unable to store results; parent keys not found: {data_keys[:-1]}") # Sort the keys of the dataset based on the heating rate values in the nested dictionary set_value_keys = sorted(dataset.keys(), key=lambda x: dataset[x]["set_value"]["value"]) # Sort the set values themselves set_values = sorted(dataset[key]["set_value"]["value"] for key in dataset.keys()) # Get number of conversion levels conversion_levels = dataset[set_value_keys[0]]["conversion_fractions"]["Alpha"] # Prepare data collection. Ea = list() m = list() b = list() r_squared = list() rmse = list() # Prepare placeholders for x and y values xy_data = {f"x{i+1}": [] for i in range(len(set_value_keys))} xy_data.update({f"y{i+1}": [] for i in range(len(set_value_keys))}) # Iterate through conversion levels and compute results for conv_id, conversion_level in enumerate(conversion_levels): conversion_temperatures = list() for set_value_id, set_value_key in enumerate(set_value_keys): conv_temp = dataset[set_value_key]["conversion_fractions"]["Temperature_Avg"].iloc[conv_id] conversion_temperatures.append(conv_temp) # Compute activation energy popt, Ea_i, fit_points = KAS_Ea(conversion_temperatures, set_values, **kwargs) Ea.append(Ea_i) # Extract and store the fitted parameters. m_fit, b_fit = popt m.append(m_fit) b.append(b_fit) # Generate y-values from the fitted model. data_x, data_y = fit_points y_fit = linear_model(data_x, m_fit, b_fit) # Calculate residuals. residuals_i = calculate_residuals(data_y, y_fit) # Calculate R-squared. r_squared_i = calculate_R_squared(residuals_i, data_y) r_squared.append(r_squared_i) # Calculate RMSE. rmse_i = calculate_RMSE(residuals_i) rmse.append(rmse_i) # Store x and y values dynamically for i, (x_val, y_val) in enumerate(zip(data_x, data_y)): xy_data[f"x{i+1}"].append(x_val) xy_data[f"y{i+1}"].append(y_val) # Combine results Ea_results = pd.DataFrame( {"Conversion": conversion_levels, "Ea": np.array(Ea), "m_fit": np.array(m), "b_fit": np.array(b), "R_squared": np.array(r_squared), "RMSE": np.array(rmse), **xy_data}) # Collect results. store_Ea["Ea_results_KAS"] = Ea_results
[docs]def exp_difference(offset, temp_x1, temp_x2, data_y1, data_y2): """ Compute the RMSE between two data series after applying a shift to the second x-axis. The function shifts `temp_x2` by a given `offset`, interpolates the corresponding `data_y2` onto the `temp_x1` grid using linear interpolation, and computes the root mean square error (RMSE) between `data_y1` and the interpolated `data_y2`. This is useful for aligning two experimental or simulated curves when a shift in the x-dimension (e.g., time or temperature) is expected. Linear interpolation is done via `scipy.interpolate.interp1d` with extrapolation enabled outside the original `temp_x2` range. Parameters ---------- offset : float Horizontal shift applied to `temp_x2` before interpolation. temp_x1 : array-like x-values of the alignment target (evaluation grid). temp_x2 : array-like x-values of the data series to be shifted. data_y1 : array-like y-values corresponding to `temp_x1`. data_y2 : array-like y-values corresponding to `temp_x2`. Returns ------- float Root mean square error (RMSE) between `data_y1` and the interpolated, shifted version of `data_y2`. """ # Interpolate data_y2 at temp_x1 shifted by offset interpolation = interp1d(temp_x2 + offset, data_y2, kind='linear', fill_value="extrapolate") data_y2_shifted = interpolation(temp_x1) # Compute RMSE residuals = calculate_residuals(data_y1, data_y2_shifted) RMSE = calculate_RMSE(residuals) return RMSE
[docs]def compute_optimal_shift(initial_guess, temp_x1, temp_x2, data_y1, data_y2, method="Powell"): """ Find the optimal horizontal shift that aligns two data series by minimizing RMSE. This function optimizes the x-axis shift applied to `temp_x2` so that the interpolated `data_y2` best matches `data_y1`, evaluated over `temp_x1`. The objective function is root mean square error (RMSE), minimized using `scipy.optimize.minimize`. Parameters ---------- initial_guess : float Initial guess for the x-axis shift. temp_x1 : array-like x-values of the alignment target (evaluation grid). temp_x2 : array-like x-values of the data series to be shifted. data_y1 : array-like y-values corresponding to `temp_x1`. data_y2 : array-like y-values corresponding to `temp_x2`. method : str, optional Optimization method passed to `scipy.optimize.minimize`. Default is "Powell" for robustness against local minima. Returns ------- float The optimal shift that minimizes RMSE between the two series. """ # Optimize temperature offset result = minimize(fun=exp_difference, x0=[initial_guess], args=(temp_x1, temp_x2, data_y1, data_y2), method=method) # Get optimal shift optimal_shift = result.x[0] return optimal_shift