Source code for optframework.kernel_opt.opt_base

# -*- coding: utf-8 -*-

import sys
import time
import numpy as np
import os
import runpy
from pathlib import Path
import warnings
import pandas as pd
import ray
# from ..pbe.dpbe_base import DPBESolver
from .opt_core import OptCore
from .opt_core_multi import OptCoreMulti
# from optframework.utils.func.func_read_exp import write_read_exp
from optframework.utils.func.bind_methods import bind_methods_from_module
from .opt_base_ray import OptBaseRay
from optframework.utils.func.print import print_highlighted
## For plots
# import matplotlib.pyplot as plt
# from ..utils.plotter import plotter as pt        

[docs]class OptBase(): """ A class to manage the optimization process for finding the kernel of PBE. This class is responsible for instantiating either the `opt_algo` or `opt_algo_multi` classes based on the provided configuration. It facilitates passing parameters, executing optimization, generating synthetic data, and visualizing results. Note ---- This class uses the `bind_methods_from_module` function to dynamically bind methods from external modules. Some methods in this class are not explicitly defined here, but instead are imported from other files. To fully understand or modify those methods, please refer to the corresponding external files, such as `optframework.kernel_opt.opt_base_ray`, from which methods are bound to this class. Methods ------- __init__(config_path=None, data_path=None) Initializes the class with configuration and data paths. """ def __init__(self, config_path=None, data_path=None, multi_flag=None): """ Initializes the OptBase class with configuration and data paths. Parameters ---------- config_path : str, optional Path to the configuration file. If not provided, a default configuration path is used. data_path : str, optional Path to the directory where data files will be stored. If not provided, a default path is used. Raises ------ Exception If the requirements file for ray is not found. """ self.base_ray = OptBaseRay(self) # Get the current script directory and the requirements file path # self.pth = os.path.dirname( __file__ ) self.work_dir = Path(os.getcwd()).resolve() # Load the configuration file config = self.check_config_path(config_path) self.core_params = config['algo_params'] self.pop_params = config['pop_params'] if multi_flag is None: self.multi_flag = config['multi_flag'] else: self.multi_flag = multi_flag self.single_case = config['single_case'] print_highlighted(f'Current operating mode: single_case = {self.single_case}, multi_flag = {self.multi_flag}.', title="INFO", color="cyan") self.opt_params_space = config['opt_params'] self.dim = self.core_params.get('dim', None) # Set the data path, use default if not provided if data_path is None: print_highlighted('Data path is not found or is None, default path will be used.', title="INFO", color="cyan") self.data_path = os.path.join(self.work_dir, "data") else: self.data_path = data_path os.makedirs(self.data_path, exist_ok=True) # Initialize the optimization core algorithm self.init_opt_core() # Initialize t_vec for file generation self.idt_vec = [np.where(self.core.t_all == t_time)[0][0] for t_time in self.core.t_vec] self.check_core_params()
[docs] def check_core_params(self): """ Check the validity of optimization core parameters. """ ### verbose, delta_flag, method, noise_type, t_vec, t_init... pass
[docs] def check_config_path(self, config_path): """ Checks if the configuration file exists and loads it. Parameters ---------- config_path : str Path to the configuration file. If None, a default path is used. Returns ------- config : dict The loaded configuration dictionary. Raises ------ Exception If the configuration file is not found at the specified path. """ # Check if the configuration file exists and load it if config_path is None: # Use the default configuration file path if none is provided # config_path = os.path.join(self.pth, "..","..","config","opt_config.py") # config_name = "opt_config" config_path = os.path.join(self.work_dir,"config","opt_config.py") if not os.path.exists(config_path): # Raise an exception if the config file is not found raise Exception(f"Warning: Config file not found at: {config_path}.") else: # Load the configuration from the specified file conf = runpy.run_path(config_path) config = conf['config'] print_highlighted(f"The Optimization and dPBE are using config file at : {config_path}", title="INFO", color="cyan") return config
[docs] def init_opt_core(self): """ Initializes the optimization core based on whether 1D data is used as auxiliary for 2D-PBE kernels. The `multi_flag` indicates whether to use 1D data as auxiliary input to help calculate 2D kernels. If `multi_flag` is True, the optimization process uses both 1D and 2D data. If False, only 2D data is used for the kernel calculation. """ # Initialize the optimization core based on dimensionality and multi_flag if self.dim == 1 and self.multi_flag: # If the dimension is 1, the multi algorithm is not applicable print_highlighted("The multi algorithm does not support 1-D pop!", title="WARNING", color="yellow") self.multi_flag = False # Initialize optimization core with or without 1D data based on multi_flag if not self.multi_flag: self.core = OptCore() else: self.core = OptCoreMulti() # Initialize attributes for the optimization core self.core.init_attr(self.core_params) if self.dim == 2 and not self.multi_flag and self.core.calc_init_N: raise ValueError("2d PSD can only use exp data to calculate initial conditions if multi_flag is enabled!") # Initialize PBE with population parameters and data path self.core.init_pbe(self.pop_params, self.data_path)
[docs] def find_opt_kernels(self, method='kernels', data_names=None, known_params=None): """ Finds optimal kernels for the PBE model by minimizing the difference between simulation results and experimental data. This method optimizes kernel parameters for the PBE (Population Balance Equation) model. It supports two optimization methods: 'kernels' and 'delta'. The 'kernels' method optimizes the kernel for each dataset and computes an average kernel, while 'delta' uses averaged delta values before optimization. Parameters ---------- method : str, optional The optimization method to use. Options are: - 'kernels': Optimizes kernel parameters for each data set and averages the results. - 'delta': Averages the delta values before optimization, leading to a single kernel. data_names : str or list of str, optional The name(s) of the experimental data file(s). If multiple datasets are provided, the optimization will be performed for each dataset. known_params : list, optional Known parameters to be used during optimization. This should match the length of `data_names`. Returns ------- dict or list of dict A dictionary (or a list of dictionaries for multiple datasets) containing optimized kernels and their respective optimization results. """ # Warn if the component or population parameters have not been set if self.core.set_comp_para_flag is False: warnings.warn('Component parameters have not been set') if self.core.set_init_pop_para_flag is False: warnings.warn('Initial PBE parameters have not been set') # Warn if data_names are not provided if data_names == None: raise ValueError("Please specify the name of the experiment data without labels!") print_highlighted(f"Now the flag of resume tuning is: {self.core.resume_unfinished}", title="INFO", color="cyan") # Helper function to construct full file paths for the data files def join_paths(names): if isinstance(names, list): return [os.path.join(self.data_path, name) for name in names] return os.path.join(self.data_path, names) exp_data_paths = [] if isinstance(known_params, dict) and not known_params: known_params = None # Handle multi-flag (whether auxiliary 1D data is used for 2D-PBE) if self.multi_flag: # We are dealing with multiple datasets if not self.single_case: # Ensure known_params is of the same length as data_names, even if empty if known_params is None: known_params = [None] * len(data_names) # Generate file paths for multiple datasets for data_names_ex in data_names: exp_data_paths.append(join_paths(data_names_ex)) else: # Single dataset optimization exp_data_paths = join_paths(data_names) else: if not self.single_case: if known_params is None: known_params = [None] * len(data_names) exp_data_paths = join_paths(data_names) # Initialize ray for parallel computation # log_to_driver = True if self.core.verbose != 0 else False # ray.init(log_to_driver=log_to_driver) # ray.init(address=os.environ["ip_head"], log_to_driver=log_to_driver) if method == 'kernels': # Currently, this method is not implemented print_highlighted("not coded yet", title="ERROR", color="red") elif method == 'delta': # Perform multi-job optimization if enabled if self.core.multi_jobs: result_dict = self.base_ray.multi_optimierer_ray(self.opt_params_space,exp_data_paths=exp_data_paths, known_params=known_params) else: # Perform sequential optimization for multiple datasets result_dict = [] if not self.single_case and not self.core.exp_data: for exp_data_paths_tem, known_params_tem in zip(exp_data_paths, known_params): result_dict_tem = self.base_ray.optimierer_ray(self.opt_params_space,exp_data_paths=exp_data_paths_tem, known_params=known_params_tem) result_dict.append(result_dict_tem) else: # Perform optimization for a single dataset result_dict = self.base_ray.optimierer_ray(self.opt_params_space,exp_data_paths=exp_data_paths, known_params=known_params) # Print the current actors (for debugging purposes) and shut down ray # self.print_current_actors() # ray.shutdown() return result_dict
[docs] def calc_PSD_delta(self, params, exp_data_path): """ Directly calculates the delta value using the input parameters. Culated delta by comparing the PSD from the input parameters with the experimental data. It can also be used to compute the theoretical minimum delta. This method is used to validate the accuracy of the calcvalue for synthetically. generated data. Parameters ---------- params : dict The parameters used to calculate the particle size distribution (PSD) and delta value. exp_data_path : str or list of str The path(s) to the experimental or synthetic PSD data file(s). For multi-dimensional data, this is a list containing paths for both 1D and 2D data. Returns ------- tuple A tuple containing: - delta : float The calculated difference between the input parameters and the experimental PSD. - exp_data_path_ori : str The original experimental data path. """ # Initialize particle distribution if necessary if self.core.calc_init_N: self.core.calc_init_from_data(exp_data_path, init_flag='mean') if isinstance(exp_data_path, list): # Handle multi-dimensional data (1D + 2D) exp_data_path_ori = exp_data_path[0] x_uni_exp = [] data_exp = [] for exp_data_path_tem in exp_data_path: x_uni_exp_tem, data_exp_tem = self.core.p.get_all_data(exp_data_path_tem) x_uni_exp.append(x_uni_exp_tem) data_exp.append(data_exp_tem) else: exp_data_path_ori = exp_data_path x_uni_exp, data_exp = self.core.p.get_all_data(exp_data_path) # Calculate the delta value based on the difference between simulated and experimental data delta = self.core.calc_delta(params, x_uni_exp, data_exp) return delta, exp_data_path_ori
# Bind methods from another module into this class bind_methods_from_module(OptBase, 'optframework.kernel_opt.opt_base_ray')