ml_pid_cbm.tools.json_tools
Module for operating the json config file.
1""" 2Module for operating the json config file. 3""" 4 5import json 6from typing import Dict, List, Tuple 7 8 9def create_cut_string(lower: float, upper: float, cut_name: str) -> str: 10 """ 11 Creates a cut string for hipe4ml loader in the format "lower_value < cut_name < upper_value". 12 13 Parameters 14 ---------- 15 lower : float 16 Value of the lower cut, rounded to 1 decimal place. 17 upper : float 18 Value of the upper cut, rounded to 1 decimal place. 19 cut_name : str 20 Name of the cut variable. 21 22 Returns 23 ------- 24 str 25 Formatted string in the format "lower_value < cut_name < upper_value". 26 """ 27 cut_string = f"{lower:.1f} <= {cut_name} < {upper:.1f}" 28 return cut_string 29 30 31def load_quality_cuts(json_file_name: str) -> List[str]: 32 """ 33 Loads quality cuts defined in a JSON file into an array of strings. 34 35 Parameters 36 ---------- 37 json_file_name : str 38 Name of the JSON file containing defined cuts. 39 40 Returns 41 ------- 42 List[str] 43 List of strings containing cuts definitions. 44 """ 45 with open(json_file_name, "r") as json_file: 46 data: Dict[str, Dict[str, float]] = json.load(json_file) 47 48 cuts = data["cuts"] 49 quality_cuts = [ 50 create_cut_string(cut_data["lower"], cut_data["upper"], cut_name) 51 for cut_name, cut_data in cuts.items() 52 ] 53 return quality_cuts 54 55 56def load_var_name(json_file_name: str, var: str) -> str: 57 """ 58 Loads the physical variable name used in the tree from a JSON file. 59 60 Parameters 61 ---------- 62 json_file_name : str 63 Name of the JSON file with var_names. 64 var : str 65 Physical variable we look for. 66 67 Returns 68 ------- 69 str 70 Name of the physical variable in our tree structure loaded from the JSON file. 71 """ 72 with open(json_file_name, "r") as json_file: 73 var_names: Dict[str, str] = json.load(json_file)["var_names"] 74 return var_names[var] 75 76 77def load_file_name(json_file_name: str, training_or_test: str) -> str: 78 """ 79 Loads the file names of both the training and test datasets. 80 81 Parameters 82 ---------- 83 json_file_name : str 84 JSON file containing filenames. 85 training_or_test : str 86 Name of the dataset (e.g., "test", "training") as defined in the JSON file to load the dataset filename. 87 88 Returns 89 ------- 90 str 91 Filename of the specified dataset. 92 """ 93 with open(json_file_name, "r") as json_file: 94 var_names: Dict[str, str] = json.load(json_file)["file_names"] 95 return var_names[training_or_test] 96 97 98def load_features_for_train(json_file_name: str) -> List[str]: 99 """ 100 Load names of variables for training from a JSON file. 101 102 Parameters 103 ---------- 104 json_file_name : str 105 Name of the JSON file. 106 107 Returns 108 ------- 109 List[str] 110 List of variables for training. 111 """ 112 with open(json_file_name, "r") as json_file: 113 features_for_train = json.load(json_file)["features_for_train"] 114 return features_for_train 115 116 117def load_vars_to_draw(json_file_name: str) -> List[str]: 118 """ 119 Load names of variables to draw from a JSON file. 120 121 Parameters 122 ---------- 123 json_file_name : str 124 Name of the JSON file. 125 126 Returns 127 ------- 128 List[str] 129 List of variables to draw. 130 """ 131 with open(json_file_name, "r") as json_file: 132 vars_to_draw = json.load(json_file)["vars_to_draw"] 133 return vars_to_draw 134 135 136def load_hyper_params_vals(json_file_name: str) -> Tuple[str, str, str]: 137 """ 138 Loads XGBoost hyperparameters values from a JSON file to skip optimization. 139 140 Parameters 141 ---------- 142 json_file_name : str 143 Name of the JSON file. 144 145 Returns 146 ------- 147 Tuple[str, str, str] 148 Tuple containing n_estimators, max_depth, and learning_rate. 149 """ 150 with open(json_file_name, "r") as json_file: 151 hyper_params_vals = json.load(json_file)["hyper_params"]["values"] 152 n_estimators = hyper_params_vals["n_estimators"] 153 max_depth = hyper_params_vals["max_depth"] 154 learning_rate = hyper_params_vals["learning_rate"] 155 return n_estimators, max_depth, learning_rate
10def create_cut_string(lower: float, upper: float, cut_name: str) -> str: 11 """ 12 Creates a cut string for hipe4ml loader in the format "lower_value < cut_name < upper_value". 13 14 Parameters 15 ---------- 16 lower : float 17 Value of the lower cut, rounded to 1 decimal place. 18 upper : float 19 Value of the upper cut, rounded to 1 decimal place. 20 cut_name : str 21 Name of the cut variable. 22 23 Returns 24 ------- 25 str 26 Formatted string in the format "lower_value < cut_name < upper_value". 27 """ 28 cut_string = f"{lower:.1f} <= {cut_name} < {upper:.1f}" 29 return cut_string
Creates a cut string for hipe4ml loader in the format "lower_value < cut_name < upper_value".
Parameters
lower : float Value of the lower cut, rounded to 1 decimal place. upper : float Value of the upper cut, rounded to 1 decimal place. cut_name : str Name of the cut variable.
Returns
str Formatted string in the format "lower_value < cut_name < upper_value".
32def load_quality_cuts(json_file_name: str) -> List[str]: 33 """ 34 Loads quality cuts defined in a JSON file into an array of strings. 35 36 Parameters 37 ---------- 38 json_file_name : str 39 Name of the JSON file containing defined cuts. 40 41 Returns 42 ------- 43 List[str] 44 List of strings containing cuts definitions. 45 """ 46 with open(json_file_name, "r") as json_file: 47 data: Dict[str, Dict[str, float]] = json.load(json_file) 48 49 cuts = data["cuts"] 50 quality_cuts = [ 51 create_cut_string(cut_data["lower"], cut_data["upper"], cut_name) 52 for cut_name, cut_data in cuts.items() 53 ] 54 return quality_cuts
Loads quality cuts defined in a JSON file into an array of strings.
Parameters
json_file_name : str Name of the JSON file containing defined cuts.
Returns
List[str] List of strings containing cuts definitions.
57def load_var_name(json_file_name: str, var: str) -> str: 58 """ 59 Loads the physical variable name used in the tree from a JSON file. 60 61 Parameters 62 ---------- 63 json_file_name : str 64 Name of the JSON file with var_names. 65 var : str 66 Physical variable we look for. 67 68 Returns 69 ------- 70 str 71 Name of the physical variable in our tree structure loaded from the JSON file. 72 """ 73 with open(json_file_name, "r") as json_file: 74 var_names: Dict[str, str] = json.load(json_file)["var_names"] 75 return var_names[var]
Loads the physical variable name used in the tree from a JSON file.
Parameters
json_file_name : str Name of the JSON file with var_names. var : str Physical variable we look for.
Returns
str Name of the physical variable in our tree structure loaded from the JSON file.
78def load_file_name(json_file_name: str, training_or_test: str) -> str: 79 """ 80 Loads the file names of both the training and test datasets. 81 82 Parameters 83 ---------- 84 json_file_name : str 85 JSON file containing filenames. 86 training_or_test : str 87 Name of the dataset (e.g., "test", "training") as defined in the JSON file to load the dataset filename. 88 89 Returns 90 ------- 91 str 92 Filename of the specified dataset. 93 """ 94 with open(json_file_name, "r") as json_file: 95 var_names: Dict[str, str] = json.load(json_file)["file_names"] 96 return var_names[training_or_test]
Loads the file names of both the training and test datasets.
Parameters
json_file_name : str JSON file containing filenames. training_or_test : str Name of the dataset (e.g., "test", "training") as defined in the JSON file to load the dataset filename.
Returns
str Filename of the specified dataset.
99def load_features_for_train(json_file_name: str) -> List[str]: 100 """ 101 Load names of variables for training from a JSON file. 102 103 Parameters 104 ---------- 105 json_file_name : str 106 Name of the JSON file. 107 108 Returns 109 ------- 110 List[str] 111 List of variables for training. 112 """ 113 with open(json_file_name, "r") as json_file: 114 features_for_train = json.load(json_file)["features_for_train"] 115 return features_for_train
Load names of variables for training from a JSON file.
Parameters
json_file_name : str Name of the JSON file.
Returns
List[str] List of variables for training.
118def load_vars_to_draw(json_file_name: str) -> List[str]: 119 """ 120 Load names of variables to draw from a JSON file. 121 122 Parameters 123 ---------- 124 json_file_name : str 125 Name of the JSON file. 126 127 Returns 128 ------- 129 List[str] 130 List of variables to draw. 131 """ 132 with open(json_file_name, "r") as json_file: 133 vars_to_draw = json.load(json_file)["vars_to_draw"] 134 return vars_to_draw
Load names of variables to draw from a JSON file.
Parameters
json_file_name : str Name of the JSON file.
Returns
List[str] List of variables to draw.
137def load_hyper_params_vals(json_file_name: str) -> Tuple[str, str, str]: 138 """ 139 Loads XGBoost hyperparameters values from a JSON file to skip optimization. 140 141 Parameters 142 ---------- 143 json_file_name : str 144 Name of the JSON file. 145 146 Returns 147 ------- 148 Tuple[str, str, str] 149 Tuple containing n_estimators, max_depth, and learning_rate. 150 """ 151 with open(json_file_name, "r") as json_file: 152 hyper_params_vals = json.load(json_file)["hyper_params"]["values"] 153 n_estimators = hyper_params_vals["n_estimators"] 154 max_depth = hyper_params_vals["max_depth"] 155 learning_rate = hyper_params_vals["learning_rate"] 156 return n_estimators, max_depth, learning_rate
Loads XGBoost hyperparameters values from a JSON file to skip optimization.
Parameters
json_file_name : str Name of the JSON file.
Returns
Tuple[str, str, str] Tuple containing n_estimators, max_depth, and learning_rate.