ml_pid_cbm.tools.load_data
Module for loading data saved in .tree format into hipe4ml.TreeHandler, data cleaning and preparing training and test dataset.
1""" 2Module for loading data saved in .tree format into hipe4ml.TreeHandler, 3data cleaning and preparing training and test dataset. 4""" 5 6from typing import Tuple 7 8from hipe4ml.model_handler import ModelHandler 9from hipe4ml.tree_handler import TreeHandler 10 11from . import json_tools 12from .particles_id import ParticlesId as Pid 13 14 15class LoadData: 16 """ 17 Class for loading data stored in .tree format into hipe4ml.TreeHandler, 18 data cleaning and preparing dataset for training and testing of the ML model 19 """ 20 21 def __init__( 22 self, 23 data_file_name: str, 24 json_file_name: str, 25 lower_p_cut: float, 26 upper_p_cut: float, 27 anti_particles: bool, 28 ): 29 """ 30 Initializes the LoadDataObject 31 32 Parameters 33 ---------- 34 data_file_name : str 35 Name of the data file in .tree format. 36 37 json_file_name : str 38 Name of the JSON file containing variable names and cuts definitions. 39 40 lower_p_cut : float 41 Value of the lower momentum cut. 42 43 upper_p_cut : float 44 Value of the upper momentum cut. 45 46 anti_particles : bool 47 Specifies whether to load only antiparticles (True) or positive particles (False). 48 """ 49 self.data_file_name = data_file_name 50 self.lower_p_cut = lower_p_cut 51 self.upper_p_cut = upper_p_cut 52 self.anti_particles = anti_particles 53 self.json_file_name = json_file_name 54 55 def get_protons_kaons_pions( 56 self, 57 tree_handler: TreeHandler, 58 nsigma: float = 5, 59 anti_particles: bool = None, 60 nsigma_proton: float = None, 61 nsigma_kaon: float = None, 62 nsigma_pion: float = None, 63 json_file_name: str = None, 64 ) -> Tuple[TreeHandler, TreeHandler, TreeHandler]: 65 """ 66 Gets protons, kaons, and pions from a TreeHandler in the nsigma region. 67 68 In this tof model, pions, muons, and electrons are treated the same. 69 70 Parameters 71 ---------- 72 tree_handler : TreeHandler 73 TreeHandler containing the data. 74 75 nsigma : float, optional 76 Number of sigma for data cleaning, by default 5. 77 78 anti_particles : bool, optional 79 Loads only antiparticles if set to True, positive particles if set to False. 80 Defaults to None. 81 82 nsigma_proton : float, optional 83 Number of sigma for protons, if not specified uses nsigma. 84 Defaults to None. 85 86 nsigma_kaon : float, optional 87 Number of sigma for kaons, if not specified uses nsigma. 88 Defaults to None. 89 90 nsigma_pion : float, optional 91 Number of sigma for pions, if not specified uses nsigma. 92 Defaults to None. 93 94 json_file_name : str, optional 95 Name of the JSON file containing variable names, by default None. 96 97 Returns 98 ------- 99 Tuple[TreeHandler, TreeHandler, TreeHandler] 100 Tuple containing TreeHandlers for protons, kaons, and pions. 101 """ 102 anti_particles = anti_particles or self.anti_particles 103 nsigma_proton = nsigma_proton if nsigma_proton is not None else nsigma 104 nsigma_kaon = nsigma_kaon if nsigma_kaon is not None else nsigma 105 nsigma_pion = nsigma_pion if nsigma_pion is not None else nsigma 106 json_file_name = json_file_name or self.json_file_name 107 108 if anti_particles is False: 109 protons = self.get_particles_type( 110 tree_handler, Pid.PROTON.value, nsigma_proton, json_file_name 111 ) 112 kaons = self.get_particles_type( 113 tree_handler, Pid.POS_KAON.value, nsigma_kaon, json_file_name 114 ) 115 pions = self.get_particles_type( 116 tree_handler, 117 # pions, muons and electrons impossible to ditinguish in this model 118 [Pid.POS_PION.value, Pid.POS_MUON.value, Pid.POSITRON.value], 119 nsigma_pion, 120 json_file_name, 121 ) 122 elif anti_particles is True: 123 protons = self.get_particles_type( 124 tree_handler, Pid.ANTI_PROTON.value, nsigma_proton, json_file_name 125 ) 126 kaons = self.get_particles_type( 127 tree_handler, Pid.NEG_KAON.value, nsigma_kaon 128 ) 129 pions = self.get_particles_type( 130 tree_handler, 131 [Pid.NEG_PION.value, Pid.NEG_MUON.value, Pid.ELECTRON.value], 132 nsigma_pion, 133 json_file_name, 134 ) 135 print( 136 f"\nNumber of protons: {len(protons)}\nNumber of kaons: {len(kaons)}\nNumber of pions: {len(pions)}" 137 ) 138 return (protons, kaons, pions) 139 140 def get_particles_type( 141 self, 142 tree_handler: TreeHandler, 143 pid: float, 144 nsigma: float = 0.0, 145 json_file_name: str = None, 146 ) -> TreeHandler: 147 """ 148 Gets particles of a given pid in the selected sigma region of mass2. 149 150 Parameters: 151 tree_handler (TreeHandler): TreeHandler with the data. 152 153 pid (float): Pid of the given particle type. 154 155 nsigma (float, optional): Number of sigma to select the sigma region of mass2. Defaults to 0. 156 157 json_file_name (str, optional): Name of the JSON file containing variable names. Defaults to None. 158 159 Returns: 160 TreeHandler: TreeHandler with the particles of the given type in the specified sigma region. 161 """ 162 json_file_name = json_file_name or self.json_file_name 163 pid_var_name = json_tools.load_var_name(json_file_name, "pid") 164 mass2_var_name = json_tools.load_var_name(json_file_name, "mass2") 165 particles = tree_handler.get_subset(f"{pid_var_name} == {pid}") 166 # getting selected nsigma region in the mass2 167 if nsigma > 0: 168 print(f"Getting particles pid={pid} in {nsigma}-sigma region") 169 mass2_column = particles.get_data_frame()[mass2_var_name] 170 mean = mass2_column.mean() 171 std = mass2_column.std() 172 if std > 0: 173 mass2_cut = json_tools.create_cut_string( 174 mean - nsigma * std, mean + nsigma * std, mass2_var_name 175 ) 176 particles = particles.get_subset(mass2_cut) 177 178 return particles 179 180 def load_tree( 181 self, 182 data_file_name: str = None, 183 tree_type: str = "plain_tree", 184 max_workers: int = 1, 185 model_handler: ModelHandler = None, 186 ) -> TreeHandler: 187 """ 188 Loads tree from given file into hipe4ml TreeHandler. 189 190 Parameters: 191 data_file_name (str, optional): Name of the file with the tree. Defaults to None. 192 193 tree_type (str, optional): Type of the tree structure to be loaded. Defaults to "plain_tree". 194 195 max_workers (int, optional): Number of max_workers for ThreadPoolExecutor used to load data with multithreading. 196 Defaults to 1. 197 198 model_handler (ModelHandler, optional): ModelHandler to apply if the dataset is validation one. Defaults to None. 199 200 Returns: 201 TreeHandler: hipe4ml structure containing the tree to train and test the model on. 202 """ 203 data_file_name = data_file_name or self.data_file_name 204 tree_handler = TreeHandler() 205 preselection = self.clean_tree() 206 tree_handler.get_handler_from_large_file( 207 data_file_name, 208 tree_type, 209 preselection=preselection, 210 max_workers=max_workers, 211 model_handler=model_handler, 212 output_margin=False, 213 ) 214 print(f"\nLoading tree from {data_file_name}...") 215 return tree_handler 216 217 def clean_tree(self, json_file_name: str = None) -> str: 218 """ 219 Creates a string with preselections (quality cuts, momentum range, and sign of charge). 220 221 Parameters: 222 json_file_name (str, optional): Name of the JSON file containing quality cuts definition 223 (if different than in the class). Defaults to None. 224 225 Returns: 226 str: Preselection string for the the TreeHandler object. 227 """ 228 preselection = "" 229 json_file_name = json_file_name or self.json_file_name 230 quality_cuts = json_tools.load_quality_cuts(json_file_name) 231 momemntum_variable_name = json_tools.load_var_name(json_file_name, "momentum") 232 charge_variable_name = json_tools.load_var_name(json_file_name, "charge") 233 234 for cut in quality_cuts: 235 preselection += f"({cut}) and " 236 # include specific momentum cut 237 p_cut = json_tools.create_cut_string( 238 self.lower_p_cut, self.upper_p_cut, momemntum_variable_name 239 ) 240 preselection += f"({p_cut}) and " 241 # include sign of charge 242 if self.anti_particles is False: 243 preselection += f"({charge_variable_name} > 0)" 244 elif self.anti_particles is True: 245 preselection += f"({charge_variable_name} < 0)" 246 247 return preselection
16class LoadData: 17 """ 18 Class for loading data stored in .tree format into hipe4ml.TreeHandler, 19 data cleaning and preparing dataset for training and testing of the ML model 20 """ 21 22 def __init__( 23 self, 24 data_file_name: str, 25 json_file_name: str, 26 lower_p_cut: float, 27 upper_p_cut: float, 28 anti_particles: bool, 29 ): 30 """ 31 Initializes the LoadDataObject 32 33 Parameters 34 ---------- 35 data_file_name : str 36 Name of the data file in .tree format. 37 38 json_file_name : str 39 Name of the JSON file containing variable names and cuts definitions. 40 41 lower_p_cut : float 42 Value of the lower momentum cut. 43 44 upper_p_cut : float 45 Value of the upper momentum cut. 46 47 anti_particles : bool 48 Specifies whether to load only antiparticles (True) or positive particles (False). 49 """ 50 self.data_file_name = data_file_name 51 self.lower_p_cut = lower_p_cut 52 self.upper_p_cut = upper_p_cut 53 self.anti_particles = anti_particles 54 self.json_file_name = json_file_name 55 56 def get_protons_kaons_pions( 57 self, 58 tree_handler: TreeHandler, 59 nsigma: float = 5, 60 anti_particles: bool = None, 61 nsigma_proton: float = None, 62 nsigma_kaon: float = None, 63 nsigma_pion: float = None, 64 json_file_name: str = None, 65 ) -> Tuple[TreeHandler, TreeHandler, TreeHandler]: 66 """ 67 Gets protons, kaons, and pions from a TreeHandler in the nsigma region. 68 69 In this tof model, pions, muons, and electrons are treated the same. 70 71 Parameters 72 ---------- 73 tree_handler : TreeHandler 74 TreeHandler containing the data. 75 76 nsigma : float, optional 77 Number of sigma for data cleaning, by default 5. 78 79 anti_particles : bool, optional 80 Loads only antiparticles if set to True, positive particles if set to False. 81 Defaults to None. 82 83 nsigma_proton : float, optional 84 Number of sigma for protons, if not specified uses nsigma. 85 Defaults to None. 86 87 nsigma_kaon : float, optional 88 Number of sigma for kaons, if not specified uses nsigma. 89 Defaults to None. 90 91 nsigma_pion : float, optional 92 Number of sigma for pions, if not specified uses nsigma. 93 Defaults to None. 94 95 json_file_name : str, optional 96 Name of the JSON file containing variable names, by default None. 97 98 Returns 99 ------- 100 Tuple[TreeHandler, TreeHandler, TreeHandler] 101 Tuple containing TreeHandlers for protons, kaons, and pions. 102 """ 103 anti_particles = anti_particles or self.anti_particles 104 nsigma_proton = nsigma_proton if nsigma_proton is not None else nsigma 105 nsigma_kaon = nsigma_kaon if nsigma_kaon is not None else nsigma 106 nsigma_pion = nsigma_pion if nsigma_pion is not None else nsigma 107 json_file_name = json_file_name or self.json_file_name 108 109 if anti_particles is False: 110 protons = self.get_particles_type( 111 tree_handler, Pid.PROTON.value, nsigma_proton, json_file_name 112 ) 113 kaons = self.get_particles_type( 114 tree_handler, Pid.POS_KAON.value, nsigma_kaon, json_file_name 115 ) 116 pions = self.get_particles_type( 117 tree_handler, 118 # pions, muons and electrons impossible to ditinguish in this model 119 [Pid.POS_PION.value, Pid.POS_MUON.value, Pid.POSITRON.value], 120 nsigma_pion, 121 json_file_name, 122 ) 123 elif anti_particles is True: 124 protons = self.get_particles_type( 125 tree_handler, Pid.ANTI_PROTON.value, nsigma_proton, json_file_name 126 ) 127 kaons = self.get_particles_type( 128 tree_handler, Pid.NEG_KAON.value, nsigma_kaon 129 ) 130 pions = self.get_particles_type( 131 tree_handler, 132 [Pid.NEG_PION.value, Pid.NEG_MUON.value, Pid.ELECTRON.value], 133 nsigma_pion, 134 json_file_name, 135 ) 136 print( 137 f"\nNumber of protons: {len(protons)}\nNumber of kaons: {len(kaons)}\nNumber of pions: {len(pions)}" 138 ) 139 return (protons, kaons, pions) 140 141 def get_particles_type( 142 self, 143 tree_handler: TreeHandler, 144 pid: float, 145 nsigma: float = 0.0, 146 json_file_name: str = None, 147 ) -> TreeHandler: 148 """ 149 Gets particles of a given pid in the selected sigma region of mass2. 150 151 Parameters: 152 tree_handler (TreeHandler): TreeHandler with the data. 153 154 pid (float): Pid of the given particle type. 155 156 nsigma (float, optional): Number of sigma to select the sigma region of mass2. Defaults to 0. 157 158 json_file_name (str, optional): Name of the JSON file containing variable names. Defaults to None. 159 160 Returns: 161 TreeHandler: TreeHandler with the particles of the given type in the specified sigma region. 162 """ 163 json_file_name = json_file_name or self.json_file_name 164 pid_var_name = json_tools.load_var_name(json_file_name, "pid") 165 mass2_var_name = json_tools.load_var_name(json_file_name, "mass2") 166 particles = tree_handler.get_subset(f"{pid_var_name} == {pid}") 167 # getting selected nsigma region in the mass2 168 if nsigma > 0: 169 print(f"Getting particles pid={pid} in {nsigma}-sigma region") 170 mass2_column = particles.get_data_frame()[mass2_var_name] 171 mean = mass2_column.mean() 172 std = mass2_column.std() 173 if std > 0: 174 mass2_cut = json_tools.create_cut_string( 175 mean - nsigma * std, mean + nsigma * std, mass2_var_name 176 ) 177 particles = particles.get_subset(mass2_cut) 178 179 return particles 180 181 def load_tree( 182 self, 183 data_file_name: str = None, 184 tree_type: str = "plain_tree", 185 max_workers: int = 1, 186 model_handler: ModelHandler = None, 187 ) -> TreeHandler: 188 """ 189 Loads tree from given file into hipe4ml TreeHandler. 190 191 Parameters: 192 data_file_name (str, optional): Name of the file with the tree. Defaults to None. 193 194 tree_type (str, optional): Type of the tree structure to be loaded. Defaults to "plain_tree". 195 196 max_workers (int, optional): Number of max_workers for ThreadPoolExecutor used to load data with multithreading. 197 Defaults to 1. 198 199 model_handler (ModelHandler, optional): ModelHandler to apply if the dataset is validation one. Defaults to None. 200 201 Returns: 202 TreeHandler: hipe4ml structure containing the tree to train and test the model on. 203 """ 204 data_file_name = data_file_name or self.data_file_name 205 tree_handler = TreeHandler() 206 preselection = self.clean_tree() 207 tree_handler.get_handler_from_large_file( 208 data_file_name, 209 tree_type, 210 preselection=preselection, 211 max_workers=max_workers, 212 model_handler=model_handler, 213 output_margin=False, 214 ) 215 print(f"\nLoading tree from {data_file_name}...") 216 return tree_handler 217 218 def clean_tree(self, json_file_name: str = None) -> str: 219 """ 220 Creates a string with preselections (quality cuts, momentum range, and sign of charge). 221 222 Parameters: 223 json_file_name (str, optional): Name of the JSON file containing quality cuts definition 224 (if different than in the class). Defaults to None. 225 226 Returns: 227 str: Preselection string for the the TreeHandler object. 228 """ 229 preselection = "" 230 json_file_name = json_file_name or self.json_file_name 231 quality_cuts = json_tools.load_quality_cuts(json_file_name) 232 momemntum_variable_name = json_tools.load_var_name(json_file_name, "momentum") 233 charge_variable_name = json_tools.load_var_name(json_file_name, "charge") 234 235 for cut in quality_cuts: 236 preselection += f"({cut}) and " 237 # include specific momentum cut 238 p_cut = json_tools.create_cut_string( 239 self.lower_p_cut, self.upper_p_cut, momemntum_variable_name 240 ) 241 preselection += f"({p_cut}) and " 242 # include sign of charge 243 if self.anti_particles is False: 244 preselection += f"({charge_variable_name} > 0)" 245 elif self.anti_particles is True: 246 preselection += f"({charge_variable_name} < 0)" 247 248 return preselection
Class for loading data stored in .tree format into hipe4ml.TreeHandler, data cleaning and preparing dataset for training and testing of the ML model
22 def __init__( 23 self, 24 data_file_name: str, 25 json_file_name: str, 26 lower_p_cut: float, 27 upper_p_cut: float, 28 anti_particles: bool, 29 ): 30 """ 31 Initializes the LoadDataObject 32 33 Parameters 34 ---------- 35 data_file_name : str 36 Name of the data file in .tree format. 37 38 json_file_name : str 39 Name of the JSON file containing variable names and cuts definitions. 40 41 lower_p_cut : float 42 Value of the lower momentum cut. 43 44 upper_p_cut : float 45 Value of the upper momentum cut. 46 47 anti_particles : bool 48 Specifies whether to load only antiparticles (True) or positive particles (False). 49 """ 50 self.data_file_name = data_file_name 51 self.lower_p_cut = lower_p_cut 52 self.upper_p_cut = upper_p_cut 53 self.anti_particles = anti_particles 54 self.json_file_name = json_file_name
Initializes the LoadDataObject
Parameters
data_file_name : str Name of the data file in .tree format.
json_file_name : str Name of the JSON file containing variable names and cuts definitions.
lower_p_cut : float Value of the lower momentum cut.
upper_p_cut : float Value of the upper momentum cut.
anti_particles : bool Specifies whether to load only antiparticles (True) or positive particles (False).
56 def get_protons_kaons_pions( 57 self, 58 tree_handler: TreeHandler, 59 nsigma: float = 5, 60 anti_particles: bool = None, 61 nsigma_proton: float = None, 62 nsigma_kaon: float = None, 63 nsigma_pion: float = None, 64 json_file_name: str = None, 65 ) -> Tuple[TreeHandler, TreeHandler, TreeHandler]: 66 """ 67 Gets protons, kaons, and pions from a TreeHandler in the nsigma region. 68 69 In this tof model, pions, muons, and electrons are treated the same. 70 71 Parameters 72 ---------- 73 tree_handler : TreeHandler 74 TreeHandler containing the data. 75 76 nsigma : float, optional 77 Number of sigma for data cleaning, by default 5. 78 79 anti_particles : bool, optional 80 Loads only antiparticles if set to True, positive particles if set to False. 81 Defaults to None. 82 83 nsigma_proton : float, optional 84 Number of sigma for protons, if not specified uses nsigma. 85 Defaults to None. 86 87 nsigma_kaon : float, optional 88 Number of sigma for kaons, if not specified uses nsigma. 89 Defaults to None. 90 91 nsigma_pion : float, optional 92 Number of sigma for pions, if not specified uses nsigma. 93 Defaults to None. 94 95 json_file_name : str, optional 96 Name of the JSON file containing variable names, by default None. 97 98 Returns 99 ------- 100 Tuple[TreeHandler, TreeHandler, TreeHandler] 101 Tuple containing TreeHandlers for protons, kaons, and pions. 102 """ 103 anti_particles = anti_particles or self.anti_particles 104 nsigma_proton = nsigma_proton if nsigma_proton is not None else nsigma 105 nsigma_kaon = nsigma_kaon if nsigma_kaon is not None else nsigma 106 nsigma_pion = nsigma_pion if nsigma_pion is not None else nsigma 107 json_file_name = json_file_name or self.json_file_name 108 109 if anti_particles is False: 110 protons = self.get_particles_type( 111 tree_handler, Pid.PROTON.value, nsigma_proton, json_file_name 112 ) 113 kaons = self.get_particles_type( 114 tree_handler, Pid.POS_KAON.value, nsigma_kaon, json_file_name 115 ) 116 pions = self.get_particles_type( 117 tree_handler, 118 # pions, muons and electrons impossible to ditinguish in this model 119 [Pid.POS_PION.value, Pid.POS_MUON.value, Pid.POSITRON.value], 120 nsigma_pion, 121 json_file_name, 122 ) 123 elif anti_particles is True: 124 protons = self.get_particles_type( 125 tree_handler, Pid.ANTI_PROTON.value, nsigma_proton, json_file_name 126 ) 127 kaons = self.get_particles_type( 128 tree_handler, Pid.NEG_KAON.value, nsigma_kaon 129 ) 130 pions = self.get_particles_type( 131 tree_handler, 132 [Pid.NEG_PION.value, Pid.NEG_MUON.value, Pid.ELECTRON.value], 133 nsigma_pion, 134 json_file_name, 135 ) 136 print( 137 f"\nNumber of protons: {len(protons)}\nNumber of kaons: {len(kaons)}\nNumber of pions: {len(pions)}" 138 ) 139 return (protons, kaons, pions)
Gets protons, kaons, and pions from a TreeHandler in the nsigma region.
In this tof model, pions, muons, and electrons are treated the same.
Parameters
tree_handler : TreeHandler TreeHandler containing the data.
nsigma : float, optional Number of sigma for data cleaning, by default 5.
anti_particles : bool, optional Loads only antiparticles if set to True, positive particles if set to False. Defaults to None.
nsigma_proton : float, optional Number of sigma for protons, if not specified uses nsigma. Defaults to None.
nsigma_kaon : float, optional Number of sigma for kaons, if not specified uses nsigma. Defaults to None.
nsigma_pion : float, optional Number of sigma for pions, if not specified uses nsigma. Defaults to None.
json_file_name : str, optional Name of the JSON file containing variable names, by default None.
Returns
Tuple[TreeHandler, TreeHandler, TreeHandler] Tuple containing TreeHandlers for protons, kaons, and pions.
141 def get_particles_type( 142 self, 143 tree_handler: TreeHandler, 144 pid: float, 145 nsigma: float = 0.0, 146 json_file_name: str = None, 147 ) -> TreeHandler: 148 """ 149 Gets particles of a given pid in the selected sigma region of mass2. 150 151 Parameters: 152 tree_handler (TreeHandler): TreeHandler with the data. 153 154 pid (float): Pid of the given particle type. 155 156 nsigma (float, optional): Number of sigma to select the sigma region of mass2. Defaults to 0. 157 158 json_file_name (str, optional): Name of the JSON file containing variable names. Defaults to None. 159 160 Returns: 161 TreeHandler: TreeHandler with the particles of the given type in the specified sigma region. 162 """ 163 json_file_name = json_file_name or self.json_file_name 164 pid_var_name = json_tools.load_var_name(json_file_name, "pid") 165 mass2_var_name = json_tools.load_var_name(json_file_name, "mass2") 166 particles = tree_handler.get_subset(f"{pid_var_name} == {pid}") 167 # getting selected nsigma region in the mass2 168 if nsigma > 0: 169 print(f"Getting particles pid={pid} in {nsigma}-sigma region") 170 mass2_column = particles.get_data_frame()[mass2_var_name] 171 mean = mass2_column.mean() 172 std = mass2_column.std() 173 if std > 0: 174 mass2_cut = json_tools.create_cut_string( 175 mean - nsigma * std, mean + nsigma * std, mass2_var_name 176 ) 177 particles = particles.get_subset(mass2_cut) 178 179 return particles
Gets particles of a given pid in the selected sigma region of mass2.
Parameters: tree_handler (TreeHandler): TreeHandler with the data.
pid (float): Pid of the given particle type.
nsigma (float, optional): Number of sigma to select the sigma region of mass2. Defaults to 0.
json_file_name (str, optional): Name of the JSON file containing variable names. Defaults to None.
Returns: TreeHandler: TreeHandler with the particles of the given type in the specified sigma region.
181 def load_tree( 182 self, 183 data_file_name: str = None, 184 tree_type: str = "plain_tree", 185 max_workers: int = 1, 186 model_handler: ModelHandler = None, 187 ) -> TreeHandler: 188 """ 189 Loads tree from given file into hipe4ml TreeHandler. 190 191 Parameters: 192 data_file_name (str, optional): Name of the file with the tree. Defaults to None. 193 194 tree_type (str, optional): Type of the tree structure to be loaded. Defaults to "plain_tree". 195 196 max_workers (int, optional): Number of max_workers for ThreadPoolExecutor used to load data with multithreading. 197 Defaults to 1. 198 199 model_handler (ModelHandler, optional): ModelHandler to apply if the dataset is validation one. Defaults to None. 200 201 Returns: 202 TreeHandler: hipe4ml structure containing the tree to train and test the model on. 203 """ 204 data_file_name = data_file_name or self.data_file_name 205 tree_handler = TreeHandler() 206 preselection = self.clean_tree() 207 tree_handler.get_handler_from_large_file( 208 data_file_name, 209 tree_type, 210 preselection=preselection, 211 max_workers=max_workers, 212 model_handler=model_handler, 213 output_margin=False, 214 ) 215 print(f"\nLoading tree from {data_file_name}...") 216 return tree_handler
Loads tree from given file into hipe4ml TreeHandler.
Parameters: data_file_name (str, optional): Name of the file with the tree. Defaults to None.
tree_type (str, optional): Type of the tree structure to be loaded. Defaults to "plain_tree".
max_workers (int, optional): Number of max_workers for ThreadPoolExecutor used to load data with multithreading.
Defaults to 1.
model_handler (ModelHandler, optional): ModelHandler to apply if the dataset is validation one. Defaults to None.
Returns: TreeHandler: hipe4ml structure containing the tree to train and test the model on.
218 def clean_tree(self, json_file_name: str = None) -> str: 219 """ 220 Creates a string with preselections (quality cuts, momentum range, and sign of charge). 221 222 Parameters: 223 json_file_name (str, optional): Name of the JSON file containing quality cuts definition 224 (if different than in the class). Defaults to None. 225 226 Returns: 227 str: Preselection string for the the TreeHandler object. 228 """ 229 preselection = "" 230 json_file_name = json_file_name or self.json_file_name 231 quality_cuts = json_tools.load_quality_cuts(json_file_name) 232 momemntum_variable_name = json_tools.load_var_name(json_file_name, "momentum") 233 charge_variable_name = json_tools.load_var_name(json_file_name, "charge") 234 235 for cut in quality_cuts: 236 preselection += f"({cut}) and " 237 # include specific momentum cut 238 p_cut = json_tools.create_cut_string( 239 self.lower_p_cut, self.upper_p_cut, momemntum_variable_name 240 ) 241 preselection += f"({p_cut}) and " 242 # include sign of charge 243 if self.anti_particles is False: 244 preselection += f"({charge_variable_name} > 0)" 245 elif self.anti_particles is True: 246 preselection += f"({charge_variable_name} < 0)" 247 248 return preselection
Creates a string with preselections (quality cuts, momentum range, and sign of charge).
Parameters: json_file_name (str, optional): Name of the JSON file containing quality cuts definition (if different than in the class). Defaults to None.
Returns: str: Preselection string for the the TreeHandler object.