ml_pid_cbm.tools.load_data

Module for loading data saved in .tree format into hipe4ml.TreeHandler, data cleaning and preparing training and test dataset.

  1"""
  2Module for loading data saved in .tree format into hipe4ml.TreeHandler,
  3data cleaning and preparing training and test dataset.
  4"""
  5
  6from typing import Tuple
  7
  8from hipe4ml.model_handler import ModelHandler
  9from hipe4ml.tree_handler import TreeHandler
 10
 11from . import json_tools
 12from .particles_id import ParticlesId as Pid
 13
 14
 15class LoadData:
 16    """
 17    Class for loading data stored in .tree format into hipe4ml.TreeHandler,
 18    data cleaning and preparing dataset for training and testing of the ML model
 19    """
 20
 21    def __init__(
 22        self,
 23        data_file_name: str,
 24        json_file_name: str,
 25        lower_p_cut: float,
 26        upper_p_cut: float,
 27        anti_particles: bool,
 28    ):
 29        """
 30        Initializes the LoadDataObject
 31
 32        Parameters
 33        ----------
 34        data_file_name : str
 35             Name of the data file in .tree format.
 36
 37        json_file_name : str
 38             Name of the JSON file containing variable names and cuts definitions.
 39
 40        lower_p_cut : float
 41            Value of the lower momentum cut.
 42
 43        upper_p_cut : float
 44            Value of the upper momentum cut.
 45
 46        anti_particles : bool
 47            Specifies whether to load only antiparticles (True) or positive particles (False).
 48        """
 49        self.data_file_name = data_file_name
 50        self.lower_p_cut = lower_p_cut
 51        self.upper_p_cut = upper_p_cut
 52        self.anti_particles = anti_particles
 53        self.json_file_name = json_file_name
 54
 55    def get_protons_kaons_pions(
 56        self,
 57        tree_handler: TreeHandler,
 58        nsigma: float = 5,
 59        anti_particles: bool = None,
 60        nsigma_proton: float = None,
 61        nsigma_kaon: float = None,
 62        nsigma_pion: float = None,
 63        json_file_name: str = None,
 64    ) -> Tuple[TreeHandler, TreeHandler, TreeHandler]:
 65        """
 66        Gets protons, kaons, and pions from a TreeHandler in the nsigma region.
 67
 68        In this tof model, pions, muons, and electrons are treated the same.
 69
 70        Parameters
 71        ----------
 72        tree_handler : TreeHandler
 73            TreeHandler containing the data.
 74
 75        nsigma : float, optional
 76            Number of sigma for data cleaning, by default 5.
 77
 78        anti_particles : bool, optional
 79            Loads only antiparticles if set to True, positive particles if set to False.
 80            Defaults to None.
 81
 82        nsigma_proton : float, optional
 83            Number of sigma for protons, if not specified uses nsigma.
 84            Defaults to None.
 85
 86        nsigma_kaon : float, optional
 87            Number of sigma for kaons, if not specified uses nsigma.
 88            Defaults to None.
 89
 90        nsigma_pion : float, optional
 91            Number of sigma for pions, if not specified uses nsigma.
 92            Defaults to None.
 93
 94        json_file_name : str, optional
 95            Name of the JSON file containing variable names, by default None.
 96
 97        Returns
 98        -------
 99        Tuple[TreeHandler, TreeHandler, TreeHandler]
100            Tuple containing TreeHandlers for protons, kaons, and pions.
101        """
102        anti_particles = anti_particles or self.anti_particles
103        nsigma_proton = nsigma_proton if nsigma_proton is not None else nsigma
104        nsigma_kaon = nsigma_kaon if nsigma_kaon is not None else nsigma
105        nsigma_pion = nsigma_pion if nsigma_pion is not None else nsigma
106        json_file_name = json_file_name or self.json_file_name
107
108        if anti_particles is False:
109            protons = self.get_particles_type(
110                tree_handler, Pid.PROTON.value, nsigma_proton, json_file_name
111            )
112            kaons = self.get_particles_type(
113                tree_handler, Pid.POS_KAON.value, nsigma_kaon, json_file_name
114            )
115            pions = self.get_particles_type(
116                tree_handler,
117                # pions, muons and electrons impossible to ditinguish in this model
118                [Pid.POS_PION.value, Pid.POS_MUON.value, Pid.POSITRON.value],
119                nsigma_pion,
120                json_file_name,
121            )
122        elif anti_particles is True:
123            protons = self.get_particles_type(
124                tree_handler, Pid.ANTI_PROTON.value, nsigma_proton, json_file_name
125            )
126            kaons = self.get_particles_type(
127                tree_handler, Pid.NEG_KAON.value, nsigma_kaon
128            )
129            pions = self.get_particles_type(
130                tree_handler,
131                [Pid.NEG_PION.value, Pid.NEG_MUON.value, Pid.ELECTRON.value],
132                nsigma_pion,
133                json_file_name,
134            )
135        print(
136            f"\nNumber of protons: {len(protons)}\nNumber of kaons: {len(kaons)}\nNumber of pions: {len(pions)}"
137        )
138        return (protons, kaons, pions)
139
140    def get_particles_type(
141        self,
142        tree_handler: TreeHandler,
143        pid: float,
144        nsigma: float = 0.0,
145        json_file_name: str = None,
146    ) -> TreeHandler:
147        """
148        Gets particles of a given pid in the selected sigma region of mass2.
149
150        Parameters:
151            tree_handler (TreeHandler): TreeHandler with the data.
152
153            pid (float): Pid of the given particle type.
154
155            nsigma (float, optional): Number of sigma to select the sigma region of mass2. Defaults to 0.
156
157            json_file_name (str, optional): Name of the JSON file containing variable names. Defaults to None.
158
159        Returns:
160            TreeHandler: TreeHandler with the particles of the given type in the specified sigma region.
161        """
162        json_file_name = json_file_name or self.json_file_name
163        pid_var_name = json_tools.load_var_name(json_file_name, "pid")
164        mass2_var_name = json_tools.load_var_name(json_file_name, "mass2")
165        particles = tree_handler.get_subset(f"{pid_var_name} == {pid}")
166        # getting selected nsigma region in the mass2
167        if nsigma > 0:
168            print(f"Getting particles pid={pid} in {nsigma}-sigma region")
169            mass2_column = particles.get_data_frame()[mass2_var_name]
170            mean = mass2_column.mean()
171            std = mass2_column.std()
172            if std > 0:
173                mass2_cut = json_tools.create_cut_string(
174                    mean - nsigma * std, mean + nsigma * std, mass2_var_name
175                )
176                particles = particles.get_subset(mass2_cut)
177
178        return particles
179
180    def load_tree(
181        self,
182        data_file_name: str = None,
183        tree_type: str = "plain_tree",
184        max_workers: int = 1,
185        model_handler: ModelHandler = None,
186    ) -> TreeHandler:
187        """
188        Loads tree from given file into hipe4ml TreeHandler.
189
190        Parameters:
191            data_file_name (str, optional): Name of the file with the tree. Defaults to None.
192
193            tree_type (str, optional): Type of the tree structure to be loaded. Defaults to "plain_tree".
194
195            max_workers (int, optional): Number of max_workers for ThreadPoolExecutor used to load data with multithreading.
196                Defaults to 1.
197                
198            model_handler (ModelHandler, optional): ModelHandler to apply if the dataset is validation one. Defaults to None.
199
200        Returns:
201            TreeHandler: hipe4ml structure containing the tree to train and test the model on.
202        """
203        data_file_name = data_file_name or self.data_file_name
204        tree_handler = TreeHandler()
205        preselection = self.clean_tree()
206        tree_handler.get_handler_from_large_file(
207            data_file_name,
208            tree_type,
209            preselection=preselection,
210            max_workers=max_workers,
211            model_handler=model_handler,
212            output_margin=False,
213        )
214        print(f"\nLoading tree from {data_file_name}...")
215        return tree_handler
216
217    def clean_tree(self, json_file_name: str = None) -> str:
218        """
219        Creates a string with preselections (quality cuts, momentum range, and sign of charge).
220
221        Parameters:
222            json_file_name (str, optional): Name of the JSON file containing quality cuts definition
223                (if different than in the class). Defaults to None.
224
225        Returns:
226            str: Preselection string for the the TreeHandler object.
227        """
228        preselection = ""
229        json_file_name = json_file_name or self.json_file_name
230        quality_cuts = json_tools.load_quality_cuts(json_file_name)
231        momemntum_variable_name = json_tools.load_var_name(json_file_name, "momentum")
232        charge_variable_name = json_tools.load_var_name(json_file_name, "charge")
233
234        for cut in quality_cuts:
235            preselection += f"({cut}) and "
236        # include specific momentum cut
237        p_cut = json_tools.create_cut_string(
238            self.lower_p_cut, self.upper_p_cut, momemntum_variable_name
239        )
240        preselection += f"({p_cut}) and "
241        # include sign of charge
242        if self.anti_particles is False:
243            preselection += f"({charge_variable_name} > 0)"
244        elif self.anti_particles is True:
245            preselection += f"({charge_variable_name} < 0)"
246
247        return preselection
class LoadData:
 16class LoadData:
 17    """
 18    Class for loading data stored in .tree format into hipe4ml.TreeHandler,
 19    data cleaning and preparing dataset for training and testing of the ML model
 20    """
 21
 22    def __init__(
 23        self,
 24        data_file_name: str,
 25        json_file_name: str,
 26        lower_p_cut: float,
 27        upper_p_cut: float,
 28        anti_particles: bool,
 29    ):
 30        """
 31        Initializes the LoadDataObject
 32
 33        Parameters
 34        ----------
 35        data_file_name : str
 36             Name of the data file in .tree format.
 37
 38        json_file_name : str
 39             Name of the JSON file containing variable names and cuts definitions.
 40
 41        lower_p_cut : float
 42            Value of the lower momentum cut.
 43
 44        upper_p_cut : float
 45            Value of the upper momentum cut.
 46
 47        anti_particles : bool
 48            Specifies whether to load only antiparticles (True) or positive particles (False).
 49        """
 50        self.data_file_name = data_file_name
 51        self.lower_p_cut = lower_p_cut
 52        self.upper_p_cut = upper_p_cut
 53        self.anti_particles = anti_particles
 54        self.json_file_name = json_file_name
 55
 56    def get_protons_kaons_pions(
 57        self,
 58        tree_handler: TreeHandler,
 59        nsigma: float = 5,
 60        anti_particles: bool = None,
 61        nsigma_proton: float = None,
 62        nsigma_kaon: float = None,
 63        nsigma_pion: float = None,
 64        json_file_name: str = None,
 65    ) -> Tuple[TreeHandler, TreeHandler, TreeHandler]:
 66        """
 67        Gets protons, kaons, and pions from a TreeHandler in the nsigma region.
 68
 69        In this tof model, pions, muons, and electrons are treated the same.
 70
 71        Parameters
 72        ----------
 73        tree_handler : TreeHandler
 74            TreeHandler containing the data.
 75
 76        nsigma : float, optional
 77            Number of sigma for data cleaning, by default 5.
 78
 79        anti_particles : bool, optional
 80            Loads only antiparticles if set to True, positive particles if set to False.
 81            Defaults to None.
 82
 83        nsigma_proton : float, optional
 84            Number of sigma for protons, if not specified uses nsigma.
 85            Defaults to None.
 86
 87        nsigma_kaon : float, optional
 88            Number of sigma for kaons, if not specified uses nsigma.
 89            Defaults to None.
 90
 91        nsigma_pion : float, optional
 92            Number of sigma for pions, if not specified uses nsigma.
 93            Defaults to None.
 94
 95        json_file_name : str, optional
 96            Name of the JSON file containing variable names, by default None.
 97
 98        Returns
 99        -------
100        Tuple[TreeHandler, TreeHandler, TreeHandler]
101            Tuple containing TreeHandlers for protons, kaons, and pions.
102        """
103        anti_particles = anti_particles or self.anti_particles
104        nsigma_proton = nsigma_proton if nsigma_proton is not None else nsigma
105        nsigma_kaon = nsigma_kaon if nsigma_kaon is not None else nsigma
106        nsigma_pion = nsigma_pion if nsigma_pion is not None else nsigma
107        json_file_name = json_file_name or self.json_file_name
108
109        if anti_particles is False:
110            protons = self.get_particles_type(
111                tree_handler, Pid.PROTON.value, nsigma_proton, json_file_name
112            )
113            kaons = self.get_particles_type(
114                tree_handler, Pid.POS_KAON.value, nsigma_kaon, json_file_name
115            )
116            pions = self.get_particles_type(
117                tree_handler,
118                # pions, muons and electrons impossible to ditinguish in this model
119                [Pid.POS_PION.value, Pid.POS_MUON.value, Pid.POSITRON.value],
120                nsigma_pion,
121                json_file_name,
122            )
123        elif anti_particles is True:
124            protons = self.get_particles_type(
125                tree_handler, Pid.ANTI_PROTON.value, nsigma_proton, json_file_name
126            )
127            kaons = self.get_particles_type(
128                tree_handler, Pid.NEG_KAON.value, nsigma_kaon
129            )
130            pions = self.get_particles_type(
131                tree_handler,
132                [Pid.NEG_PION.value, Pid.NEG_MUON.value, Pid.ELECTRON.value],
133                nsigma_pion,
134                json_file_name,
135            )
136        print(
137            f"\nNumber of protons: {len(protons)}\nNumber of kaons: {len(kaons)}\nNumber of pions: {len(pions)}"
138        )
139        return (protons, kaons, pions)
140
141    def get_particles_type(
142        self,
143        tree_handler: TreeHandler,
144        pid: float,
145        nsigma: float = 0.0,
146        json_file_name: str = None,
147    ) -> TreeHandler:
148        """
149        Gets particles of a given pid in the selected sigma region of mass2.
150
151        Parameters:
152            tree_handler (TreeHandler): TreeHandler with the data.
153
154            pid (float): Pid of the given particle type.
155
156            nsigma (float, optional): Number of sigma to select the sigma region of mass2. Defaults to 0.
157
158            json_file_name (str, optional): Name of the JSON file containing variable names. Defaults to None.
159
160        Returns:
161            TreeHandler: TreeHandler with the particles of the given type in the specified sigma region.
162        """
163        json_file_name = json_file_name or self.json_file_name
164        pid_var_name = json_tools.load_var_name(json_file_name, "pid")
165        mass2_var_name = json_tools.load_var_name(json_file_name, "mass2")
166        particles = tree_handler.get_subset(f"{pid_var_name} == {pid}")
167        # getting selected nsigma region in the mass2
168        if nsigma > 0:
169            print(f"Getting particles pid={pid} in {nsigma}-sigma region")
170            mass2_column = particles.get_data_frame()[mass2_var_name]
171            mean = mass2_column.mean()
172            std = mass2_column.std()
173            if std > 0:
174                mass2_cut = json_tools.create_cut_string(
175                    mean - nsigma * std, mean + nsigma * std, mass2_var_name
176                )
177                particles = particles.get_subset(mass2_cut)
178
179        return particles
180
181    def load_tree(
182        self,
183        data_file_name: str = None,
184        tree_type: str = "plain_tree",
185        max_workers: int = 1,
186        model_handler: ModelHandler = None,
187    ) -> TreeHandler:
188        """
189        Loads tree from given file into hipe4ml TreeHandler.
190
191        Parameters:
192            data_file_name (str, optional): Name of the file with the tree. Defaults to None.
193
194            tree_type (str, optional): Type of the tree structure to be loaded. Defaults to "plain_tree".
195
196            max_workers (int, optional): Number of max_workers for ThreadPoolExecutor used to load data with multithreading.
197                Defaults to 1.
198                
199            model_handler (ModelHandler, optional): ModelHandler to apply if the dataset is validation one. Defaults to None.
200
201        Returns:
202            TreeHandler: hipe4ml structure containing the tree to train and test the model on.
203        """
204        data_file_name = data_file_name or self.data_file_name
205        tree_handler = TreeHandler()
206        preselection = self.clean_tree()
207        tree_handler.get_handler_from_large_file(
208            data_file_name,
209            tree_type,
210            preselection=preselection,
211            max_workers=max_workers,
212            model_handler=model_handler,
213            output_margin=False,
214        )
215        print(f"\nLoading tree from {data_file_name}...")
216        return tree_handler
217
218    def clean_tree(self, json_file_name: str = None) -> str:
219        """
220        Creates a string with preselections (quality cuts, momentum range, and sign of charge).
221
222        Parameters:
223            json_file_name (str, optional): Name of the JSON file containing quality cuts definition
224                (if different than in the class). Defaults to None.
225
226        Returns:
227            str: Preselection string for the the TreeHandler object.
228        """
229        preselection = ""
230        json_file_name = json_file_name or self.json_file_name
231        quality_cuts = json_tools.load_quality_cuts(json_file_name)
232        momemntum_variable_name = json_tools.load_var_name(json_file_name, "momentum")
233        charge_variable_name = json_tools.load_var_name(json_file_name, "charge")
234
235        for cut in quality_cuts:
236            preselection += f"({cut}) and "
237        # include specific momentum cut
238        p_cut = json_tools.create_cut_string(
239            self.lower_p_cut, self.upper_p_cut, momemntum_variable_name
240        )
241        preselection += f"({p_cut}) and "
242        # include sign of charge
243        if self.anti_particles is False:
244            preselection += f"({charge_variable_name} > 0)"
245        elif self.anti_particles is True:
246            preselection += f"({charge_variable_name} < 0)"
247
248        return preselection

Class for loading data stored in .tree format into hipe4ml.TreeHandler, data cleaning and preparing dataset for training and testing of the ML model

LoadData( data_file_name: str, json_file_name: str, lower_p_cut: float, upper_p_cut: float, anti_particles: bool)
22    def __init__(
23        self,
24        data_file_name: str,
25        json_file_name: str,
26        lower_p_cut: float,
27        upper_p_cut: float,
28        anti_particles: bool,
29    ):
30        """
31        Initializes the LoadDataObject
32
33        Parameters
34        ----------
35        data_file_name : str
36             Name of the data file in .tree format.
37
38        json_file_name : str
39             Name of the JSON file containing variable names and cuts definitions.
40
41        lower_p_cut : float
42            Value of the lower momentum cut.
43
44        upper_p_cut : float
45            Value of the upper momentum cut.
46
47        anti_particles : bool
48            Specifies whether to load only antiparticles (True) or positive particles (False).
49        """
50        self.data_file_name = data_file_name
51        self.lower_p_cut = lower_p_cut
52        self.upper_p_cut = upper_p_cut
53        self.anti_particles = anti_particles
54        self.json_file_name = json_file_name

Initializes the LoadDataObject

Parameters

data_file_name : str Name of the data file in .tree format.

json_file_name : str Name of the JSON file containing variable names and cuts definitions.

lower_p_cut : float Value of the lower momentum cut.

upper_p_cut : float Value of the upper momentum cut.

anti_particles : bool Specifies whether to load only antiparticles (True) or positive particles (False).

def get_protons_kaons_pions( self, tree_handler: hipe4ml.tree_handler.TreeHandler, nsigma: float = 5, anti_particles: bool = None, nsigma_proton: float = None, nsigma_kaon: float = None, nsigma_pion: float = None, json_file_name: str = None) -> Tuple[hipe4ml.tree_handler.TreeHandler, hipe4ml.tree_handler.TreeHandler, hipe4ml.tree_handler.TreeHandler]:
 56    def get_protons_kaons_pions(
 57        self,
 58        tree_handler: TreeHandler,
 59        nsigma: float = 5,
 60        anti_particles: bool = None,
 61        nsigma_proton: float = None,
 62        nsigma_kaon: float = None,
 63        nsigma_pion: float = None,
 64        json_file_name: str = None,
 65    ) -> Tuple[TreeHandler, TreeHandler, TreeHandler]:
 66        """
 67        Gets protons, kaons, and pions from a TreeHandler in the nsigma region.
 68
 69        In this tof model, pions, muons, and electrons are treated the same.
 70
 71        Parameters
 72        ----------
 73        tree_handler : TreeHandler
 74            TreeHandler containing the data.
 75
 76        nsigma : float, optional
 77            Number of sigma for data cleaning, by default 5.
 78
 79        anti_particles : bool, optional
 80            Loads only antiparticles if set to True, positive particles if set to False.
 81            Defaults to None.
 82
 83        nsigma_proton : float, optional
 84            Number of sigma for protons, if not specified uses nsigma.
 85            Defaults to None.
 86
 87        nsigma_kaon : float, optional
 88            Number of sigma for kaons, if not specified uses nsigma.
 89            Defaults to None.
 90
 91        nsigma_pion : float, optional
 92            Number of sigma for pions, if not specified uses nsigma.
 93            Defaults to None.
 94
 95        json_file_name : str, optional
 96            Name of the JSON file containing variable names, by default None.
 97
 98        Returns
 99        -------
100        Tuple[TreeHandler, TreeHandler, TreeHandler]
101            Tuple containing TreeHandlers for protons, kaons, and pions.
102        """
103        anti_particles = anti_particles or self.anti_particles
104        nsigma_proton = nsigma_proton if nsigma_proton is not None else nsigma
105        nsigma_kaon = nsigma_kaon if nsigma_kaon is not None else nsigma
106        nsigma_pion = nsigma_pion if nsigma_pion is not None else nsigma
107        json_file_name = json_file_name or self.json_file_name
108
109        if anti_particles is False:
110            protons = self.get_particles_type(
111                tree_handler, Pid.PROTON.value, nsigma_proton, json_file_name
112            )
113            kaons = self.get_particles_type(
114                tree_handler, Pid.POS_KAON.value, nsigma_kaon, json_file_name
115            )
116            pions = self.get_particles_type(
117                tree_handler,
118                # pions, muons and electrons impossible to ditinguish in this model
119                [Pid.POS_PION.value, Pid.POS_MUON.value, Pid.POSITRON.value],
120                nsigma_pion,
121                json_file_name,
122            )
123        elif anti_particles is True:
124            protons = self.get_particles_type(
125                tree_handler, Pid.ANTI_PROTON.value, nsigma_proton, json_file_name
126            )
127            kaons = self.get_particles_type(
128                tree_handler, Pid.NEG_KAON.value, nsigma_kaon
129            )
130            pions = self.get_particles_type(
131                tree_handler,
132                [Pid.NEG_PION.value, Pid.NEG_MUON.value, Pid.ELECTRON.value],
133                nsigma_pion,
134                json_file_name,
135            )
136        print(
137            f"\nNumber of protons: {len(protons)}\nNumber of kaons: {len(kaons)}\nNumber of pions: {len(pions)}"
138        )
139        return (protons, kaons, pions)

Gets protons, kaons, and pions from a TreeHandler in the nsigma region.

In this tof model, pions, muons, and electrons are treated the same.

Parameters

tree_handler : TreeHandler TreeHandler containing the data.

nsigma : float, optional Number of sigma for data cleaning, by default 5.

anti_particles : bool, optional Loads only antiparticles if set to True, positive particles if set to False. Defaults to None.

nsigma_proton : float, optional Number of sigma for protons, if not specified uses nsigma. Defaults to None.

nsigma_kaon : float, optional Number of sigma for kaons, if not specified uses nsigma. Defaults to None.

nsigma_pion : float, optional Number of sigma for pions, if not specified uses nsigma. Defaults to None.

json_file_name : str, optional Name of the JSON file containing variable names, by default None.

Returns

Tuple[TreeHandler, TreeHandler, TreeHandler] Tuple containing TreeHandlers for protons, kaons, and pions.

def get_particles_type( self, tree_handler: hipe4ml.tree_handler.TreeHandler, pid: float, nsigma: float = 0.0, json_file_name: str = None) -> hipe4ml.tree_handler.TreeHandler:
141    def get_particles_type(
142        self,
143        tree_handler: TreeHandler,
144        pid: float,
145        nsigma: float = 0.0,
146        json_file_name: str = None,
147    ) -> TreeHandler:
148        """
149        Gets particles of a given pid in the selected sigma region of mass2.
150
151        Parameters:
152            tree_handler (TreeHandler): TreeHandler with the data.
153
154            pid (float): Pid of the given particle type.
155
156            nsigma (float, optional): Number of sigma to select the sigma region of mass2. Defaults to 0.
157
158            json_file_name (str, optional): Name of the JSON file containing variable names. Defaults to None.
159
160        Returns:
161            TreeHandler: TreeHandler with the particles of the given type in the specified sigma region.
162        """
163        json_file_name = json_file_name or self.json_file_name
164        pid_var_name = json_tools.load_var_name(json_file_name, "pid")
165        mass2_var_name = json_tools.load_var_name(json_file_name, "mass2")
166        particles = tree_handler.get_subset(f"{pid_var_name} == {pid}")
167        # getting selected nsigma region in the mass2
168        if nsigma > 0:
169            print(f"Getting particles pid={pid} in {nsigma}-sigma region")
170            mass2_column = particles.get_data_frame()[mass2_var_name]
171            mean = mass2_column.mean()
172            std = mass2_column.std()
173            if std > 0:
174                mass2_cut = json_tools.create_cut_string(
175                    mean - nsigma * std, mean + nsigma * std, mass2_var_name
176                )
177                particles = particles.get_subset(mass2_cut)
178
179        return particles

Gets particles of a given pid in the selected sigma region of mass2.

Parameters: tree_handler (TreeHandler): TreeHandler with the data.

pid (float): Pid of the given particle type.

nsigma (float, optional): Number of sigma to select the sigma region of mass2. Defaults to 0.

json_file_name (str, optional): Name of the JSON file containing variable names. Defaults to None.

Returns: TreeHandler: TreeHandler with the particles of the given type in the specified sigma region.

def load_tree( self, data_file_name: str = None, tree_type: str = 'plain_tree', max_workers: int = 1, model_handler: hipe4ml.model_handler.ModelHandler = None) -> hipe4ml.tree_handler.TreeHandler:
181    def load_tree(
182        self,
183        data_file_name: str = None,
184        tree_type: str = "plain_tree",
185        max_workers: int = 1,
186        model_handler: ModelHandler = None,
187    ) -> TreeHandler:
188        """
189        Loads tree from given file into hipe4ml TreeHandler.
190
191        Parameters:
192            data_file_name (str, optional): Name of the file with the tree. Defaults to None.
193
194            tree_type (str, optional): Type of the tree structure to be loaded. Defaults to "plain_tree".
195
196            max_workers (int, optional): Number of max_workers for ThreadPoolExecutor used to load data with multithreading.
197                Defaults to 1.
198                
199            model_handler (ModelHandler, optional): ModelHandler to apply if the dataset is validation one. Defaults to None.
200
201        Returns:
202            TreeHandler: hipe4ml structure containing the tree to train and test the model on.
203        """
204        data_file_name = data_file_name or self.data_file_name
205        tree_handler = TreeHandler()
206        preselection = self.clean_tree()
207        tree_handler.get_handler_from_large_file(
208            data_file_name,
209            tree_type,
210            preselection=preselection,
211            max_workers=max_workers,
212            model_handler=model_handler,
213            output_margin=False,
214        )
215        print(f"\nLoading tree from {data_file_name}...")
216        return tree_handler

Loads tree from given file into hipe4ml TreeHandler.

Parameters: data_file_name (str, optional): Name of the file with the tree. Defaults to None.

tree_type (str, optional): Type of the tree structure to be loaded. Defaults to "plain_tree".

max_workers (int, optional): Number of max_workers for ThreadPoolExecutor used to load data with multithreading.
    Defaults to 1.

model_handler (ModelHandler, optional): ModelHandler to apply if the dataset is validation one. Defaults to None.

Returns: TreeHandler: hipe4ml structure containing the tree to train and test the model on.

def clean_tree(self, json_file_name: str = None) -> str:
218    def clean_tree(self, json_file_name: str = None) -> str:
219        """
220        Creates a string with preselections (quality cuts, momentum range, and sign of charge).
221
222        Parameters:
223            json_file_name (str, optional): Name of the JSON file containing quality cuts definition
224                (if different than in the class). Defaults to None.
225
226        Returns:
227            str: Preselection string for the the TreeHandler object.
228        """
229        preselection = ""
230        json_file_name = json_file_name or self.json_file_name
231        quality_cuts = json_tools.load_quality_cuts(json_file_name)
232        momemntum_variable_name = json_tools.load_var_name(json_file_name, "momentum")
233        charge_variable_name = json_tools.load_var_name(json_file_name, "charge")
234
235        for cut in quality_cuts:
236            preselection += f"({cut}) and "
237        # include specific momentum cut
238        p_cut = json_tools.create_cut_string(
239            self.lower_p_cut, self.upper_p_cut, momemntum_variable_name
240        )
241        preselection += f"({p_cut}) and "
242        # include sign of charge
243        if self.anti_particles is False:
244            preselection += f"({charge_variable_name} > 0)"
245        elif self.anti_particles is True:
246            preselection += f"({charge_variable_name} < 0)"
247
248        return preselection

Creates a string with preselections (quality cuts, momentum range, and sign of charge).

Parameters: json_file_name (str, optional): Name of the JSON file containing quality cuts definition (if different than in the class). Defaults to None.

Returns: str: Preselection string for the the TreeHandler object.