From 8c4a2f222d7d2d2c35d5fd4feab86b526f9c8454 Mon Sep 17 00:00:00 2001 From: Jacan Chaplais Date: Thu, 21 Apr 2022 15:54:31 +0100 Subject: [PATCH] MaskGroup classmethod from numpy + enable numerical subscript + docstrings --- graphicle/data.py | 185 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 180 insertions(+), 5 deletions(-) diff --git a/graphicle/data.py b/graphicle/data.py index a961d4a..d10ef9b 100644 --- a/graphicle/data.py +++ b/graphicle/data.py @@ -138,13 +138,25 @@ class MaskGroup(MaskBase): repr=False, factory=dict, converter=_mask_dict_convert ) + @classmethod + def from_numpy_structured(cls, arr: np.ndarray): + return cls(dict(map(lambda name: (name, arr[name]), arr.dtype.names))) + def __repr__(self): keys = ", ".join(self.names) return f"MaskGroup(mask_arrays=[{keys}])" def __getitem__(self, key): if not isinstance(key, str): - raise KeyError("Key must be string.") + return self.__class__( + dict( + map( + lambda name_arr: (name_arr[0], name_arr[1][key]), + self._mask_arrays.items(), + ) + ) + ) + return self._mask_arrays[key] def __setitem__(self, key, mask): @@ -229,10 +241,11 @@ class PdgArray(ArrayBase): data: np.ndarray = array_field("int") __lookup_table: __PdgRecords = field(init=False, repr=False) - __mega_to_giga: float = 1.0e-3 + __mega_to_giga: float = field(init=False, repr=False) def __attrs_post_init__(self): self.__lookup_table = self.__PdgRecords() + self.__mega_to_giga: float = 1.0e-3 def __len__(self): return len(self.data) @@ -457,9 +470,19 @@ def __array__(self): #################### @define class HelicityArray(ArrayBase): + """Data structure containing helicity / polarisation values for + particle set. + + Attributes + ---------- + data : ndarray + Helicity values. + """ + data: np.ndarray = array_field("helicity") def copy(self): + """Returns a new StatusArray instance with same data.""" return deepcopy(self) def __getitem__(self, key): @@ -479,9 +502,23 @@ def __array__(self): #################################### @define class StatusArray(ArrayBase): + """Data structure containing status values for particle set. + + Attributes + ---------- + data : ndarray + Status codes. + + Notes + ----- + These codes are specific to the Monte-Carlo event generators which + produced the data. + """ + data: np.ndarray = array_field("h_int") def copy(self): + """Returns a new StatusArray instance with same data.""" return deepcopy(self) def __getitem__(self, key): @@ -545,12 +582,22 @@ def hard_mask(self) -> MaskGroup: ######################################### @define class ParticleSet(ParticleBase): - """Combines rich particle description. + """Composite of data structures containing particle set description. Attributes ---------- - data : ndarray - Structured array containing color / anti-color pairs. + pdg : PdgArray + PDG codes. + pmu : MomentumArray + Four momenta. + color : ColorArray + Color / anti-color pairs. + helicity : HelicityArray + Helicity values. + status : StatusArray + Status codes from Monte-Carlo event generator. + final : MaskArray + Boolean array indicating final state in particle set. """ pdg: PdgArray = PdgArray() @@ -597,6 +644,32 @@ def from_numpy( status: Optional[np.ndarray] = None, final: Optional[np.ndarray] = None, ): + """Creates a ParticleSet instance directly from numpy arrays. + + Parameters + ---------- + pdg : ndarray, optional + PDG codes. + pmu : ndarray, optional + Four momenta, formatted in columns of (x, y, z, e), or as + a structured array with those fields. + color : ndarray, optional + Color / anti-color pairs, formatted in columns of + (col, acol), or as a structured array with those fields. + helicity : ndarray, optional + Helicity values. + status : ndarray, optional + Status codes from Monte-Carlo event generator. + final : ndarray, optional + Boolean array indicating which particles are final state. + + Returns + ------- + particle_set : ParticleSet + A composite object, wrapping the data provided in Graphicle + objects, and providing a unified interface to them. + """ + def optional(data_class, data: Optional[np.ndarray]): return data_class(data) if data is not None else data_class() @@ -622,6 +695,21 @@ class _AdjDict(TypedDict): @define class AdjacencyList(AdjacencyBase): + """Describes relations between particles in particle set using a + COO edge list, and provides methods to convert representation. + + Attributes + ---------- + edges : ndarray + COO edge list. + nodes : ndarray + Vertex ids of each particle with at least one edge. + weights : ndarray + Scalar value embedded on each edge. + matrix : ndarray + Adjacency matrix representation. + """ + _data: np.ndarray = array_field("edge") weights: np.ndarray = array_field("double") @@ -636,6 +724,28 @@ def __getitem__(self, key): key = key.data return self.__class__(np.array(self._data[key])) + def __add__(self, other_array: "AdjacencyList") -> "AdjacencyList": + """Combines two AdjacencyList objects by extending edge and + weight lists of both arrays. + If the same edge occurs in both AdjacencyLists, this will lead + to multigraph connectivity. + """ + if not isinstance(other_array, self.__class__): + raise ValueError("Can only add AdjacencyList.") + this_has_weights = len(self.weights) != 0 + other_has_weights = len(other_array.weights) != 0 + both_weighted = this_has_weights and other_has_weights + both_unweighted = (not this_has_weights) and (not other_has_weights) + if not (both_weighted or both_unweighted): + raise ValueError( + "Mismatch between weights: both adjacency lists " + + "must either be weighted, or unweighted." + ) + return self.__class__( + data=np.concatenate([self._data, other_array._data]), + weights=np.concatenate([self.weights, other_array.weights]), + ) + def copy(self): return deepcopy(self) @@ -694,6 +804,9 @@ def to_dicts( edge_data: Optional[Dict[str, ArrayBase]] = None, node_data: Optional[Dict[str, ArrayBase]] = None, ) -> _AdjDict: + """Returns data in dictionary format, which is more easily + parsed by external libraries, such as NetworkX. + """ if edge_data is None: edge_data = dict() if node_data is None: @@ -726,6 +839,39 @@ def make_data_dicts(orig: Tuple[Any, ...], data: Dict[str, ArrayBase]): ##################################################### @define class Graphicle: + """Composite object, combining particle set data with relational + information between particles. + + Attributes + ---------- + particles : ParticleSet + Data describing the particles in the set. + adj : AdjacencyList + Connectivity between the particles, to form a graph. + pdg : PdgArray + PDG codes. + pmu : MomentumArray + Four momenta. + color : ColorArray + Color / anti-color pairs. + helicity : HelicityArray + Helicity values. + status : StatusArray + Status codes from Monte-Carlo event generator. + final : MaskArray + Boolean array indicating final state in particle set. + edges : ndarray + COO edge list. + nodes : ndarray + Vertex ids of each particle with at least one edge. + hard_mask : MaskGroup + Identifies which particles participate in the hard process. + For Pythia, this is split into four categories: incoming, + intermediate, outgoing, outgoing_nonperturbative_diffraction. + hard_vertex : int + Vertex at which the hard process is initiated. + """ + particles: ParticleSet = ParticleSet() adj: AdjacencyList = AdjacencyList() @@ -755,6 +901,35 @@ def from_numpy( edges: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None, ): + """Instantiates a Graphicle object from an optional collection + of numpy arrays. + + Parameters + ---------- + pdg : ndarray, optional + PDG codes. + pmu : ndarray, optional + Four momenta, formatted in columns of (x, y, z, e), or as + a structured array with those fields. + color : ndarray, optional + Color / anti-color pairs, formatted in columns of + (col, acol), or as a structured array with those fields. + helicity : ndarray, optional + Helicity values. + status : ndarray, optional + Status codes from Monte-Carlo event generator. + final : ndarray, optional + Boolean array indicating which particles are final state. + edges : ndarray, optional + COO formatted pairs of vertex ids, of shape (N, 2), where + N is the number of particles in the graph. + Alternatively, supply a structured array with fields + (in, out). + weights : ndarray, optional + Weights to be associated with each edge in the COO edge + list, provided in the same order. + """ + particles = ParticleSet.from_numpy( pdg=pdg, pmu=pmu,