How to use the coffea.hist.Cat function in coffea

To help you get started, we’ve selected a few coffea examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github CoffeaTeam / coffea / tests / test_hist_plot.py View on Github external
arrays.pop('E'),
                                                           )
    electrons = awkward.JaggedArray.zip(p4=p4, **arrays)

    arrays = {k.replace('Muon_', ''): v for k, v in tree.arrays("Muon_*", namedecode='ascii').items()}
    p4 = uproot_methods.TLorentzVectorArray.from_cartesian(
        arrays.pop('Px'),
        arrays.pop('Py'),
        arrays.pop('Pz'),
        arrays.pop('E'),
    )
    muons = awkward.JaggedArray.zip(p4=p4, **arrays)

    # Two types of axes exist presently: bins and categories
    lepton_kinematics = hist.Hist("Events",
                                  hist.Cat("flavor", "Lepton flavor"),
                                  hist.Bin("pt", "$p_{T}$", 19, 10, 100),
                                  hist.Bin("eta", r"$\eta$", [-2.5, -1.4, 0, 1.4, 2.5]),
                                  )

    # Pass keyword arguments to fill, all arrays must be flat numpy arrays
    # User is responsible for ensuring all arrays have same jagged structure!
    lepton_kinematics.fill(flavor="electron", pt=electrons['p4'].pt.flatten(), eta=electrons['p4'].eta.flatten())
    lepton_kinematics.fill(flavor="muon", pt=muons['p4'].pt.flatten(), eta=muons['p4'].eta.flatten())

    return lepton_kinematics
github CoffeaTeam / coffea / tests / test_hist_tools.py View on Github external
assert h_regular_bins.sum("x", "y", overflow='all').values(sumw2=True)[()] == (nentries, nentries)
    # bin x=2, y=10 (when overflow removed)
    count_some_bin = np.sum((test_pt>=20.)&(test_pt<30.)&(test_eta>=0.)&(test_eta<0.3))
    assert h_regular_bins.integrate("x", slice(20, 30)).values()[()][10] == count_some_bin
    assert h_regular_bins.integrate("y", slice(0, 0.3)).values()[()][2] == count_some_bin

    h_reduced = h_regular_bins[10:,-.6:]
    # bin x=1, y=2
    assert h_reduced.integrate("x", slice(20, 30)).values()[()][2] == count_some_bin
    assert h_reduced.integrate("y", slice(0, 0.3)).values()[()][1] == count_some_bin
    h_reduced.fill(x=23, y=0.1)
    assert h_reduced.integrate("x", slice(20, 30)).values()[()][2] == count_some_bin + 1
    assert h_reduced.integrate("y", slice(0, 0.3)).values()[()][1] == count_some_bin + 1

    animal = hist.Cat("animal", "type of animal")
    vocalization = hist.Cat("vocalization", "onomatopoiea is that how you spell it?")
    h_cat_bins = hist.Hist("I like cats", animal, vocalization)
    h_cat_bins.fill(animal="cat", vocalization="meow", weight=2.)
    h_cat_bins.fill(animal="dog", vocalization="meow", weight=np.array([-1., -1., -5.]))
    h_cat_bins.fill(animal="dog", vocalization="woof", weight=100.)
    h_cat_bins.fill(animal="dog", vocalization="ruff")
    assert h_cat_bins.values()[("cat", "meow")] == 2.
    assert h_cat_bins.values(sumw2=True)[("dog", "meow")] == (-7., 27.)
    assert h_cat_bins.integrate("vocalization", ["woof", "ruff"]).values(sumw2=True)[("dog",)] == (101., 10001.)

    height = hist.Bin("height", "height [m]", 10, 0, 5)
    h_mascots_1 = hist.Hist("fermi mascot showdown",
                          animal,
                          vocalization,
                          height,
                          # weight is a reserved keyword
                          hist.Bin("mass", "weight (g=9.81m/s**2) [kg]", np.power(10., np.arange(5)-1)),
github CoffeaTeam / coffea / tests / test_hist_tools.py View on Github external
nentries = np.sum(counts)
    assert h_regular_bins.sum("x", "y", overflow='all').values(sumw2=True)[()] == (nentries, nentries)
    # bin x=2, y=10 (when overflow removed)
    count_some_bin = np.sum((test_pt>=20.)&(test_pt<30.)&(test_eta>=0.)&(test_eta<0.3))
    assert h_regular_bins.integrate("x", slice(20, 30)).values()[()][10] == count_some_bin
    assert h_regular_bins.integrate("y", slice(0, 0.3)).values()[()][2] == count_some_bin

    h_reduced = h_regular_bins[10:,-.6:]
    # bin x=1, y=2
    assert h_reduced.integrate("x", slice(20, 30)).values()[()][2] == count_some_bin
    assert h_reduced.integrate("y", slice(0, 0.3)).values()[()][1] == count_some_bin
    h_reduced.fill(x=23, y=0.1)
    assert h_reduced.integrate("x", slice(20, 30)).values()[()][2] == count_some_bin + 1
    assert h_reduced.integrate("y", slice(0, 0.3)).values()[()][1] == count_some_bin + 1

    animal = hist.Cat("animal", "type of animal")
    vocalization = hist.Cat("vocalization", "onomatopoiea is that how you spell it?")
    h_cat_bins = hist.Hist("I like cats", animal, vocalization)
    h_cat_bins.fill(animal="cat", vocalization="meow", weight=2.)
    h_cat_bins.fill(animal="dog", vocalization="meow", weight=np.array([-1., -1., -5.]))
    h_cat_bins.fill(animal="dog", vocalization="woof", weight=100.)
    h_cat_bins.fill(animal="dog", vocalization="ruff")
    assert h_cat_bins.values()[("cat", "meow")] == 2.
    assert h_cat_bins.values(sumw2=True)[("dog", "meow")] == (-7., 27.)
    assert h_cat_bins.integrate("vocalization", ["woof", "ruff"]).values(sumw2=True)[("dog",)] == (101., 10001.)

    height = hist.Bin("height", "height [m]", 10, 0, 5)
    h_mascots_1 = hist.Hist("fermi mascot showdown",
                          animal,
                          vocalization,
                          height,
                          # weight is a reserved keyword
github CoffeaTeam / coffea / tests / test_hist_tools.py View on Github external
assert h_species.integrate("vocalization").values()[('birds',)][1,2] == nbirds_bin
    tally = h_species.sum("mass", "height", "vocalization").values()
    assert tally[('birds',)] == 1004.
    assert tally[('mammals',)] == 91.

    h_species.scale({"honk": 0.1, "huff": 0.9}, axis="vocalization")
    h_species.scale(5.)
    tally = h_species.sum("mass", height, vocalization).values(sumw2=True)
    assert tally[('birds',)] == (520., 350.)
    assert tally[('mammals',)] == (435., 25*(40*(0.9**2)+20*(2.5**2)+1))

    assert h_species.axis("vocalization") is vocalization
    assert h_species.axis("height") is height
    assert h_species.integrate("vocalization", "h*").axis("height") is height

    tall_class = hist.Cat("tall_class", "species class (species above 1m)")
    mapping = {
        'birds': (['goose', 'crane'], slice(1., None)),
        'mammals': (['bison', 'fox'], slice(1., None)),
    }
    h_tall = h_mascots.group((animal, height), tall_class, mapping)
    tall_bird_count = np.sum(goose_h>=1.) + np.sum(crane_h>=1)
    assert h_tall.sum("mass", "vocalization").values()[('birds',)] == tall_bird_count
    tall_mammal_count = np.sum(adult_bison_h>=1.) + np.sum(baby_bison_h>=1) + 1
    assert h_tall.sum("mass", "vocalization").values()[('mammals',)] == tall_mammal_count

    h_less = h_mascots.remove(["fox", "bison"], axis="animal")
    assert h_less.sum("vocalization", "height", "mass", "animal").values()[()] == 1004.
github CoffeaTeam / coffea / coffea / processor / test_items / NanoTestProcessor.py View on Github external
def __init__(self, columns=[]):
        self._columns = columns
        dataset_axis = hist.Cat("dataset", "Primary dataset")
        mass_axis = hist.Bin("mass", r"$m_{\mu\mu}$ [GeV]", 30000, 0.25, 300)
        pt_axis = hist.Bin("pt", r"$p_{T}$ [GeV]", 30000, 0.25, 300)

        self._accumulator = processor.dict_accumulator({
                                                       'mass': hist.Hist("Counts", dataset_axis, mass_axis),
                                                       'pt': hist.Hist("Counts", dataset_axis, pt_axis),
                                                       'cutflow': processor.defaultdict_accumulator(int),
                                                       })
github CoffeaTeam / coffea / coffea / processor / test_items / NanoTestProcessor.py View on Github external
def __init__(self, columns=[]):
        self._columns = columns
        dataset_axis = hist.Cat("dataset", "Primary dataset")
        mass_axis = hist.Bin("mass", r"$m_{\mu\mu}$ [GeV]", 30000, 0.25, 300)
        pt_axis = hist.Bin("pt", r"$p_{T}$ [GeV]", 30000, 0.25, 300)

        self._accumulator = processor.dict_accumulator({
                                                       'mass': hist.Hist("Counts", dataset_axis, mass_axis),
                                                       'pt': hist.Hist("Counts", dataset_axis, pt_axis),
                                                       'cutflow': processor.defaultdict_accumulator(int),
                                                       })