Skip to content
Snippets Groups Projects
Commit 88320e73 authored by Niclas Steve Eich's avatar Niclas Steve Eich
Browse files

udated to new aci

parent db1f054a
Branches main
No related tags found
No related merge requests found
......@@ -7,8 +7,6 @@ Definition of an analysis example
import scinum as sn
import utils.aci as aci
import six
import copy
#
# analysis and config
......@@ -24,42 +22,39 @@ def rgb(r, g, b):
return (r, g, b)
# create the analysis
config_2017 = cfg = analysis.copy(name="example_analysis")
analysis = analysis.copy(name="vh")
get = analysis.processes.get
# analysis = od.Analysis("example_analysis", 1)
# setup the config for the 2017 campaign
# we pass no name, so the config will have the same name as the campaign
# config_2017 = cfg = run_2017.default_config(analysis=analysis)
# corrections which are only used for distinct analyses
analysis.aux["non_common_corrections"] = []
analysis.aux["signal_process"] = "dy"
analysis.aux["stat_model"] = "example_model"
cfg.aux["stat_model"] = "example_model"
# add analysis specific processes
specific_processes = []
#
# process groups
#
cfg.aux["process_groups"] = {
analysis.aux["process_groups"] = {
"default": [
# "data_dl",
"dy",
"tt",
"ttV",
"ttVH",
"ttVV", # one file missing?
"ttVV",
],
"plotting": [
# "data_dl",
"tt",
"dy",
"tt",
"ttV",
"ttVH",
"ttVV",
# "plot_other",
],
}
cfg.aux["signal_prcess"] = "dy"
cfg.aux["btag_sf_shifts"] = [
analysis.aux["btag_sf_shifts"] = [
"lf",
"lfstats1",
"lfstats2",
......@@ -70,50 +65,8 @@ cfg.aux["btag_sf_shifts"] = [
"cferr2",
]
processes = [
{
"name": "plot_other",
"idx": 999992,
"color": rgb(0, 183, 153),
"label": r"Other",
"processes": [
get("ttV"),
get("ttVV"),
get("ttVH"),
],
},
# {
# "name": "ttX",
# "idx": 999995,
# "label": r"ttX",
# "processes": [get("ttV"), get("ttVV"), get("ttVH")],
# },
]
specific_processes = []
for process in processes:
specific_processes.append(
aci.Process(
name=process["name"],
id=process["idx"],
color=process.get("color", rgb(55, 55, 55)),
label=process["label"],
processes=process["processes"],
)
)
# add all processes now
# config_2017.processes.extend(specific_processes)
# channels to use
channels = ["mumu", "ee"]
# remove all channels from cfg which are not in `channels`
# from IPython import embed;embed()
PrepareConfig(
config_2017,
# channels=channels,
# defines which datasets we will use
analysis,
processes=[
"dy_lep_10To50",
"dy_lep_nj",
......@@ -121,17 +74,15 @@ PrepareConfig(
"ttV",
"ttVH",
"ttVV",
"data_ee",
"data_mumu",
],
ignore_datasets=[], # "TTWW", "data_B_ee"
ignore_datasets=[],
allowed_exclusive_processes=[],
)
from example_analysis.config.categories import setup_categories
setup_categories(config_2017)
setup_categories(analysis)
from example_analysis.config.variables import setup_variables
setup_variables(config_2017)
setup_variables(analysis)
......@@ -5,234 +5,24 @@ import numpy as np
import hist
import example_analysis.config.analysis as example_config
from utils.datacard import Datacard, RDict
from utils.util import reluncs, wquant
from enum import IntEnum, unique
from rich.console import Console
import rich.table
# @unique
# class Processes(IntEnum):
# # Signal Processes
# # They have to be <= 0
# WH = -2
# ZH_bbll = -1
# ggZH_bbll = 0
# # Background Processes
# # They have to be > 0
Processes = {}
all_processes = example_config.cfg.aux["process_groups"]["default"].copy()
Processes[example_config.cfg.aux["signal_process"]] = -1
for index, process in enumerate(all_processes, start=1):
Processes[process] = index
class DataCardLogger(Datacard):
uncertainty_dict = {}
def __init__(self, analysis, category, variable, hists, config_inst, year):
super().__init__(analysis, category, variable, hists, config_inst, year)
def add_systematics(self, names, type, processes, **kwargs):
if isinstance(names, dict):
for name, values in names.items():
self.uncertainty_dict[name] = {
"value": values,
"type": type,
"processes": processes,
}
if isinstance(names, list):
for name in names:
self.uncertainty_dict[name] = {
"value": "",
"type": type,
"processes": processes,
}
elif isinstance(names, str):
self.uncertainty_dict[names] = {
"value": "",
"type": type,
"processes": processes,
}
super().add_systematics(names, type, processes, **kwargs)
def dump_uncertainty_table(self, target=None, console_output=True):
table = rich.table.Table(title="Variables")
table.add_column("Name")
table.add_column("type")
table.add_column("value")
table.add_column("processes")
for name, val in sorted(self.uncertainty_dict.items()):
if isinstance(val["value"], (list, set)):
value = ", ".join(map(str, list(val["value"])))
else:
value = str(val["value"])
table.add_row(name, value, val["type"], " ".join(list(val["processes"])))
if target is not None:
with open(target, "wt") as report_file:
console = Console(file=report_file)
console.print(table)
if console_output:
console = Console()
console.print(table)
# from utils.util import reluncs, wquant
# from enum import IntEnum, unique
# from rich.console import Console
import rich.table
class StatModel(DataCardLogger):
_processes = Processes
class StatModel(Datacard):
@property
def custom_lines(self):
return [
f"{self.bin} autoMCStats 10",
]
# hook for renamings
# rename processes
# @property
# def rprocesses(self):
# return RDict(os.path.abspath("bbww_dl/models/processes.txt"))
#
# # rename nuisances
# @property
# def rnuisances(self):
# return RDict(os.path.abspath(f"bbww_dl/models/systematics_{self.year}.txt"))
@classmethod
def requires(cls, task):
from tasks.group import Rebin
return Rebin.req(task, process_group=list(Processes.keys())) # + ["data"]
def build_systematics(self):
# lumi https://gitlab.cern.ch/hh/naming-conventions#luminosity
lumi = {
2017: {
"CMS_lumi_13TeV_2017": 1.020,
},
}
self.add_systematics(
names=lumi[int(self.year)],
type="lnN",
processes=self.processes,
)
# process xsec uncertainties
# https://gitlab.cern.ch/hh/naming-conventions#theory-uncertainties
self.add_systematics(
names={
"m_top_unc_ggHH": 1.026,
"alpha_s_ggHH": 1.021,
"pdf_ggHH": 1.021,
# scale uncertainty will be added by PhysicsModel
"QCDscale_ggHH": (0.950, 1.022),
},
type="lnN",
processes=[p for p in self.processes if p.startswith("ggHH_")],
)
self.add_systematics(
names={
"pdf_qqHH": 1.021,
# scale uncertainty will be added by PhysicsModel
"QCDscale_qqHH": (0.9996, 1.0003),
},
type="lnN",
processes=[p for p in self.processes if p.startswith("qqHH_")],
)
# tt
self.add_systematics(
names={
"m_top_unc_tt": reluncs("tt", uncs="mtop"),
"pdf_tt": reluncs("tt", uncs="pdf"),
"QCDscale_tt": reluncs("tt", uncs="scale"),
"CMS_bbWW_TT_norm": 1.3,
},
type="lnN",
processes=["tt"],
)
# dy
self.add_systematics(
names={
"integration_dy": reluncs("dy_lep_50ToInf", uncs="integration"),
"pdf_dy": reluncs("dy_lep_50ToInf", uncs="pdf"),
"QCDscale_dy": reluncs("dy_lep_50ToInf", uncs="scale"),
},
type="lnN",
processes=["dy"],
)
self.add_systematics(
names={
"pdf_st": reluncs("st", uncs="pdf"),
"QCDscale_st": reluncs("st", uncs="scale"),
},
type="lnN",
processes=["st"],
)
# add shape uncs
# misc
self.add_systematics(
names="top_pT_reweighting",
type="shape",
strength=1.0,
processes=["tt"],
)
common_shape_systs = [
"pileup",
]
if self.year in ("2016", "2017"):
common_shape_systs += ["l1_ecal_prefiring"]
self.add_systematics(
names=common_shape_systs,
type="shape",
strength=1.0,
processes=self.processes,
)
btagshifts = [
"btagWeight_hf",
"btagWeight_hfstats1",
"btagWeight_hfstats2",
"btagWeight_lf",
"btagWeight_lfstats1",
"btagWeight_lfstats2",
"btagWeight_subjet",
]
self.add_systematics(
names=self.config_inst.aux["jes_sources"] + ["jer", "UnclustEn"],
type="shape",
strength=1.0,
processes=self.processes,
suffix=("_up", "_down"),
)
# lepton efficiencies
self.add_systematics(
names="electron*",
type="shape",
strength=1.0,
processes=self.processes,
)
self.add_systematics(
names="muon*",
type="shape",
strength=1.0,
processes=self.processes,
)
# trigger
self.add_systematics(
names="trigger*",
type="shape",
strength=1.0,
processes=self.processes,
)
return Rebin.req(
task, process_group=("default",)
) # "plotting") # list(cls.processes)) # + ["data"]
......@@ -122,17 +122,21 @@ class BaseSelection:
vals = [preproc(getattr(X[id], var)) for var in vars]
out[name] = np.stack(vals, axis=-1)
# set all nans or infs to 0
out[name] = np.nan_to_num(
out[name], nan=0.0, posinf=0.0, neginf=-0.0
).astype(typ)
out[name] = np.nan_to_num(out[name], nan=0.0, posinf=0.0, neginf=-0.0).astype(typ)
return out
dtype = np.float32
debug_dataset = "data_F_mumu" # "ggZH_HToBB_ZToLL_M125" # "data_F_mumu" # "TTTo2L2Nu" # "WWW_4F"
debug_dataset = (
"data_F_mumu" # "ggZH_HToBB_ZToLL_M125" # "data_F_mumu" # "TTTo2L2Nu" # "WWW_4F"
)
# debug_uuids = {"5CB647A3-93F6-0B40-BD9F-3DB44E8D60F7"}
def select(self, events):
def const_arr(value):
arr = np.broadcast_to(value, shape=(n_events,))
arr.flags.writeable = False
return arr
dataset_key = tuple(events.metadata["dataset"])
dataset_inst = self.get_dataset(events)
......@@ -352,9 +356,7 @@ class BaseSelection:
POGMuonSF(self.corrections)(good_muons, weights, self.year)
# trigger sf
lep_pt = ak.fill_none(
ak.pad_none(good_leptons.pt, 2, clip=True), np.nan
)
lep_pt = ak.fill_none(ak.pad_none(good_leptons.pt, 2, clip=True), np.nan)
if "trigger" in self.corrections:
Ctrig = self.corrections["trigger"]["hist"]
for name, ch, trigger_hist in [
......@@ -370,12 +372,10 @@ class BaseSelection:
),
]:
args = lep_pt[:, 0], lep_pt[:, 1]
trigger_sf = Ctrig[f"ttHbb_dilepton_trigger_{trigger_hist}"](
trigger_sf = Ctrig[f"ttHbb_dilepton_trigger_{trigger_hist}"](*args)
trigger_sf_error = Ctrig[f"ttHbb_dilepton_trigger_{trigger_hist}_error"](
*args
)
trigger_sf_error = Ctrig[
f"ttHbb_dilepton_trigger_{trigger_hist}_error"
](*args)
weights.add(
f"trigger_{name}_sf",
np.where(ch, trigger_sf, 1),
......@@ -455,11 +455,12 @@ class BaseSelection:
if dataset_inst.is_mc:
(process,) = dataset_inst.processes.values
procid = np.full(n_events, process.id)
procid = const_arr(process.id)
else:
raise NotImplementedError(
"There was an old Pget import, which is not anymore available. please check the comment under this error!"
)
procid = const_arr(self.analysis_inst.processes.get("data").id)
# raise NotImplementedError(
# "There was an old Pget import, which is not anymore available. please check the comment under this error!"
# )
# procid = np.full(n_events, Pget("data").id)
yield locals()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment