Commit 2fe13b78 authored by Peter Fackeldey's avatar Peter Fackeldey
Browse files

solve merge issues and update notebook

parent a8f4fb51
......@@ -2,50 +2,9 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"| | Dataset | # files | Size in GB |\n",
"|---:|:---------------------|----------:|-------------:|\n",
"| 0 | diHiggs_bbWW_dl | 1 | 0.840575 |\n",
"| 1 | tt_sl | 46 | 89.7791 |\n",
"| 2 | tt_dl | 66 | 138.113 |\n",
"| 3 | dy_lep_0j | 47 | 90.5825 |\n",
"| 4 | dy_lep_1j | 63 | 121.776 |\n",
"| 5 | dy_lep_2j | 44 | 88.8813 |\n",
"| 6 | ww | 5 | 9.07005 |\n",
"| 7 | zz | 2 | 2.29989 |\n",
"| 8 | wz | 3 | 4.64214 |\n",
"| 9 | st_t_channel_top | 5 | 9.7472 |\n",
"| 10 | st_t_channel_antitop | 3 | 6.00462 |\n",
"| 11 | st_s_channel | 8 | 15.61 |\n",
"| 12 | st_tW_top | 8 | 14.662 |\n",
"| 13 | st_tW_antitop | 8 | 14.3483 |\n",
"| 14 | wjets | 16 | 29.7 |\n",
"| 15 | wjets_ext1 | 21 | 40.1326 |\n",
"| 16 | ttWjets_2q | 1 | 2.09891 |\n",
"| 17 | ttWjets_1l_1nu | 7 | 12.5142 |\n",
"| 18 | ttZ_2q | 1 | 1.98814 |\n",
"| 19 | ttZ_2l_2nu | 10 | 19.7742 |"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"execution_count": null,
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total size: 712.57 GB\n"
]
}
],
"outputs": [],
"source": [
"import json\n",
"import os\n",
......@@ -72,24 +31,9 @@
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d1fc629161e9496884bb657f77f724b6",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …"
]
},
"execution_count": null,
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"%matplotlib widget\n",
"\n",
......@@ -123,7 +67,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
......@@ -133,8 +77,9 @@
"from utils.dask import HTCondorCluster, dask_executor\n",
"\n",
"cluster = HTCondorCluster(\n",
" cores=4,\n",
" memory=\"2000MiB\",\n",
" cores=12,\n",
" processes=3,\n",
" memory=\"4000MiB\",\n",
" disk=\"2GB\",\n",
" job_extra=dict(Request_CPUs=0, Request_GPUs=0, Request_GpuMemory=1),\n",
" security=Security(),\n",
......@@ -142,92 +87,16 @@
" dashboard_address=\":8787\",\n",
")\n",
"\n",
"cluster.scale_machines({\"*\": 5, \"*gpu*1*\": 0, \"*gpu*4*\": 0, \"*gpu*3*\": 0, \"*portal*\": 0}, timeout=10)\n",
"cluster.scale_machines({\"*\": 1, \"*worker*\": 3, \"*gpu1*\": 0, \"*gpu04*\": 0, \"*portal*\": 0}, timeout=30)\n",
"\n",
"client = Client(cluster, security=Security())"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f1dd3cc051e94d64a68edda76e5e8c5a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, description='Preprocessing', max=365.0, style=ProgressStyle(descriptio…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "addf29204fc04146a4178c8a85e60f64",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, max=492734841.0), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"({'sum_gen_weights': defaultdict_accumulator(float,\n",
" {'ttZ_2l_2nu': 1928311.853515625,\n",
" 'ttWjets_1l_1nu': 1690099.44140625,\n",
" 'ttZ_2q': 383055.28125,\n",
" 'wjets_ext1': 44587448.0,\n",
" 'ttWjets_2q': 560304.7265625,\n",
" 'wjets': 33043732.0,\n",
" 'st_tW_antitop': 270757537.75,\n",
" 'st_tW_top': 277236667.25,\n",
" 'st_t_channel_antitop': 3675910.0,\n",
" 'st_s_channel': 36965496.78125,\n",
" 'st_t_channel_top': 5982064.0,\n",
" 'wz': 3928630.0,\n",
" 'ww': 7791560.9453125,\n",
" 'zz': 1949768.0,\n",
" 'dy_lep_2j': 142391068736.0,\n",
" 'dy_lep_1j': 445773520736.0,\n",
" 'dy_lep_0j': 534588452544.0,\n",
" 'tt_dl': 4984952938.5,\n",
" 'tt_sl': 13154468478.0,\n",
" 'diHiggs_bbWW_dl': 388947.0})},\n",
" {'columns': {'Generator_weight'},\n",
" 'entries': value_accumulator(int, 492734841),\n",
" 'processtime': value_accumulator(float, 1462.6902267932892),\n",
" 'chunks': value_accumulator(int, 4928)})"
]
},
"execution_count": null,
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"import coffea.processor as processor\n",
"\n",
......@@ -252,22 +121,23 @@
" def postprocess(self, accumulator):\n",
" return accumulator\n",
"\n",
" \n",
"\n",
"sumw = processor.run_uproot_job(\n",
" fileset,\n",
" treename=\"Events\",\n",
" processor_instance=SumwProcessor(),\n",
" pre_executor=processor.futures_executor,\n",
" pre_args=dict(workers=32),\n",
" executor=dask_executor,\n",
" executor_args=dict(\n",
" client=client,\n",
" compression=1,\n",
" worker_affinity=1,\n",
" savemetrics=1,\n",
" worker_affinity=1,\n",
" affinity_factor={\"*worker*\": 20},\n",
" nano=True,\n",
" live_callback=lambda x: x,\n",
" ),\n",
" pre_executor=processor.futures_executor,\n",
" pre_args=dict(workers=None),\n",
" chunksize=100000,\n",
")\n",
"\n",
......@@ -276,52 +146,11 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2cd7d3e268234a1780c52ce5ca278ae1",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Output()"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3124f0f17e2e4923978404cc373e36c9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, max=492734841.0), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"Total time: 790.59s\n",
"Total events: 4.927348e+08\n",
"Events / s: 623253\n",
"Events / s / thread: 7459\n"
]
}
],
"outputs": [],
"source": [
"%matplotlib inline\n",
"%config InlineBackend.figure_format = \"svg\"\n",
......@@ -356,17 +185,18 @@
" fileset,\n",
" treename=\"Events\",\n",
" processor_instance=Processor(config=config_inst, corrections=corrections),\n",
" pre_executor=processor.futures_executor,\n",
" pre_args=dict(workers=32),\n",
" executor=dask_executor,\n",
" executor_args=dict(\n",
" client=client,\n",
" compression=1,\n",
" savemetrics=1,\n",
" worker_affinity=1,\n",
" affinity_factor={\"*worker*\": 20},\n",
" nano=True,\n",
" live_callback=live_plot\n",
" live_callback=live_plot,\n",
" ),\n",
" pre_executor=processor.futures_executor,\n",
" pre_args=dict(workers=None),\n",
" chunksize=100000,\n",
")\n",
"toc = time.time()\n",
......@@ -382,43 +212,14 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e9a6461e8a0d406199ab676b6cece4ed",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"VBox(children=(Dropdown(description='Variable:', options=('MET', 'm_ee', 'm_mumu', 'mjj', 'jet1_pt', 'jet1_eta…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f66f9ae9a2ee4048a12c3c37b8873500",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Output()"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"%matplotlib widget\n",
"%config InlineBackend.figure_format = \"pdf\"\n",
"%matplotlib inline\n",
"%config InlineBackend.figure_format = \"svg\"\n",
"\n",
"from demo_utils import plot\n",
"from functools import partial\n",
......@@ -426,6 +227,8 @@
"\n",
"hists = regroup(output[0], config_inst, sumw[0])\n",
"\n",
"print(output[0][\"MET\"].values())\n",
"\n",
"variable = widgets.Dropdown(\n",
" options=config_inst.variables.names(),\n",
" description=\"Variable:\",\n",
......
......@@ -196,6 +196,7 @@ def dask_executor(
return accumulator
reducer = _reduce()
items_orig = items
if worker_affinity:
item2hash = {
item: sha512(
......@@ -254,7 +255,7 @@ def dask_executor(
]
)
worker_names = np.array(worker_names)
dist_factor **= 1. / worker_hashes.shape[-1]
dist_factor **= 1.0 / worker_hashes.shape[-1]
def hash2worker(hash_bytes):
item_hash = np.frombuffer(hash_bytes, dtype=np.int8)[None, :]
......
Subproject commit 3371463e57e825d941360f5947cf6dbb7fdfe4b4
Subproject commit f8e419bdae17a2e5cd9418716e8b4c27a554f1d5
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment