Commit 0e7b91c4 authored by Dennis Noll's avatar Dennis Noll
Browse files

[utils] hist: init histutils

parent 3c012dbc
......@@ -18,6 +18,7 @@ from tasks.base import BaseTask, ConfigTask
from tasks.mixins import PGroupMixin, RecipeMixin
from tasks.coffea import CoffeaProcessor
from tasks.plotting import PlotHistsBase
import utils.hist as histutils
class SyncSelectionUpload(RecipeMixin, ConfigTask):
......@@ -59,52 +60,6 @@ class SyncFile(ExternalTask):
return law.LocalFileTarget(self.filepath)
def reduce_along_axis(func, axis, h):
axes = np.array(h.axes, dtype=object)
axes = np.delete(axes, axis)
hnew = hist.Hist(*axes)
hnew.view()[:] = np.apply_along_axis(func, axis, h.view())
return hnew
def _kstest(arr):
with np.errstate(invalid="ignore"):
return scipy.stats.kstest(arr[0, :], arr[1, :]).statistic
return scipy.stats.chisquare(arr[0, :], arr[1, :]).statistic
def kstest(h1, h2, compare_axis, feature_axis):
assert feature_axis == -1, "Other axes not implemented yet"
axes1 = np.array(h1.axes, dtype=object)
axes2 = np.array(h2.axes, dtype=object)
# currently only works for StrCategory
compared_axis = [
f"{a}_{b}" for (a, b) in itertools.product(axes1[compare_axis], axes2[compare_axis])
]
new_axes = np.copy(axes1)
new_axes[compare_axis] = type(axes1[compare_axis])(compared_axis)
new_axes = np.delete(new_axes, feature_axis)
hnew = hist.Hist(*new_axes)
a1, a2 = h1.view(), h2.view()
c, d = np.broadcast_arrays(
np.expand_dims(a1, compare_axis), np.expand_dims(a2, compare_axis + 1)
)
s = np.stack([c, d], axis=-2)
# reshape
o = np.array(s.shape)
o[compare_axis] = o[compare_axis] * o[compare_axis + 1]
o = np.delete(o, compare_axis + 1)
s = np.reshape(s, o)
# perform test
kstest = np.vectorize(_kstest, signature="(2,n)->()")
hnew.view()[:] = kstest(s["value"])
return hnew
class SyncSelection(PlotHistsBase, ConfigTask):
files = law.CSVParameter(default=[])
own = luigi.BoolParameter()
......@@ -203,7 +158,7 @@ class SyncSelection(PlotHistsBase, ConfigTask):
ratio="lines",
)
# statistical tests
hnew = kstest(compare[:1, :], compare[1:, :], 0, -1)
hnew = histutils.kstest(compare[:1, :], compare[1:, :], 0, -1)
statistical_tests[f"{mask_key}_{feature_key}"] = hnew.view()[0]
self.output()["statistical_tests"].dump(statistical_tests)
......
# coding: utf-8
import itertools
import numpy as np
import scipy
import hist
def reduce_along_axis(func, axis, h):
axes = np.array(h.axes, dtype=object)
axes = np.delete(axes, axis)
hnew = hist.Hist(*axes)
hnew.view()[:] = np.apply_along_axis(func, axis, h.view())
return hnew
def _kstest(arr):
with np.errstate(invalid="ignore"):
return scipy.stats.kstest(arr[0, :], arr[1, :]).statistic
return scipy.stats.chisquare(arr[0, :], arr[1, :]).statistic
def kstest(h1, h2, compare_axis, feature_axis):
assert feature_axis == -1, "Other axes not implemented yet"
axes1 = np.array(h1.axes, dtype=object)
axes2 = np.array(h2.axes, dtype=object)
# currently only works for StrCategory
compared_axis = [
f"{a}_{b}" for (a, b) in itertools.product(axes1[compare_axis], axes2[compare_axis])
]
new_axes = np.copy(axes1)
new_axes[compare_axis] = type(axes1[compare_axis])(compared_axis)
new_axes = np.delete(new_axes, feature_axis)
hnew = hist.Hist(*new_axes)
a1, a2 = h1.view(), h2.view()
c, d = np.broadcast_arrays(
np.expand_dims(a1, compare_axis), np.expand_dims(a2, compare_axis + 1)
)
s = np.stack([c, d], axis=-2)
# reshape
o = np.array(s.shape)
o[compare_axis] = o[compare_axis] * o[compare_axis + 1]
o = np.delete(o, compare_axis + 1)
s = np.reshape(s, o)
# perform test
kstest = np.vectorize(_kstest, signature="(2,n)->()")
hnew.view()[:] = kstest(s["value"])
return hnew
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment