PK AHK% temci/__init__.pyimport temci.scriptsPK #}6H9o temci/tester/testers.py""" Contains the tester base class and several simple implementations. """ import temci.utils.util as util import temci.utils.util as util if util.can_import("scipy"): import scipy as np import scipy.stats as st import scipy.optimize as opti from temci.utils.typecheck import * from temci.utils.registry import AbstractRegistry, register import logging, warnings class TesterRegistry(AbstractRegistry): settings_key_path = "stats" use_key = "tester" use_list = False default = "t" registry = {} class Tester(object, metaclass=util.Singleton): """ A tester tests the probability of the nullhypothesis of two same length list of observations. This is a base class that shouldn't be instantiated. """ scipy_stat_method = "" name = "" def __init__(self, misc_settings: dict, uncertainty_range: tuple): """ :param data1: first list of of data points :param data2: second list of data points :param uncertainty_range: (start, end) probability tuple that gives range in which the tester doesn't give a definitive result on the nullhypothesis check """ self.uncertainty_range = uncertainty_range assert isinstance(uncertainty_range, Tuple(Float(), Float())) self.misc_settings = misc_settings def test(self, data1: list, data2: list) -> float: """ Calculates the probability of the null hypotheses. """ res = 0 min_len = min(len(data1), len(data2)) with warnings.catch_warnings(record=True) as w: res = self._test_impl(data1[0:min_len], data2[0: min_len]) return res def _test_impl(self, data1: list, data2: list) -> float: return getattr(st, self.scipy_stat_method)(data1, data2)[-1] def is_uncertain(self, data1: list, data2: list) -> bool: return min(len(data1), len(data2)) == 0 or \ self.uncertainty_range[0] <= self.test(data1, data2) <= self.uncertainty_range[1] def is_equal(self, data1: list, data2: list): return self.test(data1, data2) > max(*self.uncertainty_range) def is_unequal(self, data1: list, data2: list): return self.test(data1, data2) < min(*self.uncertainty_range) def estimate_needed_runs(self, data1: list, data2: list, run_bin_size: int, min_runs: int, max_runs: int) -> int: """ Calculate a approximation of the needed length of both observations that is needed for the p value to lie outside the uncertainty range. It uses the simple observation that the graph of the p value plotted against the size of the sets has a exponential, logarithmic or root shape. :param data1: list of observations :param data2: list of observations :param run_bin_size: granularity of the observation (> 0) :param min_runs: minimum number of allowed runs :param max_runs: maximum number of allowed runs :return: approximation of needed runs or float("inf") """ #print("###", max_runs) if data1 == data2: #logging.error("equal") return min_runs min_len = min(len(data1), len(data2)) #print("##", max_runs) if min_len <= 5: return max_runs x_space = np.linspace(0, min_len - 2, min_len - 2) yn = [self.test(data1[0:i], data2[0:i]) for i in range(2, min_len)] def interpolate(func, name: str): try: popt, pcov = opti.curve_fit(func, x_space, yn, maxfev=10000) for i in range(min_len, max_runs + 1, run_bin_size): ith = func(i, *popt) if ith > max(self.uncertainty_range) or ith < min(self.uncertainty_range): #print("i = ", i) return i return max_runs except (TypeError, RuntimeWarning, RuntimeError) as err: logging.info("Interpolating {} with {} data points gave " "following error: {}".format(name, min_len, str(err))) return float("inf") funcs = [ (lambda x, a, b, c: a * np.exp(-b * x) + c, "exponential function") ] res = 0 with warnings.catch_warnings(record=True) as w: res = min(interpolate(*f) for f in funcs) return res def __eq__(self, other): return isinstance(other, type(self)) @register(TesterRegistry, name="t", misc_type=Dict()) class TTester(Tester): """ Implementation of the Tester base class for the student's t test. """ scipy_stat_method = "ttest_ind" name = "t" @register(TesterRegistry, name="ks", misc_type=Dict()) class KSTester(Tester): """ Uses the Kolmogorov-Smirnov statistic on 2 samples. """ scipy_stat_method = "ks_2samp" name = "kolmogorov smirnov" @register(TesterRegistry, name="anderson", misc_type=Dict()) class AndersonTester(Tester): """ Uses the Anderson statistic on 2 samples. """ scipy_stat_method = "anderson_ksamp" def _test_impl(self, data1: list, data2: list) -> float: return max(st.anderson_ksamp([data1, data2])[-1], 1) name = "anderson"PK BYxHKul ul temci/tester/report.pyimport logging import math import re import shutil from collections import namedtuple import multiprocessing import time import sys import itertools from temci.report.stats import TestedPairsAndSingles, BaseStatObject, TestedPair, TestedPairProperty, StatMessage, \ StatMessageType, Single, SingleProperty, SinglesProperty from temci.report.testers import TesterRegistry, Tester from temci.report.rundata import RunDataStatsHelper, RunData from temci.utils.typecheck import * from temci.utils.registry import AbstractRegistry, register import temci.utils.util as util import click, yaml, os if util.can_import("numpy"): import numpy as np import pandas as pd from temci.utils.settings import Settings from multiprocessing import Pool from temci.utils.util import join_strs import typing as t class ReporterRegistry(AbstractRegistry): settings_key_path = "report" use_key = "reporter" use_list = False default = "html2" registry = {} class AbstractReporter: def __init__(self, misc_settings: dict = None, stats_helper: RunDataStatsHelper = None): self.misc = misc_settings if stats_helper is None: runs = [] typecheck(Settings()["report/in"], ValidYamlFileName()) with open(Settings()["report/in"], "r") as f: runs = yaml.load(f) self.stats_helper = RunDataStatsHelper.init_from_dicts(runs) else: self.stats_helper = stats_helper self.stats = TestedPairsAndSingles(self.stats_helper.valid_runs(), distinct_descriptions=True) def report(self): raise NotImplementedError() @register(ReporterRegistry, "console", Dict({ "out": FileNameOrStdOut() // Default("-") // Description("Output file name or stdard out (-)") })) class ConsoleReporter(AbstractReporter): """ Simple reporter that outputs just text. """ def report(self, with_tester_results: bool = True, to_string: bool = False) -> t.Optional[str]: output = [""] def string_printer(line: str, **args): output[0] += str(line) + "\n" print_func = string_printer if to_string else print with click.open_file(self.misc["out"], mode='w') as f: for block in self.stats_helper.valid_runs(): assert isinstance(block, RunData) print_func("{descr:<20} ({num:>5} single benchmarks)" .format(descr=block.description(), num=len(block.data[block.properties[0]])), file=f) for prop in sorted(block.properties): mean = np.mean(block[prop]) stdev = np.std(block[prop]) print_func("\t {prop:<18} mean = {mean:>15.5f}, " "deviation = {dev_perc:>10.5%} ({dev:>15.5f})".format( prop=prop, mean=mean, dev=stdev, dev_perc=stdev/mean ), file=f) if with_tester_results: self._report_list("Equal program blocks", self.stats_helper.get_evaluation(with_equal=True, with_uncertain=False, with_unequal=False), f, print_func) self._report_list("Unequal program blocks", self.stats_helper.get_evaluation(with_equal=False, with_uncertain=False, with_unequal=True), f, print_func) self._report_list("Uncertain program blocks", self.stats_helper.get_evaluation(with_equal=True, with_uncertain=True, with_unequal=True), f, print_func) if to_string: return output[0] def _report_list(self, title: str, list, file, print_func: t.Callable[[str, Any], None]): if len(list) != 0: print_func(title, file=file) print_func("####################", file=file) for item in list: print_func("\t {} ⟷ {}".format(item["data"][0].description(), item["data"][1].description()), file=file) for prop in sorted(item["properties"]): prop_data = item["properties"][prop] perc = prop_data["p_val"] if prop_data["unequal"]: perc = 1 - perc print_func("\t\t {descr:<18} probability = {perc:>10.5%}, speed up = {speed_up:>10.5%}" .format(descr=prop_data["description"], perc=perc, speed_up=prop_data["speed_up"]), file=file) @register(ReporterRegistry, "html", Dict({ "out": Str() // Default("report") // Description("Output directory"), "html_filename": Str() // Default("report.html") // Description("Name of the HTML file"), "pair_kind": ExactEither("scatter", "reg", "resid", "kde", "hex") // Default("kde") // Description("Kind of plot to draw for pair plots (see searborn.joinplot)"), "plot_size": PositiveInt() // Default(8) // Description("Width of the plots in centimeters"), "compared_props": (ListOrTuple(Str())) // Default(["all"]) // Description("Properties to include in comparison table"), "compare_against": NaturalNumber() // Default(0) // Description("Run to to use as base run for relative values in comparison table") })) class HTMLReporter(AbstractReporter): """ Reporter that produces a HTML bsaed report with lot's of graphics. """ counter = 0 """ Just a counter to allow collision free figure saving. """ PlotTuple = namedtuple("PlotTuple", ["func", "args", "kwargs", "filename"]) def report(self): typecheck(self.misc["out"], DirName(), value_name="reporter option out") if os.path.exists(self.misc["out"]): shutil.rmtree(self.misc["out"]) resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "report_resources")) shutil.copytree(resources_path, self.misc["out"]) runs = self.stats_helper.valid_runs() html = """
A benchmarking report comparing {comparing_str}