PKAHK%temci/__init__.pyimport temci.scriptsPK#}6H 9otemci/tester/testers.py""" Contains the tester base class and several simple implementations. """ import temci.utils.util as util import temci.utils.util as util if util.can_import("scipy"): import scipy as np import scipy.stats as st import scipy.optimize as opti from temci.utils.typecheck import * from temci.utils.registry import AbstractRegistry, register import logging, warnings class TesterRegistry(AbstractRegistry): settings_key_path = "stats" use_key = "tester" use_list = False default = "t" registry = {} class Tester(object, metaclass=util.Singleton): """ A tester tests the probability of the nullhypothesis of two same length list of observations. This is a base class that shouldn't be instantiated. """ scipy_stat_method = "" name = "" def __init__(self, misc_settings: dict, uncertainty_range: tuple): """ :param data1: first list of of data points :param data2: second list of data points :param uncertainty_range: (start, end) probability tuple that gives range in which the tester doesn't give a definitive result on the nullhypothesis check """ self.uncertainty_range = uncertainty_range assert isinstance(uncertainty_range, Tuple(Float(), Float())) self.misc_settings = misc_settings def test(self, data1: list, data2: list) -> float: """ Calculates the probability of the null hypotheses. """ res = 0 min_len = min(len(data1), len(data2)) with warnings.catch_warnings(record=True) as w: res = self._test_impl(data1[0:min_len], data2[0: min_len]) return res def _test_impl(self, data1: list, data2: list) -> float: return getattr(st, self.scipy_stat_method)(data1, data2)[-1] def is_uncertain(self, data1: list, data2: list) -> bool: return min(len(data1), len(data2)) == 0 or \ self.uncertainty_range[0] <= self.test(data1, data2) <= self.uncertainty_range[1] def is_equal(self, data1: list, data2: list): return self.test(data1, data2) > max(*self.uncertainty_range) def is_unequal(self, data1: list, data2: list): return self.test(data1, data2) < min(*self.uncertainty_range) def estimate_needed_runs(self, data1: list, data2: list, run_bin_size: int, min_runs: int, max_runs: int) -> int: """ Calculate a approximation of the needed length of both observations that is needed for the p value to lie outside the uncertainty range. It uses the simple observation that the graph of the p value plotted against the size of the sets has a exponential, logarithmic or root shape. :param data1: list of observations :param data2: list of observations :param run_bin_size: granularity of the observation (> 0) :param min_runs: minimum number of allowed runs :param max_runs: maximum number of allowed runs :return: approximation of needed runs or float("inf") """ #print("###", max_runs) if data1 == data2: #logging.error("equal") return min_runs min_len = min(len(data1), len(data2)) #print("##", max_runs) if min_len <= 5: return max_runs x_space = np.linspace(0, min_len - 2, min_len - 2) yn = [self.test(data1[0:i], data2[0:i]) for i in range(2, min_len)] def interpolate(func, name: str): try: popt, pcov = opti.curve_fit(func, x_space, yn, maxfev=10000) for i in range(min_len, max_runs + 1, run_bin_size): ith = func(i, *popt) if ith > max(self.uncertainty_range) or ith < min(self.uncertainty_range): #print("i = ", i) return i return max_runs except (TypeError, RuntimeWarning, RuntimeError) as err: logging.info("Interpolating {} with {} data points gave " "following error: {}".format(name, min_len, str(err))) return float("inf") funcs = [ (lambda x, a, b, c: a * np.exp(-b * x) + c, "exponential function") ] res = 0 with warnings.catch_warnings(record=True) as w: res = min(interpolate(*f) for f in funcs) return res def __eq__(self, other): return isinstance(other, type(self)) @register(TesterRegistry, name="t", misc_type=Dict()) class TTester(Tester): """ Implementation of the Tester base class for the student's t test. """ scipy_stat_method = "ttest_ind" name = "t" @register(TesterRegistry, name="ks", misc_type=Dict()) class KSTester(Tester): """ Uses the Kolmogorov-Smirnov statistic on 2 samples. """ scipy_stat_method = "ks_2samp" name = "kolmogorov smirnov" @register(TesterRegistry, name="anderson", misc_type=Dict()) class AndersonTester(Tester): """ Uses the Anderson statistic on 2 samples. """ scipy_stat_method = "anderson_ksamp" def _test_impl(self, data1: list, data2: list) -> float: return max(st.anderson_ksamp([data1, data2])[-1], 1) name = "anderson"PKIHHuy"ulultemci/tester/report.pyimport logging import math import re import shutil from collections import namedtuple import multiprocessing import time import sys import itertools from temci.tester.stats import TestedPairsAndSingles, BaseStatObject, TestedPair, TestedPairProperty, StatMessage, \ StatMessageType, Single, SingleProperty, SinglesProperty from temci.tester.testers import TesterRegistry, Tester from temci.tester.rundata import RunDataStatsHelper, RunData from temci.utils.typecheck import * from temci.utils.registry import AbstractRegistry, register import temci.utils.util as util import click, yaml, os if util.can_import("numpy"): import numpy as np import pandas as pd from temci.utils.settings import Settings from multiprocessing import Pool from temci.utils.util import join_strs import typing as t class ReporterRegistry(AbstractRegistry): settings_key_path = "report" use_key = "reporter" use_list = False default = "html2" registry = {} class AbstractReporter: def __init__(self, misc_settings: dict = None, stats_helper: RunDataStatsHelper = None): self.misc = misc_settings if stats_helper is None: runs = [] typecheck(Settings()["report/in"], ValidYamlFileName()) with open(Settings()["report/in"], "r") as f: runs = yaml.load(f) self.stats_helper = RunDataStatsHelper.init_from_dicts(runs) else: self.stats_helper = stats_helper self.stats = TestedPairsAndSingles(self.stats_helper.valid_runs(), distinct_descriptions=True) def report(self): raise NotImplementedError() @register(ReporterRegistry, "console", Dict({ "out": FileNameOrStdOut() // Default("-") // Description("Output file name or stdard out (-)") })) class ConsoleReporter(AbstractReporter): """ Simple reporter that outputs just text. """ def report(self, with_tester_results: bool = True, to_string: bool = False) -> t.Optional[str]: output = [""] def string_printer(line: str, **args): output[0] += str(line) + "\n" print_func = string_printer if to_string else print with click.open_file(self.misc["out"], mode='w') as f: for block in self.stats_helper.valid_runs(): assert isinstance(block, RunData) print_func("{descr:<20} ({num:>5} single benchmarks)" .format(descr=block.description(), num=len(block.data[block.properties[0]])), file=f) for prop in sorted(block.properties): mean = np.mean(block[prop]) stdev = np.std(block[prop]) print_func("\t {prop:<18} mean = {mean:>15.5f}, " "deviation = {dev_perc:>10.5%} ({dev:>15.5f})".format( prop=prop, mean=mean, dev=stdev, dev_perc=stdev/mean ), file=f) if with_tester_results: self._report_list("Equal program blocks", self.stats_helper.get_evaluation(with_equal=True, with_uncertain=False, with_unequal=False), f, print_func) self._report_list("Unequal program blocks", self.stats_helper.get_evaluation(with_equal=False, with_uncertain=False, with_unequal=True), f, print_func) self._report_list("Uncertain program blocks", self.stats_helper.get_evaluation(with_equal=True, with_uncertain=True, with_unequal=True), f, print_func) if to_string: return output[0] def _report_list(self, title: str, list, file, print_func: t.Callable[[str, Any], None]): if len(list) != 0: print_func(title, file=file) print_func("####################", file=file) for item in list: print_func("\t {} ⟷ {}".format(item["data"][0].description(), item["data"][1].description()), file=file) for prop in sorted(item["properties"]): prop_data = item["properties"][prop] perc = prop_data["p_val"] if prop_data["unequal"]: perc = 1 - perc print_func("\t\t {descr:<18} probability = {perc:>10.5%}, speed up = {speed_up:>10.5%}" .format(descr=prop_data["description"], perc=perc, speed_up=prop_data["speed_up"]), file=file) @register(ReporterRegistry, "html", Dict({ "out": Str() // Default("report") // Description("Output directory"), "html_filename": Str() // Default("report.html") // Description("Name of the HTML file"), "pair_kind": ExactEither("scatter", "reg", "resid", "kde", "hex") // Default("kde") // Description("Kind of plot to draw for pair plots (see searborn.joinplot)"), "plot_size": PositiveInt() // Default(8) // Description("Width of the plots in centimeters"), "compared_props": (ListOrTuple(Str())) // Default(["all"]) // Description("Properties to include in comparison table"), "compare_against": NaturalNumber() // Default(0) // Description("Run to to use as base run for relative values in comparison table") })) class HTMLReporter(AbstractReporter): """ Reporter that produces a HTML bsaed report with lot's of graphics. """ counter = 0 """ Just a counter to allow collision free figure saving. """ PlotTuple = namedtuple("PlotTuple", ["func", "args", "kwargs", "filename"]) def report(self): typecheck(self.misc["out"], DirName(), value_name="reporter option out") if os.path.exists(self.misc["out"]): shutil.rmtree(self.misc["out"]) resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "report_resources")) shutil.copytree(resources_path, self.misc["out"]) runs = self.stats_helper.valid_runs() html = """ Benchmarking report
{inner_html}
""" descriptions = [run.description() for run in self.stats_helper.valid_runs()] comparing_str = "" if len(descriptions) == 1: comparing_str = descriptions[0] elif len(descriptions) > 1: comparing_str = " and ".join([", ".join(descriptions[0:-1]), descriptions[-1]]) inner_html = "" self.big_size = self.misc["plot_size"] self.small_size = max(2, math.floor(self.big_size * 2 / len(runs[0].properties))) if len(self.stats_helper.valid_runs()) > 1: logging.info("Generate comparison tables") inner_html += "

Comparison tables

" + self._comparison_tables() self._write(html.format(**locals())) for i in range(0, len(runs)): for j in range(0, i): logging.info("Plot pair summary ({}, {})".format(i, j)) inner_html += self._pair_summary(runs[i], runs[j], heading_no=2) self._write(html.format(**locals())) for i in range(0, len(runs)): logging.info("Plot program block {}".format(i)) inner_html += self._report_single(runs[i]) self._write(html.format(**locals())) if len(self.stats_helper.valid_runs()) > 1: for i in range(0, len(runs)): for j in range(0, i): logging.info("Plot pair ({}, {})".format(i, j)) inner_html += self._report_pair(runs[i], runs[j]) self._write(html.format(**locals())) def _write(self, html_string: str): """ Store the html string in the appropriate file and append "" """ with open(os.path.join(self.misc["out"], self.misc["html_filename"]), "w") as f: f.write(html_string) def _set_fig_size(self, size: int): import matplotlib.pyplot as plt plt.rcParams['figure.figsize'] = (size, size) self.current_size = size def _report_single(self, data: RunData): import matplotlib.pyplot as plt import seaborn as sns ret_str = """

{}

{} benchmarks
""".format(data.description(), len(data[data.properties[0]])) ret_str += """ """ for prop in sorted(self.stats_helper.properties()): x = pd.Series(data[prop], name=prop) self._set_fig_size(self.small_size) ax = sns.distplot(x) if self.small_size == self.current_size: plt.xticks([]) plt.yticks([]) filename = self._get_new_figure_filename() plt.xlim(0, max(data[prop])) plt.xlabel(prop) plt.savefig(filename) plt.title(prop) plt.close() ret_str += """ """.format(filename=filename, sm=self.small_size) ret_str += """
""" for prop in sorted(self.stats_helper.properties()): ret_str += """

{prop}

{benchs} benchmarks
""".format(prop=prop, benchs=len(data[prop])) x = pd.Series(data[prop], name=prop) self._set_fig_size(self.big_size) ax = sns.distplot(x, kde=False) filename = self._get_new_figure_filename() plt.xlim(min(data[prop]), max(data[prop])) plt.savefig(filename) plt.close() ret_str += """ """.format(filename=filename) prop_data = data[prop] vals = { "mean": np.mean(prop_data), "median": np.median(prop_data), "min": np.min(prop_data), "max": np.max(prop_data), "standard deviation": np.std(prop_data) } ret_str += """ """ for name in sorted(vals.keys()): ret_str += """ """.format(name=name, absolute=vals[name], rel_mean=vals[name] / vals["mean"], rel_median=vals[name] / vals["median"]) ret_str += """
statistical propertyabsolute value relative to meanrelative to median
{name} {absolute} {rel_mean:15.5%} {rel_median:15.5%}
""" return ret_str def _report_pair(self, first: RunData, second: RunData): ret_str = """

{descr1} <=> {descr2}

""".format(descr1=first.description(), descr2=second.description()) ret_str += self._pair_summary(first, second, heading_no=3) for prop in sorted(self.stats_helper.properties): length = min(len(first[prop]), len(second[prop])) first_prop = first[prop][0:length] second_prop = second[prop][0:length] ret_str += """

{prop}

{benchs} benchmarks

Probability of the null hypothesis

I.e. the probability that the data sets of both program block of the property {prop} come from the same population. """.format(filename=self._jointplot(first, second, prop, size=self.big_size), prop=prop, filename2=self._barplot(first, second, prop, size=self.big_size), benchs=length) for tester_name in sorted(TesterRegistry.registry.keys()): tester = TesterRegistry.get_for_name(tester_name, Settings()["stats/uncertainty_range"]) p_val = tester.test(first[prop], second[prop]) row_class = self._p_val_to_row_class(p_val) tester_descr = tester.__description__ ret_str += """ """.format(**locals()) ret_str += """
TesterprobabilityTester description
{tester_name}{p_val:5.5%} {tester_descr}
""" vals = { "mean": (np.mean(first_prop), np.mean(second_prop)), "median": (np.median(first_prop), np.median(second_prop)), } ret_str += """ """ for descr in sorted(vals.keys()): first_val, second_val = vals[descr] ret_str += """ """.format(descr=descr, diff=first_val - second_val, rel_diff=(first_val - second_val) / first_val) ret_str += """
Diferrence in property absolute difference difference rel. to first
{descr}{diff:15.5}{rel_diff:3.5%}
""" return ret_str def _pair_summary(self, first: RunData, second: RunData, heading_no: int): html = """ Summary of {descr} <=> {descr2} {{inner_html}} """.format(descr=first.description(), descr2=second.description(), no=heading_no) inner_html = """ """ for prop in sorted(self.stats_helper.properties()): inner_html += """ """.format(filename=self._jointplot(first, second, prop, size=self.small_size, show_ticks=False)) inner_html += "" for prop in sorted(self.stats_helper.properties()): inner_html += """ """.format(filename=self._barplot(first, second, prop, size=self.small_size, show_ticks=False)) inner_html += "" for prop in sorted(self.stats_helper.properties()): length = min(len(first[prop]), len(second[prop])) first_prop = first[prop][0:length] second_prop = second[prop][0:length] inner_html += """ " inner_html += """
""" for tester_name in sorted(TesterRegistry.registry.keys()): tester = TesterRegistry.get_for_name(tester_name, Settings()["stats/uncertainty_range"]) p_val = tester.test(first_prop, second_prop) row_class = self._p_val_to_row_class(p_val) inner_html += """ """.format(**locals()) inner_html += "
testerp val
{tester_name}{p_val:3.5%}
""" return html.format(**locals()) def _p_val_to_row_class(self, p_val: float) -> str: row_class = "" if self.stats_helper.is_equal(p_val): row_class = "danger" elif self.stats_helper.is_unequal(p_val): row_class = "success" return row_class def _comparison_tables(self, runs: list = None, properties: list = None, compare_against: int = None, heading_no: int = 3) -> str: runs = runs or self.stats_helper.valid_runs() p = properties or self.misc["compared_props"] properties = list(p) compare_against = compare_against or self.misc["compare_against"] typecheck(properties, List(Str())) typecheck(runs, List(T(RunData)) // (lambda l: len(l) > 0)) typecheck(compare_against, Int(range=range(len(runs)))) if "all" in properties: properties = self.stats_helper.properties() stat_funcs = { "mean": np.mean, "median": np.median, "min": np.min, "max": np.max, "standard deviation / mean": lambda l: np.std(l) / np.mean(l), "standard deviation / median": lambda l: np.std(l) / np.median(l) } ret_str = "" for stat_prop in sorted(stat_funcs.keys()): stat_func = stat_funcs[stat_prop] ret_str += """ {prop} """.format(n=heading_no, prop=stat_prop) ret_str += self._comparison_table(stat_func, runs, properties, compare_against) return ret_str def _comparison_table(self, stat_func, runs: list, properties: list, compare_against: int) -> str: """ :param stat_func: function that gets a data series (list) and returns a scalar (e.g. mean or median) :param runs: RunData objects to compare :param properties: used properties :param compare_against: use this run as the base run (for relative values) :return: html string """ values = [] for run in runs: values_for_run = {} for property in sorted(properties): values_for_run[property] = stat_func(run[property]) values.append(values_for_run) ret_str = """ {} """.format("".join("".format(run.description(), compare_against) for run in runs)) for property in sorted(properties): ret_str += """ """.format(property) for i, run in enumerate(runs): ret_str += """ """.format( abs=values[i][property], rel=values[i][property] / values[compare_against][property] ) ret_str += """ """ ret_str += """ """ # why? see https://dl.acm.org/citation.cfm?id=5673 mult_compare_against = np.prod(list(values[compare_against].values())) for (i, run) in enumerate(runs): mult = np.prod(list(values[i].values())) ret_str += """ """.format( abs=np.power(mult, 1 / len(values[i])), rel=np.power(mult / mult_compare_against, 1 / len(values[i])) ) ret_str += """
{}
{}{abs:15.5}{rel:3.3}
geometric mean{rel:3.3}
""" return ret_str def _jointplot(self, first: RunData, second: RunData, property: str, size: int, filename: str = None, show_ticks: bool = True): import matplotlib.pyplot as plt import seaborn as sns import numpy filename = filename or self._get_new_figure_filename() length = min(len(first[property]), len(second[property])) first_prop = first[property][0:length] second_prop = second[property][0:length] lim = (0, max(max(first_prop), max(second_prop))) self._set_fig_size(size) x1 = pd.Series(first_prop, name="{descr}: {prop}".format(descr=first.description(), prop=property)) x2 = pd.Series(second_prop, name="{descr}: {prop}".format(descr=second.description(), prop=property)) plt.xlim(lim) g = None try: g = sns.jointplot(x1, x2, kind=self.misc["pair_kind"], size=size, space=0, stat_func=self.stats_helper.tester.test, xlim=lim, ylim=lim) if not show_ticks: g.ax_joint.set_xticklabels([]) g.ax_joint.set_yticklabels([]) g.savefig(filename) plt.close() except BaseException as ex: logging.warning(ex) return filename def _barplot(self, first: RunData, second: RunData, property: str, size: int, filename: str = None, show_ticks: bool = True) -> str: import matplotlib.pyplot as plt import seaborn as sns filename = filename or self._get_new_figure_filename() self._set_fig_size(size) length = min(len(first[property]), len(second[property])) first_prop = first[property][0:length] second_prop = second[property][0:length] min_xval = min(first_prop + second_prop) max_xval = max(first_prop + second_prop) bins = np.linspace(min_xval, max_xval, math.floor(math.sqrt(length) * size)) sns.distplot(first_prop, bins=bins,label=first.description(), kde=False) sns.distplot(second_prop, bins=bins,label=second.description(), kde=False) if not show_ticks: plt.xticks([]) plt.yticks([]) plt.xlim(min_xval, max_xval) plt.legend() plt.savefig(filename) plt.close() return filename def _save_figure(self, figure) -> str: filename = self._get_new_figure_filename() figure.savefig(filename) return filename def _get_new_figure_filename(self) -> str: self.counter += 1 return os.path.join(os.path.abspath(self.misc["out"]), "figure.{}{}" .format(self.counter, BaseStatObject.img_filename_ending)) @register(ReporterRegistry, "html2", Dict({ "out": Str() // Default("report") // Description("Output directory"), "html_filename": Str() // Default("report.html") // Description("Name of the HTML file"), "fig_width_small": Float() // Default(15.0) // Description("Width of all small plotted figures"), "fig_width_big": Float() // Default(25.0) // Description("Width of all big plotted figures"), "boxplot_height": Float() // Default(2.0) // Description("Height per run block for the big comparison box plots"), "alpha": Float() // Default(0.05) // Description("Alpha value for confidence intervals"), "gen_tex": Bool() // Default(True) // Description("Generate simple latex versions of the plotted figures?"), "gen_pdf": Bool() // Default(False) // Description("Generate pdf versions of the plotted figures?"), "gen_xls": Bool() // Default(False) // Description("Generate excel files for all tables"), "show_zoomed_out": Bool() // Default(False) // Description("Show zoomed out (x min = 0) figures in the extended summaries?") })) class HTMLReporter2(AbstractReporter): """ Reporter that produces a HTML bsaed report with lot's of graphics. A rewite of the original HTMLReporter """ counter = 0 """ Just a counter to allow collision free figure saving. """ def report(self): import humanfriendly as hf typecheck(self.misc["out"], DirName(), value_name="reporter option out") start_time = time.time() if os.path.exists(self.misc["out"]): shutil.rmtree(self.misc["out"]) resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "report_resources")) shutil.copytree(resources_path, self.misc["out"]) runs = self.stats_helper.valid_runs() self.app_html = "" html = """ Benchmarking report
{inner_html}
{self.app_html} """ if self.misc["gen_pdf"] and not util.has_pdflatex(): util.warn_for_pdflatex_non_existence_once() self.misc["gen_pdf"] = False comparing_str = join_strs([single.description() for single in self.stats.singles]) inner_html = """

Summary

""" inner_html += self._format_errors_and_warnings(self.stats) inner_html += """

Overall summary

""" inner_html += self._full_single_property_comp_table().html() for prop in self.stats.properties(): inner_html += """

Summary regarding {prop}

""".format(**locals()) inner_html += self._full_single_property_comp_table(prop).html() inner_html += """

""" inner_html += self._comparison_for_prop(prop) for single in self.stats.singles: inner_html += """

""" inner_html += self._extended_summary(single, with_title=True, title_level=2, title_class="page-header") + """
""" for pair in self.stats.pairs: inner_html += """
""" inner_html += self._extended_summary(pair, with_title=True, title_level=2, title_class="page-header") + """
""" self._write(html.format(timespan=hf.format_timespan(time.time() - start_time), **locals())) logging.info("Finished generating html") logging.info("Generate images...") self._process_hist_cache(self._hist_async_img_cache.values(), "Generate images") self._process_boxplot_cache(self._boxplot_async_cache.values(), "Generate box plots") self._write(html.format(timespan=hf.format_timespan(time.time() - start_time), **locals())) if self.misc["gen_pdf"] or self.misc["gen_tex"]: strs = (["tex"] if self.misc["gen_tex"] else []) + (["pdf"] if self.misc["gen_pdf"] else []) self._process_hist_cache(self._hist_async_misc_cache.values(), "Generate {}".format(join_strs(strs))) def _process_hist_cache(self, cache: t.Iterable[dict], title: str): pool = multiprocessing.Pool(4) pool_res = [pool.apply_async(self._process_hist_cache_entry, args=(entry,)) for entry in cache] if Settings().has_log_level("info"): with click.progressbar(pool_res, label=title) as pool_res: for res in pool_res: res.get() else: for res in pool_res: res.get() def _process_boxplot_cache(self, cache: t.Iterable[dict], title: str): pool = multiprocessing.Pool(4) pool_res = [pool.apply_async(self._process_boxplot_cache_entry, args=(entry,)) for entry in cache] if Settings().has_log_level("info"): with click.progressbar(pool_res, label=title) as pool_res: for res in pool_res: res.get() else: for res in pool_res: res.get() def _write(self, html_string: str): """ Store the html string in the appropriate file and append "" """ report_filename = os.path.join(self.misc["out"], self.misc["html_filename"]) with open(report_filename, "w") as f: f.write(html_string) logging.info("Wrote report into " + report_filename) def _full_single_property_comp_table(self, property: str = None) -> 'Table': header_cells = [] for single in self.stats.singles: _single = SingleProperty(single, single.rundata, property) if property is not None else single modal_id = self._short_summary_modal(_single) header_cells.append(Cell(self, content=self._obj_description(single), color_class_obj=single, modal_id=modal_id)) table = Table(self, header_cells, header_cells, Cell(self, "vs.")) for i in range(self.stats.number_of_singles()): for j in range(self.stats.number_of_singles()): popover = Popover(self, "Explanation", content="") cell = None pair = self.stats.get_pair(i, j) rel_diff = None if property is None: popover.content = """ Geometric mean of the mean differences relative to the means of the left: \\[\\sqrt[\|properties\|]{ \\prod_{p \in \\text{properties}} \\frac{\\overline{\\text{left[p]}} - \\overline{\\text{right[p]}}}{ \\overline{\\text{left[p]}}}}\]

Using the more widely known would be like lying

. """ rel_diff = pair.rel_difference() popover.trigger = "hover click" else: pair = pair[property] popover.content="""Difference relative to the mean of the left: \\begin{align} & \\frac{\\overline{\\text{left[%s]}} - \\overline{\\text{right[%s]}}}{\\overline{\\text{left[%s]}}} \\\\ &= \\frac{%5.4f - %5.4f}{%5.4f} \\end{align} """ % (property, property, property, pair.first.mean(), pair.second.mean(), pair.first.mean()) rel_diff = pair.mean_diff_per_mean() cell = Cell(self, content=str(rel_diff), popover=popover, color_class_obj=pair, show_click_on_info=True) cell.modal_id = self._short_summary_modal(pair) table[i, j] = cell return table def _extended_summary(self, obj: BaseStatObject, with_title: bool = True, title_level: int = 3, title_class: str = "") -> str: html = "" other_id_obj = None # type: BaseStatObject if isinstance(obj, Single): html += self._extended_summary_of_single(obj, title_level) if isinstance(obj, SingleProperty): html += self._extended_summary_of_single_property(obj, title_level) if isinstance(obj, TestedPair): html += self._extended_summary_of_tested_pair(obj, title_level) other_id_obj = obj.swap() if isinstance(obj, TestedPairProperty): html += self._extended_summary_of_tested_pair_property(obj, title_level) if with_title: other_id_app = "" if other_id_obj is None else """
"""\ .format(self._html_id_for_object("misc", other_id_obj)) html = """ {title}""".format(level=title_level, tc=title_class, title=self._obj_description(obj), id=self._html_id_for_object("misc", obj)) + other_id_app + html return html def _extended_summary_of_single(self, obj: Single, title_level: int) -> str: html = self._short_summary(obj, use_modals=True, extended=False, title_level=title_level + 1) for prop in sorted(obj.properties.keys()): html += """
{prop}""".format( level=title_level + 1, prop=prop, id=self._html_id_for_object("misc", obj.properties[prop]) ) html += self._extended_summary(obj.properties[prop], with_title=False, title_level=title_level + 1, title_class="page-header") html += """
""" return html def _extended_summary_of_single_property(self, obj: SingleProperty, title_level: int) -> str: html = self._short_summary(obj, use_modals=True, extended=True, title_level=title_level + 1) return html def _extended_summary_of_tested_pair(self, obj: TestedPair, title_level: int) -> str: html = self._short_summary(obj, use_modals=True, extended=True, title_level=title_level + 1) swapped = obj.swap() for prop in sorted(obj.properties.keys()): html += """
{prop}
""".format( level=title_level + 1, prop=prop, id=self._html_id_for_object("misc", obj.properties[prop]), id2=self._html_id_for_object("misc", swapped.properties[prop]) ) html += self._extended_summary(obj.properties[prop], with_title=False, title_level=title_level + 1, title_class="page-header") html += """
""" return html def _extended_summary_of_tested_pair_property(self, obj: TestedPairProperty, title_level: int) -> str: html = self._short_summary(obj, use_modals=True, extended=True, title_level=title_level + 1) return html def _short_summary(self, obj: BaseStatObject, with_title: bool = False, title_level: int = 4, use_modals: bool = False, extended: bool = False) -> str: html = "" if with_title: html += "{title}".format(level=title_level, title=self._obj_description(obj)) html += self._format_errors_and_warnings(obj) if isinstance(obj, SingleProperty): html += self._short_summary_of_single_property(obj, use_modals, extended) if isinstance(obj, TestedPairProperty): html += self._short_summary_of_tested_pair_property(obj, use_modals, extended) if isinstance(obj, TestedPair): html += self._short_summary_of_tested_pair(obj, use_modals, extended) if isinstance(obj, Single): html += self._short_summary_of_single(obj, use_modals, extended) return html def _short_summary_of_single(self, obj: Single, use_modal: bool = False, extended: bool = False): obj_descrs = sorted(obj.properties.keys()) objs = [obj.properties[val] for val in obj_descrs] return self._short_summary_table_for_single_property(objs=objs, objs_in_cols=False, obj_descrs=obj_descrs, use_modal=use_modal, extended=extended) def _short_summary_of_single_property(self, obj: SingleProperty, use_modals: bool = False, extended: bool = False): filenames = self._histogram(obj, big=extended, zoom_in=True) html = self._filenames_to_img_html(filenames) if extended and self.misc["show_zoomed_out"]: html += self._filenames_to_img_html(self._histogram(obj, big=extended, zoom_in=False)) html += self._short_summary_table_for_single_property([obj], objs_in_cols=True, use_modal=use_modals, extended=extended) return html def _short_summary_of_tested_pair_property(self, obj: TestedPairProperty, extended: bool = False, use_modals: bool = False): filenames = self._histogram(obj, big=extended, zoom_in=True) html = self._filenames_to_img_html(filenames) if extended and self.misc["show_zoomed_out"]: filenames = self._histogram(obj, big=extended, zoom_in=False) html += self._filenames_to_img_html(filenames) ci_popover = Popover(self, "Confidence interval", """ The chance is \\[ 1 - \\alpha = {p} \\] that the mean difference \\begin{{align}} &\\text{{{first}}} - \\text{{{second}}} \\\\ =& {diff} \\end{{align}} lies in the interval $$({ci[0]:5.5f}, {ci[1]:5.5f})$$ (assuming the data is normal distributed to a certain degree). """.format(p=1-self.misc["alpha"], first=str(obj.first.parent), second=str(obj.second.parent), prop=obj.property, diff=obj.mean_diff(), ci=obj.mean_diff_ci(self.misc["alpha"]))) tested_per_prop = [ { "title": "Mean difference", "popover": Popover(self, "Explanation", """ Difference between the mean of the first and the mean of the second. It's the absolute difference and is often less important that the relative differences. """), "func": lambda x: x.mean_diff(), "format": "{:5.5f}" }, { "title": "... per mean", "func": lambda x: x.mean_diff_per_mean(), "format": "{:5.5%}", "popover": Popover(self, "Explanation", """The mean difference relative to the first mean \\begin{align} & \\frac{ \\overline{\\text{%s}} - \\overline{\\text{%s}}}{ \\overline{\\text{%s}} } \\\\ &= \\frac{ %f }{ %f} \\end{align} gives a number that helps to talk about the practical significance of the mean difference. A tiny difference might be cool, but irrelevant (as caching effects are probably higher, use the
temci build
if you're curious about this). """ % (obj.first.parent.description(), obj.second.parent.description(), str(obj.first.parent), float(obj.mean_diff()), float(obj.first.mean()))) }, { "title": "... per std dev", "func": lambda x: x.mean_diff_per_dev(), "format": "{:5.5%}", "popover": Popover(self, "Explanation", """ The mean difference relative to the maximum standard deviation: \\begin{{align}} &\\frac{{ \\overline{{ \\text{{{first}}} }} - \\overline{{\\text{{{second}}}}}}}{{ \\text{{max}}(\\sigma_\\text{{{first}}}, \\sigma_\\text{{{second}}}) }} \\\\ = & \\frac{{{md}}}{{{std}}} \\end{{align}} It's important, because as Gernot Heiser points out: """.format(first=obj.first.parent.description(), second=obj.second.parent.description(), md=obj.mean_diff(), std=obj.max_std_dev()), trigger="hover click") }, { "title": "... ci (lower bound)", "func": lambda x: x.mean_diff_ci(self.misc["alpha"])[0], "format": "{:5.5f}", "extended": True, "popover": ci_popover } ,{ "title": "... ci (upper bound)", "func": lambda x: x.mean_diff_ci(self.misc["alpha"])[1], "format": "{:5.5f}", "extended": True, "popover": ci_popover }, { "title": obj.tester.name, "func": lambda x: x.equal_prob(), "format": "{:5.5%}", "popover": self._popover_for_tester(obj.tester) }, { "title": "min n", "func": lambda x: x.min_observations(), "format": "{}", "popover": Popover(self, "Explanation", """ The minimum of the number of valid runs of both. or statistically spoken: the minimum sample size.""") } ] if not extended: l = [] for elem in tested_per_prop: if not ("extended" in elem and elem["extended"]): l.append(elem) tested_per_prop = l def content_func(row_header: str, col_header: str, row: int, col: int): return tested_per_prop[row]["format"].format(tested_per_prop[row]["func"](obj)) def header_popover_func(elem, index: int, is_header_row: bool): if not is_header_row and "popover" in tested_per_prop[index]: return tested_per_prop[index]["popover"] table = Table.from_content_func(self, cols=[obj], rows=list(map(lambda d: d["title"], tested_per_prop)), content_func=content_func, anchor_cell=Cell(self), header_popover_func=header_popover_func) html += str(table) html += self._short_summary_table_for_single_property(objs=[obj.first, obj.second], obj_descrs=[obj.first.description(), obj.second.description()], objs_in_cols=False, use_modal=use_modals) return html def _short_summary_of_tested_pair(self, obj: TestedPair, extended: bool = False, use_modals: bool = False) -> str: tested_per_prop = [ { "title": "Mean difference", "popover": Popover(self, "Explanation", """ Difference between the mean of the first and the mean of the second. It's the absolute difference and is often less important that the relative differences. """), "func": lambda x: x.mean_diff(), "format": "{:5.5f}" }, { "title": "... per mean", "func": lambda x: x.mean_diff_per_mean(), "format": "{:5.5%}", "popover": Popover(self, "Explanation", """The mean difference relative to the first mean gives a number that helps to talk about the practical significance of the mean difference. A tiny difference might be cool, but irrelevant (as caching effects are probably higher, use the \\verb|temci build| if you're curious about this). """) }, { "title": "... per std dev", "func": lambda x: x.mean_diff_per_dev(), "format": "{:5.5%}", "popover": Popover(self, "Explanation", """ The mean difference relative to the maximum standard deviation is important, because as Gernot Heiser points out: """, trigger="hover click") }, { "title": "... ci", "func": lambda x: x.mean_diff_ci(self.misc["alpha"])[0], "format": "{:5.5f}", "extended": True, "popover": Popover(self, "Confidence interval", """ The chance is \\[ 1 - \\alpha = {p} \\] that the mean difference lies in the interval of which the lower and the upper bound are given (assuming the data is normal distributed to a certain degree). """.format(p=1-self.misc["alpha"])) } ,{ "title": "", "func": lambda x: x.mean_diff_ci(self.misc["alpha"])[1], "format": "{:5.5f}", "extended": True, "popover": Popover(self, "Confidence interval", """ The chance is \\[ 1 - \\alpha = {p} \\] that the mean difference lies in the interval of which the lower and the upper bound are given. """.format(p=1-self.misc["alpha"])) }, { "title": obj.tester.name, "func": lambda x: x.equal_prob(), "format": "{:5.5%}", "popover": self._popover_for_tester(obj.tester) } ] if not extended: l = [] for elem in tested_per_prop: if not ("extended" in elem and elem["extended"]): l.append(elem) tested_per_prop = l def header_link_func(elem: str, index: int, is_header_row: bool): if not is_header_row and not use_modals: return "#" + self._html_id_for_object("misc", obj.properties[elem]) def header_modal_func(elem: str, index: int, is_header_row: bool): if not is_header_row and use_modals: return self._short_summary_modal(obj.properties[elem]) def content_func(row_header: str, col_header: str, row: int, col: int): d = tested_per_prop[col] res = d["func"](obj.properties[row_header]) return d["format"].format(res) def header_color_obj(elem, index: int, is_header_row: bool): if not is_header_row: return obj[elem] def header_popover_func(elem, index: int, is_header_row: bool): if is_header_row and "popover" in tested_per_prop[index]: return tested_per_prop[index]["popover"] table = Table.from_content_func(self, rows=sorted(list(obj.properties.keys())), cols=list(map(lambda d: d["title"], tested_per_prop)), header_link_func=header_link_func, content_func=content_func, anchor_cell=Cell(self), header_color_obj_func=header_color_obj, header_modal_func=header_modal_func, header_popover_func=header_popover_func) html = str(table) html += """

The relative difference between {first} and {second} is {rel_diff} """.format(po=Popover(self, "Explanation", """ Geometric mean of the mean differences relative to the means of the first: \\[\\sqrt[\|properties\|]{ \\prod_{p \in \\text{properties}} \\frac{\\overline{\\text{first[p]}} - \\overline{\\text{second[p]}}}{ \\overline{\\text{first[p]}}}}\] Using the more widely known would be like lying. """, trigger="hover click"), first=obj.first, second=obj.second, rel_diff=obj.rel_difference()) return html def _short_summary_table_for_single_property(self, objs: t.List[SingleProperty], use_modal: bool, objs_in_cols: bool, obj_descrs: t.List[str] = None, extended: bool = False) -> str: """ :param objs: objects to look on :param use_modal: use modals for meta information, not simple links? :param objs_in_cols: show the different objects in own columns, not rows :param extended: more infos :return: """ obj_descrs = obj_descrs or [self._obj_description(obj) for obj in objs] #objs[0]..std_dev_per_mean() mean_ci_popover = Popover(self, "Mean confidence interval", """ The chance is \\[ 1 - \\alpha = {p} \\] that the mean lies in the given interval (assuming the data is normal distributed to a certain degree). """.format(p=1-self.misc["alpha"])) std_dev_ci_popover = Popover(self, "Standard deviation confidence interval", """ The chance is \\[ 1 - \\alpha = {p} \\] that the standard deviation lies in the given interval (assuming the data is normal distributed to a certain degree). """.format(p=1-self.misc["alpha"])) tested_per_prop = [ { "title": "mean", "func": lambda x: x.mean(), "format": "{:5.5f}", "popover": Popover(self, "Explanation", """The simple arithmetical mean \\[ \\frac{1}{n}\\sum_{i=1}^{n} a_i. \\] """) }, { "title": "std dev", "popover": Popover(self, "Explanation", """ The sample standard deviation \\[ \\sigma_N = \\sqrt{\\frac{1}{N} \\sum_{i=1}^N (x_i - \\overline{x})^2} \\] In statistics, the standard deviation is a measure that is used to quantify the amount of variation or dispersion of a set of data values. A standard deviation close to 0 indicates that the data points tend to be very close to the mean (also called the expected value) of the set, while a high standard deviation indicates that the data points are spread out over a wider range of values. (wikipedia) """, trigger="hover click"), "func": lambda x: x.std_dev(), "format": "{:5.5f}", "extended": True }, { "title": "$$\sigma$$ per mean", "func": lambda x: x.std_dev_per_mean(), "format": "{:5.0%}", "popover": Popover(self, "Explanation", """ The standard deviation relative to the mean is a measure of how big the relative variation of data is. A small value is considered neccessary for a benchmark to be useful. Or to quote Gernot Heiser:

Always do several runs, and check the standard deviation. Watch out for abnormal variance. In the sort of measurements we do, standard deviations are normally expected to be less than 0.1%. If you see >1% this should ring alarm bells.

""", trigger="hover click") }, { "title": "sem", "popover": Popover(self, "Explanation", """Standard error mean: \\[ \\sigma(\\overline{X}) = \\frac{\\sigma}{\\sqrt{n}} \\]

Put simply, the standard error of the sample is an estimate of how far the sample mean is likely to be from the population mean, whereas the standard deviation of the sample is the degree to which individuals within the sample differ from the sample mean. (wikipedia)

""", trigger="hover focus"), "func": lambda x: x.sem(), "format": "{:5.5f}", "extended": False }, { "title": "median", "func": lambda x: x.median(), "format": "{:5.5f}", "popover": Popover(self, "Explanation", """ The median is the value that seperates that data into two equal sizes subsets (with the < and the > relation respectively). As the mean and the standard deviation are already given here, the median isn't important. """), "extended": True }, { "title": "min", "func": lambda x: x.min(), "format": "{:5.5f}", "popover": Popover(self, "Explanation", """The minimum value. It's a bad sign if the maximum is far lower than the mean and you can't explain it. """), "extended": True }, { "title": "max", "func": lambda x: x.min(), "format": "{:5.5f}", "popover": Popover(self, "Explanation", """The maximum value. It's a bad sign if the maximum is far higher than the mean and you can't explain it. """), "extended": True }, { "title": "n", "func": lambda x: x.observations(), "format": "{}", "popover": Popover(self, "Explanation", """The number of valid runs or statistically spoken: the sample size."""), "extended": False }, { "title": "mean ci (lower bound)", "func": lambda x: x.mean_ci(self.misc["alpha"])[0], "format": "{:5.5f}", "extended": True, "popover": mean_ci_popover } ,{ "title": "mean ci (upper bound)", "func": lambda x: x.mean_ci(self.misc["alpha"])[1], "format": "{:5.5f}", "extended": True, "popover": mean_ci_popover }, { "title": "std dev ci (lower bound)", "func": lambda x: x.std_dev_ci(self.misc["alpha"])[0], "format": "{:5.5f}", "extended": True, "popover": mean_ci_popover } ,{ "title": "std dev ci (upper bound)", "func": lambda x: x.std_dev_ci(self.misc["alpha"])[1], "format": "{:5.5f}", "extended": True, "popover": mean_ci_popover }, { "title": "normality probability", "func": lambda x: x.normality(), "format": "{:5.5%}", "popover": Popover(self, "Explanation", """ Quoting the minitab blog:

If process knowledge tells you that your data should follow a normal distribution, then run a normality test to be sure. If your Anderson-Darling Normality Test p-value is larger than, say, an alpha level of 0.05 (here {alpha}), then you can conclude that your data follow a normal distribution and, therefore, the mean is an adequate measure of central tendency.

The T test is robust against non normality, but that's not the case fpr statistical properties like the given confidence intervals. """.format(alpha=self.misc["alpha"])), "extended": True } ] if not extended: l = [] for elem in tested_per_prop: if not ("extended" in elem and elem["extended"]): l.append(elem) tested_per_prop = l def header_link_func(elem: SingleProperty, index: int, is_header_row: bool): if objs_in_cols == is_header_row and not use_modal: return "#" + self._html_id_for_object("misc", elem) def header_modal_func(elem: SingleProperty, index: int, is_header_row: bool): if objs_in_cols == is_header_row and use_modal: return self._short_summary_modal(elem) def header_popover_func(elem, index: int, is_header_row: bool): if objs_in_cols != is_header_row and "popover" in tested_per_prop[index]: return tested_per_prop[index]["popover"] def content_func(row_header: t.Union[SingleProperty, str], col_header: t.Union[SingleProperty, str], row: int, col: int): d = {} obj = None # type: SingleProperty if objs_in_cols: d = tested_per_prop[row] obj = col_header else: d = tested_per_prop[col] obj = row_header return d["format"].format(d["func"](obj)) def header_color_obj(elem, index: int, is_header_row: bool): if objs_in_cols == is_header_row: return elem def header_content_func(elem, index: int, is_header_row: bool) -> str: if objs_in_cols == is_header_row: return obj_descrs[index] return tested_per_prop[index]["title"] func_titles = list(map(lambda d: d["title"], tested_per_prop)) rows = [] cols = [] if objs_in_cols: cols = objs rows = func_titles else: cols = func_titles rows = objs table = Table.from_content_func(self, rows=rows, cols=cols, header_link_func=header_link_func, content_func=content_func, anchor_cell=Cell(self), header_color_obj_func=header_color_obj, header_content_func=header_content_func, header_modal_func=header_modal_func, header_popover_func=header_popover_func) return str(table) def _comparison_for_prop(self, property) -> str: html = self._filenames_to_img_html( self._singles_property_boxplot(self.stats.singles_properties[property], big=True), kind="boxplot" ) html += "

" html += self._tabular_comparison_for_prop(property) return html def _tabular_comparison_for_prop(self, property: str) -> str: return self._short_summary_table_for_single_property(self.stats.singles_properties[property].singles, use_modal=True, objs_in_cols=False) def _filenames_to_img_html(self, filenames: t.Dict[str, str], kind: str = "hist"): return """

""".format(popover=self._img_filenames_popover(filenames, kind), img=self._filename_relative_to_out_dir(filenames["img"])) def _img_filenames_popover(self, filenames: t.Dict[str, str], kind: str = "hist") -> 'Popover': _filenames = {} for key in filenames: _filenames[key] = self._filename_relative_to_out_dir(filenames[key]) filenames = _filenames html = """
""" if "img" in filenames: html += """ The current image """.format(**filenames) if "pdf" in filenames: html += """ PDF (generated by matplotlib) """.format(**filenames) if "tex" in filenames: if kind == "hist": html += """ TeX (requiring the package pgfplots) """.format(**filenames) elif kind == "boxplot": html += """ TeX (requiring the package pgfplots and \\usepgfplotslibrary{{statistics}}) """.format(**filenames) html +=""" Standalone TeX """.format(**filenames) html += """
""".format(**filenames) return Popover(self, "Get this image in your favorite format", content=html, trigger="hover click") def _filename_relative_to_out_dir(self, abs_filename: str) -> str: ret = os.path.realpath(abs_filename)[len(os.path.realpath(self.misc["out"])) + 1: ] if ret == "": return "." return ret _boxplot_cache = {} _boxplot_async_cache = {} def _singles_property_boxplot(self, obj: SinglesProperty, fig_width: int = None, big: bool = False): if fig_width is None: fig_width = self.misc["fig_width_big"] if big else self.misc["fig_width_small"] filename = self._get_fig_filename(obj) + "___{}".format(fig_width) if filename not in self._boxplot_async_cache: d = { "img": filename + BaseStatObject.img_filename_ending } if self.misc["gen_tex"]: d["tex"] = filename + ".tex" d["tex_standalone"] = filename + "____standalone.tex" if self.misc["gen_pdf"]: d["pdf"] = filename + ".pdf" self._boxplot_cache[filename] = d self._boxplot_async_cache[filename] = { "filename": filename, "obj": obj, "fig_width": fig_width, "img": True, "tex": self.misc["gen_tex"], "pdf": self.misc["gen_pdf"], "tex_sa": self.misc["gen_tex"] } return self._boxplot_cache[filename] def _process_boxplot_cache_entry(self, entry: t.Dict[str, str]): height = self.misc["boxplot_height"] * len(entry["obj"].singles) + 2 entry["obj"].boxplot(fig_width=entry["fig_width"], fig_height=height) entry["obj"].store_figure(entry["filename"], fig_width=entry["fig_width"], img=entry["img"], tex=entry["tex"], pdf=entry["pdf"], tex_standalone=entry["tex_sa"], fig_height=height) logging.debug("Plotted {}, fig_width={}cm, img={}, tex={}, pdf={}" .format(entry["obj"], entry["fig_width"], entry["img"], entry["tex"], entry["pdf"])) _hist_cache = {} # type: t.Dict[str, t.Dict[str, str]] _hist_async_img_cache = {} _hist_async_misc_cache = {} def _histogram(self, obj: BaseStatObject, fig_width: int = None, zoom_in: bool = True, big: bool = False) -> t.Dict[str, str]: if fig_width is None: fig_width = self.misc["fig_width_big"] if big else self.misc["fig_width_small"] filename = self._get_fig_filename(obj) + "___{}___{}".format(fig_width, zoom_in) if filename not in self._hist_cache: d = { "img": filename + BaseStatObject.img_filename_ending } if self.misc["gen_tex"]: d["tex"] = filename + ".tex" d["tex_standalone"] = filename + "____standalone.tex" if self.misc["gen_pdf"]: d["pdf"] = filename + ".pdf" self._hist_cache[filename] = d self._hist_async_img_cache[filename] = { "filename": filename, "obj": obj, "fig_width": fig_width, "zoom_in": zoom_in, "img": True, "tex": False, "pdf": False, "tex_sa": False } if self.misc["gen_pdf"] or self.misc["gen_tex"]: self._hist_async_misc_cache[filename] = { "filename": filename, "obj": obj, "fig_width": fig_width, "zoom_in": zoom_in, "img": False, "tex": self.misc["gen_tex"], "pdf": self.misc["gen_pdf"], "tex_sa": self.misc["gen_tex"] } return self._hist_cache[filename] def _process_hist_cache_entry(self, entry: t.Dict[str, str]): entry["obj"].histogram(zoom_in=entry["zoom_in"], fig_width=entry["fig_width"]) entry["obj"].store_figure(entry["filename"], fig_width=entry["fig_width"], img=entry["img"], tex=entry["tex"], pdf=entry["pdf"], tex_standalone=entry["tex_sa"]) logging.debug("Plotted {}, zoom_in={}, fig_width={}cm, img={}, tex={}, pdf={}" .format(entry["obj"], entry["zoom_in"], entry["fig_width"], entry["img"], entry["tex"], entry["pdf"])) def _popover_for_tester(self, tester: Tester): return Popover(self, tester.name.capitalize(), """ Probability that the null hypothesis is not incorrect. It's the probability that the measured values (for a given property) come out of the same population for both benchmarked programs. Or short: That the programs have the same characteristics for a given property.
Important note: Statistical tests can only given an probability of the null hypothesis being incorrect. But this okay, if you're aim is to see whether a specific program is better (different) than another program in some respect.
""") def _short_summary_modal(self, obj: BaseStatObject) -> str: """ :param obj: :return: id """ if not hasattr(self, "_modal_cache"): self._modal_cache = [] # type: t.List[str] modal_id = self._html_id_for_object("short_summary_modal", obj) if modal_id in self._modal_cache: return modal_id modal_title = self._obj_description(obj) modal_body = self._short_summary(obj, with_title=False) html_id = self._html_id_for_object("misc", obj) html = """ """.format(**locals()) self.app_html += html return modal_id def _obj_description(self, obj: BaseStatObject) -> str: if isinstance(obj, Single): return obj.description() if isinstance(obj, TestedPair): return "{} vs. {}".format(self._obj_description(obj.first), self._obj_description(obj.second)) if isinstance(obj, SingleProperty) or isinstance(obj, TestedPairProperty): obj_base = "" if isinstance(obj, SingleProperty): obj_base = obj.rundata.description() else: obj_base = self._obj_description(obj.parent) return obj_base + " (regarding {})".format(obj.property) def _html_id_for_object(self, scope: str, obj: BaseStatObject) -> str: return "{}___{}".format(scope, self._get_obj_id(obj)) def _get_obj_id(self, obj: BaseStatObject) -> str: if isinstance(obj, Single): return str(self.stats.singles.index(obj)) if isinstance(obj, TestedPair): return self._get_obj_id(obj.first) + "_" + self._get_obj_id(obj.second) if isinstance(obj, SingleProperty) or isinstance(obj, TestedPairProperty): return self._get_obj_id(obj.parent) + "__" + self.html_escape_property(obj.property) if isinstance(obj, SinglesProperty): return "SinglesProperty______" + self.html_escape_property(obj.property) assert False # you shouldn't reach this point @classmethod def html_escape_property(cls, property: str) -> str: return re.sub(r"([^a-zA-Z0-9]+)", "000000", property) def _format_errors_and_warnings(self, obj: BaseStatObject, show_parent: bool = True) -> str: def format_msg(msg: StatMessage): message = msg.generate_msg_text(show_parent) msg_class = "div_danger" if msg.type == StatMessageType.ERROR else "div_warning" html = """
{message}
""".format(**locals()) if msg.hint != "" and msg.hint is not None: html = """
{message}
""".format(**locals()) return html def collapsible(title: str, msgs: t.List[StatMessage]): collapse_id = self._random_html_id() heading_id = self._random_html_id() inner = "\n".join(map(format_msg, msgs)) return """
{inner}
""".format(**locals()) html = "" if obj.has_errors(): html += collapsible('Errors {}'.format(len(obj.errors())), obj.errors()) if obj.has_warnings(): html += collapsible('Warnings {}'.format(len(obj.warnings())), obj.warnings()) return html _time = time.time() def _get_fig_filename(self, obj: BaseStatObject) -> str: """ Without any extension. """ return os.path.realpath(os.path.join(os.path.abspath(self.misc["out"]), self._html_id_for_object("fig", obj))) _id_counter = 1000 def _random_html_id(self) -> str: self._id_counter += 1 return "id" + str(self._id_counter) def get_random_filename(self) -> str: return os.path.realpath(os.path.join(os.path.abspath(self.misc["out"]), self._random_html_id())) class Popover: divs = {} # t.Dict[str, str] """ Maps the contents of the created divs to their ids """ def __init__(self, parent: HTMLReporter2, title: str, content: str, trigger: str = "hover"): self.parent = parent self.title = title self.content = content or "" self.trigger = trigger def __str__(self) -> str: content = """
""" + self.content + """
""" if content not in self.divs: id = self.parent._random_html_id() self.parent.app_html += """ """.format(id=id, content=content) self.divs[content] = id id = self.divs[content] focus = 'tabindex="0" role="button"' if "focus" in self.trigger or "click" in self.trigger else "" return '{focus} data-trigger="{trigger}" data-toggle="popover" data-html="true"' \ 'data-placement="auto" data-title="{title}" data-container="body" ' \ 'data-content-id="{id}"'\ .format(content=content, trigger=self.trigger, title=self.title, focus=focus, id=id) def color_class(obj: BaseStatObject) -> str: if obj.has_errors(): return "danger" if obj.has_warnings(): return "warning" if isinstance(obj, TestedPairProperty): if obj.is_equal() is not None: return "sucess" if obj.is_equal() == False and obj.mean_diff_per_mean() < 1 else "active" return "" def color_explanation(obj: BaseStatObject) -> str: _color_class = "div_" + color_class(obj) msg = "" if obj.has_errors(): msg = "This color means that the corresponding data set is erroneous " \ "(with {} errors and {} warnings).".format(len(obj.errors()), len(obj.warnings())) elif obj.has_warnings(): msg = "This color means that the corresponding data set could be erroneous " \ "(with {} warnings).".format(len(obj.warnings())) elif isinstance(obj, TestedPairProperty) and obj.is_equal() is not None: msg = "This color means that everything is probably okay with the corresponding data" \ " and that the tester could make a decision." else: msg = "Everything seems to be okay." if msg != "": return """

{msg}

""".format(**locals()) class Cell: """ Cell of a html table """ def __init__(self, parent: HTMLReporter2, content: str = "", cell_class: str = "", popover: Popover = None, modal_id: str = None, color_class_obj: BaseStatObject = None, is_header_cell: bool = False, cell_scope: str = None, show_click_on_info: bool = None, link: str = None): """ :param content: displayed text of the cell :param cell_class: CSS class of the table cellr :param modal_id: id of the modal linked to this cell :param color_class_obj: object used to get the color class. Adds also an explanation to the popover :param is_header_cell: is the cell a header cell? """ self.content = content self.cell_class = cell_class self.popover = popover self.modal_id = modal_id self.link = link self.parent = parent assert link is None or modal_id is None if color_class_obj is not None: if self.popover is None: self.popover = Popover(parent, "Explanation", color_explanation(color_class_obj)) else: self.popover.content += color_explanation(color_class_obj) self.cell_class += " " + color_class(color_class_obj) if (modal_id is not None and show_click_on_info != False) or (show_click_on_info is True and not link): msg = "

Click on the cell to get more information.

" if self.popover is None: self.popover = Popover(parent, "Explanation", msg) else: self.popover.content += msg self.is_header_cell = is_header_cell self.cell_scope = cell_scope def __str__(self): cell_tag = "th" if self.is_header_cell else "td" scope = 'scope="{}"'.format(self.cell_scope) if self.cell_scope else "" html = """<{} class="{}" {}>""".format(cell_tag, self.cell_class, scope) html_end = "".format(cell_tag) if self.popover: html += """
""".format(self.popover) html_end = "
" + html_end if self.modal_id: html += """""".format(id=self.modal_id) html_end = "" + html_end if self.link: html += """ """.format(link=self.link, elem_id=self.parent._random_html_id()) html_end = "" + html_end return html + self.content + html_end T1 = t.TypeVar('T1', BaseStatObject, str, int, float, bool) T2 = t.TypeVar('T2', BaseStatObject, str, int, float, bool) class Table: """ A html table consisting of Cell objects. Idea: Abstract the creation of html tables to a degree that allows automatic generation of latex and csv. """ def __init__(self, parent: HTMLReporter2, header_row: t.List['Cell'], header_col: t.List['Cell'], anchor_cell: 'Cell' = None, content_cells: t.List[t.List['Cell']] = None): """ The resulting table has len(header_row) + rows and len(header_col) + 1 columns. :param header_row: list of cells of the bold top header row :param header_col: list of cells of the bold left header collumn :param anchor_cell: the cell in the top left corner of the table :param content_cells: a list of content rows :return resulting html """ self.parent = parent self.header_row = header_row self.header_col = header_col for cell in itertools.chain(self.header_row, self.header_col): cell.is_header_cell = True for cell in self.header_col: cell.cell_scope = "row" assert len(header_row) > 0 self.orig_anchor_cell = Cell(self.parent, "") if anchor_cell is None else Cell(self.parent, anchor_cell.content) self.anchor_cell = anchor_cell or Cell(self.parent, "⍗ ") self.anchor_cell.content += " ⍗" self.anchor_cell.cell_class += " anchor_cell " self.height = len(header_col) """ Number of content (non header) rows """ self.width = len(header_row) """ Number of content (non header) columns """ if content_cells: assert len(content_cells) == self.height and len(content_cells[0]) == self.width \ and all(len(content_cells[0]) == len(row) for row in content_cells) self.content_cells = content_cells else: self.content_cells = [[Cell(self.parent) for i in range(self.width)] for j in range(self.height)] def __str__(self) -> str: html = """ """ html += " ".join(str(cell) for cell in [self.format_anchor_cell()] + self.header_row) html += """ """ for (hcell, row) in zip(self.header_col, self.content_cells): html += "\t\t\t{}\n".format(" ".join(str(cell) for cell in [hcell] + row)) html += """
""" return html def html(self): return str(self) def format_anchor_cell(self) -> 'Cell': formats = [{ "ending": ".tex", "mime": "application/x-latex", "descr": "Latex table", "code": self.latex() }, { "ending": ".tex", "mime": "application/x-latex", "descr": "Latex table with surrounding article environment", "code": self.latex(True) }, { "ending": ".csv", "mime": "text/csv", "descr": "CSV table", "code": self.csv() }] html = """
""" for d in formats: id = self.parent._random_html_id() self.parent.app_html += """ """.format(id, d["code"]) html += """
{descr}
""".format(descr=d["descr"], id=id, filename="table" + d["ending"], mime=d["mime"]) if self.parent.misc["gen_xls"]: html += """ Excel (.xls) file """.format(filename=self.xls()) html += """
""" self.anchor_cell.popover = Popover(self.parent, "Get this table in your favorite format", content=html, trigger="hover click") return self.anchor_cell def latex(self, with_env: bool = False) -> str: tex = "" tex_end = "" if with_env: tex = """ \\documentclass[10pt,a4paper]{article} \\begin{document} """ tex_end = """ \\end{document} """ tex += """ \\begin{{tabular}}{{l|{cs}}} """.format(cs="".join("r" * self.width)) tex_end = """ \\end{tabular} """ + tex_end tex += " & ".join(cell.content for cell in [self.orig_anchor_cell] + self.header_row) + "\\\\ \n \\hline " for (hcell, row) in zip(self.header_col, self.content_cells): tex += " & ".join(cell.content.replace("%", "\\%") for cell in [hcell] + row) + "\\\\ \n" return tex + tex_end def csv(self) -> str: rows = [] rows.append(",".join(repr(cell.content) for cell in [self.orig_anchor_cell] + self.header_row)) def convert_content(text: str) -> str: if text.endswith("%"): return str(float(text[:-1]) / 100) try: float(text) return text except: return repr(text) for (hcell, row) in zip(self.header_col, self.content_cells): rows.append(",".join(convert_content(cell.content) for cell in [hcell] + row)) return "\n".join(rows) def xls(self) -> str: import tablib data = tablib.Dataset() data.headers = [cell.content for cell in [self.orig_anchor_cell] + self.header_row] for (hcell, row) in zip(self.header_col, self.content_cells): data.append([cell.content for cell in [hcell] + row]) filename = self.parent.get_random_filename() + ".xls" with open(filename, "wb") as f: f.write(data.xls) return filename def __getitem__(self, cell_pos: t.Tuple[int, int]) -> 'Cell': return self.content_cells[cell_pos[0]][cell_pos[1]] def __setitem__(self, cell_pos: t.Tuple[int, int], new_val: 'Cell'): self.content_cells[cell_pos[0]][cell_pos[1]] = new_val def append(self, header: 'Cell', content_row: t.List['Cell']): assert len(content_row) == self.width self.content_cells.append(content_row) self.header_col.append(header) @classmethod def from_content_func(cls, parent: HTMLReporter2, rows: t.List[T1], cols: t.List[T2], anchor_cell: 'Cell', content_func: t.Callable[[T1, T2], Any], content_modal_func: t.Callable[[T1, T2, int, int], str] = None, header_modal_func: t.Callable[[t.Union[T1, T2], int, bool], str] = None, content_popover_func: t.Callable[[T1, T2, int, int], t.Optional[Popover]] = None, header_popover_func: t.Callable[[t.Union[T1, T2], int, bool], t.Optional[Popover]] = None, content_link_func: t.Callable[[T1, T2, int, int], t.Optional[str]] = None, header_link_func: t.Callable[[t.Union[T1, T2], int, bool], t.Optional[str]] = None, content_color_obj_func: t.Callable[[T1, T2, int, int], t.Optional[BaseStatObject]] = None, header_color_obj_func: t.Callable[[t.Union[T1, T2], int, bool], t.Optional[BaseStatObject]] = None, header_content_func: t.Callable[[t.Union[T1, T2], int, bool], str] = None): """ Idea: Table that populates itself with a passed content function. """ def convert_hc(elem: t.Union[T1, T2], index: int, header_row: bool) -> Cell: def call(func: t.Optional[t.Callable[[t.Union[T1, T2], int, bool], t.T]]) -> t.T: if func: return func(elem, index, header_row) return None content = "" color_obj = None if header_content_func: content = str(header_content_func(elem, index, header_row)) elif isinstance(elem, str) or isinstance(elem, int) or isinstance(elem, float): content = str(elem) elif isinstance(elem, BaseStatObject): content = parent._obj_description(elem) else: assert False if isinstance(elem, BaseStatObject): color_obj = elem if header_color_obj_func: color_obj = header_color_obj_func(elem, index, header_row) modal_id = call(header_modal_func) popover = call(header_popover_func) link = None if header_link_func and header_link_func(elem, index, header_row): assert not modal_id # modal and link can't be used together in the same cell link = header_link_func(elem, index, header_row) return Cell(parent, content, popover=popover, modal_id=modal_id, color_class_obj=color_obj, is_header_cell=True, cell_scope="row" if header_row else None, link=link) header_row = [] for (i, elem) in enumerate(cols): header_row.append(convert_hc(elem, i, header_row=True)) header_col = [] for (i, elem) in enumerate(rows): header_col.append(convert_hc(elem, i, header_row=False)) def convert_cc(row_header: T1, col_header: T2, row: int, col: int) -> Cell: def call(func: t.Optional[t.Callable[[T1, T2, int, int], t.T]]) -> t.T: if func: return func(row_header, col_header, row, col) return None content = str(content_func(row_header, col_header, row, col)) color_obj = call(content_color_obj_func) modal_id = call(content_modal_func) popover = call(content_popover_func) link = call(content_link_func) assert None in [link, modal_id] return Cell(parent, content, popover=popover, modal_id=modal_id, color_class_obj=color_obj, link=link) content_cells = [] for (row, row_header) in enumerate(rows): a = [] for (col, col_header) in enumerate(cols): a.append(convert_cc(row_header, col_header, row, col)) content_cells.append(a) return Table(parent, header_row, header_col, anchor_cell, content_cells) def html_escape_property(property: str) -> str: return re.sub(r"([^a-zA-Z0-9]+)", "000000", property)PKǀDHȴU^^temci/tester/stats.py""" Statistical helper classes for tested pairs and single blocks. """ import logging import os from collections import defaultdict from enum import Enum import itertools import math from temci.tester.rundata import RunData from temci.tester.testers import Tester, TesterRegistry from temci.utils.settings import Settings import typing as t import temci.utils.util as util if util.can_import("scipy"): import numpy as np import scipy as sp import scipy.stats as st import pandas as pd from temci.utils.typecheck import * from temci.utils.util import join_strs class StatMessageType(Enum): ERROR = 10 WARNING = 5 class StatMessageValueFormat(Enum): INT = "{}" FLOAT = "{:5.5f}" PERCENT = "{:5.3%}" class StatMessage: """ A statistical message that gives a hint to """ message = "{props}: {b_val}" hint = "" type = None # type: StatMessageType border_value = 0 value_format = StatMessageValueFormat.FLOAT # type: t.Union[StatMessageValueFormat, str] def __init__(self, parent: 'BaseStatObject', properties: t.Union[t.List[str], str], values): self.parent = parent if not isinstance(properties, list): properties = [properties] if not isinstance(values, list): values = [values] typecheck(properties, List() // (lambda x: len(x) > 0)) typecheck(values, List() // (lambda x: len(x) == len(properties))) self.properties = sorted(properties) self.values = values def __add__(self, other: 'StatMessage') -> 'StatMessage': typecheck(other, T(type(self))) assert self.parent.eq_except_property(other.parent) return type(self)(self.parent, self.properties + other.properties, self.values + other.values) @staticmethod def combine(*messages: t.List[t.Optional['StatMessage']]) -> t.List['StatMessage']: """ Combines all message of the same type and with the same parent in the passed list. Ignores None entries. :param messages: passed list of messages :return: new reduced list """ msgs = set([msg for msg in messages if msg is not None]) # t.Set['StatMessage'] something_changed = True while something_changed: something_changed = False merged_pair = None # type: t.Tuple['StatMessage', 'StatMessage'] for (msg, msg2) in itertools.product(msgs, msgs): if msg is not msg2: if msg.parent.eq_except_property(msg2.parent) and type(msg) == type(msg2): merged_pair = (msg, msg2) something_changed = True break if something_changed: msg, msg2 = merged_pair msgs.remove(msg) msgs.remove(msg2) msgs.add(msg + msg2) return list(msgs) @classmethod def _val_to_str(cls, value) -> str: format = cls.value_format if isinstance(cls.value_format, str) else cls.value_format.value return format.format(value) @classmethod def check_value(cls, value) -> bool: """ If this fails with the passed value, than the warning is appropriate. """ pass @classmethod def create_if_valid(cls, parent, value, properties = None, **kwargs) -> t.Union['StatMessage', None]: assert isinstance(value, Int()|Float()) if cls.check_value(value): return None ret = None if properties is not None: ret = cls(parent, properties, value, **kwargs) else: ret = cls(parent, properties, value, **kwargs) return ret def generate_msg_text(self, show_parent: bool) -> str: """ Generates the text of this message object. :param show_parent: Is the parent shown in after the properties? E.g. "blub of bla parent: …" :return: message text """ val_strs = list(map(self._val_to_str, self.values)) prop_strs = ["{} ({})".format(prop, val) for (prop, val) in zip(self.properties, val_strs)] props = join_strs(prop_strs) if show_parent: props += " of {}".format(self.parent.description()) return self.message.format(b_val=self._val_to_str(self.border_value), props=props) class StatWarning(StatMessage): type = StatMessageType.WARNING class StatError(StatWarning, StatMessage): type = StatMessageType.ERROR class StdDeviationToHighWarning(StatWarning): message = "The standard deviation per mean of {props} is to high it should be <= {b_val}." hint = "With the exec run driver you can probably use the stop_start plugin, preheat and sleep plugins. " \ "Also consider to increase the number of measured runs." border_value = 0.01 value_format = StatMessageValueFormat.PERCENT @classmethod def check_value(cls, value) -> bool: return value <= cls.border_value class StdDeviationToHighError(StdDeviationToHighWarning): type = StatMessageType.ERROR border_value = 0.05 class NotEnoughObservationsWarning(StatWarning): message = "The number of observations of {props} is less than {b_val}." hint = "Increase the number of measured runs." border_value = 30 value_format = StatMessageValueFormat.INT @classmethod def check_value(cls, value) -> bool: return value >= cls.border_value class NotEnoughObservationsError(NotEnoughObservationsWarning): type = StatMessageType.ERROR border_value = 15 class BaseStatObject: """ Class that gives helper methods for the extending stat object classes. """ _filename_counter = 0 img_filename_ending = ".svg" def __init__(self): self._stat_messages = [] self.fig = None self._hist_data = {} def get_stat_messages(self) -> t.List[StatMessage]: if not self._stat_messages: self._stat_messages = StatMessage.combine(*self._get_stat_messages()) return self._stat_messages def _get_stat_messages(self) -> t.List[StatMessage]: raise NotImplementedError() def warnings(self) -> t.List[StatMessage]: return [x for x in self.get_stat_messages() if x.type is StatMessageType.WARNING] def errors(self) -> t.List[StatMessage]: return [x for x in self.get_stat_messages() if x.type is StatMessageType.ERROR] def has_errors(self) -> bool: return any([x.type == StatMessageType.ERROR for x in self.get_stat_messages()]) def has_warnings(self) -> bool: return any([x.type == StatMessageType.WARNING for x in self.get_stat_messages()]) def get_data_frame(self, **kwargs) -> 'pd.DataFrame': """ Get the data frame that is associated with this stat object. """ raise NotImplementedError() def eq_except_property(self, other) -> bool: raise NotImplementedError() def _height_for_width(self, width: float) -> float: golden_mean = (np.sqrt(5) - 1.0) / 2.0 # Aesthetic ratio return width * golden_mean def _latexify(self, fig_width: float, fig_height: float = None): """Set up matplotlib's RC params for LaTeX plotting. Call this before plotting a figure. Adapted from http://nipunbatra.github.io/2014/08/latexify/ Parameters ---------- fig_width : float, optional, inches fig_height : float, optional, inches """ # code adapted from http://www.scipy.org/Cookbook/Matplotlib/LaTeX_Examples #MAX_HEIGHT_INCHES = 8.0 #if fig_height > MAX_HEIGHT_INCHES: # print("WARNING: fig_height too large:" + fig_height + # "so will reduce to" + MAX_HEIGHT_INCHES + "inches.") # fig_height = MAX_HEIGHT_INCHES params = {'backend': 'ps', 'text.latex.preamble': ['\\usepackage{gensymb}'], 'axes.labelsize': 8, # fontsize for x and y labels (was 10) 'axes.titlesize': 8, 'font.size': 8, # was 10 'legend.fontsize': 8, # was 10 'xtick.labelsize': 8, 'ytick.labelsize': 8, 'text.usetex': True, 'figure.figsize': self._fig_size_cm_to_inch(fig_width,fig_height), 'font.family': 'serif' } import matplotlib matplotlib.rcParams.update(params) def _format_axes(self, ax): """ Adapted from http://nipunbatra.github.io/2014/08/latexify/ """ SPINE_COLOR = 'gray' for spine in ['top', 'right']: ax.spines[spine].set_visible(False) for spine in ['left', 'bottom']: ax.spines[spine].set_color(SPINE_COLOR) ax.spines[spine].set_linewidth(0.5) ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_ticks_position('left') for axis in [ax.xaxis, ax.yaxis]: axis.set_tick_params(direction='out', color=SPINE_COLOR) return ax def _get_new_file_name(self, dir: str) -> str: self._filename_counter += 1 return os.path.join(os.path.abspath(dir), str(self._filename_counter)) def _fig_size_cm_to_inch(self, fig_width: float, fig_height: float) -> t.Tuple[float, float]: return fig_width * 0.39370079, fig_height * 0.39370079 def store_figure(self, filename: str, fig_width: float, fig_height: float = None, pdf: bool = True, tex: bool = True, tex_standalone: bool = True, img: bool = True) -> t.Dict[str, str]: import matplotlib.pyplot as plt """ Stores the current figure in different formats and returns a dict, that maps each used format (pdf, tex or img) to the resulting files name. :param filename: base filename that is prepended with the appropriate extensions :param fig_width: width of the resulting figure (in cm) :param fig_height: height of the resulting figure (in cm) or calculated via the golden ratio from fig_width :param pdf: store as pdf optimized for publishing :param tex: store as tex with pgfplots :param img: store as png image :return: dictionary mapping each used format to the resulting files name """ if fig_height is None: fig_height = self._height_for_width(fig_width) #filename = # self._get_new_file_name(dir) ret_dict = {} if img: ret_dict["img"] = self._store_as_image(filename + self.img_filename_ending, fig_width, fig_height) if tex: ret_dict["tex"] = self._store_as_tex(filename + ".tex", fig_width, fig_height, standalone=False) if pdf: if util.has_pdflatex(): ret_dict["pdf"] = self._store_as_pdf(filename + ".pdf", fig_width, fig_height) else: util.warn_for_pdflatex_non_existence_once() if tex_standalone: ret_dict["tex_standalone"] = self._store_as_tex(filename + "____standalone.tex", fig_width, fig_height, standalone=True) if self.fig is not None: plt.close('all') return ret_dict def _store_as_pdf(self, filename: str, fig_width: float, fig_height: float) -> str: """ Stores the current figure in a pdf file. :warning modifies the current figure """ import matplotlib.pyplot as plt if not filename.endswith(".pdf"): filename += ".pdf" self.reset_plt() self._latexify(fig_width, fig_height) try: plt.tight_layout() except ValueError: pass self._format_axes(plt.gca()) plt.savefig(filename) self.reset_plt() return os.path.realpath(filename) def _store_as_tex(self, filename: str, fig_width: float, fig_height: float, standalone: bool) -> str: """ Stores the current figure as latex in a tex file. Needs pgfplots in latex. Works independently of matplotlib. """ if not filename.endswith(".tex"): filename += ".tex" if "min_xval" not in self._hist_data: return x_range = (self._hist_data["min_xval"], self._hist_data["max_xval"]) x_bin_width = (self._hist_data["min_xval"] - self._hist_data["max_xval"]) / self._hist_data["bin_count"] plot_tex = "" ymax = 0 for value in self._hist_data["values"]: hist, bin_edges = np.histogram(value, bins=self._hist_data["bin_count"], range=x_range) #bin_edges = map(_ + (x_bin_width / 2), bin_edges) plot_tex += """ \\addplot coordinates {{ {} ({}, 0) }}; """.format(" ".join(map(lambda d: "({}, {})".format(*d), zip(bin_edges, hist))), bin_edges[-1]) ymax = max(ymax, max(hist)) tex = """ \\pgfplotsset{{width={width}cm, height={height}cm, compat=1.10}} \\begin{{tikzpicture}} \\begin{{axis}}[ ymin=0, ymax={ymax}, bar shift=0pt, enlarge x limits=0.10, cycle list name=auto, every axis plot/.append style={{ybar interval, opacity={opacity},fill,draw=none,no markers}}, ylabel= , xlabel={xlabel}""".format(width=fig_width, height=fig_height, xlabel=self._hist_data["xlabel"], ymax=ymax * 1.2, opacity= 1 if len(self._hist_data["values"]) == 1 else 0.75) if self._hist_data["legend"]: legend = "\\\\".join(self._hist_data["legend"]) + "\\\\" tex += """, legend entries={{{}}}""".format(legend) tex += """ ] """ tex += plot_tex tex += """ \end{axis} \end{tikzpicture} """ if standalone: tex = """ \\documentclass[margin=10pt]{standalone} \\usepackage{pgfplots} \\begin{document} """ + tex + """ \\end{document} """ with open(filename, "w") as f: f.write(tex) return os.path.realpath(filename) def _store_as_image(self, filename: str, fig_width: float, fig_height: float) -> str: """ Stores the current figure as an $img_filename_ending image. """ import matplotlib.pyplot as plt if not filename.endswith(self.img_filename_ending): filename += self.img_filename_ending self.reset_plt() plt.savefig(filename) self.reset_plt() return os.path.realpath(filename) def _freedman_diaconis_bins(self, *arrays: t.List) -> int: """ Calculate number of hist bins using Freedman-Diaconis rule. If more than one array is passed, the maximum number of bins calculated for each array is used. Adapted from seaborns source code. """ # From http://stats.stackexchange.com/questions/798/ import seaborn as sns def freedman_diaconis(array: np.array): array = [a for a in array if not math.isnan(a)] h = 2 * sns.utils.iqr(array) / (len(array) ** (1 / 3)) # fall back to sqrt(a) bins if iqr is 0 if h == 0: return int(np.sqrt(len(array))) else: return int(np.ceil((max(array) - min(array)) / h)) return max(map(freedman_diaconis, arrays)) def is_single_valued(self) -> bool: """ Does the data consist only of one unique value? """ return False def histogram(self, fig_width: int, fig_height: float = None, x_ticks: list = None, y_ticks: list = None, show_legend: bool = None, type: str = None, align: str = 'mid', x_label: str = None, y_label: str = None, zoom_in: bool = True, other_objs: t.List['BaseStatObject'] = None, other_obj_names: t.List[str] = None, own_name: str = None, **kwargs): """ Plots a histogram as the current figure. Don't forget to close it via fig.close() :param x_ticks: None: use default ticks, list: use the given ticks :param y_ticks: None: use default ticks, list: use the given ticks :param show_legend: show a legend in the plot? If None only show one if there are more than one sub histograms :param type: histogram type (either 'bar', 'barstacked', 'step', 'stepfilled' or None for auto) :param align: controls where each bar centered ('left', 'mid' or 'right') :param x_label: if not None, shows the given x label :param y_lable: if not None: shows the given y label :param zoom_in: does the x axis start at the minimum x value? :param kwargs: optional arguments passed to the get_data_frame method :param other_objs: addional objects to plot on the same histogram (only SingleProperty objects allowed) :param other_obj_names: names of the additional objects :param own_name: used with other_objs option """ self._hist_data = {} import matplotlib.pyplot as plt import seaborn as sns if fig_height is None: fig_height = self._height_for_width(fig_width) if self.is_single_valued(): descr = self.description() if isinstance(self, SingleProperty): descr += " [" + self.property + "]" logging.error("Can't plot histogram for {} as it's only single valued.".format(self)) return df = self.get_data_frame(**kwargs) if other_objs: typecheck(self, SingleProperty) for obj in other_objs: if obj.is_single_valued() or not isinstance(obj, SingleProperty): logging.error("Can't additionally plot histogram for {} as it's only single valued.".format(self)) return series_dict = {} for (i, name) in enumerate(other_obj_names): series_dict[name] = pd.Series(other_objs[i].data, name=name) series_dict[own_name] = self.data df = pd.DataFrame(series_dict, columns=sorted(list(series_dict.keys()))) df_t = df.T show_legend = show_legend or (show_legend is None and len(df_t) > 1) min_xval = min(map(min, df_t.values)) if zoom_in else 0 max_xval = max(map(max, df_t.values)) if type is None: type = 'bar' if len(df_t) == 1 else 'stepfilled' bin_count = self._freedman_diaconis_bins(*df_t.values) bins = np.linspace(min_xval, max_xval, bin_count) self.reset_plt() ymax = 0 for value in df_t.values: hist, bin_edges = np.histogram(value, bins=bin_count, range=(min_xval, max_xval)) ymax = max(ymax, max(hist)) self.fig = plt.figure(figsize=self._fig_size_cm_to_inch(fig_width, fig_height)) plt.xlim(min_xval, max_xval) plt.ylim(0, ymax * (1.2 if show_legend else 1.05)) plt.hist(df.values, bins=bin_count, range=(min_xval, max_xval), histtype=type, align=align, label=list(reversed(df.keys())), alpha= 0.75 if len(df_t) > 1 else 1) #sns.distplot(df, bins=bin_count, color=["red", "blue", "yellow"][0:len(df_t)]) if x_ticks is not None: plt.xticks(x_ticks) if y_ticks is not None: plt.yticks(y_ticks) legend = None if show_legend: legend = list(df.keys()) plt.legend(labels=list(reversed(legend))) if len(df_t) == 1: plt.xlabel(df.keys()[0]) if x_label is not None: plt.xlabel(x_label) if y_label is not None: plt.xlabel(y_label) self._hist_data = { "xlabel": x_label or ("" if len(df_t) > 1 else df.keys()[0]), "legend": None if legend is None else list(reversed(legend)), "min_xval": min_xval, "max_xval": max_xval, "values": list(reversed(df_t.values)), "bin_count": bin_count } def description(self) -> str: return str(self) def __str__(self) -> str: return self.description() def reset_plt(self): import seaborn as sns sns.reset_defaults() sns.set_style("darkgrid") sns.set_palette(sns.color_palette("muted")) class Single(BaseStatObject): """ A statistical wrapper around a single run data object. """ def __init__(self, data: t.Union[RunData, 'Single']): super().__init__() if isinstance(data, RunData): self.rundata = data else: self.rundata = data.rundata self.attributes = self.rundata.attributes self.properties = {} # type: t.Dict[str, SingleProperty] """ SingleProperty objects for each property """ for prop in data.properties: self.properties[prop] = SingleProperty(self, self.rundata, prop) def _get_stat_messages(self) -> t.List[StatMessage]: """ Combines the messages for all inherited SingleProperty objects (for each property), :return: list of all messages """ msgs = [x for prop in self.properties for x in self.properties[prop].get_stat_messages()] return msgs def get_data_frame(self) -> 'pd.DataFrame': series_dict = {} for prop in self.properties: series_dict[prop] = pd.Series(self.properties[prop].data, name=prop) frame = pd.DataFrame(series_dict, columns=sorted(self.properties.keys())) return frame def description(self) -> str: return self.rundata.description() def eq_except_property(self, other) -> bool: return isinstance(other, type(self)) and self.rundata == other.rundata def __eq__(self, other) -> bool: return self.eq_except_property(other) class SingleProperty(BaseStatObject): """ A statistical wrapper around a single run data block for a specific measured property. """ def __init__(self, parent: Single, data: t.Union[RunData, 'SingleProperty'], property: str): super().__init__() self.parent = parent if isinstance(data, RunData): self.rundata = data # type: RunData self.data = data[property] # type: t.List[t.Union[int, float]] else: self.rundata = data.rundata self.data = data.data self.array = np.array(self.data) self.property = property def _get_stat_messages(self) -> t.List[StatMessage]: msgs = [ StdDeviationToHighWarning.create_if_valid(self, self.std_dev_per_mean(), self.property), StdDeviationToHighError.create_if_valid(self, self.std_dev_per_mean(), self.property), NotEnoughObservationsWarning.create_if_valid(self, self.observations(), self.property), NotEnoughObservationsError.create_if_valid(self, self.observations(), self.property) ] return msgs def mean(self) -> float: return np.mean(self.array) def median(self) -> float: return np.median(self.array) def min(self) -> float: return np.min(self.array) def max(self) -> float: return np.max(self.array) def std_dev(self) -> float: """ Returns the standard deviation. """ return np.std(self.array) def std_devs(self) -> t.Tuple[float, float]: """ Calculates the standard deviation of elements <= mean and of the elements > mean. :return: (lower, upper) """ mean = self.mean() def std_dev(elements: list) -> float: return np.sqrt(sum(np.power(x - mean, 2) for x in elements) / (len(elements) - 1)) lower = [x for x in self.array if x <= mean] upper = [x for x in self.array if x > mean] return std_dev(lower), std_dev(upper) def std_dev_per_mean(self) -> float: return self.std_dev() / self.mean() def variance(self) -> float: return np.var(self.array) def observations(self) -> int: return len(self.data) def __len__(self) -> int: return len(self.data) def eq_except_property(self, other) -> bool: return isinstance(other, SingleProperty) and self.rundata == other.rundata def __eq__(self, other): return self.eq_except_property(other) and self.property == other.property def sem(self) -> float: """ Returns the standard error of the mean (standard deviation / sqrt(observations)). """ return st.sem(self.array) def std_error_mean(self) -> float: return st.sem(self.array) def mean_ci(self, alpha: float) -> t.Tuple[float, float]: """ Calculates the confidence interval in which the population mean lies with the given probability. Assumes normal distribution. :param alpha: given probability :return: lower, upper bound :see http://stackoverflow.com/a/15034143 """ h = self.std_error_mean() * st.t._ppf((1+alpha)/2.0, self.observations() - 1) return self.mean() - h, self.mean() + h def std_dev_ci(self, alpha: float) -> t.Tuple[float, float]: """ Calculates the confidence interval in which the standard deviation lies with the given probability. Assumes normal distribution. :param alpha: given probability :return: lower, upper bound :see http://www.stat.purdue.edu/~tlzhang/stat511/chapter7_4.pdf """ var = self.variance() * (self.observations() - 1) upper = np.sqrt(var / st.t._ppf(alpha/2.0, self.observations() - 1)) lower = np.sqrt(var / st.t._ppf(1-alpha/2.0, self.observations() - 1)) return lower, upper def is_single_valued(self) -> bool: """ Does the data consist only of one unique value? """ return len(set(self.data)) == 1 def description(self) -> str: return self.rundata.description() def get_data_frame(self) -> 'pd.DataFrame': series_dict = {self.property: pd.Series(self.data, name=self.property)} frame = pd.DataFrame(series_dict, columns=[self.property]) return frame def skewedness(self) -> float: """ Calculates the skewedness of the data. """ return sp.stats.skew(self.data, axis=0, bias=True) if len(self.data) >= 8 else float("nan") def normality(self) -> float: """ Calculates the probability of the data being normal distributed. """ return sp.stats.normaltest(self.data)[1] if len(self.data) >= 8 else float("nan") def percentile(self, q: int) -> float: """ Calculates the q th percentile. q must be between 0 and 100 inclusive. """ return np.percentile(self.data, q) def quartiles(self) -> t.Tuple[float, float, float]: """ Calculates the 3 quartiles (1, 2 and 3) """ return self.percentile(25), self.percentile(50), self.percentile(75) def iqr(self) -> float: """ Calculates the interquartile range. """ return np.subtract(*np.percentile(self.data, [75, 25])) def whiskers(self, whis: float = 1.5) -> t.Tuple[float, float]: """ Calculates the upper and the lower whisker for a boxplot. I.e. the minimum and the maximum value of the data set the lie in the range (Q1 - whis * IQR, Q3 + whis * IQR). IQR being the interquartil distance, Q1 the lower and Q2 the upper quartile. Adapted from http://stackoverflow.com/a/20096945 """ q1, q2, q3 = self.quartiles() iqr = self.iqr() hi_val = q1 + whis * self.iqr() whisk_hi = np.compress(self.array <= hi_val, self.array) if len(whisk_hi) == 0 or np.max(whisk_hi) < q3: whisk_hi = q3 else: whisk_hi = max(whisk_hi) # get low extreme lo_val = q1 - whis * iqr whisk_lo = np.compress(self.array >= lo_val, self.array) if len(whisk_lo) == 0 or np.min(whisk_lo) > q1: whisk_lo = q1 else: whisk_lo = min(whisk_lo) return whisk_lo, whisk_hi class TestedPair(BaseStatObject): """ A statistical wrapper around two run data objects that are compared via a tester. """ def __init__(self, first: t.Union[RunData, Single], second: t.Union[RunData, Single], tester: Tester = None): super().__init__() self.first = Single(first) self.second = Single(second) self.tester = tester or TesterRegistry.get_for_name(TesterRegistry.get_used(), Settings()["stats/tester"], Settings()["stats/uncertainty_range"]) self.properties = {} # type: t.Dict[str, TestedPairProperty] """ TestedPairProperty objects for each shared property of the inherited Single objects """ for prop in set(self.first.properties.keys()).intersection(self.second.properties.keys()): self.properties[prop] = TestedPairProperty(self, self.first, self.second, prop, tester) def _get_stat_messages(self) -> t.List[StatMessage]: """ Combines the messages for all inherited TestedPairProperty objects (for each property), :return: simplified list of all messages """ msgs = [x for prop in self.properties for x in self.properties[prop].get_stat_messages()] return msgs def rel_difference(self) -> float: """ Calculates the geometric mean of the relative mean differences (first - second) / first. :see http://www.cse.unsw.edu.au/~cs9242/15/papers/Fleming_Wallace_86.pdf """ # todo: add method (and report.py support) to give a score (based on first mean / second mean) mean = 1 for x in self.properties.values(): mean *= x.mean_diff_per_mean() if mean == 0: return 1 sig = np.sign(mean) return sig * math.pow(abs(mean), 1 / len(self.properties)) def swap(self) -> 'TestedPair': """ Creates a new pair with the elements swapped. :return: new pair object """ return TestedPair(self.second, self.first, self.tester) def __getitem__(self, property: str) -> 'TestedPairProperty': return self.properties[property] def eq_except_property(self, other) -> bool: return isinstance(other, type(self)) and self.first == other.first and self.second == other.second \ and self.tester == other.tester def __eq__(self, other) -> bool: return self.eq_except_property(other) def description(self) -> str: return "{} vs. {}".format(self.first, self.second) class TestedPairsAndSingles(BaseStatObject): """ A wrapper around a list of tested pairs and singles. """ def __init__(self, singles: t.List[t.Union[RunData, Single]], pairs: t.List[TestedPair] = None, distinct_descriptions: bool = False): super().__init__() self.singles = list(map(Single, singles)) # type: t.List[Single] self.pairs = pairs or [] # type: t.List[TestedPair] if distinct_descriptions: descr_attrs = defaultdict(lambda: 0) # type: t.Dict[str, int] descr_nr_zero = {} # type: t.Dict[str, Single] for single in self.singles: if "description" in single.attributes: descr = single.attributes["description"] num = descr_attrs[descr] descr_attrs[descr] += 1 if num != 0: single.attributes["description"] += " [{}]".format(num) if num == 1: descr_nr_zero[descr].attributes["description"] += " [0]" else: descr_nr_zero[descr] = single if pairs is None and len(self.singles) > 1: for i in range(0, len(self.singles) - 1): for j in range(i + 1, len(self.singles)): self.pairs.append(self.get_pair(i, j)) self.singles_properties = {} # type: t.Dict[str, SinglesProperty] for prop in self.properties(): self.singles_properties[prop] = SinglesProperty(self.singles, prop) def number_of_singles(self) -> int: return len(self.singles) def get_pair(self, first_id: int, second_id: int) -> TestedPair: l = self.number_of_singles() assert 0 <= first_id < l and 0 <= second_id < l return TestedPair(self.singles[first_id], self.singles[second_id]) def properties(self) -> t.List[str]: """ Returns the properties that are shared among all single run data objects. """ if not self.singles: return props = set(self.singles[0].properties.keys()) for single in self.singles[1:]: props.intersection_update(single.properties.keys()) return sorted(props) def get_stat_messages(self) -> t.List[StatMessage]: """ Combines the messages for all inherited TestedPair and Single objects, :return: simplified list of all messages """ msgs = [] for pair in self.pairs: msgs.extend(pair.get_stat_messages()) return msgs def __getitem__(self, id: int) -> Single: assert 0 <= id < self.number_of_singles() return self.singles[id] class EffectToSmallWarning(StatWarning): message = "The mean difference per standard deviation of {props} is less than {b_val}." hint = "Try to reduce the standard deviation if you think that the measured difference is significant: " \ "With the exec run driver you can probably use the stop_start plugin, preheat and sleep plugins. " \ "Also consider increasing the number of measured runs." border_value = 2 value_format = StatMessageValueFormat.FLOAT @classmethod def check_value(cls, value) -> bool: return value >= cls.border_value class EffectToSmallError(EffectToSmallWarning): type = StatMessageType.ERROR border_value = 1 class TestedPairProperty(BaseStatObject): """ Statistic helper for a compared pair of run data blocks for a specific measured property. """ def __init__(self, parent: TestedPair, first: Single, second: Single, property: str, tester: Tester = None): super().__init__() self.parent = parent self.first = SingleProperty(first, first.rundata, property) self.second = SingleProperty(second, second.rundata, property) self.tester = tester or TesterRegistry.get_for_name(TesterRegistry.get_used(), Settings()["stats/tester"], Settings()["stats/uncertainty_range"]) self.property = property def _get_stat_messages(self) -> t.List[StatMessage]: """ Combines the messages for all inherited TestedPairProperty objects (for each property), :return: simplified list of all messages """ msgs = self.first.get_stat_messages() + self.second.get_stat_messages() if self.is_equal() == False: msgs += [ EffectToSmallWarning.create_if_valid(self, self.mean_diff_per_dev(), self.property), EffectToSmallError.create_if_valid(self, self.mean_diff_per_dev(), self.property) ] return msgs def mean_diff(self) -> float: return self.first.mean() - self.second.mean() def mean_diff_ci(self, alpha: float) -> t.Tuple[float, float]: """ Calculates the confidence interval in which the mean difference lies with the given probability. Assumes normal distribution. :param alpha: given probability :return: lower, upper bound :see http://www.kean.edu/~fosborne/bstat/06b2means.html """ d = self.mean_diff() t = sp.stats.norm.sf(1-alpha/2.0) * np.sqrt(self.first.variance() / self.first.observations() - self.second.variance() / self.second.observations()) return d - t, d + t def mean_diff_per_mean(self) -> float: """ :return: (mean(A) - mean(B)) / mean(A) """ return self.mean_diff() / self.first.mean() def mean_diff_per_dev(self) -> float: """ Calculates the mean difference per standard deviation (maximum of first and second). """ return self.mean_diff() / self.max_std_dev() def equal_prob(self) -> float: """ Probability of the nullhypothesis being not not correct (three way logic!!!). :return: p value between 0 and 1 """ return self.tester.test(self.first.data, self.second.data) def is_equal(self) -> t.Union[None, bool]: """ Checks the nullhypthosesis. :return: True or False if the p val isn't in the uncertainty range of the tester, None else """ if self.tester.is_uncertain(self.first.data, self.second.data): return None return self.tester.is_equal(self.first.data, self.second.data) def mean_std_dev(self) -> float: return (self.first.mean() + self.second.mean()) / 2 def max_std_dev(self) -> float: return max(self.first.std_dev(), self.second.std_dev()) def get_data_frame(self, show_property = True) -> 'pd.DataFrame': columns = [] if show_property: columns = ["{}: {}".format(self.first, self.property), "{}: {}".format(self.second, self.property)] else: columns = [str(self.first), str(self.second)] series_dict = { columns[0]: pd.Series(self.first.data, name=columns[0]), columns[1]: pd.Series(self.second.data, name=columns[1]) } frame = pd.DataFrame(series_dict, columns=list(reversed(columns))) return frame def is_single_valued(self) -> bool: return self.first.is_single_valued() and self.second.is_single_valued() def eq_except_property(self, other) -> bool: return isinstance(other, type(self)) and self.first.eq_except_property(self.second) \ and self.tester == other.tester def __eq__(self, other) -> bool: return self.eq_except_property(other) and self.property == other.property def min_observations(self) -> int: return min(self.first.observations(), self.second.observations()) def description(self) -> str: return "{} vs. {}".format(self.first, self.second) def swap(self) -> 'TestedPairProperty': return TestedPairProperty(self.parent, self.parent.first, self.parent.second, self.property, self.tester) class SinglesProperty(BaseStatObject): def __init__(self, singles: t.List[t.Union[Single, SingleProperty]], property: str): super().__init__() self.singles = singles # type: t.List[SingleProperty] if isinstance(singles, List(T(Single))): self.singles = [single.properties[property] for single in singles] self.property = property def __str__(self) -> str: return "SinglesProperty(property={prop})".format(prop=self.property) def get_data_frame(self, **kwargs) -> 'pd.DataFrame': columns = [] data = {} min_len = min(len(single.data) for single in self.singles) for single in self.singles: name = str(single.parent) columns.append(name) data[name] = single.data[0:min_len] return pd.DataFrame(data, columns=columns) def boxplot(self, fig_width: int, fig_height: float = None): """ Creates a (horizontal) box plot comparing all single object for a given property. """ import seaborn as sns import matplotlib.pyplot as plt if fig_height is None: fig_height = self._height_for_width(fig_width) self.fig = plt.figure(figsize=self._fig_size_cm_to_inch(fig_width, fig_height)) df = self.get_data_frame() sns.boxplot(data=df, orient="h") def _store_as_tex(self, filename: str, fig_width: float, fig_height: float, standalone: bool): """ Stores the current figure as latex in a tex file. Works independently of matplotlib. Needs following code in the document preamble: \\usepackage{pgfplots} \\usepgfplotslibrary{statistics} Useful demo at http://tex.stackexchange.com/questions/115210/boxplot-in-latex """ if not filename.endswith(".tex"): filename += ".tex" descrs = [str(single.parent) for single in self.singles] tex = """ \\pgfplotsset{{width={width}cm, height={height}cm, compat=1.8}} \\begin{{tikzpicture}} \\begin{{axis}}[ cycle list name=auto, xlabel={xlabel}, ytick={{{yticks}}}, yticklabels={{{yticklabels}}}, max space between ticks=50pt ]""".format( width=fig_width, height=fig_height, xlabel=self.property, yticklabels="\\\\".join(descrs) + "\\\\", yticks=",".join(map(str, range(1, len(descrs) + 1))) ) for single in self.singles: q1, q2, q3 = single.quartiles() wh_lower, wh_upper = single.whiskers() tex += """ \\addplot+[ boxplot prepared={{ median={median}, upper quartile={q3}, lower quartile={q1}, upper whisker={wh_upper}, lower whisker={wh_lower} }}, ] coordinates {{}}; """.format(median=single.median(), **locals()) tex += """ \end{axis} \end{tikzpicture} """ if standalone: tex = """ \\documentclass[margin=10pt]{standalone} \\usepackage{pgfplots} \\usepgfplotslibrary{statistics} \\begin{document} """ + tex + """ \\end{document} """ with open(filename, "w") as f: f.write(tex) return os.path.realpath(filename) def max(self) -> float: return max(single.max() for single in self.singles)PKDHE΀99temci/tester/rundata.py""" Contains the RunData object for benchmarking data of specific program block and the RunDataStatsHelper that provides helper methods for working with these objects. """ from temci.tester.testers import Tester, TesterRegistry from temci.utils.typecheck import * from temci.utils.settings import Settings import temci.utils.util as util if util.can_import("scipy"): import scipy import typing as t class RunData(object): """ A set of benchmarking data for a specific program block. """ def __init__(self, data: t.Dict[str, t.List[t.Union[int, float]]] = None, attributes: t.Dict[str, str] = None, external: bool = False): """ Initializes a new run data object with a list of measured properties, an optional dictionary mapping each property to a list of actual values and a dictionary of optional attributes that describe its program block. """ typecheck(data, E(None) | Dict(all_keys=False)) typecheck(attributes, Exact(None) | Dict(key_type=Str(), all_keys=False)) self.external = external self.properties = [] # type: t.List[str] """ List of measured properties. They might not all be measured the same number of times. """ self.data = {} # type: t.Dict[str, t.List[t.Union[int, float]]] """ Raw benchmarking data, mapping properties to their corresponding values """ if data is not None and len(data) > 0: self.add_data_block(data) self.attributes = attributes or {} # type: t.Dict[str, str] def add_data_block(self, data_block: t.Dict[str, t.List[t.Union[int, float]]]): """ Adds a block of data. The passed dictionary maps each of the run datas properties to list of actual values (from each benchmarking run). """ typecheck(data_block, Dict(key_type=Str(), value_type= List(Int() | Float()), all_keys=False)) self.properties = set(self.properties).union(set(data_block.keys())) for prop in data_block: if prop not in self.data: self.data[prop] = [] self.properties.add(prop) self.data[prop].extend(data_block[prop]) self.properties = sorted(list(self.properties)) def __len__(self) -> int: """ Returns the number of measured properties. """ return len(self.data) def min_values(self) -> int: """ Returns the minimum number of measured values for the associated program block over all properties. """ return min(map(len, self.data.values())) if len(self) > 0 else 0 def benchmarks(self) -> int: """ Returns the maximum number of measured values for the associated program block over all properties. This number should be equivalent to the number of measured benchmarking runs. """ return max(map(len, self.data.values())) if len(self) > 0 else 0 def __getitem__(self, property: str): """ Returns the benchmarking values associated with the passed property. """ return self.data[property] def to_dict(self) -> dict: """ Returns a dictionary that represents this run data object. """ return { "attributes": self.attributes, "data": self.data } def __str__(self): return repr(self.attributes) def description(self): if "description" in self.attributes: return self.attributes["description"] return ", ".join("{}={}".format(key, self.attributes[key]) for key in self.attributes) class RunDataStatsHelper(object): """ This class helps to simplify the work with a set of run data observations. """ def __init__(self, runs: t.List[RunData], tester: Tester = None, external_count: int = 0): """ Don't use the constructor use init_from_dicts if possible. :param runs: list of run data objects :param tester: used tester or tester that is set in the settings """ self.tester = tester or TesterRegistry.get_for_name(TesterRegistry.get_used(), Settings()["stats/uncertainty_range"]) typecheck(runs, List(T(RunData))) self.runs = runs # type: t.List[RunData] self.external_count = external_count def properties(self) -> t.List[str]: """ Returns a sorted list of all properties that exist in all (!) run data blocks. """ if not self.runs: return [] props = set(self.runs[0].properties) for rd in self.runs[1:]: if rd: props = props.intersection(rd.properties) return list(sorted(props)) @classmethod def init_from_dicts(cls, runs: t.List[Dict] = None, external: bool = False) -> 'RunDataStatsHelper': """ Expected structure of the stats settings and the runs parameter:: "stats": { "tester": ..., "properties": ["prop1", ...], # or "properties": [("prop1", "description of prop1"), ...], "uncertainty_range": (0.1, 0.3) } "runs": [ {"attributes": {"attr1": ..., ...}, "data": {"__ov-time": [...], ...}}, ... ] :param runs: list of dictionaries representing the benchmarking runs for each program block :param external: are the passed runs not from this benchmarking run but from another? :rtype RunDataStatsHelper :raises ValueError if the stats of the runs parameter have not the correct structure """ typecheck(runs, List(Dict({ "data": Dict(key_type=Str(), value_type=List(Int()|Float()), all_keys=False) | NonExistent(), "attributes": Dict(key_type=Str(), all_keys=False) }, all_keys=False)), value_name="runs parameter") run_datas = [] runs = runs or [] # type: t.List[dict] for run in runs: if "data" not in run: run["data"] = {} run_datas.append(RunData(run["data"], run["attributes"], external=external)) return RunDataStatsHelper(run_datas, external_count=len(runs) if external else 0) def _is_uncertain(self, property: str, data1: RunData, data2: RunData) -> bool: return self.tester.is_uncertain(data1[property], data2[property]) def _is_equal(self, property: str, data1: RunData, data2: RunData) -> bool: return self.tester.is_equal(data1[property], data2[property]) def _is_unequal(self, property: str, data1: RunData, data2: RunData) -> bool: return self.tester.is_unequal(data1[property], data2[property]) def is_uncertain(self, p_val: float) -> bool: return min(*Settings()["stats/uncertainty_range"]) <= p_val <= max(*Settings()["stats/uncertainty_range"]) def is_equal(self, p_val: float) -> bool: return p_val > max(*Settings()["stats/uncertainty_range"]) def is_unequal(self, p_val: float) -> bool: return p_val < min(*Settings()["stats/uncertainty_range"]) def _speed_up(self, property: str, data1: RunData, data2: RunData): """ Calculates the speed up from the second to the first (e.g. the first is RESULT * 100 % faster than the second). """ return (scipy.mean(data2[property]) - scipy.mean(data1[property])) \ / scipy.mean(data1[property]) def _estimate_time_for_run_datas(self, run_bin_size: int, data1: RunData, data2: RunData, min_runs: int, max_runs: int) -> float: if min(len(data1), len(data2)) == 0 or "__ov-time" not in data1.properties or "__ov-time" not in data2.properties: return max_runs needed_runs = [] for prop in set(data1.properties).intersection(data2.properties): estimate = self.tester.estimate_needed_runs(data1[prop], data2[prop], run_bin_size, min_runs, max_runs) needed_runs.append(estimate) avg_time = max(scipy.mean(data1["__ov-time"]), scipy.mean(data2["__ov-time"])) return max(needed_runs) * avg_time def get_program_ids_to_bench(self) -> t.List[int]: """ Returns the ids (the first gets id 0, …) of the program block / run data object that should be benchmarked again. """ to_bench = set() for (i, run) in enumerate(self.runs): if i in to_bench or not run: continue for j in range(i): if j in to_bench or not self.runs[j]: continue run2 = self.runs[j] if any(self._is_uncertain(prop, run, run2) for prop in set(run.properties) .intersection(run2.properties)): to_bench.add(i) to_bench.add(j) return [i - self.external_count for i in to_bench if i >= self.external_count] def estimate_time(self, run_bin_size: int, min_runs: int, max_runs: int) -> float: """ Roughly erstimates the time needed to finish benchmarking all program blocks. It doesn't take any parallelism into account. Therefore divide the number by the used parallel processes. :param run_bin_size: times a program block is benchmarked in a single block of time :param min_runs: minimum number of allowed runs :param max_runs: maximum number of allowed runs :return estimated time in seconds or float("inf") if no proper estimation could be made """ to_bench = self.get_program_ids_to_bench() max_times = [0 for i in self.runs] for i in to_bench: run = self.runs[i] for j in to_bench: max_time = self._estimate_time_for_run_datas(run_bin_size, run, self.runs[j], min_runs, max_runs) max_times[i] = max(max_times[i], max_time) max_times[j] = max(max_times[j], max_time) if max_time == float("inf"): return float("inf") return sum(max_times) def estimate_time_for_next_round(self, run_bin_size: int, all: bool) -> float: """ Roughly estimates the time needed for the next benchmarking round. :param run_bin_size: times a program block is benchmarked in a single block of time and the size of a round :param all: expect all program block to be benchmarked :return estimated time in seconds """ if "__ov-time" not in self.properties(): return -1 summed = 0 to_bench = range(0, len(self.runs)) if all else self.get_program_ids_to_bench() for i in to_bench: summed += scipy.mean(self.runs[i]["__ov-time"]) * run_bin_size return summed def add_run_data(self, data: list = None, attributes: dict = None) -> int: """ Adds a new run data (corresponding to a program block) and returns its id. :param data: benchmarking data of the new run data object :param attributes: attributes of the new run data object :return: id of the run data object (and its corresponding program block) """ self.runs.append(RunData(self.properties, data, attributes)) return len(self.runs) - 1 def disable_run_data(self, id: int): """ Disable that run data object with the given id. """ self.runs[id] = None def add_data_block(self, program_id: int, data_block: t.Dict[str, t.List[t.Union[int, float]]]): """ Add block of data for the program block with the given id. :param program_id: id of the program. :param data_block: list of data from several benchmarking runs of the program block :raises ValueError if the program block with the given id doesn't exist """ program_id += self.external_count assert program_id >= self.external_count if program_id >= len(self.runs): raise ValueError("Program block with id {} doesn't exist".format(program_id - self.external_count)) self.runs[program_id].add_data_block(data_block) def get_evaluation(self, with_equal: bool, with_unequal: bool, with_uncertain: bool) -> dict: """ Structure of the returned list items:: - data: # set of two run data objects properties: # information for each property that is equal, ... -prop: - equal: True/False uncertain: True/False p_val: probability of the null hypothesis speed_up: speed up from the first to the second description: description of the property :param with_equal: with tuple with at least one "equal" property :param with_unequal: ... unequal property :param with_uncertain: include also uncertain properties :return: list of tuples for which at least one property matches the criteria """ arr = [] for i in range(0, len(self.runs) - 1): for j in range(i + 1, len(self.runs)): if not self.runs[i] or not self.runs[j]: continue data = (self.runs[i], self.runs[j]) props = {} for prop in self.properties(): map = {"p_val": self.tester.test(data[0][prop], data[1][prop]), "speed_up": self._speed_up(prop, *data), "description": prop, "equal": self._is_equal(prop, *data), "unequal": self._is_unequal(prop, *data), "uncertain": self._is_uncertain(prop, *data)} if map["unequal"] == with_unequal and map["equal"] == with_equal \ and map["uncertain"] == with_uncertain: props[prop] = map if len(props) > 0: arr.append({ "data": data, "properties": props }) return arr def serialize(self) -> t.List: return list(x.to_dict() for x in self.runs if x) def valid_runs(self) -> t.List[RunData]: res = [x for x in self.runs if x is not None] #print(res) return resPK#}6H{6UU temci/tester/report_processor.pyfrom temci.tester.report import ReporterRegistry from temci.tester.rundata import RunDataStatsHelper class ReportProcessor: def __init__(self, stats_helper: RunDataStatsHelper = None): self.reporter = ReporterRegistry.get_for_name(ReporterRegistry.get_used(), stats_helper) def report(self): self.reporter.report()PK#}6Htemci/tester/__init__.pyPK#}6H-9'temci/tester/report_resources/style.css.footer { padding: 20px; width: 100%; color: gray; text-align: center; } #toc { z-index: 1; background: white; opacity: 0.9; margin-top: 30px; } .table a { display: block; text-decoration: none; } .div_info { background-color: #D9EDF7; } .div_warning { background-color: #FCF8E3; } .div_danger { background-color: #F2DEDE; } .anchor_cell { text-align: center; } .hyphenate { /*text-align: justify;*/ hyphens: auto; -webkit-hyphens: auto; -ms-hyphens: auto; -moz-hyphens: auto; } .link { color: #0000EE; cursor: pointer; } /* * jquery.tocify.css 1.9.0 * Author: @gregfranko */ /* The Table of Contents container element */ .tocify { width: 20%; max-height: 90%; overflow: auto; margin-left: 2%; position: fixed; border: 1px solid #ccc; webkit-border-radius: 6px; moz-border-radius: 6px; border-radius: 6px; } /* The Table of Contents is composed of multiple nested unordered lists. These styles remove the default styling of an unordered list because it is ugly. */ .tocify ul, .tocify li { list-style: none; margin: 0; padding: 0; border: none; line-height: 30px; } /* Top level header elements */ .tocify-header { text-indent: 10px; } /* Top level subheader elements. These are the first nested items underneath a header element. */ .tocify-subheader { text-indent: 20px; display: none; } /* Makes the font smaller for all subheader elements. */ .tocify-subheader li { font-size: 12px; } /* Further indents second level subheader elements. */ .tocify-subheader .tocify-subheader { text-indent: 30px; } /* Further indents third level subheader elements. You can continue this pattern if you have more nested elements. */ .tocify-subheader .tocify-subheader .tocify-subheader { text-indent: 40px; } /* Twitter Bootstrap Override Style */ .nav-list > li > a, .nav-list .nav-header { margin: 0px; } /* Twitter Bootstrap Override Style */ .nav-list > li > a { padding: 5px; } .tocify-extend-page { height: 0px !important; } .nav-list a:focus { color: #FFF; text-shadow: 0px -1px 0px rgba(0, 0, 0, 0.2); background-color: red; }PKlIHf+g2P P 'temci/tester/report_resources/script.jsvar toc; $(function() { /*! jquery.tocify - v1.9.0 - 2013-10-01 * http://gregfranko.com/jquery.tocify.js/ * Copyright (c) 2013 Greg Franko; Licensed MIT*/ !function(e){"use strict";e(window.jQuery,window,document)}(function(e,t,s,i){"use strict";var o="tocify",n="tocify-focus",a="tocify-hover",l="tocify-hide",h="tocify-header",r="."+h,d="tocify-subheader",c="."+d,f="tocify-item",u="."+f,p="tocify-extend-page",g="."+p;e.widget("toc.tocify",{version:"1.9.0",options:{context:"body",ignoreSelector:null,selectors:"h1, h2, h3",showAndHide:!0,showEffect:"slideDown",showEffectSpeed:"medium",hideEffect:"slideUp",hideEffectSpeed:"medium",smoothScroll:!0,smoothScrollSpeed:"medium",scrollTo:0,showAndHideOnScroll:!0,highlightOnScroll:!0,highlightOffset:40,theme:"bootstrap",extendPage:!0,extendPageOffset:100,history:!0,scrollHistory:!1,hashGenerator:"compact",highlightDefault:!0},_create:function(){var s=this;s.extendPageScroll=!0,s.items=[],s._generateToc(),s._addCSSClasses(),s.webkit=function(){for(var e in t)if(e&&-1!==e.toLowerCase().indexOf("webkit"))return!0;return!1}(),s._setEventHandlers(),e(t).load(function(){s._setActiveElement(!0),e("html, body").promise().done(function(){setTimeout(function(){s.extendPageScroll=!1},0)})})},_generateToc:function(){var t,s,i=this,n=i.options.ignoreSelector;return t=-1!==this.options.selectors.indexOf(",")?e(this.options.context).find(this.options.selectors.replace(/ /g,"").substr(0,this.options.selectors.indexOf(","))):e(this.options.context).find(this.options.selectors.replace(/ /g,"")),t.length?(i.element.addClass(o),void t.each(function(t){e(this).is(n)||(s=e("