PK#}6Htemci/__init__.pyPK#}6H 9otemci/tester/testers.py""" Contains the tester base class and several simple implementations. """ import temci.utils.util as util import temci.utils.util as util if util.can_import("scipy"): import scipy as np import scipy.stats as st import scipy.optimize as opti from temci.utils.typecheck import * from temci.utils.registry import AbstractRegistry, register import logging, warnings class TesterRegistry(AbstractRegistry): settings_key_path = "stats" use_key = "tester" use_list = False default = "t" registry = {} class Tester(object, metaclass=util.Singleton): """ A tester tests the probability of the nullhypothesis of two same length list of observations. This is a base class that shouldn't be instantiated. """ scipy_stat_method = "" name = "" def __init__(self, misc_settings: dict, uncertainty_range: tuple): """ :param data1: first list of of data points :param data2: second list of data points :param uncertainty_range: (start, end) probability tuple that gives range in which the tester doesn't give a definitive result on the nullhypothesis check """ self.uncertainty_range = uncertainty_range assert isinstance(uncertainty_range, Tuple(Float(), Float())) self.misc_settings = misc_settings def test(self, data1: list, data2: list) -> float: """ Calculates the probability of the null hypotheses. """ res = 0 min_len = min(len(data1), len(data2)) with warnings.catch_warnings(record=True) as w: res = self._test_impl(data1[0:min_len], data2[0: min_len]) return res def _test_impl(self, data1: list, data2: list) -> float: return getattr(st, self.scipy_stat_method)(data1, data2)[-1] def is_uncertain(self, data1: list, data2: list) -> bool: return min(len(data1), len(data2)) == 0 or \ self.uncertainty_range[0] <= self.test(data1, data2) <= self.uncertainty_range[1] def is_equal(self, data1: list, data2: list): return self.test(data1, data2) > max(*self.uncertainty_range) def is_unequal(self, data1: list, data2: list): return self.test(data1, data2) < min(*self.uncertainty_range) def estimate_needed_runs(self, data1: list, data2: list, run_bin_size: int, min_runs: int, max_runs: int) -> int: """ Calculate a approximation of the needed length of both observations that is needed for the p value to lie outside the uncertainty range. It uses the simple observation that the graph of the p value plotted against the size of the sets has a exponential, logarithmic or root shape. :param data1: list of observations :param data2: list of observations :param run_bin_size: granularity of the observation (> 0) :param min_runs: minimum number of allowed runs :param max_runs: maximum number of allowed runs :return: approximation of needed runs or float("inf") """ #print("###", max_runs) if data1 == data2: #logging.error("equal") return min_runs min_len = min(len(data1), len(data2)) #print("##", max_runs) if min_len <= 5: return max_runs x_space = np.linspace(0, min_len - 2, min_len - 2) yn = [self.test(data1[0:i], data2[0:i]) for i in range(2, min_len)] def interpolate(func, name: str): try: popt, pcov = opti.curve_fit(func, x_space, yn, maxfev=10000) for i in range(min_len, max_runs + 1, run_bin_size): ith = func(i, *popt) if ith > max(self.uncertainty_range) or ith < min(self.uncertainty_range): #print("i = ", i) return i return max_runs except (TypeError, RuntimeWarning, RuntimeError) as err: logging.info("Interpolating {} with {} data points gave " "following error: {}".format(name, min_len, str(err))) return float("inf") funcs = [ (lambda x, a, b, c: a * np.exp(-b * x) + c, "exponential function") ] res = 0 with warnings.catch_warnings(record=True) as w: res = min(interpolate(*f) for f in funcs) return res def __eq__(self, other): return isinstance(other, type(self)) @register(TesterRegistry, name="t", misc_type=Dict()) class TTester(Tester): """ Implementation of the Tester base class for the student's t test. """ scipy_stat_method = "ttest_ind" name = "t" @register(TesterRegistry, name="ks", misc_type=Dict()) class KSTester(Tester): """ Uses the Kolmogorov-Smirnov statistic on 2 samples. """ scipy_stat_method = "ks_2samp" name = "kolmogorov smirnov" @register(TesterRegistry, name="anderson", misc_type=Dict()) class AndersonTester(Tester): """ Uses the Anderson statistic on 2 samples. """ scipy_stat_method = "anderson_ksamp" def _test_impl(self, data1: list, data2: list) -> float: return max(st.anderson_ksamp([data1, data2])[-1], 1) name = "anderson"PK8HK݉ggtemci/tester/report.pyimport logging import math import re import shutil from collections import namedtuple import multiprocessing import time import sys import itertools from temci.tester.stats import TestedPairsAndSingles, BaseStatObject, TestedPair, TestedPairProperty, StatMessage, \ StatMessageType, Single, SingleProperty, SinglesProperty from temci.tester.testers import TesterRegistry, Tester from temci.tester.rundata import RunDataStatsHelper, RunData from temci.utils.typecheck import * from temci.utils.registry import AbstractRegistry, register import temci.utils.util as util import click, yaml, os if util.can_import("numpy"): import numpy as np import pandas as pd from temci.utils.settings import Settings from multiprocessing import Pool from temci.utils.util import join_strs import typing as t class ReporterRegistry(AbstractRegistry): settings_key_path = "report" use_key = "reporter" use_list = False default = "html2" registry = {} class AbstractReporter: def __init__(self, misc_settings: dict = None, stats_helper: RunDataStatsHelper = None): self.misc = misc_settings if stats_helper is None: runs = [] typecheck(Settings()["report/in"], ValidYamlFileName()) with open(Settings()["report/in"], "r") as f: runs = yaml.load(f) self.stats_helper = RunDataStatsHelper.init_from_dicts(runs) else: self.stats_helper = stats_helper self.stats = TestedPairsAndSingles(self.stats_helper.runs, distinct_descriptions=True) def report(self): raise NotImplementedError() @register(ReporterRegistry, "console", Dict({ "out": FileNameOrStdOut() // Default("-") // Description("Output file name or stdard out (-)") })) class ConsoleReporter(AbstractReporter): """ Simple reporter that outputs just text. """ def report(self, with_tester_results: bool = True, to_string: bool = False) -> t.Optional[str]: output = [""] def string_printer(line: str, **args): output[0] += str(line) + "\n" print_func = string_printer if to_string else print with click.open_file(self.misc["out"], mode='w') as f: for block in self.stats_helper.runs: assert isinstance(block, RunData) print_func("{descr:<20} ({num:>5} single benchmarkings)" .format(descr=block.description(), num=len(block.data[block.properties[0]])), file=f) for prop in sorted(block.properties): mean = np.mean(block[prop]) stdev = np.std(block[prop]) print_func("\t {prop:<18} mean = {mean:>15.5f}, " "deviation = {dev_perc:>10.5%} ({dev:>15.5f})".format( prop=prop, mean=mean, dev=stdev, dev_perc=stdev/mean ), file=f) if with_tester_results: self._report_list("Equal program blocks", self.stats_helper.get_evaluation(with_equal=True, with_uncertain=False, with_unequal=False), f, print_func) self._report_list("Unequal program blocks", self.stats_helper.get_evaluation(with_equal=False, with_uncertain=False, with_unequal=True), f, print_func) self._report_list("Uncertain program blocks", self.stats_helper.get_evaluation(with_equal=True, with_uncertain=True, with_unequal=True), f, print_func) if to_string: return output[0] def _report_list(self, title: str, list, file, print_func: t.Callable[[str, Any], None]): if len(list) != 0: print_func(title, file=file) print_func("####################", file=file) for item in list: print_func("\t {} ⟷ {}".format(item["data"][0].description(), item["data"][1].description()), file=file) for prop in sorted(item["properties"]): prop_data = item["properties"][prop] perc = prop_data["p_val"] if prop_data["unequal"]: perc = 1 - perc print_func("\t\t {descr:<18} probability = {perc:>10.5%}, speed up = {speed_up:>10.5%}" .format(descr=prop_data["description"], perc=perc, speed_up=prop_data["speed_up"]), file=file) @register(ReporterRegistry, "html", Dict({ "out": Str() // Default("report") // Description("Output directory"), "html_filename": Str() // Default("report.html") // Description("Name of the HTML file"), "pair_kind": ExactEither("scatter", "reg", "resid", "kde", "hex") // Default("kde") // Description("Kind of plot to draw for pair plots (see searborn.joinplot)"), "plot_size": PositiveInt() // Default(8) // Description("Width of the plots in centimeters"), "compared_props": (ListOrTuple(Str())) // Default(["all"]) // Description("Properties to include in comparison table"), "compare_against": NaturalNumber() // Default(0) // Description("Run to to use as base run for relative values in comparison table") })) class HTMLReporter(AbstractReporter): """ Reporter that produces a HTML bsaed report with lot's of graphics. """ counter = 0 """ Just a counter to allow collision free figure saving. """ PlotTuple = namedtuple("PlotTuple", ["func", "args", "kwargs", "filename"]) def report(self): typecheck(self.misc["out"], DirName(), value_name="reporter option out") if os.path.exists(self.misc["out"]): shutil.rmtree(self.misc["out"]) resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "report_resources")) shutil.copytree(resources_path, self.misc["out"]) runs = self.stats_helper.runs html = """ Benchmarking report
{inner_html}
""" descriptions = [run.description() for run in self.stats_helper.runs] comparing_str = "" if len(descriptions) == 1: comparing_str = descriptions[0] elif len(descriptions) > 1: comparing_str = " and ".join([", ".join(descriptions[0:-1]), descriptions[-1]]) inner_html = "" self.big_size = self.misc["plot_size"] self.small_size = max(2, math.floor(self.big_size * 2 / len(runs[0].properties))) if len(self.stats_helper.runs) > 1: logging.info("Generate comparison tables") inner_html += "

Comparison tables

" + self._comparison_tables() self._write(html.format(**locals())) for i in range(0, len(runs)): for j in range(0, i): logging.info("Plot pair summary ({}, {})".format(i, j)) inner_html += self._pair_summary(runs[i], runs[j], heading_no=2) self._write(html.format(**locals())) for i in range(0, len(runs)): logging.info("Plot program block {}".format(i)) inner_html += self._report_single(runs[i]) self._write(html.format(**locals())) if len(self.stats_helper.runs) > 1: for i in range(0, len(runs)): for j in range(0, i): logging.info("Plot pair ({}, {})".format(i, j)) inner_html += self._report_pair(runs[i], runs[j]) self._write(html.format(**locals())) def _write(self, html_string: str): """ Store the html string in the appropriate file and append "" """ with open(os.path.join(self.misc["out"], self.misc["html_filename"]), "w") as f: f.write(html_string) def _set_fig_size(self, size: int): import matplotlib.pyplot as plt plt.rcParams['figure.figsize'] = (size, size) self.current_size = size def _report_single(self, data: RunData): import matplotlib.pyplot as plt import seaborn as sns ret_str = """

{}

{} benchmarkings
""".format(data.description(), len(data[data.properties[0]])) ret_str += """ """ for prop in sorted(self.stats_helper.properties()): x = pd.Series(data[prop], name=prop) self._set_fig_size(self.small_size) ax = sns.distplot(x) if self.small_size == self.current_size: plt.xticks([]) plt.yticks([]) filename = self._get_new_figure_filename() plt.xlim(0, max(data[prop])) plt.xlabel(prop) plt.savefig(filename) plt.title(prop) plt.close() ret_str += """ """.format(filename=filename, sm=self.small_size) ret_str += """
""" for prop in sorted(self.stats_helper.properties()): ret_str += """

{prop}

{benchs} benchmarkings
""".format(prop=prop, benchs=len(data[prop])) x = pd.Series(data[prop], name=prop) self._set_fig_size(self.big_size) ax = sns.distplot(x, kde=False) filename = self._get_new_figure_filename() plt.xlim(min(data[prop]), max(data[prop])) plt.savefig(filename) plt.close() ret_str += """ """.format(filename=filename) prop_data = data[prop] vals = { "mean": np.mean(prop_data), "median": np.median(prop_data), "min": np.min(prop_data), "max": np.max(prop_data), "standard deviation": np.std(prop_data) } ret_str += """ """ for name in sorted(vals.keys()): ret_str += """ """.format(name=name, absolute=vals[name], rel_mean=vals[name] / vals["mean"], rel_median=vals[name] / vals["median"]) ret_str += """
statistical propertyabsolute value relative to meanrelative to median
{name} {absolute} {rel_mean:15.5%} {rel_median:15.5%}
""" return ret_str def _report_pair(self, first: RunData, second: RunData): ret_str = """

{descr1} <=> {descr2}

""".format(descr1=first.description(), descr2=second.description()) ret_str += self._pair_summary(first, second, heading_no=3) for prop in sorted(self.stats_helper.properties): length = min(len(first[prop]), len(second[prop])) first_prop = first[prop][0:length] second_prop = second[prop][0:length] ret_str += """

{prop}

{benchs} benchmarkings

Probability of the null hypothesis

I.e. the probability that the data sets of both program block of the property {prop} come from the same population. """.format(filename=self._jointplot(first, second, prop, size=self.big_size), prop=prop, filename2=self._barplot(first, second, prop, size=self.big_size), benchs=length) for tester_name in sorted(TesterRegistry.registry.keys()): tester = TesterRegistry.get_for_name(tester_name, Settings()["stats/uncertainty_range"]) p_val = tester.test(first[prop], second[prop]) row_class = self._p_val_to_row_class(p_val) tester_descr = tester.__description__ ret_str += """ """.format(**locals()) ret_str += """
TesterprobabilityTester description
{tester_name}{p_val:5.5%} {tester_descr}
""" vals = { "mean": (np.mean(first_prop), np.mean(second_prop)), "median": (np.median(first_prop), np.median(second_prop)), } ret_str += """ """ for descr in sorted(vals.keys()): first_val, second_val = vals[descr] ret_str += """ """.format(descr=descr, diff=first_val - second_val, rel_diff=(first_val - second_val) / first_val) ret_str += """
Diferrence in property absolute difference difference rel. to first
{descr}{diff:15.5}{rel_diff:3.5%}
""" return ret_str def _pair_summary(self, first: RunData, second: RunData, heading_no: int): html = """ Summary of {descr} <=> {descr2} {{inner_html}} """.format(descr=first.description(), descr2=second.description(), no=heading_no) inner_html = """ """ for prop in sorted(self.stats_helper.properties()): inner_html += """ """.format(filename=self._jointplot(first, second, prop, size=self.small_size, show_ticks=False)) inner_html += "" for prop in sorted(self.stats_helper.properties()): inner_html += """ """.format(filename=self._barplot(first, second, prop, size=self.small_size, show_ticks=False)) inner_html += "" for prop in sorted(self.stats_helper.properties()): length = min(len(first[prop]), len(second[prop])) first_prop = first[prop][0:length] second_prop = second[prop][0:length] inner_html += """ " inner_html += """
""" for tester_name in sorted(TesterRegistry.registry.keys()): tester = TesterRegistry.get_for_name(tester_name, Settings()["stats/uncertainty_range"]) p_val = tester.test(first_prop, second_prop) row_class = self._p_val_to_row_class(p_val) inner_html += """ """.format(**locals()) inner_html += "
testerp val
{tester_name}{p_val:3.5%}
""" return html.format(**locals()) def _p_val_to_row_class(self, p_val: float) -> str: row_class = "" if self.stats_helper.is_equal(p_val): row_class = "danger" elif self.stats_helper.is_unequal(p_val): row_class = "success" return row_class def _comparison_tables(self, runs: list = None, properties: list = None, compare_against: int = None, heading_no: int = 3) -> str: runs = runs or self.stats_helper.runs p = properties or self.misc["compared_props"] properties = list(p) compare_against = compare_against or self.misc["compare_against"] typecheck(properties, List(Str())) typecheck(runs, List(T(RunData)) // (lambda l: len(l) > 0)) typecheck(compare_against, Int(range=range(len(runs)))) if "all" in properties: properties = self.stats_helper.properties() stat_funcs = { "mean": np.mean, "median": np.median, "min": np.min, "max": np.max, "standard deviation / mean": lambda l: np.std(l) / np.mean(l), "standard deviation / median": lambda l: np.std(l) / np.median(l) } ret_str = "" for stat_prop in sorted(stat_funcs.keys()): stat_func = stat_funcs[stat_prop] ret_str += """ {prop} """.format(n=heading_no, prop=stat_prop) ret_str += self._comparison_table(stat_func, runs, properties, compare_against) return ret_str def _comparison_table(self, stat_func, runs: list, properties: list, compare_against: int) -> str: """ :param stat_func: function that gets a data series (list) and returns a scalar (e.g. mean or median) :param runs: RunData objects to compare :param properties: used properties :param compare_against: use this run as the base run (for relative values) :return: html string """ values = [] for run in runs: values_for_run = {} for property in sorted(properties): values_for_run[property] = stat_func(run[property]) values.append(values_for_run) ret_str = """ {} """.format("".join("".format(run.description(), compare_against) for run in runs)) for property in sorted(properties): ret_str += """ """.format(property) for i, run in enumerate(runs): ret_str += """ """.format( abs=values[i][property], rel=values[i][property] / values[compare_against][property] ) ret_str += """ """ ret_str += """ """ # why? see https://dl.acm.org/citation.cfm?id=5673 mult_compare_against = np.prod(list(values[compare_against].values())) for (i, run) in enumerate(runs): mult = np.prod(list(values[i].values())) ret_str += """ """.format( abs=np.power(mult, 1 / len(values[i])), rel=np.power(mult / mult_compare_against, 1 / len(values[i])) ) ret_str += """
{}
{}{abs:15.5}{rel:3.3}
geometric mean{rel:3.3}
""" return ret_str def _jointplot(self, first: RunData, second: RunData, property: str, size: int, filename: str = None, show_ticks: bool = True): import matplotlib.pyplot as plt import seaborn as sns import numpy filename = filename or self._get_new_figure_filename() length = min(len(first[property]), len(second[property])) first_prop = first[property][0:length] second_prop = second[property][0:length] lim = (0, max(max(first_prop), max(second_prop))) self._set_fig_size(size) x1 = pd.Series(first_prop, name="{descr}: {prop}".format(descr=first.description(), prop=property)) x2 = pd.Series(second_prop, name="{descr}: {prop}".format(descr=second.description(), prop=property)) plt.xlim(lim) g = None try: g = sns.jointplot(x1, x2, kind=self.misc["pair_kind"], size=size, space=0, stat_func=self.stats_helper.tester.test, xlim=lim, ylim=lim) if not show_ticks: g.ax_joint.set_xticklabels([]) g.ax_joint.set_yticklabels([]) g.savefig(filename) plt.close() except BaseException as ex: logging.warning(ex) return filename def _barplot(self, first: RunData, second: RunData, property: str, size: int, filename: str = None, show_ticks: bool = True) -> str: import matplotlib.pyplot as plt import seaborn as sns filename = filename or self._get_new_figure_filename() self._set_fig_size(size) length = min(len(first[property]), len(second[property])) first_prop = first[property][0:length] second_prop = second[property][0:length] min_xval = min(first_prop + second_prop) max_xval = max(first_prop + second_prop) bins = np.linspace(min_xval, max_xval, math.floor(math.sqrt(length) * size)) sns.distplot(first_prop, bins=bins,label=first.description(), kde=False) sns.distplot(second_prop, bins=bins,label=second.description(), kde=False) if not show_ticks: plt.xticks([]) plt.yticks([]) plt.xlim(min_xval, max_xval) plt.legend() plt.savefig(filename) plt.close() return filename def _save_figure(self, figure) -> str: filename = self._get_new_figure_filename() figure.savefig(filename) return filename def _get_new_figure_filename(self) -> str: self.counter += 1 return os.path.join(os.path.abspath(self.misc["out"]), "figure.{}{}" .format(self.counter, BaseStatObject.img_filename_ending)) @register(ReporterRegistry, "html2", Dict({ "out": Str() // Default("report") // Description("Output directory"), "html_filename": Str() // Default("report.html") // Description("Name of the HTML file"), "fig_width_small": Float() // Default(15.0) // Description("Width of all small plotted figures"), "fig_width_big": Float() // Default(25.0) // Description("Width of all big plotted figures"), "boxplot_height": Float() // Default(2.0) // Description("Height per run block for the big comparison box plots"), "alpha": Float() // Default(0.05) // Description("Alpha value for confidence intervals"), "gen_tex": Bool() // Default(True) // Description("Generate simple latex versions of the plotted figures?"), "gen_pdf": Bool() // Default(False) // Description("Generate pdf versions of the plotted figures?"), "show_zoomed_out": Bool() // Default(False) // Description("Show zoomed out (x min = 0) figures in the extended summaries?") })) class HTMLReporter2(AbstractReporter): """ Reporter that produces a HTML bsaed report with lot's of graphics. A rewite of the original HTMLReporter """ counter = 0 """ Just a counter to allow collision free figure saving. """ def report(self): import humanfriendly as hf typecheck(self.misc["out"], DirName(), value_name="reporter option out") start_time = time.time() if os.path.exists(self.misc["out"]): shutil.rmtree(self.misc["out"]) resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "report_resources")) shutil.copytree(resources_path, self.misc["out"]) runs = self.stats_helper.runs self.app_html = "" html = """ Benchmarking report
{inner_html}
{self.app_html} """ comparing_str = join_strs([single.description() for single in self.stats.singles]) inner_html = """

Summary

""" inner_html += self._format_errors_and_warnings(self.stats) inner_html += """

Overall summary

""" inner_html += self._full_single_property_comp_table().html() for prop in self.stats.properties(): inner_html += """

Summary regarding {prop}

""".format(**locals()) inner_html += self._full_single_property_comp_table(prop).html() inner_html += """

""" inner_html += self._comparison_for_prop(prop) for single in self.stats.singles: inner_html += """

""" inner_html += self._extended_summary(single, with_title=True, title_level=2, title_class="page-header") + """
""" for pair in self.stats.pairs: inner_html += """
""" inner_html += self._extended_summary(pair, with_title=True, title_level=2, title_class="page-header") + """
""" self._write(html.format(timespan=hf.format_timespan(time.time() - start_time), **locals())) logging.info("Finished generating html") logging.info("Generate images...") self._process_hist_cache(self._hist_async_img_cache.values(), "Generate images") self._process_boxplot_cache(self._boxplot_async_cache.values(), "Generate box plots") self._write(html.format(timespan=hf.format_timespan(time.time() - start_time), **locals())) if self.misc["gen_pdf"] or self.misc["gen_tex"]: strs = (["tex"] if self.misc["gen_tex"] else []) + (["pdf"] if self.misc["gen_pdf"] else []) self._process_hist_cache(self._hist_async_misc_cache.values(), "Generate {}".format(join_strs(strs))) def _process_hist_cache(self, cache: t.Iterable[dict], title: str): pool = multiprocessing.Pool(4) pool_res = [pool.apply_async(self._process_hist_cache_entry, args=(entry,)) for entry in cache] if Settings().has_log_level("info"): with click.progressbar(pool_res, label=title) as pool_res: for res in pool_res: res.get() else: for res in pool_res: res.get() def _process_boxplot_cache(self, cache: t.Iterable[dict], title: str): pool = multiprocessing.Pool(4) pool_res = [pool.apply_async(self._process_boxplot_cache_entry, args=(entry,)) for entry in cache] if Settings().has_log_level("info"): with click.progressbar(pool_res, label=title) as pool_res: for res in pool_res: res.get() else: for res in pool_res: res.get() def _write(self, html_string: str): """ Store the html string in the appropriate file and append "" """ report_filename = os.path.join(self.misc["out"], self.misc["html_filename"]) with open(report_filename, "w") as f: f.write(html_string) logging.info("Wrote report into " + report_filename) def _full_single_property_comp_table(self, property: str = None) -> 'Table': header_cells = [] for single in self.stats.singles: _single = SingleProperty(single, single.rundata, property) if property is not None else single modal_id = self._short_summary_modal(_single) header_cells.append(Cell(self, content=self._obj_description(single), color_class_obj=single, modal_id=modal_id)) table = Table(self, header_cells, header_cells, Cell(self, "vs.")) for i in range(self.stats.number_of_singles()): for j in range(self.stats.number_of_singles()): popover = Popover(self, "Explanation", content="") cell = None pair = self.stats.get_pair(i, j) rel_diff = None if property is None: popover.content = """ Geometric mean of the mean differences relative to the means of the left: \\[\\sqrt[\|properties\|]{ \\prod_{p \in \\text{properties}} \\frac{\\overline{\\text{left[p]}} - \\overline{\\text{right[p]}}}{ \\overline{\\text{left[p]}}}}\]

Using the more widely known would be like lying

. """ rel_diff = pair.rel_difference() popover.trigger = "hover click" else: pair = pair[property] popover.content="""Difference relative to the mean of the left: \\begin{align} & \\frac{\\overline{\\text{left[%s]}} - \\overline{\\text{right[%s]}}}{\\overline{\\text{left[%s]}}} \\\\ &= \\frac{%5.4f - %5.4f}{%5.4f} \\end{align} """ % (property, property, property, pair.first.mean(), pair.second.mean(), pair.first.mean()) rel_diff = pair.mean_diff_per_mean() cell = Cell(self, content=str(rel_diff), popover=popover, color_class_obj=pair, show_click_on_info=True) cell.modal_id = self._short_summary_modal(pair) table[i, j] = cell return table def _extended_summary(self, obj: BaseStatObject, with_title: bool = True, title_level: int = 3, title_class: str = "") -> str: html = "" other_id_obj = None # type: BaseStatObject if isinstance(obj, Single): html += self._extended_summary_of_single(obj, title_level) if isinstance(obj, SingleProperty): html += self._extended_summary_of_single_property(obj, title_level) if isinstance(obj, TestedPair): html += self._extended_summary_of_tested_pair(obj, title_level) other_id_obj = obj.swap() if isinstance(obj, TestedPairProperty): html += self._extended_summary_of_tested_pair_property(obj, title_level) if with_title: other_id_app = "" if other_id_obj is None else """
"""\ .format(self._html_id_for_object("misc", other_id_obj)) html = """ {title}""".format(level=title_level, tc=title_class, title=self._obj_description(obj), id=self._html_id_for_object("misc", obj)) + other_id_app + html return html def _extended_summary_of_single(self, obj: Single, title_level: int) -> str: html = self._short_summary(obj, use_modals=True, extended=False, title_level=title_level + 1) for prop in sorted(obj.properties.keys()): html += """
{prop}""".format( level=title_level + 1, prop=prop, id=self._html_id_for_object("misc", obj.properties[prop]) ) html += self._extended_summary(obj.properties[prop], with_title=False, title_level=title_level + 1, title_class="page-header") html += """
""" return html def _extended_summary_of_single_property(self, obj: SingleProperty, title_level: int) -> str: html = self._short_summary(obj, use_modals=True, extended=True, title_level=title_level + 1) return html def _extended_summary_of_tested_pair(self, obj: TestedPair, title_level: int) -> str: html = self._short_summary(obj, use_modals=True, extended=True, title_level=title_level + 1) swapped = obj.swap() for prop in sorted(obj.properties.keys()): html += """
{prop}
""".format( level=title_level + 1, prop=prop, id=self._html_id_for_object("misc", obj.properties[prop]), id2=self._html_id_for_object("misc", swapped.properties[prop]) ) html += self._extended_summary(obj.properties[prop], with_title=False, title_level=title_level + 1, title_class="page-header") html += """
""" return html def _extended_summary_of_tested_pair_property(self, obj: TestedPairProperty, title_level: int) -> str: html = self._short_summary(obj, use_modals=True, extended=True, title_level=title_level + 1) return html def _short_summary(self, obj: BaseStatObject, with_title: bool = False, title_level: int = 4, use_modals: bool = False, extended: bool = False) -> str: html = "" if with_title: html += "{title}".format(level=title_level, title=self._obj_description(obj)) html += self._format_errors_and_warnings(obj) if isinstance(obj, SingleProperty): html += self._short_summary_of_single_property(obj, use_modals, extended) if isinstance(obj, TestedPairProperty): html += self._short_summary_of_tested_pair_property(obj, use_modals, extended) if isinstance(obj, TestedPair): html += self._short_summary_of_tested_pair(obj, use_modals, extended) if isinstance(obj, Single): html += self._short_summary_of_single(obj, use_modals, extended) return html def _short_summary_of_single(self, obj: Single, use_modal: bool = False, extended: bool = False): obj_descrs = sorted(obj.properties.keys()) objs = [obj.properties[val] for val in obj_descrs] return self._short_summary_table_for_single_property(objs=objs, objs_in_cols=False, obj_descrs=obj_descrs, use_modal=use_modal, extended=extended) def _short_summary_of_single_property(self, obj: SingleProperty, use_modals: bool = False, extended: bool = False): filenames = self._histogram(obj, big=extended, zoom_in=True) html = self._filenames_to_img_html(filenames) if extended and self.misc["show_zoomed_out"]: html += self._filenames_to_img_html(self._histogram(obj, big=extended, zoom_in=False)) html += self._short_summary_table_for_single_property([obj], objs_in_cols=True, use_modal=use_modals, extended=extended) return html def _short_summary_of_tested_pair_property(self, obj: TestedPairProperty, extended: bool = False, use_modals: bool = False): filenames = self._histogram(obj, big=extended, zoom_in=True) html = self._filenames_to_img_html(filenames) if extended and self.misc["show_zoomed_out"]: filenames = self._histogram(obj, big=extended, zoom_in=False) html += self._filenames_to_img_html(filenames) ci_popover = Popover(self, "Confidence interval", """ The chance is \\[ 1 - \\alpha = {p} \\] that the mean difference \\begin{{align}} &\\text{{{first}}} - \\text{{{second}}} \\\\ =& {diff} \\end{{align}} lies in the interval $$({ci[0]:5.5f}, {ci[1]:5.5f})$$ (assuming the data is normal distributed to a certain degree). """.format(p=1-self.misc["alpha"], first=str(obj.first.parent), second=str(obj.second.parent), prop=obj.property, diff=obj.mean_diff(), ci=obj.mean_diff_ci(self.misc["alpha"]))) tested_per_prop = [ { "title": "Mean difference", "popover": Popover(self, "Explanation", """ Difference between the mean of the first and the mean of the second. It's the absolute difference and is often less important that the relative differences. """), "func": lambda x: x.mean_diff(), "format": "{:5.5f}" }, { "title": "... per mean", "func": lambda x: x.mean_diff_per_mean(), "format": "{:5.5%}", "popover": Popover(self, "Explanation", """The mean difference relative to the first mean \\begin{align} & \\frac{ \\overline{\\text{%s}} - \\overline{\\text{%s}}}{ \\overline{\\text{%s}} } \\\\ &= \\frac{ %f }{ %f} \\end{align} gives a number that helps to talk about the practical significance of the mean difference. A tiny difference might be cool, but irrelevant (as caching effects are probably higher, use the
temci build
if you're curious about this). """ % (obj.first.parent.description(), obj.second.parent.description(), str(obj.first.parent), float(obj.mean_diff()), float(obj.first.mean()))) }, { "title": "... per std dev", "func": lambda x: x.mean_diff_per_dev(), "format": "{:5.5%}", "popover": Popover(self, "Explanation", """ The mean difference relative to the maximum standard deviation: \\begin{{align}} &\\frac{{ \\overline{{ \\text{{{first}}} }} - \\overline{{\\text{{{second}}}}}}}{{ \\text{{max}}(\\sigma_\\text{{{first}}}, \\sigma_\\text{{{second}}}) }} \\\\ = & \\frac{{{md}}}{{{std}}} \\end{{align}} It's important, because as Gernot Heiser points out: """.format(first=obj.first.parent.description(), second=obj.second.parent.description(), md=obj.mean_diff(), std=obj.max_std_dev()), trigger="hover click") }, { "title": "... ci (lower bound)", "func": lambda x: x.mean_diff_ci(self.misc["alpha"])[0], "format": "{:5.5f}", "extended": True, "popover": ci_popover } ,{ "title": "... ci (upper bound)", "func": lambda x: x.mean_diff_ci(self.misc["alpha"])[1], "format": "{:5.5f}", "extended": True, "popover": ci_popover }, { "title": obj.tester.name, "func": lambda x: x.equal_prob(), "format": "{:5.5%}", "popover": self._popover_for_tester(obj.tester) }, { "title": "min n", "func": lambda x: x.min_observations(), "format": "{}", "popover": Popover(self, "Explanation", """ The minimum of the number of valid runs of both. or statistically spoken: the minimum sample size.""") } ] if not extended: l = [] for elem in tested_per_prop: if not ("extended" in elem and elem["extended"]): l.append(elem) tested_per_prop = l def content_func(row_header: str, col_header: str, row: int, col: int): return tested_per_prop[row]["format"].format(tested_per_prop[row]["func"](obj)) def header_popover_func(elem, index: int, is_header_row: bool): if not is_header_row and "popover" in tested_per_prop[index]: return tested_per_prop[index]["popover"] table = Table.from_content_func(self, cols=[obj], rows=list(map(lambda d: d["title"], tested_per_prop)), content_func=content_func, anchor_cell=Cell(self), header_popover_func=header_popover_func) html += str(table) html += self._short_summary_table_for_single_property(objs=[obj.first, obj.second], obj_descrs=[obj.first.description(), obj.second.description()], objs_in_cols=False, use_modal=use_modals) return html def _short_summary_of_tested_pair(self, obj: TestedPair, extended: bool = False, use_modals: bool = False) -> str: tested_per_prop = [ { "title": "Mean difference", "popover": Popover(self, "Explanation", """ Difference between the mean of the first and the mean of the second. It's the absolute difference and is often less important that the relative differences. """), "func": lambda x: x.mean_diff(), "format": "{:5.5f}" }, { "title": "... per mean", "func": lambda x: x.mean_diff_per_mean(), "format": "{:5.5%}", "popover": Popover(self, "Explanation", """The mean difference relative to the first mean gives a number that helps to talk about the practical significance of the mean difference. A tiny difference might be cool, but irrelevant (as caching effects are probably higher, use the \\verb|temci build| if you're curious about this). """) }, { "title": "... per std dev", "func": lambda x: x.mean_diff_per_dev(), "format": "{:5.5%}", "popover": Popover(self, "Explanation", """ The mean difference relative to the maximum standard deviation is important, because as Gernot Heiser points out: """, trigger="hover click") }, { "title": "... ci", "func": lambda x: x.mean_diff_ci(self.misc["alpha"])[0], "format": "{:5.5f}", "extended": True, "popover": Popover(self, "Confidence interval", """ The chance is \\[ 1 - \\alpha = {p} \\] that the mean difference lies in the interval of which the lower and the upper bound are given (assuming the data is normal distributed to a certain degree). """.format(p=1-self.misc["alpha"])) } ,{ "title": "", "func": lambda x: x.mean_diff_ci(self.misc["alpha"])[1], "format": "{:5.5f}", "extended": True, "popover": Popover(self, "Confidence interval", """ The chance is \\[ 1 - \\alpha = {p} \\] that the mean difference lies in the interval of which the lower and the upper bound are given. """.format(p=1-self.misc["alpha"])) }, { "title": obj.tester.name, "func": lambda x: x.equal_prob(), "format": "{:5.5%}", "popover": self._popover_for_tester(obj.tester) } ] if not extended: l = [] for elem in tested_per_prop: if not ("extended" in elem and elem["extended"]): l.append(elem) tested_per_prop = l def header_link_func(elem: str, index: int, is_header_row: bool): if not is_header_row and not use_modals: return "#" + self._html_id_for_object("misc", obj.properties[elem]) def header_modal_func(elem: str, index: int, is_header_row: bool): if not is_header_row and use_modals: return self._short_summary_modal(obj.properties[elem]) def content_func(row_header: str, col_header: str, row: int, col: int): d = tested_per_prop[col] res = d["func"](obj.properties[row_header]) return d["format"].format(res) def header_color_obj(elem, index: int, is_header_row: bool): if not is_header_row: return obj[elem] def header_popover_func(elem, index: int, is_header_row: bool): if is_header_row and "popover" in tested_per_prop[index]: return tested_per_prop[index]["popover"] table = Table.from_content_func(self, rows=sorted(list(obj.properties.keys())), cols=list(map(lambda d: d["title"], tested_per_prop)), header_link_func=header_link_func, content_func=content_func, anchor_cell=Cell(self), header_color_obj_func=header_color_obj, header_modal_func=header_modal_func, header_popover_func=header_popover_func) html = str(table) html += """

The relative difference between {first} and {second} is {rel_diff} """.format(po=Popover(self, "Explanation", """ Geometric mean of the mean differences relative to the means of the first: \\[\\sqrt[\|properties\|]{ \\prod_{p \in \\text{properties}} \\frac{\\overline{\\text{first[p]}} - \\overline{\\text{second[p]}}}{ \\overline{\\text{first[p]}}}}\] Using the more widely known would be like lying. """, trigger="hover click"), first=obj.first, second=obj.second, rel_diff=obj.rel_difference()) return html def _short_summary_table_for_single_property(self, objs: t.List[SingleProperty], use_modal: bool, objs_in_cols: bool, obj_descrs: t.List[str] = None, extended: bool = False) -> str: """ :param objs: objects to look on :param use_modal: use modals for meta information, not simple links? :param objs_in_cols: show the different objects in own columns, not rows :param extended: more infos :return: """ obj_descrs = obj_descrs or [self._obj_description(obj) for obj in objs] #objs[0]..std_dev_per_mean() mean_ci_popover = Popover(self, "Mean confidence interval", """ The chance is \\[ 1 - \\alpha = {p} \\] that the mean lies in the given interval (assuming the data is normal distributed to a certain degree). """.format(p=1-self.misc["alpha"])) std_dev_ci_popover = Popover(self, "Standard deviation confidence interval", """ The chance is \\[ 1 - \\alpha = {p} \\] that the standard deviation lies in the given interval (assuming the data is normal distributed to a certain degree). """.format(p=1-self.misc["alpha"])) tested_per_prop = [ { "title": "mean", "func": lambda x: x.mean(), "format": "{:5.5f}", "popover": Popover(self, "Explanation", """The simple arithmetical mean \\[ \\frac{1}{n}\\sum_{i=1}^{n} a_i. \\] """) }, { "title": "std dev", "popover": Popover(self, "Explanation", """ The sample standard deviation \\[ \\sigma_N = \\sqrt{\\frac{1}{N} \\sum_{i=1}^N (x_i - \\overline{x})^2} \\] In statistics, the standard deviation is a measure that is used to quantify the amount of variation or dispersion of a set of data values. A standard deviation close to 0 indicates that the data points tend to be very close to the mean (also called the expected value) of the set, while a high standard deviation indicates that the data points are spread out over a wider range of values. (wikipedia) """, trigger="hover click"), "func": lambda x: x.std_dev(), "format": "{:5.5f}", "extended": True }, { "title": "$$\sigma$$ per mean", "func": lambda x: x.std_dev_per_mean(), "format": "{:5.0%}", "popover": Popover(self, "Explanation", """ The standard deviation relative to the mean is a measure of how big the relative variation of data is. A small value is considered neccessary for a benchmark to be useful. Or to quote Gernot Heiser:

Always do several runs, and check the standard deviation. Watch out for abnormal variance. In the sort of measurements we do, standard deviations are normally expected to be less than 0.1%. If you see >1% this should ring alarm bells.

""", trigger="hover click") }, { "title": "sem", "popover": Popover(self, "Explanation", """Standard error mean: \\[ \\sigma(\\overline{X}) = \\frac{\\sigma}{\\sqrt{n}} \\]

Put simply, the standard error of the sample is an estimate of how far the sample mean is likely to be from the population mean, whereas the standard deviation of the sample is the degree to which individuals within the sample differ from the sample mean. (wikipedia)

""", trigger="hover focus"), "func": lambda x: x.sem(), "format": "{:5.5f}", "extended": False }, { "title": "median", "func": lambda x: x.median(), "format": "{:5.5f}", "popover": Popover(self, "Explanation", """ The median is the value that seperates that data into two equal sizes subsets (with the < and the > relation respectively). As the mean and the standard deviation are already given here, the median isn't important. """), "extended": True }, { "title": "min", "func": lambda x: x.min(), "format": "{:5.5f}", "popover": Popover(self, "Explanation", """The minimum value. It's a bad sign if the maximum is far lower than the mean and you can't explain it. """), "extended": True }, { "title": "max", "func": lambda x: x.min(), "format": "{:5.5f}", "popover": Popover(self, "Explanation", """The maximum value. It's a bad sign if the maximum is far higher than the mean and you can't explain it. """), "extended": True }, { "title": "n", "func": lambda x: x.observations(), "format": "{}", "popover": Popover(self, "Explanation", """The number of valid runs or statistically spoken: the sample size."""), "extended": False }, { "title": "mean ci (lower bound)", "func": lambda x: x.mean_ci(self.misc["alpha"])[0], "format": "{:5.5f}", "extended": True, "popover": mean_ci_popover } ,{ "title": "mean ci (upper bound)", "func": lambda x: x.mean_ci(self.misc["alpha"])[1], "format": "{:5.5f}", "extended": True, "popover": mean_ci_popover }, { "title": "std dev ci (lower bound)", "func": lambda x: x.std_dev_ci(self.misc["alpha"])[0], "format": "{:5.5f}", "extended": True, "popover": mean_ci_popover } ,{ "title": "std dev ci (upper bound)", "func": lambda x: x.std_dev_ci(self.misc["alpha"])[1], "format": "{:5.5f}", "extended": True, "popover": mean_ci_popover }, { "title": "normality probability", "func": lambda x: x.normality(), "format": "{:5.5%}", "popover": Popover(self, "Explanation", """ Quoting the minitab blog:

If process knowledge tells you that your data should follow a normal distribution, then run a normality test to be sure. If your Anderson-Darling Normality Test p-value is larger than, say, an alpha level of 0.05 (here {alpha}), then you can conclude that your data follow a normal distribution and, therefore, the mean is an adequate measure of central tendency.

The T test is robust against non normality, but that's not the case fpr statistical properties like the given confidence intervals. """.format(alpha=self.misc["alpha"])), "extended": True } ] if not extended: l = [] for elem in tested_per_prop: if not ("extended" in elem and elem["extended"]): l.append(elem) tested_per_prop = l def header_link_func(elem: SingleProperty, index: int, is_header_row: bool): if objs_in_cols == is_header_row and not use_modal: return "#" + self._html_id_for_object("misc", elem) def header_modal_func(elem: SingleProperty, index: int, is_header_row: bool): if objs_in_cols == is_header_row and use_modal: return self._short_summary_modal(elem) def header_popover_func(elem, index: int, is_header_row: bool): if objs_in_cols != is_header_row and "popover" in tested_per_prop[index]: return tested_per_prop[index]["popover"] def content_func(row_header: t.Union[SingleProperty, str], col_header: t.Union[SingleProperty, str], row: int, col: int): d = {} obj = None # type: SingleProperty if objs_in_cols: d = tested_per_prop[row] obj = col_header else: d = tested_per_prop[col] obj = row_header return d["format"].format(d["func"](obj)) def header_color_obj(elem, index: int, is_header_row: bool): if objs_in_cols == is_header_row: return elem def header_content_func(elem, index: int, is_header_row: bool) -> str: if objs_in_cols == is_header_row: return obj_descrs[index] return tested_per_prop[index]["title"] func_titles = list(map(lambda d: d["title"], tested_per_prop)) rows = [] cols = [] if objs_in_cols: cols = objs rows = func_titles else: cols = func_titles rows = objs table = Table.from_content_func(self, rows=rows, cols=cols, header_link_func=header_link_func, content_func=content_func, anchor_cell=Cell(self), header_color_obj_func=header_color_obj, header_content_func=header_content_func, header_modal_func=header_modal_func, header_popover_func=header_popover_func) return str(table) def _comparison_for_prop(self, property) -> str: html = self._filenames_to_img_html( self._singles_property_boxplot(self.stats.singles_properties[property], big=True), kind="boxplot" ) html += "

" html += self._tabular_comparison_for_prop(property) return html def _tabular_comparison_for_prop(self, property: str) -> str: return self._short_summary_table_for_single_property(self.stats.singles_properties[property].singles, use_modal=True, objs_in_cols=False) def _filenames_to_img_html(self, filenames: t.Dict[str, str], kind: str = "hist"): return """

""".format(popover=self._img_filenames_popover(filenames, kind), img=self._filename_relative_to_out_dir(filenames["img"])) def _img_filenames_popover(self, filenames: t.Dict[str, str], kind: str = "hist") -> 'Popover': _filenames = {} for key in filenames: _filenames[key] = self._filename_relative_to_out_dir(filenames[key]) filenames = _filenames html = """
""" if "img" in filenames: html += """ The current image """.format(**filenames) if "pdf" in filenames: html += """ PDF (generated by matplotlib) """.format(**filenames) if "tex" in filenames: if kind == "hist": html += """ TeX (requiring the package pgfplots) """.format(**filenames) elif kind == "boxplot": html += """ TeX (requiring the package pgfplots and \\usepgfplotslibrary{{statistics}}) """.format(**filenames) html +=""" Standalone TeX """.format(**filenames) html += """
""".format(**filenames) return Popover(self, "Get this image in your favorite format", content=html, trigger="hover click") def _filename_relative_to_out_dir(self, abs_filename: str) -> str: ret = os.path.realpath(abs_filename)[len(os.path.realpath(self.misc["out"])) + 1: ] if ret == "": return "." return ret _boxplot_cache = {} _boxplot_async_cache = {} def _singles_property_boxplot(self, obj: SinglesProperty, fig_width: int = None, big: bool = False): if fig_width is None: fig_width = self.misc["fig_width_big"] if big else self.misc["fig_width_small"] filename = self._get_fig_filename(obj) + "___{}".format(fig_width) if filename not in self._boxplot_async_cache: d = { "img": filename + BaseStatObject.img_filename_ending } if self.misc["gen_tex"]: d["tex"] = filename + ".tex" d["tex_standalone"] = filename + "____standalone.tex" if self.misc["gen_pdf"]: d["pdf"] = filename + ".pdf" self._boxplot_cache[filename] = d self._boxplot_async_cache[filename] = { "filename": filename, "obj": obj, "fig_width": fig_width, "img": True, "tex": self.misc["gen_tex"], "pdf": self.misc["gen_pdf"], "tex_sa": self.misc["gen_tex"] } return self._boxplot_cache[filename] def _process_boxplot_cache_entry(self, entry: t.Dict[str, str]): height = self.misc["boxplot_height"] * len(entry["obj"].singles) + 2 entry["obj"].boxplot(fig_width=entry["fig_width"], fig_height=height) entry["obj"].store_figure(entry["filename"], fig_width=entry["fig_width"], img=entry["img"], tex=entry["tex"], pdf=entry["pdf"], tex_standalone=entry["tex_sa"], fig_height=height) logging.debug("Plotted {}, fig_width={}cm, img={}, tex={}, pdf={}" .format(entry["obj"], entry["fig_width"], entry["img"], entry["tex"], entry["pdf"])) _hist_cache = {} # type: t.Dict[str, t.Dict[str, str]] _hist_async_img_cache = {} _hist_async_misc_cache = {} def _histogram(self, obj: BaseStatObject, fig_width: int = None, zoom_in: bool = True, big: bool = False) -> t.Dict[str, str]: if fig_width is None: fig_width = self.misc["fig_width_big"] if big else self.misc["fig_width_small"] filename = self._get_fig_filename(obj) + "___{}___{}".format(fig_width, zoom_in) if filename not in self._hist_cache: d = { "img": filename + BaseStatObject.img_filename_ending } if self.misc["gen_tex"]: d["tex"] = filename + ".tex" d["tex_standalone"] = filename + "____standalone.tex" if self.misc["gen_pdf"]: d["pdf"] = filename + ".pdf" self._hist_cache[filename] = d self._hist_async_img_cache[filename] = { "filename": filename, "obj": obj, "fig_width": fig_width, "zoom_in": zoom_in, "img": True, "tex": False, "pdf": False, "tex_sa": False } if self.misc["gen_pdf"] or self.misc["gen_tex"]: self._hist_async_misc_cache[filename] = { "filename": filename, "obj": obj, "fig_width": fig_width, "zoom_in": zoom_in, "img": False, "tex": self.misc["gen_tex"], "pdf": self.misc["gen_pdf"], "tex_sa": self.misc["gen_tex"] } return self._hist_cache[filename] def _process_hist_cache_entry(self, entry: t.Dict[str, str]): entry["obj"].histogram(zoom_in=entry["zoom_in"], fig_width=entry["fig_width"]) entry["obj"].store_figure(entry["filename"], fig_width=entry["fig_width"], img=entry["img"], tex=entry["tex"], pdf=entry["pdf"], tex_standalone=entry["tex_sa"]) logging.debug("Plotted {}, zoom_in={}, fig_width={}cm, img={}, tex={}, pdf={}" .format(entry["obj"], entry["zoom_in"], entry["fig_width"], entry["img"], entry["tex"], entry["pdf"])) def _popover_for_tester(self, tester: Tester): return Popover(self, tester.name.capitalize(), """ Probability that the null hypothesis is not incorrect. It's the probability that the measured values (for a given property) come out of the same population for both benchmarked programs. Or short: That the programs have the same characteristics for a given property.
Important note: Statistical tests can only given an probability of the null hypothesis being incorrect. But this okay, if you're aim is to see whether a specific program is better (different) than another program in some respect.
""") def _short_summary_modal(self, obj: BaseStatObject) -> str: """ :param obj: :return: id """ if not hasattr(self, "_modal_cache"): self._modal_cache = [] # type: t.List[str] modal_id = self._html_id_for_object("short_summary_modal", obj) if modal_id in self._modal_cache: return modal_id modal_title = self._obj_description(obj) modal_body = self._short_summary(obj, with_title=False) html_id = self._html_id_for_object("misc", obj) html = """ """.format(**locals()) self.app_html += html return modal_id def _obj_description(self, obj: BaseStatObject) -> str: if isinstance(obj, Single): return obj.description() if isinstance(obj, TestedPair): return "{} vs. {}".format(self._obj_description(obj.first), self._obj_description(obj.second)) if isinstance(obj, SingleProperty) or isinstance(obj, TestedPairProperty): obj_base = "" if isinstance(obj, SingleProperty): obj_base = obj.rundata.description() else: obj_base = self._obj_description(obj.parent) return obj_base + " (regarding {})".format(obj.property) def _html_id_for_object(self, scope: str, obj: BaseStatObject) -> str: return "{}___{}".format(scope, self._get_obj_id(obj)) def _get_obj_id(self, obj: BaseStatObject) -> str: if isinstance(obj, Single): return str(self.stats.singles.index(obj)) if isinstance(obj, TestedPair): return self._get_obj_id(obj.first) + "_" + self._get_obj_id(obj.second) if isinstance(obj, SingleProperty) or isinstance(obj, TestedPairProperty): return self._get_obj_id(obj.parent) + "__" + self.html_escape_property(obj.property) if isinstance(obj, SinglesProperty): return "SinglesProperty______" + self.html_escape_property(obj.property) assert False # you shouldn't reach this point @classmethod def html_escape_property(cls, property: str) -> str: return re.sub(r"([^a-zA-Z0-9]+)", "000000", property) def _format_errors_and_warnings(self, obj: BaseStatObject, show_parent: bool = True) -> str: def format_msg(msg: StatMessage): message = msg.generate_msg_text(show_parent) msg_class = "div_danger" if msg.type == StatMessageType.ERROR else "div_warning" html = """
{message}
""".format(**locals()) if msg.hint != "" and msg.hint is not None: html = """
{message}
""".format(**locals()) return html def collapsible(title: str, msgs: t.List[StatMessage]): collapse_id = self._random_html_id() heading_id = self._random_html_id() inner = "\n".join(map(format_msg, msgs)) return """
{inner}
""".format(**locals()) html = "" if obj.has_errors(): html += collapsible('Errors {}'.format(len(obj.errors())), obj.errors()) if obj.has_warnings(): html += collapsible('Warnings {}'.format(len(obj.warnings())), obj.warnings()) return html _time = time.time() def _get_fig_filename(self, obj: BaseStatObject) -> str: """ Without any extension. """ return os.path.realpath(os.path.join(os.path.abspath(self.misc["out"]), self._html_id_for_object("fig", obj))) _id_counter = 1000 def _random_html_id(self) -> str: self._id_counter += 1 return "id" + str(self._id_counter) class Popover: divs = {} # t.Dict[str, str] """ Maps the contents of the created divs to their ids """ def __init__(self, parent: HTMLReporter2, title: str, content: str, trigger: str = "hover"): self.parent = parent self.title = title self.content = content or "" self.trigger = trigger def __str__(self) -> str: content = """
""" + self.content + """
""" if content not in self.divs: id = self.parent._random_html_id() self.parent.app_html += """ """.format(id=id, content=content) self.divs[content] = id id = self.divs[content] focus = 'tabindex="0" role="button"' if "focus" in self.trigger or "click" in self.trigger else "" return '{focus} data-trigger="{trigger}" data-toggle="popover" data-html="true"' \ 'data-placement="auto" data-title="{title}" data-container="body" ' \ 'data-content-id="{id}"'\ .format(content=content, trigger=self.trigger, title=self.title, focus=focus, id=id) def color_class(obj: BaseStatObject) -> str: if obj.has_errors(): return "danger" if obj.has_warnings(): return "warning" if isinstance(obj, TestedPairProperty): if obj.is_equal() is not None: return "sucess" if obj.is_equal() == False and obj.mean_diff_per_mean() < 1 else "active" return "" def color_explanation(obj: BaseStatObject) -> str: _color_class = "div_" + color_class(obj) msg = "" if obj.has_errors(): msg = "This color means that the corresponding data set is erroneous " \ "(with {} errors and {} warnings).".format(len(obj.errors()), len(obj.warnings())) elif obj.has_warnings(): msg = "This color means that the corresponding data set could be erroneous " \ "(with {} warnings).".format(len(obj.warnings())) elif isinstance(obj, TestedPairProperty) and obj.is_equal() is not None: msg = "This color means that everything is probably okay with the corresponding data" \ " and that the tester could make a decision." else: msg = "Everything seems to be okay." if msg != "": return """

{msg}

""".format(**locals()) class Cell: """ Cell of a html table """ def __init__(self, parent: HTMLReporter2, content: str = "", cell_class: str = "", popover: Popover = None, modal_id: str = None, color_class_obj: BaseStatObject = None, is_header_cell: bool = False, cell_scope: str = None, show_click_on_info: bool = None, link: str = None): """ :param content: displayed text of the cell :param cell_class: CSS class of the table cellr :param modal_id: id of the modal linked to this cell :param color_class_obj: object used to get the color class. Adds also an explanation to the popover :param is_header_cell: is the cell a header cell? """ self.content = content self.cell_class = cell_class self.popover = popover self.modal_id = modal_id self.link = link self.parent = parent assert link is None or modal_id is None if color_class_obj is not None: if self.popover is None: self.popover = Popover(parent, "Explanation", color_explanation(color_class_obj)) else: self.popover.content += color_explanation(color_class_obj) self.cell_class += " " + color_class(color_class_obj) if (modal_id is not None and show_click_on_info != False) or (show_click_on_info is True and not link): msg = "

Click on the cell to get more information.

" if self.popover is None: self.popover = Popover(parent, "Explanation", msg) else: self.popover.content += msg self.is_header_cell = is_header_cell self.cell_scope = cell_scope def __str__(self): cell_tag = "th" if self.is_header_cell else "td" scope = 'scope="{}"'.format(self.cell_scope) if self.cell_scope else "" html = """<{} class="{}" {}>""".format(cell_tag, self.cell_class, scope) html_end = "".format(cell_tag) if self.popover: html += """
""".format(self.popover) html_end = "
" + html_end if self.modal_id: html += """""".format(id=self.modal_id) html_end = "" + html_end if self.link: html += """ """.format(link=self.link, elem_id=self.parent._random_html_id()) html_end = "" + html_end return html + self.content + html_end T1 = t.TypeVar('T1', BaseStatObject, str, int, float, bool) T2 = t.TypeVar('T2', BaseStatObject, str, int, float, bool) class Table: """ A html table consisting of Cell objects. Idea: Abstract the creation of html tables to a degree that allows automatic generation of latex and csv. """ def __init__(self, parent: HTMLReporter2, header_row: t.List['Cell'], header_col: t.List['Cell'], anchor_cell: 'Cell' = None, content_cells: t.List[t.List['Cell']] = None): """ The resulting table has len(header_row) + rows and len(header_col) + 1 columns. :param header_row: list of cells of the bold top header row :param header_col: list of cells of the bold left header collumn :param anchor_cell: the cell in the top left corner of the table :param content_cells: a list of content rows :return resulting html """ self.parent = parent self.header_row = header_row self.header_col = header_col for cell in itertools.chain(self.header_row, self.header_col): cell.is_header_cell = True for cell in self.header_col: cell.cell_scope = "row" assert len(header_row) > 0 self.orig_anchor_cell = Cell(self.parent, "") if anchor_cell is None else Cell(self.parent, anchor_cell.content) self.anchor_cell = anchor_cell or Cell(self.parent, "⍗ ") self.anchor_cell.content += " ⍗" self.anchor_cell.cell_class += " anchor_cell " self.height = len(header_col) """ Number of content (non header) rows """ self.width = len(header_row) """ Number of content (non header) columns """ if content_cells: assert len(content_cells) == self.height and len(content_cells[0]) == self.width \ and all(len(content_cells[0]) == len(row) for row in content_cells) self.content_cells = content_cells else: self.content_cells = [[Cell(self.parent) for i in range(self.width)] for j in range(self.height)] def __str__(self) -> str: html = """ """ html += " ".join(str(cell) for cell in [self.format_anchor_cell()] + self.header_row) html += """ """ for (hcell, row) in zip(self.header_col, self.content_cells): html += "\t\t\t{}\n".format(" ".join(str(cell) for cell in [hcell] + row)) html += """
""" return html def html(self): return str(self) def format_anchor_cell(self) -> 'Cell': formats = [{ "ending": ".tex", "mime": "application/x-latex", "descr": "Latex table", "code": self.latex() }, { "ending": ".tex", "mime": "application/x-latex", "descr": "Latex table with surrounding article environment", "code": self.latex(True) }, { "ending": ".csv", "mime": "text/csv", "descr": "CSV table", "code": self.csv() }] html = """
""" for d in formats: id = self.parent._random_html_id() self.parent.app_html += """ """.format(id, d["code"]) html += """
{descr}
""".format(descr=d["descr"], id=id, filename="table" + d["ending"], mime=d["mime"]) html += """
""" self.anchor_cell.popover = Popover(self.parent, "Get this table in your favorite format", content=html, trigger="hover click") return self.anchor_cell def latex(self, with_env: bool = False) -> str: tex = "" tex_end = "" if with_env: tex = """ \\documentclass[10pt,a4paper]{article} \\begin{document} """ tex_end = """ \\end{document} """ tex += """ \\begin{{tabular}}{{l|{cs}}} """.format(cs="".join("r" * self.width)) tex_end = """ \\end{tabular} """ + tex_end tex += " & ".join(cell.content for cell in [self.orig_anchor_cell] + self.header_row) + "\\\\ \n \\hline " for (hcell, row) in zip(self.header_col, self.content_cells): tex += " & ".join(cell.content.replace("%", "\\%") for cell in [hcell] + row) + "\\\\ \n" return tex + tex_end def csv(self) -> str: rows = [] rows.append(",".join(repr(cell.content) for cell in [self.orig_anchor_cell] + self.header_row)) def convert_content(text: str) -> str: if text.endswith("%"): return str(float(text[:-1]) / 100) try: float(text) return text except: return repr(text) for (hcell, row) in zip(self.header_col, self.content_cells): rows.append(",".join(convert_content(cell.content) for cell in [hcell] + row)) return "\n".join(rows) def __getitem__(self, cell_pos: t.Tuple[int, int]) -> 'Cell': return self.content_cells[cell_pos[0]][cell_pos[1]] def __setitem__(self, cell_pos: t.Tuple[int, int], new_val: 'Cell'): self.content_cells[cell_pos[0]][cell_pos[1]] = new_val def append(self, header: 'Cell', content_row: t.List['Cell']): assert len(content_row) == self.width self.content_cells.append(content_row) self.header_col.append(header) @classmethod def from_content_func(cls, parent: HTMLReporter2, rows: t.List[T1], cols: t.List[T2], anchor_cell: 'Cell', content_func: t.Callable[[T1, T2], Any], content_modal_func: t.Callable[[T1, T2, int, int], str] = None, header_modal_func: t.Callable[[t.Union[T1, T2], int, bool], str] = None, content_popover_func: t.Callable[[T1, T2, int, int], t.Optional[Popover]] = None, header_popover_func: t.Callable[[t.Union[T1, T2], int, bool], t.Optional[Popover]] = None, content_link_func: t.Callable[[T1, T2, int, int], t.Optional[str]] = None, header_link_func: t.Callable[[t.Union[T1, T2], int, bool], t.Optional[str]] = None, content_color_obj_func: t.Callable[[T1, T2, int, int], t.Optional[BaseStatObject]] = None, header_color_obj_func: t.Callable[[t.Union[T1, T2], int, bool], t.Optional[BaseStatObject]] = None, header_content_func: t.Callable[[t.Union[T1, T2], int, bool], str] = None): """ Idea: Table that populates itself with a passed content function. """ def convert_hc(elem: t.Union[T1, T2], index: int, header_row: bool) -> Cell: def call(func: t.Optional[t.Callable[[t.Union[T1, T2], int, bool], t.T]]) -> t.T: if func: return func(elem, index, header_row) return None content = "" color_obj = None if header_content_func: content = str(header_content_func(elem, index, header_row)) elif isinstance(elem, str) or isinstance(elem, int) or isinstance(elem, float): content = str(elem) elif isinstance(elem, BaseStatObject): content = parent._obj_description(elem) else: assert False if isinstance(elem, BaseStatObject): color_obj = elem if header_color_obj_func: color_obj = header_color_obj_func(elem, index, header_row) modal_id = call(header_modal_func) popover = call(header_popover_func) link = None if header_link_func and header_link_func(elem, index, header_row): assert not modal_id # modal and link can't be used together in the same cell link = header_link_func(elem, index, header_row) return Cell(parent, content, popover=popover, modal_id=modal_id, color_class_obj=color_obj, is_header_cell=True, cell_scope="row" if header_row else None, link=link) header_row = [] for (i, elem) in enumerate(cols): header_row.append(convert_hc(elem, i, header_row=True)) header_col = [] for (i, elem) in enumerate(rows): header_col.append(convert_hc(elem, i, header_row=False)) def convert_cc(row_header: T1, col_header: T2, row: int, col: int) -> Cell: def call(func: t.Optional[t.Callable[[T1, T2, int, int], t.T]]) -> t.T: if func: return func(row_header, col_header, row, col) return None content = str(content_func(row_header, col_header, row, col)) color_obj = call(content_color_obj_func) modal_id = call(content_modal_func) popover = call(content_popover_func) link = call(content_link_func) assert None in [link, modal_id] return Cell(parent, content, popover=popover, modal_id=modal_id, color_class_obj=color_obj, link=link) content_cells = [] for (row, row_header) in enumerate(rows): a = [] for (col, col_header) in enumerate(cols): a.append(convert_cc(row_header, col_header, row, col)) content_cells.append(a) return Table(parent, header_row, header_col, anchor_cell, content_cells) def html_escape_property(property: str) -> str: return re.sub(r"([^a-zA-Z0-9]+)", "000000", property)PK8H7_temci/tester/stats.py""" Statistical helper classes for tested pairs and single blocks. """ import logging import os from collections import defaultdict from enum import Enum import itertools import math from temci.tester.rundata import RunData from temci.tester.testers import Tester, TesterRegistry from temci.utils.settings import Settings import typing as t import temci.utils.util as util if util.can_import("scipy"): import numpy as np import scipy as sp import scipy.stats as st import pandas as pd from temci.utils.typecheck import * from temci.utils.util import join_strs class StatMessageType(Enum): ERROR = 10 WARNING = 5 class StatMessageValueFormat(Enum): INT = "{}" FLOAT = "{:5.5f}" PERCENT = "{:5.3%}" class StatMessage: """ A statistical message that gives a hint to """ message = "{props}: {b_val}" hint = "" type = None # type: StatMessageType border_value = 0 value_format = StatMessageValueFormat.FLOAT # type: t.Union[StatMessageValueFormat, str] def __init__(self, parent: 'BaseStatObject', properties: t.Union[t.List[str], str], values): self.parent = parent if not isinstance(properties, list): properties = [properties] if not isinstance(values, list): values = [values] typecheck(properties, List() // (lambda x: len(x) > 0)) typecheck(values, List() // (lambda x: len(x) == len(properties))) self.properties = sorted(properties) self.values = values def __add__(self, other: 'StatMessage') -> 'StatMessage': typecheck(other, T(type(self))) assert self.parent.eq_except_property(other.parent) return type(self)(self.parent, self.properties + other.properties, self.values + other.values) @staticmethod def combine(*messages: t.List[t.Optional['StatMessage']]) -> t.List['StatMessage']: """ Combines all message of the same type and with the same parent in the passed list. Ignores None entries. :param messages: passed list of messages :return: new reduced list """ msgs = set([msg for msg in messages if msg is not None]) # t.Set['StatMessage'] something_changed = True while something_changed: something_changed = False merged_pair = None # type: t.Tuple['StatMessage', 'StatMessage'] for (msg, msg2) in itertools.product(msgs, msgs): if msg is not msg2: if msg.parent.eq_except_property(msg2.parent) and type(msg) == type(msg2): merged_pair = (msg, msg2) something_changed = True break if something_changed: msg, msg2 = merged_pair msgs.remove(msg) msgs.remove(msg2) msgs.add(msg + msg2) return list(msgs) @classmethod def _val_to_str(cls, value) -> str: format = cls.value_format if isinstance(cls.value_format, str) else cls.value_format.value return format.format(value) @classmethod def check_value(cls, value) -> bool: """ If this fails with the passed value, than the warning is appropriate. """ pass @classmethod def create_if_valid(cls, parent, value, properties = None, **kwargs) -> t.Union['StatMessage', None]: assert isinstance(value, Int()|Float()) if cls.check_value(value): return None ret = None if properties is not None: ret = cls(parent, properties, value, **kwargs) else: ret = cls(parent, properties, value, **kwargs) return ret def generate_msg_text(self, show_parent: bool) -> str: """ Generates the text of this message object. :param show_parent: Is the parent shown in after the properties? E.g. "blub of bla parent: …" :return: message text """ val_strs = list(map(self._val_to_str, self.values)) prop_strs = ["{} ({})".format(prop, val) for (prop, val) in zip(self.properties, val_strs)] props = join_strs(prop_strs) if show_parent: props += " of {}".format(self.parent.description()) return self.message.format(b_val=self._val_to_str(self.border_value), props=props) class StatWarning(StatMessage): type = StatMessageType.WARNING class StatError(StatWarning, StatMessage): type = StatMessageType.ERROR class StdDeviationToHighWarning(StatWarning): message = "The standard deviation per mean of {props} is to high it should be <= {b_val}." hint = "With the exec run driver you can probably use the stop_start plugin, preheat and sleep plugins. " \ "Also consider to increase the number of measured runs." border_value = 0.01 value_format = StatMessageValueFormat.PERCENT @classmethod def check_value(cls, value) -> bool: return value <= cls.border_value class StdDeviationToHighError(StdDeviationToHighWarning): type = StatMessageType.ERROR border_value = 0.05 class NotEnoughObservationsWarning(StatWarning): message = "The number of observations of {props} is less than {b_val}." hint = "Increase the number of measured runs." border_value = 30 value_format = StatMessageValueFormat.INT @classmethod def check_value(cls, value) -> bool: return value >= cls.border_value class NotEnoughObservationsError(NotEnoughObservationsWarning): type = StatMessageType.ERROR border_value = 15 class BaseStatObject: """ Class that gives helper methods for the extending stat object classes. """ _filename_counter = 0 img_filename_ending = ".svg" def __init__(self): self._stat_messages = [] self.fig = None self._hist_data = {} def get_stat_messages(self) -> t.List[StatMessage]: if not self._stat_messages: self._stat_messages = StatMessage.combine(*self._get_stat_messages()) return self._stat_messages def _get_stat_messages(self) -> t.List[StatMessage]: raise NotImplementedError() def warnings(self) -> t.List[StatMessage]: return [x for x in self.get_stat_messages() if x.type is StatMessageType.WARNING] def errors(self) -> t.List[StatMessage]: return [x for x in self.get_stat_messages() if x.type is StatMessageType.ERROR] def has_errors(self) -> bool: return any([x.type == StatMessageType.ERROR for x in self.get_stat_messages()]) def has_warnings(self) -> bool: return any([x.type == StatMessageType.WARNING for x in self.get_stat_messages()]) def get_data_frame(self, **kwargs) -> 'pd.DataFrame': """ Get the data frame that is associated with this stat object. """ raise NotImplementedError() def eq_except_property(self, other) -> bool: raise NotImplementedError() def _height_for_width(self, width: float) -> float: golden_mean = (np.sqrt(5) - 1.0) / 2.0 # Aesthetic ratio return width * golden_mean def _latexify(self, fig_width: float, fig_height: float = None): """Set up matplotlib's RC params for LaTeX plotting. Call this before plotting a figure. Adapted from http://nipunbatra.github.io/2014/08/latexify/ Parameters ---------- fig_width : float, optional, inches fig_height : float, optional, inches """ # code adapted from http://www.scipy.org/Cookbook/Matplotlib/LaTeX_Examples #MAX_HEIGHT_INCHES = 8.0 #if fig_height > MAX_HEIGHT_INCHES: # print("WARNING: fig_height too large:" + fig_height + # "so will reduce to" + MAX_HEIGHT_INCHES + "inches.") # fig_height = MAX_HEIGHT_INCHES params = {'backend': 'ps', 'text.latex.preamble': ['\\usepackage{gensymb}'], 'axes.labelsize': 8, # fontsize for x and y labels (was 10) 'axes.titlesize': 8, 'font.size': 8, # was 10 'legend.fontsize': 8, # was 10 'xtick.labelsize': 8, 'ytick.labelsize': 8, 'text.usetex': True, 'figure.figsize': self._fig_size_cm_to_inch(fig_width,fig_height), 'font.family': 'serif' } import matplotlib matplotlib.rcParams.update(params) def _format_axes(self, ax): """ Adapted from http://nipunbatra.github.io/2014/08/latexify/ """ SPINE_COLOR = 'gray' for spine in ['top', 'right']: ax.spines[spine].set_visible(False) for spine in ['left', 'bottom']: ax.spines[spine].set_color(SPINE_COLOR) ax.spines[spine].set_linewidth(0.5) ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_ticks_position('left') for axis in [ax.xaxis, ax.yaxis]: axis.set_tick_params(direction='out', color=SPINE_COLOR) return ax def _get_new_file_name(self, dir: str) -> str: self._filename_counter += 1 return os.path.join(os.path.abspath(dir), str(self._filename_counter)) def _fig_size_cm_to_inch(self, fig_width: float, fig_height: float) -> t.Tuple[float, float]: return fig_width * 0.39370079, fig_height * 0.39370079 def store_figure(self, filename: str, fig_width: float, fig_height: float = None, pdf: bool = True, tex: bool = True, tex_standalone: bool = True, img: bool = True) -> t.Dict[str, str]: import matplotlib.pyplot as plt """ Stores the current figure in different formats and returns a dict, that maps each used format (pdf, tex or img) to the resulting files name. :param filename: base filename that is prepended with the appropriate extensions :param fig_width: width of the resulting figure (in cm) :param fig_height: height of the resulting figure (in cm) or calculated via the golden ratio from fig_width :param pdf: store as pdf optimized for publishing :param tex: store as tex with pgfplots :param img: store as png image :return: dictionary mapping each used format to the resulting files name """ if fig_height is None: fig_height = self._height_for_width(fig_width) #filename = # self._get_new_file_name(dir) ret_dict = {} if img: ret_dict["img"] = self._store_as_image(filename + self.img_filename_ending, fig_width, fig_height) if tex: ret_dict["tex"] = self._store_as_tex(filename + ".tex", fig_width, fig_height, standalone=False) if pdf: ret_dict["pdf"] = self._store_as_pdf(filename + ".pdf", fig_width, fig_height) if tex_standalone: ret_dict["tex_standalone"] = self._store_as_tex(filename + "____standalone.tex", fig_width, fig_height, standalone=True) if self.fig is not None: plt.close('all') return ret_dict def _store_as_pdf(self, filename: str, fig_width: float, fig_height: float) -> str: """ Stores the current figure in a pdf file. :warning modifies the current figure """ import matplotlib.pyplot as plt if not filename.endswith(".pdf"): filename += ".pdf" self.reset_plt() self._latexify(fig_width, fig_height) try: plt.tight_layout() except ValueError: pass self._format_axes(plt.gca()) plt.savefig(filename) self.reset_plt() return os.path.realpath(filename) def _store_as_tex(self, filename: str, fig_width: float, fig_height: float, standalone: bool) -> str: """ Stores the current figure as latex in a tex file. Needs pgfplots in latex. Works independently of matplotlib. """ if not filename.endswith(".tex"): filename += ".tex" if "min_xval" not in self._hist_data: return x_range = (self._hist_data["min_xval"], self._hist_data["max_xval"]) x_bin_width = (self._hist_data["min_xval"] - self._hist_data["max_xval"]) / self._hist_data["bin_count"] plot_tex = "" ymax = 0 for value in self._hist_data["values"]: hist, bin_edges = np.histogram(value, bins=self._hist_data["bin_count"], range=x_range) #bin_edges = map(_ + (x_bin_width / 2), bin_edges) plot_tex += """ \\addplot coordinates {{ {} ({}, 0) }}; """.format(" ".join(map(lambda d: "({}, {})".format(*d), zip(bin_edges, hist))), bin_edges[-1]) ymax = max(ymax, max(hist)) tex = """ \\pgfplotsset{{width={width}cm, height={height}cm, compat=1.10}} \\begin{{tikzpicture}} \\begin{{axis}}[ ymin=0, ymax={ymax}, bar shift=0pt, enlarge x limits=0.10, cycle list name=auto, every axis plot/.append style={{ybar interval, opacity={opacity},fill,draw=none,no markers}}, ylabel= , xlabel={xlabel}""".format(width=fig_width, height=fig_height, xlabel=self._hist_data["xlabel"], ymax=ymax * 1.2, opacity= 1 if len(self._hist_data["values"]) == 1 else 0.75) if self._hist_data["legend"]: legend = "\\\\".join(self._hist_data["legend"]) + "\\\\" tex += """, legend entries={{{}}}""".format(legend) tex += """ ] """ tex += plot_tex tex += """ \end{axis} \end{tikzpicture} """ if standalone: tex = """ \\documentclass[margin=10pt]{standalone} \\usepackage{pgfplots} \\begin{document} """ + tex + """ \\end{document} """ with open(filename, "w") as f: f.write(tex) return os.path.realpath(filename) def _store_as_image(self, filename: str, fig_width: float, fig_height: float) -> str: """ Stores the current figure as an $img_filename_ending image. """ import matplotlib.pyplot as plt if not filename.endswith(self.img_filename_ending): filename += self.img_filename_ending self.reset_plt() plt.savefig(filename) self.reset_plt() return os.path.realpath(filename) def _freedman_diaconis_bins(self, *arrays: t.List) -> int: """ Calculate number of hist bins using Freedman-Diaconis rule. If more than one array is passed, the maximum number of bins calculated for each array is used. Adapted from seaborns source code. """ # From http://stats.stackexchange.com/questions/798/ import seaborn as sns def freedman_diaconis(array: np.array): array = [a for a in array if not math.isnan(a)] h = 2 * sns.utils.iqr(array) / (len(array) ** (1 / 3)) # fall back to sqrt(a) bins if iqr is 0 if h == 0: return int(np.sqrt(len(array))) else: return int(np.ceil((max(array) - min(array)) / h)) return max(map(freedman_diaconis, arrays)) def is_single_valued(self) -> bool: """ Does the data consist only of one unique value? """ return False def histogram(self, fig_width: int, fig_height: float = None, x_ticks: list = None, y_ticks: list = None, show_legend: bool = None, type: str = None, align: str = 'mid', x_label: str = None, y_label: str = None, zoom_in: bool = True, other_objs: t.List['BaseStatObject'] = None, other_obj_names: t.List[str] = None, own_name: str = None, **kwargs): """ Plots a histogram as the current figure. Don't forget to close it via fig.close() :param x_ticks: None: use default ticks, list: use the given ticks :param y_ticks: None: use default ticks, list: use the given ticks :param show_legend: show a legend in the plot? If None only show one if there are more than one sub histograms :param type: histogram type (either 'bar', 'barstacked', 'step', 'stepfilled' or None for auto) :param align: controls where each bar centered ('left', 'mid' or 'right') :param x_label: if not None, shows the given x label :param y_lable: if not None: shows the given y label :param zoom_in: does the x axis start at the minimum x value? :param kwargs: optional arguments passed to the get_data_frame method :param other_objs: addional objects to plot on the same histogram (only SingleProperty objects allowed) :param other_obj_names: names of the additional objects :param own_name: used with other_objs option """ self._hist_data = {} import matplotlib.pyplot as plt import seaborn as sns if fig_height is None: fig_height = self._height_for_width(fig_width) if self.is_single_valued(): descr = self.description() if isinstance(self, SingleProperty): descr += " [" + self.property + "]" logging.error("Can't plot histogram for {} as it's only single valued.".format(self)) return df = self.get_data_frame(**kwargs) if other_objs: typecheck(self, SingleProperty) for obj in other_objs: if obj.is_single_valued() or not isinstance(obj, SingleProperty): logging.error("Can't additionally plot histogram for {} as it's only single valued.".format(self)) return series_dict = {} for (i, name) in enumerate(other_obj_names): series_dict[name] = pd.Series(other_objs[i].data, name=name) series_dict[own_name] = self.data df = pd.DataFrame(series_dict, columns=sorted(list(series_dict.keys()))) df_t = df.T show_legend = show_legend or (show_legend is None and len(df_t) > 1) min_xval = min(map(min, df_t.values)) if zoom_in else 0 max_xval = max(map(max, df_t.values)) if type is None: type = 'bar' if len(df_t) == 1 else 'stepfilled' bin_count = self._freedman_diaconis_bins(*df_t.values) bins = np.linspace(min_xval, max_xval, bin_count) self.reset_plt() ymax = 0 for value in df_t.values: hist, bin_edges = np.histogram(value, bins=bin_count, range=(min_xval, max_xval)) ymax = max(ymax, max(hist)) self.fig = plt.figure(figsize=self._fig_size_cm_to_inch(fig_width, fig_height)) plt.xlim(min_xval, max_xval) plt.ylim(0, ymax * (1.2 if show_legend else 1.05)) plt.hist(df.values, bins=bin_count, range=(min_xval, max_xval), histtype=type, align=align, label=list(reversed(df.keys())), alpha= 0.75 if len(df_t) > 1 else 1) #sns.distplot(df, bins=bin_count, color=["red", "blue", "yellow"][0:len(df_t)]) if x_ticks is not None: plt.xticks(x_ticks) if y_ticks is not None: plt.yticks(y_ticks) legend = None if show_legend: legend = list(df.keys()) plt.legend(labels=list(reversed(legend))) if len(df_t) == 1: plt.xlabel(df.keys()[0]) if x_label is not None: plt.xlabel(x_label) if y_label is not None: plt.xlabel(y_label) self._hist_data = { "xlabel": x_label or ("" if len(df_t) > 1 else df.keys()[0]), "legend": None if legend is None else list(reversed(legend)), "min_xval": min_xval, "max_xval": max_xval, "values": list(reversed(df_t.values)), "bin_count": bin_count } def description(self) -> str: return str(self) def __str__(self) -> str: return self.description() def reset_plt(self): import seaborn as sns sns.reset_defaults() sns.set_style("darkgrid") sns.set_palette(sns.color_palette("muted")) class Single(BaseStatObject): """ A statistical wrapper around a single run data object. """ def __init__(self, data: t.Union[RunData, 'Single']): super().__init__() if isinstance(data, RunData): self.rundata = data else: self.rundata = data.rundata self.attributes = self.rundata.attributes self.properties = {} # type: t.Dict[str, SingleProperty] """ SingleProperty objects for each property """ for prop in data.properties: self.properties[prop] = SingleProperty(self, self.rundata, prop) def _get_stat_messages(self) -> t.List[StatMessage]: """ Combines the messages for all inherited SingleProperty objects (for each property), :return: list of all messages """ msgs = [x for prop in self.properties for x in self.properties[prop].get_stat_messages()] return msgs def get_data_frame(self) -> 'pd.DataFrame': series_dict = {} for prop in self.properties: series_dict[prop] = pd.Series(self.properties[prop].data, name=prop) frame = pd.DataFrame(series_dict, columns=sorted(self.properties.keys())) return frame def description(self) -> str: return self.rundata.description() def eq_except_property(self, other) -> bool: return isinstance(other, type(self)) and self.rundata == other.rundata def __eq__(self, other) -> bool: return self.eq_except_property(other) class SingleProperty(BaseStatObject): """ A statistical wrapper around a single run data block for a specific measured property. """ def __init__(self, parent: Single, data: t.Union[RunData, 'SingleProperty'], property: str): super().__init__() self.parent = parent if isinstance(data, RunData): self.rundata = data # type: RunData self.data = data[property] # type: t.List[t.Union[int, float]] else: self.rundata = data.rundata self.data = data.data self.array = np.array(self.data) self.property = property def _get_stat_messages(self) -> t.List[StatMessage]: msgs = [ StdDeviationToHighWarning.create_if_valid(self, self.std_dev_per_mean(), self.property), StdDeviationToHighError.create_if_valid(self, self.std_dev_per_mean(), self.property), NotEnoughObservationsWarning.create_if_valid(self, self.observations(), self.property), NotEnoughObservationsError.create_if_valid(self, self.observations(), self.property) ] return msgs def mean(self) -> float: return np.mean(self.array) def median(self) -> float: return np.median(self.array) def min(self) -> float: return np.min(self.array) def max(self) -> float: return np.max(self.array) def std_dev(self) -> float: """ Returns the standard deviation. """ return np.std(self.array) def std_devs(self) -> t.Tuple[float, float]: """ Calculates the standard deviation of elements <= mean and of the elements > mean. :return: (lower, upper) """ mean = self.mean() def std_dev(elements: list) -> float: return np.sqrt(sum(np.power(x - mean, 2) for x in elements) / (len(elements) - 1)) lower = [x for x in self.array if x <= mean] upper = [x for x in self.array if x > mean] return std_dev(lower), std_dev(upper) def std_dev_per_mean(self) -> float: return self.std_dev() / self.mean() def variance(self) -> float: return np.var(self.array) def observations(self) -> int: return len(self.data) def __len__(self) -> int: return len(self.data) def eq_except_property(self, other) -> bool: return isinstance(other, SingleProperty) and self.rundata == other.rundata def __eq__(self, other): return self.eq_except_property(other) and self.property == other.property def sem(self) -> float: """ Returns the standard error of the mean (standard deviation / sqrt(observations)). """ return st.sem(self.array) def std_error_mean(self) -> float: return st.sem(self.array) def mean_ci(self, alpha: float) -> t.Tuple[float, float]: """ Calculates the confidence interval in which the population mean lies with the given probability. Assumes normal distribution. :param alpha: given probability :return: lower, upper bound :see http://stackoverflow.com/a/15034143 """ h = self.std_error_mean() * st.t._ppf((1+alpha)/2.0, self.observations() - 1) return self.mean() - h, self.mean() + h def std_dev_ci(self, alpha: float) -> t.Tuple[float, float]: """ Calculates the confidence interval in which the standard deviation lies with the given probability. Assumes normal distribution. :param alpha: given probability :return: lower, upper bound :see http://www.stat.purdue.edu/~tlzhang/stat511/chapter7_4.pdf """ var = self.variance() * (self.observations() - 1) upper = np.sqrt(var / st.t._ppf(alpha/2.0, self.observations() - 1)) lower = np.sqrt(var / st.t._ppf(1-alpha/2.0, self.observations() - 1)) return lower, upper def is_single_valued(self) -> bool: """ Does the data consist only of one unique value? """ return len(set(self.data)) == 1 def description(self) -> str: return self.rundata.description() def get_data_frame(self) -> 'pd.DataFrame': series_dict = {self.property: pd.Series(self.data, name=self.property)} frame = pd.DataFrame(series_dict, columns=[self.property]) return frame def skewedness(self) -> float: """ Calculates the skewedness of the data. """ return sp.stats.skew(self.data, axis=0, bias=True) if len(self.data) >= 8 else float("nan") def normality(self) -> float: """ Calculates the probability of the data being normal distributed. """ return sp.stats.normaltest(self.data)[1] if len(self.data) >= 8 else float("nan") def percentile(self, q: int) -> float: """ Calculates the q th percentile. q must be between 0 and 100 inclusive. """ return np.percentile(self.data, q) def quartiles(self) -> t.Tuple[float, float, float]: """ Calculates the 3 quartiles (1, 2 and 3) """ return self.percentile(25), self.percentile(50), self.percentile(75) def iqr(self) -> float: """ Calculates the interquartile range. """ return np.subtract(*np.percentile(self.data, [75, 25])) def whiskers(self, whis: float = 1.5) -> t.Tuple[float, float]: """ Calculates the upper and the lower whisker for a boxplot. I.e. the minimum and the maximum value of the data set the lie in the range (Q1 - whis * IQR, Q3 + whis * IQR). IQR being the interquartil distance, Q1 the lower and Q2 the upper quartile. Adapted from http://stackoverflow.com/a/20096945 """ q1, q2, q3 = self.quartiles() iqr = self.iqr() hi_val = q1 + whis * self.iqr() whisk_hi = np.compress(self.array <= hi_val, self.array) if len(whisk_hi) == 0 or np.max(whisk_hi) < q3: whisk_hi = q3 else: whisk_hi = max(whisk_hi) # get low extreme lo_val = q1 - whis * iqr whisk_lo = np.compress(self.array >= lo_val, self.array) if len(whisk_lo) == 0 or np.min(whisk_lo) > q1: whisk_lo = q1 else: whisk_lo = min(whisk_lo) return whisk_lo, whisk_hi class TestedPair(BaseStatObject): """ A statistical wrapper around two run data objects that are compared via a tester. """ def __init__(self, first: t.Union[RunData, Single], second: t.Union[RunData, Single], tester: Tester = None): super().__init__() self.first = Single(first) self.second = Single(second) self.tester = tester or TesterRegistry.get_for_name(TesterRegistry.get_used(), Settings()["stats/tester"], Settings()["stats/uncertainty_range"]) self.properties = {} # type: t.Dict[str, TestedPairProperty] """ TestedPairProperty objects for each shared property of the inherited Single objects """ for prop in set(self.first.properties.keys()).intersection(self.second.properties.keys()): self.properties[prop] = TestedPairProperty(self, self.first, self.second, prop, tester) def _get_stat_messages(self) -> t.List[StatMessage]: """ Combines the messages for all inherited TestedPairProperty objects (for each property), :return: simplified list of all messages """ msgs = [x for prop in self.properties for x in self.properties[prop].get_stat_messages()] return msgs def rel_difference(self) -> float: """ Calculates the geometric mean of the relative mean differences (first - second) / first. :see http://www.cse.unsw.edu.au/~cs9242/15/papers/Fleming_Wallace_86.pdf """ # todo: add method (and report.py support) to give a score (based on first mean / second mean) mean = 1 for x in self.properties.values(): mean *= x.mean_diff_per_mean() if mean == 0: return 1 sig = np.sign(mean) return sig * math.pow(abs(mean), 1 / len(self.properties)) def swap(self) -> 'TestedPair': """ Creates a new pair with the elements swapped. :return: new pair object """ return TestedPair(self.second, self.first, self.tester) def __getitem__(self, property: str) -> 'TestedPairProperty': return self.properties[property] def eq_except_property(self, other) -> bool: return isinstance(other, type(self)) and self.first == other.first and self.second == other.second \ and self.tester == other.tester def __eq__(self, other) -> bool: return self.eq_except_property(other) def description(self) -> str: return "{} vs. {}".format(self.first, self.second) class TestedPairsAndSingles(BaseStatObject): """ A wrapper around a list of tested pairs and singles. """ def __init__(self, singles: t.List[t.Union[RunData, Single]], pairs: t.List[TestedPair] = None, distinct_descriptions: bool = False): super().__init__() self.singles = list(map(Single, singles)) # type: t.List[Single] self.pairs = pairs or [] # type: t.List[TestedPair] if distinct_descriptions: descr_attrs = defaultdict(lambda: 0) # type: t.Dict[str, int] descr_nr_zero = {} # type: t.Dict[str, Single] for single in self.singles: if "description" in single.attributes: descr = single.attributes["description"] num = descr_attrs[descr] descr_attrs[descr] += 1 if num != 0: single.attributes["description"] += " [{}]".format(num) if num == 1: descr_nr_zero[descr].attributes["description"] += " [0]" else: descr_nr_zero[descr] = single if pairs is None and len(self.singles) > 1: for i in range(0, len(self.singles) - 1): for j in range(i + 1, len(self.singles)): self.pairs.append(self.get_pair(i, j)) self.singles_properties = {} # type: t.Dict[str, SinglesProperty] for prop in self.properties(): self.singles_properties[prop] = SinglesProperty(self.singles, prop) def number_of_singles(self) -> int: return len(self.singles) def get_pair(self, first_id: int, second_id: int) -> TestedPair: l = self.number_of_singles() assert 0 <= first_id < l and 0 <= second_id < l return TestedPair(self.singles[first_id], self.singles[second_id]) def properties(self) -> t.List[str]: """ Returns the properties that are shared among all single run data objects. """ if not self.singles: return props = set(self.singles[0].properties.keys()) for single in self.singles[1:]: props.intersection_update(single.properties.keys()) return sorted(props) def get_stat_messages(self) -> t.List[StatMessage]: """ Combines the messages for all inherited TestedPair and Single objects, :return: simplified list of all messages """ msgs = [] for pair in self.pairs: msgs.extend(pair.get_stat_messages()) return msgs def __getitem__(self, id: int) -> Single: assert 0 <= id < self.number_of_singles() return self.singles[id] class EffectToSmallWarning(StatWarning): message = "The mean difference per standard deviation of {props} is less than {b_val}." hint = "Try to reduce the standard deviation if you think that the measured difference is significant: " \ "With the exec run driver you can probably use the stop_start plugin, preheat and sleep plugins. " \ "Also consider increasing the number of measured runs." border_value = 2 value_format = StatMessageValueFormat.FLOAT @classmethod def check_value(cls, value) -> bool: return value >= cls.border_value class EffectToSmallError(EffectToSmallWarning): type = StatMessageType.ERROR border_value = 1 class TestedPairProperty(BaseStatObject): """ Statistic helper for a compared pair of run data blocks for a specific measured property. """ def __init__(self, parent: TestedPair, first: Single, second: Single, property: str, tester: Tester = None): super().__init__() self.parent = parent self.first = SingleProperty(first, first.rundata, property) self.second = SingleProperty(second, second.rundata, property) self.tester = tester or TesterRegistry.get_for_name(TesterRegistry.get_used(), Settings()["stats/tester"], Settings()["stats/uncertainty_range"]) self.property = property def _get_stat_messages(self) -> t.List[StatMessage]: """ Combines the messages for all inherited TestedPairProperty objects (for each property), :return: simplified list of all messages """ msgs = self.first.get_stat_messages() + self.second.get_stat_messages() if self.is_equal() == False: msgs += [ EffectToSmallWarning.create_if_valid(self, self.mean_diff_per_dev(), self.property), EffectToSmallError.create_if_valid(self, self.mean_diff_per_dev(), self.property) ] return msgs def mean_diff(self) -> float: return self.first.mean() - self.second.mean() def mean_diff_ci(self, alpha: float) -> t.Tuple[float, float]: """ Calculates the confidence interval in which the mean difference lies with the given probability. Assumes normal distribution. :param alpha: given probability :return: lower, upper bound :see http://www.kean.edu/~fosborne/bstat/06b2means.html """ d = self.mean_diff() t = sp.stats.norm.sf(1-alpha/2.0) * np.sqrt(self.first.variance() / self.first.observations() - self.second.variance() / self.second.observations()) return d - t, d + t def mean_diff_per_mean(self) -> float: """ :return: (mean(A) - mean(B)) / mean(A) """ return self.mean_diff() / self.first.mean() def mean_diff_per_dev(self) -> float: """ Calculates the mean difference per standard deviation (maximum of first and second). """ return self.mean_diff() / self.max_std_dev() def equal_prob(self) -> float: """ Probability of the nullhypothesis being not not correct (three way logic!!!). :return: p value between 0 and 1 """ return self.tester.test(self.first.data, self.second.data) def is_equal(self) -> t.Union[None, bool]: """ Checks the nullhypthosesis. :return: True or False if the p val isn't in the uncertainty range of the tester, None else """ if self.tester.is_uncertain(self.first.data, self.second.data): return None return self.tester.is_equal(self.first.data, self.second.data) def mean_std_dev(self) -> float: return (self.first.mean() + self.second.mean()) / 2 def max_std_dev(self) -> float: return max(self.first.std_dev(), self.second.std_dev()) def get_data_frame(self, show_property = True) -> 'pd.DataFrame': columns = [] if show_property: columns = ["{}: {}".format(self.first, self.property), "{}: {}".format(self.second, self.property)] else: columns = [str(self.first), str(self.second)] series_dict = { columns[0]: pd.Series(self.first.data, name=columns[0]), columns[1]: pd.Series(self.second.data, name=columns[1]) } frame = pd.DataFrame(series_dict, columns=list(reversed(columns))) return frame def is_single_valued(self) -> bool: return self.first.is_single_valued() and self.second.is_single_valued() def eq_except_property(self, other) -> bool: return isinstance(other, type(self)) and self.first.eq_except_property(self.second) \ and self.tester == other.tester def __eq__(self, other) -> bool: return self.eq_except_property(other) and self.property == other.property def min_observations(self) -> int: return min(self.first.observations(), self.second.observations()) def description(self) -> str: return "{} vs. {}".format(self.first, self.second) def swap(self) -> 'TestedPairProperty': return TestedPairProperty(self.parent, self.parent.first, self.parent.second, self.property, self.tester) class SinglesProperty(BaseStatObject): def __init__(self, singles: t.List[t.Union[Single, SingleProperty]], property: str): super().__init__() self.singles = singles # type: t.List[SingleProperty] if isinstance(singles, List(T(Single))): self.singles = [single.properties[property] for single in singles] self.property = property def __str__(self) -> str: return "SinglesProperty(property={prop})".format(prop=self.property) def get_data_frame(self, **kwargs) -> 'pd.DataFrame': columns = [] data = {} min_len = min(len(single.data) for single in self.singles) for single in self.singles: name = str(single.parent) columns.append(name) data[name] = single.data[0:min_len] return pd.DataFrame(data, columns=columns) def boxplot(self, fig_width: int, fig_height: float = None): """ Creates a (horizontal) box plot comparing all single object for a given property. """ import seaborn as sns import matplotlib.pyplot as plt if fig_height is None: fig_height = self._height_for_width(fig_width) self.fig = plt.figure(figsize=self._fig_size_cm_to_inch(fig_width, fig_height)) df = self.get_data_frame() sns.boxplot(data=df, orient="h") def _store_as_tex(self, filename: str, fig_width: float, fig_height: float, standalone: bool): """ Stores the current figure as latex in a tex file. Works independently of matplotlib. Needs following code in the document preamble: \\usepackage{pgfplots} \\usepgfplotslibrary{statistics} Useful demo at http://tex.stackexchange.com/questions/115210/boxplot-in-latex """ if not filename.endswith(".tex"): filename += ".tex" descrs = [str(single.parent) for single in self.singles] tex = """ \\pgfplotsset{{width={width}cm, height={height}cm, compat=1.8}} \\begin{{tikzpicture}} \\begin{{axis}}[ cycle list name=auto, xlabel={xlabel}, ytick={{{yticks}}}, yticklabels={{{yticklabels}}}, max space between ticks=50pt ]""".format( width=fig_width, height=fig_height, xlabel=self.property, yticklabels="\\\\".join(descrs) + "\\\\", yticks=",".join(map(str, range(1, len(descrs) + 1))) ) for single in self.singles: q1, q2, q3 = single.quartiles() wh_lower, wh_upper = single.whiskers() tex += """ \\addplot+[ boxplot prepared={{ median={median}, upper quartile={q3}, lower quartile={q1}, upper whisker={wh_upper}, lower whisker={wh_lower} }}, ] coordinates {{}}; """.format(median=single.median(), **locals()) tex += """ \end{axis} \end{tikzpicture} """ if standalone: tex = """ \\documentclass[margin=10pt]{standalone} \\usepackage{pgfplots} \\usepgfplotslibrary{statistics} \\begin{document} """ + tex + """ \\end{document} """ with open(filename, "w") as f: f.write(tex) return os.path.realpath(filename) def max(self) -> float: return max(single.max() for single in self.singles)PK#}6H9d77temci/tester/rundata.py""" Contains the RunData object for benchmarking data of specific program block and the RunDataStatsHelper that provides helper methods for working with these objects. """ from temci.tester.testers import Tester, TesterRegistry from temci.utils.typecheck import * from temci.utils.settings import Settings import temci.utils.util as util if util.can_import("scipy"): import scipy import typing as t class RunData(object): """ A set of benchmarking data for a specific program block. """ def __init__(self, data: t.Dict[str, t.List[t.Union[int, float]]] = None, attributes: t.Dict[str, str] = None, external: bool = False): """ Initializes a new run data object with a list of measured properties, an optional dictionary mapping each property to a list of actual values and a dictionary of optional attributes that describe its program block. """ typecheck(data, E(None) | Dict(all_keys=False)) typecheck(attributes, Exact(None) | Dict(key_type=Str(), all_keys=False)) self.external = external self.properties = [] # type: t.List[str] """ List of measured properties. They might not all be measured the same number of times. """ self.data = {} # type: t.Dict[str, t.List[t.Union[int, float]]] """ Raw benchmarking data, mapping properties to their corresponding values """ if data is not None and len(data) > 0: self.add_data_block(data) self.attributes = attributes or {} # type: t.Dict[str, str] def add_data_block(self, data_block: t.Dict[str, t.List[t.Union[int, float]]]): """ Adds a block of data. The passed dictionary maps each of the run datas properties to list of actual values (from each benchmarking run). """ typecheck(data_block, Dict(key_type=Str(), value_type= List(Int() | Float()), all_keys=False)) self.properties = set(self.properties).union(set(data_block.keys())) for prop in data_block: if prop not in self.data: self.data[prop] = [] self.properties.add(prop) self.data[prop].extend(data_block[prop]) self.properties = sorted(list(self.properties)) def __len__(self) -> int: """ Returns the number of measured properties. """ return len(self.data) def min_values(self) -> int: """ Returns the minimum number of measured values for the associated program block over all properties. """ return min(map(len, self.data.values())) if len(self) > 0 else 0 def benchmarkings(self) -> int: """ Returns the maximum number of measured values for the associated program block over all properties. This number should be equivalent to the number of measured benchmarking runs. """ return max(map(len, self.data.values())) if len(self) > 0 else 0 def __getitem__(self, property: str): """ Returns the benchmarking values associated with the passed property. """ return self.data[property] def to_dict(self) -> dict: """ Returns a dictionary that represents this run data object. """ return { "attributes": self.attributes, "data": self.data } def __str__(self): return repr(self.attributes) def description(self): if "description" in self.attributes: return self.attributes["description"] return ", ".join("{}={}".format(key, self.attributes[key]) for key in self.attributes) class RunDataStatsHelper(object): """ This class helps to simplify the work with a set of run data observations. """ def __init__(self, runs: t.List[RunData], tester: Tester = None, external_count: int = 0): """ Don't use the constructor use init_from_dicts if possible. :param runs: list of run data objects :param tester: used tester or tester that is set in the settings """ self.tester = tester or TesterRegistry.get_for_name(TesterRegistry.get_used(), Settings()["stats/uncertainty_range"]) typecheck(runs, List(T(RunData))) self.runs = runs # type: t.List[RunData] self.external_count = external_count def properties(self) -> t.List[str]: """ Returns a sorted list of all properties that exist in all (!) run data blocks. """ if not self.runs: return [] props = set(self.runs[0].properties) for rd in self.runs[1:]: props = props.intersection(rd.properties) return list(sorted(props)) @classmethod def init_from_dicts(cls, runs: t.List[Dict] = None, external: bool = False) -> 'RunDataStatsHelper': """ Expected structure of the stats settings and the runs parameter:: "stats": { "tester": ..., "properties": ["prop1", ...], # or "properties": [("prop1", "description of prop1"), ...], "uncertainty_range": (0.1, 0.3) } "runs": [ {"attributes": {"attr1": ..., ...}, "data": {"ov-time": [...], ...}}, ... ] :param runs: list of dictionaries representing the benchmarking runs for each program block :param external: are the passed runs not from this benchmarking run but from another? :rtype RunDataStatsHelper :raises ValueError if the stats of the runs parameter have not the correct structure """ typecheck(runs, List(Dict({ "data": Dict(key_type=Str(), value_type=List(Int()|Float()), all_keys=False) | NonExistent(), "attributes": Dict(key_type=Str(), all_keys=False) }, all_keys=False)), value_name="runs parameter") run_datas = [] runs = runs or [] # type: t.List[dict] for run in runs: if "data" not in run: run["data"] = {} run_datas.append(RunData(run["data"], run["attributes"], external=external)) return RunDataStatsHelper(run_datas, external_count=len(runs) if external else 0) def _is_uncertain(self, property: str, data1: RunData, data2: RunData) -> bool: return self.tester.is_uncertain(data1[property], data2[property]) def _is_equal(self, property: str, data1: RunData, data2: RunData) -> bool: return self.tester.is_equal(data1[property], data2[property]) def _is_unequal(self, property: str, data1: RunData, data2: RunData) -> bool: return self.tester.is_unequal(data1[property], data2[property]) def is_uncertain(self, p_val: float) -> bool: return min(*Settings()["stats/uncertainty_range"]) <= p_val <= max(*Settings()["stats/uncertainty_range"]) def is_equal(self, p_val: float) -> bool: return p_val > max(*Settings()["stats/uncertainty_range"]) def is_unequal(self, p_val: float) -> bool: return p_val < min(*Settings()["stats/uncertainty_range"]) def _speed_up(self, property: str, data1: RunData, data2: RunData): """ Calculates the speed up from the second to the first (e.g. the first is RESULT * 100 % faster than the second). """ return (scipy.mean(data2[property]) - scipy.mean(data1[property])) \ / scipy.mean(data1[property]) def _estimate_time_for_run_datas(self, run_bin_size: int, data1: RunData, data2: RunData, min_runs: int, max_runs: int) -> float: if min(len(data1), len(data2)) == 0 or "ov-time" not in data1.properties or "ov-time" not in data2.properties: return max_runs needed_runs = [] for prop in set(data1.properties).intersection(data2.properties): estimate = self.tester.estimate_needed_runs(data1[prop], data2[prop], run_bin_size, min_runs, max_runs) needed_runs.append(estimate) avg_time = max(scipy.mean(data1["ov-time"]), scipy.mean(data2["ov-time"])) return max(needed_runs) * avg_time def get_program_ids_to_bench(self) -> t.List[int]: """ Returns the ids (the first gets id 0, …) of the program block / run data object that should be benchmarked again. """ to_bench = set() for (i, run) in enumerate(self.runs): if i in to_bench: continue for j in range(i): if j in to_bench: continue run2 = self.runs[j] if any(self._is_uncertain(prop, run, run2) for prop in set(run.properties) .intersection(run2.properties)): to_bench.add(i) to_bench.add(j) return [i - self.external_count for i in to_bench if i >= self.external_count] def estimate_time(self, run_bin_size: int, min_runs: int, max_runs: int) -> float: """ Roughly erstimates the time needed to finish benchmarking all program blocks. It doesn't take any parallelism into account. Therefore divide the number by the used parallel processes. :param run_bin_size: times a program block is benchmarked in a single block of time :param min_runs: minimum number of allowed runs :param max_runs: maximum number of allowed runs :return estimated time in seconds or float("inf") if no proper estimation could be made """ to_bench = self.get_program_ids_to_bench() max_times = [0 for i in self.runs] for i in to_bench: run = self.runs[i] for j in to_bench: max_time = self._estimate_time_for_run_datas(run_bin_size, run, self.runs[j], min_runs, max_runs) max_times[i] = max(max_times[i], max_time) max_times[j] = max(max_times[j], max_time) if max_time == float("inf"): return float("inf") return sum(max_times) def estimate_time_for_next_round(self, run_bin_size: int, all: bool) -> float: """ Roughly estimates the time needed for the next benchmarking round. :param run_bin_size: times a program block is benchmarked in a single block of time and the size of a round :param all: expect all program block to be benchmarked :return estimated time in seconds """ if "ov-time" not in self.properties(): return -1 summed = 0 to_bench = range(0, len(self.runs)) if all else self.get_program_ids_to_bench() for i in to_bench: summed += scipy.mean(self.runs[i]["ov-time"]) * run_bin_size return summed def add_run_data(self, data: list = None, attributes: dict = None) -> int: """ Adds a new run data (corresponding to a program block) and returns its id. :param data: benchmarking data of the new run data object :param attributes: attributes of the new run data object :return: id of the run data object (and its corresponding program block) """ self.runs.append(RunData(self.properties, data, attributes)) return len(self.runs) - 1 def add_data_block(self, program_id: int, data_block: t.Dict[str, t.List[t.Union[int, float]]]): """ Add block of data for the program block with the given id. :param program_id: id of the program. :param data_block: list of data from several benchmarking runs of the program block :raises ValueError if the program block with the given id doesn't exist """ program_id += self.external_count assert program_id >= self.external_count if program_id >= len(self.runs): raise ValueError("Program block with id {} doesn't exist".format(program_id - self.external_count)) self.runs[program_id].add_data_block(data_block) def get_evaluation(self, with_equal: bool, with_unequal: bool, with_uncertain: bool) -> dict: """ Structure of the returned list items:: - data: # set of two run data objects properties: # information for each property that is equal, ... -prop: - equal: True/False uncertain: True/False p_val: probability of the null hypothesis speed_up: speed up from the first to the second description: description of the property :param with_equal: with tuple with at least one "equal" property :param with_unequal: ... unequal property :param with_uncertain: include also uncertain properties :return: list of tuples for which at least one property matches the criteria """ arr = [] for i in range(0, len(self.runs) - 1): for j in range(i + 1, len(self.runs)): data = (self.runs[i], self.runs[j]) props = {} for prop in self.properties(): map = {"p_val": self.tester.test(data[0][prop], data[1][prop]), "speed_up": self._speed_up(prop, *data), "description": prop, "equal": self._is_equal(prop, *data), "unequal": self._is_unequal(prop, *data), "uncertain": self._is_uncertain(prop, *data)} if map["unequal"] == with_unequal and map["equal"] == with_equal \ and map["uncertain"] == with_uncertain: props[prop] = map if len(props) > 0: arr.append({ "data": data, "properties": props }) return arr def serialize(self) -> t.List: return list(x.to_dict() for x in self.runs)PK#}6H{6UU temci/tester/report_processor.pyfrom temci.tester.report import ReporterRegistry from temci.tester.rundata import RunDataStatsHelper class ReportProcessor: def __init__(self, stats_helper: RunDataStatsHelper = None): self.reporter = ReporterRegistry.get_for_name(ReporterRegistry.get_used(), stats_helper) def report(self): self.reporter.report()PK#}6Htemci/tester/__init__.pyPK6Htemci/misc/__init__.pyPKN;H^ltemci/misc/game.py""" Benchmarks game inspired comparison of different implementations for a given language. It doesn't really belong directly to the temci tool, but uses big parts of it. It's currently in a pre alpha state as it's a part of the evaluation for my bachelor thesis that I'm currently doing, """ import logging, time import typing as t import multiprocessing START_TIME = time.time() import subprocess import itertools import temci.utils.util as util if __name__ == "__main__": util.allow_all_imports = True from temci.tester.rundata import RunData from temci.tester.stats import SingleProperty, Single, SinglesProperty from temci.utils.typecheck import * import os, shutil, copy from pprint import pprint from temci.tester import report from temci.utils.util import InsertionTimeOrderedDict itod_from_list = InsertionTimeOrderedDict.from_list if util.can_import("scipy"): import scipy.stats as stats import ruamel.yaml as yaml from temci.tester.report import HTMLReporter2, html_escape_property FIG_WIDTH = 15 FIG_HEIGHT_PER_ELEMENT = 1.5 class BaseObject: def __init__(self, name: str): self.name = name def _create_dir(self, dir: str): """ ... and delete all contents if the directory all ready exists. """ if os.path.exists(dir): shutil.rmtree(dir) os.mkdir(dir) def _create_own_dir(self, base_dir: str) -> str: dir = os.path.realpath(os.path.join(base_dir, self.name)) self._create_dir(dir) return dir def _process_build_obj(self, arg: t.Tuple[str, 'BaseObject']): path, obj = arg tmp = obj.build(path) if isinstance(tmp, list): return tmp else: return [tmp] def _buildup_dict(self, path: str, base_objs: t.Dict[str, 'BaseObject'], multiprocess: bool = False) -> t.List[dict]: objs = [] for key in base_objs: objs.append((path, base_objs[key])) map_func = map if multiprocess: pool = multiprocessing.Pool() map_func = pool.map ret_fts = map_func(self._process_build_obj, objs) ret = [] for elem in ret_fts: ret.extend(elem) return ret def build(self, base_dir: str) -> t.List[dict]: pass @classmethod def from_config_dict(cls, *args) -> 'BaseObject': pass def boxplot_html(self, base_file_name: str, singles: t.List[SingleProperty]) -> str: sp = SinglesProperty(singles, self.name) sp.boxplot(FIG_WIDTH, max(len(singles) * FIG_HEIGHT_PER_ELEMENT, 6)) d = sp.store_figure(base_file_name, fig_width=FIG_WIDTH, fig_height=max(len(singles) * FIG_HEIGHT_PER_ELEMENT, 4), pdf=False) html = """

""".format(d["img"]) for format in sorted(d): html += """ {} """.format(d[format], format) return html + "

" def boxplot_html_for_data(self, name: str, base_file_name: str, data: t.Dict[str, t.List[float]]): singles = [] for var in data: run_data = RunData({name: data[var]}, {"description": str(var)}) singles.append(SingleProperty(Single(run_data), run_data, name)) return self.boxplot_html(base_file_name, singles) class Implementation(BaseObject): def __init__(self, parent: 'ProgramWithInput', name: str, run_cmd: str, build_cmd: str = None, run_data: t.List[t.Union[int, float]] = None): super().__init__(name) typecheck_locals(parent=T(ProgramWithInput)) self.parent = parent self.run_cmd = run_cmd self.build_cmd = build_cmd self.run_data = run_data def get_single_property(self) -> SingleProperty: assert self.run_data is not None data = RunData({self.name: self.run_data}) return SingleProperty(Single(RunData({self.name: self.run_data})), data, self.name) @classmethod def from_config_dict(cls, parent: 'ProgramWithInput', config: dict) -> 'Implementation': typecheck(config, Dict({ "name": Str(), "run_cmd": Str(), "build_cmd": Str() | NonExistent() })) return cls(parent, **config) def build(self, base_dir: str) -> t.List[dict]: path = self._create_own_dir(base_dir) d = { "input": self.parent.input, "file": self.parent.parent.file, "bfile": os.path.basename(self.parent.parent.file), "program": self.parent.parent.name, "impl": self.name, "impl_escaped": html_escape_property(self.name) } run_cmd = self.run_cmd.format(**d) if self.parent.parent.file is not None: shutil.copy(self.parent.parent.file, os.path.join(path, os.path.basename(self.parent.parent.file))) for copied_file in self.parent.parent.copied_files: p = os.path.join(path, copied_file) if os.path.isdir(copied_file): shutil.copytree(copied_file, p) else: shutil.copy(copied_file, p) if self.build_cmd: build_cmd = self.build_cmd.format(**d) #pprint(build_cmd) proc = subprocess.Popen(["/bin/sh", "-c", build_cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, cwd=path) out, err = proc.communicate() logging.info(out) if proc.poll() > 0: logging.error("Error while executing {}: {}".format(build_cmd, err)) exit(1) prog_in = self.parent prog = prog_in.parent category = prog.parent lang = category.parent logging.info(path) return { "attributes": { "language": lang.name, "category": category.name, "program": prog.name, "impl": self.name, "input": str(prog_in.input) }, "run_config": { "run_cmd": run_cmd, "cwd": path } } class Input: """ Input with a variable numeric part. """ def __init__(self, prefix: str = None, number: t.Union[int, float] = None, appendix: str = None): self.prefix = prefix or "" self.number = number self.appendix = appendix or "" def __mul__(self, other: t.Union[int, float]) -> 'Input': typecheck_locals(other=Int() | Float()) return Input(self.prefix, None if self.number is None else self.number * other, self.appendix) def __floordiv__(self, other: t.Union[int, float]) -> 'Input': typecheck_locals(other=Int() | Float()) return Input(self.prefix, None if self.number is None else self.number * other, self.appendix) def __str__(self): return self.prefix + str(self.number or "") + self.appendix def __repr__(self): return repr(str(self)) def replace(self, search: str, replacement: str) -> 'Input': """ Returns an input object in which the search string is replaced in the prefix and the appendix. """ return Input(self.prefix.replace(search, replacement), self.number, self.appendix.replace(search, replacement)) @classmethod def from_config_dict(cls, config: dict) -> 'Input': typecheck_locals(config=Dict({ "prefix": Str() | NonExistent(), "number": Int() | Float() | NonExistent(), "appendix": Str() | NonExistent() })) return Input(**config) @classmethod def list_from_numbers(cls, *numbers: t.List[t.Union[int, float]]) -> t.List['Input']: return [Input(number=number) for number in numbers] def to_dict(self) -> dict: ret = {} if self.prefix != "": ret["prefix"] = self.prefix if self.number is not None: ret["number"] = self.number if self.appendix != "": ret["appendix"] = self.appendix return ret def __hash__(self, *args, **kwargs): return str(self).__hash__(*args, **kwargs) StatisticalPropertyFunc = t.Callable[[SingleProperty], float] """ Get's passed the SingleProperty object to process and min mean """ rel_mean_func = lambda x, min: x.mean() / min def rel_std_dev_func(x: SingleProperty, min: float) -> float: return x.std_dev() / min class ProgramWithInput(BaseObject): def __init__(self, parent: 'Program', input: Input, impls: t.List[Implementation], id: int): super().__init__(str(id)) self.parent = parent self.input = input self.impls = itod_from_list(impls, lambda x: x.name) # type: t.Dict[str, Implementation] def build(self, base_dir: str) -> t.List[dict]: path = self._create_own_dir(base_dir) return self._buildup_dict(path, self.impls) def __getitem__(self, name: str) -> Implementation: return self.impls[name] def get_single(self): data = InsertionTimeOrderedDict() for impl in self.impls: data[impl] = self.impls[impl] return Single(RunData(data)) def get_single_properties(self) -> t.List[t.Tuple[str, SingleProperty]]: return [(impl, self.impls[impl].get_single_property()) for impl in self.impls] def get_means_rel_to_best(self) -> t.Dict[str, float]: return self.get_statistical_properties_for_each(rel_mean_func) def get_statistical_properties_for_each(self, func: StatisticalPropertyFunc) -> t.Dict[str, float]: sps = self.get_single_properties() best_mean = min(sp.mean() for (impl, sp) in sps) d = InsertionTimeOrderedDict() for (impl, sp) in sps: d[impl] = func(sp, best_mean) return d def get_box_plot_html(self, base_file_name: str) -> str: singles = [] for impl in self.impls: impl_val = self.impls[impl] data = RunData({self.name: impl_val.run_data}, {"description": "{!r}|{}".format(self.input, impl)}) singles.append(SingleProperty(Single(data), data, self.name)) return self.boxplot_html(base_file_name, singles) def get_html(self, base_file_name: str, h_level: int) -> str: sp = None # type: SingleProperty scores = self.get_means_rel_to_best() columns = [ { "name": "implementation", "func": lambda x, sp: x.name, "format": "{}" }, { "name": "n", "func": lambda x, sp: sp.observations(), "format": "{:5d}" }, { "name": "mean", "func": lambda x, sp: sp.mean(), "format": "{:5.5f}" }, { "name": "mean / best mean", "func": lambda x, sp: scores[x.name], "format": "{:5.5f}" }, { "name": "std / mean", "func": lambda x, sp: sp.std_dev_per_mean(), "format": "{:5.2%}" }, { "name": "median", "func": lambda x, sp: sp.median(), "format": "{:5.5f}" } ] html = """ Input: {input} {box_plot} {header} """.format(h=h_level, input=repr(self.input), box_plot=self.get_box_plot_html(base_file_name), header="".join("".format(elem["name"]) for elem in columns)) for impl in self.impls: impl_val = self.impls[impl] sp = impl_val.get_single_property() col_vals = [] for elem in columns: col_vals.append(elem["format"].format(elem["func"](impl_val, sp))) html += """ {} """.format("".join("".format(col_val) for col_val in col_vals)) return html + "
{}
{}
" class Program(BaseObject): def __init__(self, parent: 'ProgramCategory', name: str, file: str, prog_inputs: t.List[ProgramWithInput] = None, copied_files: t.List[str] = None): super().__init__(name) self.parent = parent self.file = file self.prog_inputs = itod_from_list(prog_inputs, lambda x: x.name) # type: t.Dict[str, ProgramWithInput] self.copied_files = copied_files or [] # type: t.List[str] @classmethod def from_config_dict(cls, parent: 'ProgramCategory', config: dict) -> 'Implementation': typecheck(config, Dict({ "program": Str(), "file": FileName(allow_non_existent=False), "inputs": List(Dict({ "prefix": Str() | NonExistent(), "number": Int() | Float() | NonExistent(), "appendix": Str() | NonExistent() })) | NonExistent(), "copied_files": List(Str()) | NonExistent(), "impls": List(Dict(all_keys=False)) | NonExistent() })) program = cls(parent, name=config["program"], file=config["file"], copied_files=config["copied_files"] if "copied_files" in config else []) inputs = config["inputs"] if "inputs" in config else [""] for (i, input) in enumerate(inputs): input = Input.from_config_dict(input) prog_input = ProgramWithInput(program, input, [], i) program.prog_inputs[str(input)] = prog_input impls = config["impls"] if "impls" in config else [] prog_input.impls = InsertionTimeOrderedDict() for impl_conf in impls: impl = Implementation.from_config_dict(prog_input, impl_conf) prog_input.impls[impl.name] = impl return program def build(self, base_dir: str) -> t.List[dict]: path = self._create_own_dir(base_dir) return self._buildup_dict(path, self.prog_inputs) def __getitem__(self, input: str) -> ProgramWithInput: return self.prog_inputs[input] def get_box_plot_html(self, base_file_name: str) -> str: singles = [] for input in self.prog_inputs: prog_in = self.prog_inputs[input] for impl in prog_in.impls: impl_val = prog_in.impls[impl] data = RunData({self.name: impl_val.run_data}, {"description": "{!r}|{}".format(input, impl)}) singles.append(SingleProperty(Single(data), data, self.name)) return self.boxplot_html(base_file_name, singles) def get_box_plot_per_input_per_impl_html(self, base_file_name: str, input: str) -> str: """ A box plot for each input that shows the execution times for each implementation. """ return self.prog_inputs[input].get_box_plot_html(base_file_name + "__input_" + str(list(self.prog_inputs.keys()).index(input))) def get_statistical_property_scores_per_input_per_impl(self, func: StatisticalPropertyFunc, input: str) -> t.Dict[str, float]: return self.prog_inputs[input].get_statistical_properties_for_each(func) def get_html(self, base_file_name: str, h_level: int) -> str: html = """ Program: {!r} """.format(h_level, self.name, h_level) html += """ Measured values per implementation and input

""" html += self.get_box_plot_html(base_file_name) scores = self.get_impl_mean_scores() std_devs = self.get_statistical_property_scores(rel_std_dev_func) html += """

""" for impl in scores.keys(): html += """ """.format(impl, scores[impl], std_devs[impl]) html += "
implementationgeom mean over means relative to best (per input) aka mean score ... std dev rel. to the best mean
{}{:5.5f}{:5.2%}
" impl_names = list(scores.keys()) for (i, input) in enumerate(self.prog_inputs.keys()): app = html_escape_property(input) if len(app) > 20: app = str(i) html += self.prog_inputs[input].get_html(base_file_name + "_" + app, h_level + 1) return html def get_impl_mean_scores(self) -> t.Dict[str, float]: """ Geometric mean over the means relative to best per implementation (per input). """ return self.get_statistical_property_scores(rel_mean_func) def get_statistical_property_scores(self, func: StatisticalPropertyFunc) -> t.Dict[str, float]: d = InsertionTimeOrderedDict() # type: t.Dict[str, t.List[float]] for input in self.prog_inputs: rel_vals = self.prog_inputs[input].get_statistical_properties_for_each(func) for impl in rel_vals: if impl not in d: d[impl] = [] d[impl].append(rel_vals[impl]) scores = InsertionTimeOrderedDict() for impl in d: scores[impl] = stats.gmean(d[impl]) return scores def _get_inputs_that_contain_impl(self, impl: str) -> t.List[ProgramWithInput]: return list(filter(lambda x: impl in x.impls, self.prog_inputs.values())) class ProgramCategory(BaseObject): """ Represents a specific abstract program that gives the specification for several implementations (aka "program"s). """ def __init__(self, parent: 'Language', name: str, programs: t.List[Program]): super().__init__(name) self.parent = parent self.programs = itod_from_list(programs, lambda x: x.name) # type: t.Dict[str, Program] @classmethod def from_config_dict(cls, parent: 'Language', config: dict) -> 'ProgramCategory': typecheck(config, Dict({ "category": Str(), "programs": List(Dict(all_keys=False)) })) cat = cls(parent, config["category"], []) cat.programs = InsertionTimeOrderedDict() for prog_conf in config["programs"]: prog = Program.from_config_dict(cat, prog_conf) cat.programs[prog.name] = prog return cat def build(self, base_dir: str) -> t.List[dict]: path = self._create_own_dir(base_dir) return self._buildup_dict(path, self.programs) def __getitem__(self, name: str) -> Program: return self.programs[name] def get_box_plot_html(self, base_file_name: str) -> str: # a box plot over the mean scores per sub program scores_per_impl = self.get_scores_per_impl() singles = [] for impl in scores_per_impl: scores = scores_per_impl[impl] name = "mean score" data = RunData({name: scores}, {"description": impl}) singles.append(SingleProperty(Single(data), data, name)) return self.boxplot_html(base_file_name, singles) def get_html(self, base_file_name: str, h_level: int) -> str: html = """ {} """.format(h_level, self.name, h_level) scores = self.get_impl_mean_scores() std_devs = self.get_statistical_property_scores(rel_std_dev_func) if len(self.programs) > 1: html += """ Mean scores per implementation for this program category

""" html += self.get_box_plot_html(base_file_name) html += """

""" for impl in scores.keys(): html += """ """.format(impl, scores[impl], std_devs[impl]) html += "
implementationgeom mean over means relative to best (per input and program) aka mean score ... std devs relative to the best means
{}{:5.5f}{:5.2%}
" if len(self.get_input_strs()) > 1: html += """ Mean scores per input """.format(h=h_level + 1) for input in self.get_input_strs(): mean_scores = self.get_statistical_property_scores_per_input_per_impl(rel_mean_func, input) std_scores = self.get_statistical_property_scores_per_input_per_impl(rel_std_dev_func, input) html += """

Mean scores for input {!r}

""".format(input) html += self.get_box_plot_per_input_per_impl_html(base_file_name, input) html += """

""" for impl in mean_scores.keys(): html += """ """.format(impl, stats.gmean(mean_scores[impl]), stats.gmean(std_scores[impl])) html += "
implgeom mean over means relative to best (per input and program) aka mean score ... std devs relative to the best means
{}{:5.5f}{:5.2%}
" impl_names = list(scores.keys()) for (i, prog) in enumerate(self.programs): html += self.programs[prog].get_html(base_file_name + "_" + html_escape_property(prog), h_level + 1) return html def get_scores_per_impl(self) -> t.Dict[str, t.List[float]]: return self.get_statistical_property_scores_per_impl(rel_mean_func) def get_statistical_property_scores_per_impl(self, func: StatisticalPropertyFunc) -> t.Dict[str, float]: impl_scores = InsertionTimeOrderedDict() for prog in self.programs: scores = self.programs[prog].get_statistical_property_scores(func) for impl in scores: if impl not in impl_scores: impl_scores[impl] = [] impl_scores[impl].append(scores[impl]) return impl_scores def get_impl_mean_scores(self) -> t.Dict[str, float]: return self.get_statistical_property_scores(rel_mean_func) def get_statistical_property_scores(self, func: StatisticalPropertyFunc) -> t.Dict[str, float]: ret = InsertionTimeOrderedDict() scores_per_impl = self.get_statistical_property_scores_per_impl(func) for impl in scores_per_impl: ret[impl] = stats.gmean(scores_per_impl[impl]) return ret def get_box_plot_per_input_per_impl_html(self, base_file_name: str, input: str) -> str: """ A box plot for each input that shows the mean scores (over all programs) for each implementation. """ return self.boxplot_html_for_data("mean score", base_file_name + "__input_" + html_escape_property(input), self.get_statistical_property_scores_per_input_per_impl(rel_mean_func, input)) def get_statistical_property_scores_per_input_per_impl(self, func: StatisticalPropertyFunc, input: str)\ -> t.Dict[str, t.List[float]]: scores_per_impl = InsertionTimeOrderedDict() for prog in self.programs: prog_val = self.programs[prog] scores = prog_val.get_statistical_property_scores_per_input_per_impl(func, input) for impl in scores: if impl not in scores_per_impl: scores_per_impl[impl] = [] scores_per_impl[impl].append(scores[impl]) return scores_per_impl def get_input_strs(self) -> t.List[str]: return list(self.programs.values())[0].prog_inputs.keys() class Language(BaseObject): def __init__(self, name: str, categories: t.List[ProgramCategory]): super().__init__(name) self.categories = itod_from_list(categories, lambda x: x.name) # type: t.Dict[str, ProgramCategory] @classmethod def from_config_dict(cls, config: dict) -> 'Language': typecheck(config, Dict({ "language": Str(), "categories": List(Dict(all_keys=False)), "impls": List(Dict({"name": Str()}, all_keys=False)) | NonExistent() })) lang = cls(config["language"], []) lang.categories = InsertionTimeOrderedDict() for cat_conf in config["categories"]: cat = ProgramCategory.from_config_dict(lang, cat_conf) lang.categories[cat.name] = cat if "impls" in config: for cat in lang.categories: cat_val = lang.categories[cat] for prog in cat_val.programs: prog_val = cat_val.programs[prog] for p_in in prog_val.prog_inputs: p_in_val = prog_val.prog_inputs[p_in] for impl_conf in config["impls"]: name = impl_conf["name"] if name not in p_in_val.impls: p_in_val.impls[name] = Implementation.from_config_dict(p_in_val, impl_conf) return lang def __getitem__(self, name: str) -> ProgramCategory: return self.categories[name] def set_run_data_from_result_dict(self, run_datas: t.List[t.Dict[str, t.Any]], property: str = "task-clock"): for run_data in run_datas: attrs = run_data["attributes"] typecheck(attrs, Dict({ "language": E(self.name), "category": Str(), "program": Str(), "impl": Str(), "input": Str() })) try: self[attrs["category"]][attrs["program"]][attrs["input"]][attrs["impl"]].run_data = run_data["data"][property] except KeyError as err: #logging.warning(err) pass def process_result_file(self, file: str, property: str = "task-clock"): with open(file, "r") as f: self.set_run_data_from_result_dict(yaml.load(f), property) def build(self, base_dir: str, multiprocess: bool = True) -> t.List[dict]: #path = self._create_own_dir(base_dir) return self._buildup_dict(base_dir, self.categories, multiprocess=True) def create_temci_run_file(self, base_build_dir: str, file: str): run_config = self.build(base_build_dir) with open(file, "w") as f: print(yaml.dump(run_config, Dumper=yaml.RoundTripDumper), file=f) def get_box_plot_html(self, base_file_name: str) -> str: # a box plot over the mean scores per category scores_per_impl = self.get_scores_per_impl() singles = [] for impl in scores_per_impl: scores = scores_per_impl[impl] name = "mean score" data = RunData({name: scores}, {"description": impl}) singles.append(SingleProperty(Single(data), data, name)) return self.boxplot_html(base_file_name, singles) def get_html(self, base_file_name: str, h_level: int, with_header: bool = True, multiprocess: bool = False) -> str: html = "" if with_header: html += """ Language: {} """.format(h_level, self.name, h_level) else: h_level -= 1 html += """ Summary Mean score per implementation

""".format(h=h_level + 1) html += self.get_box_plot_html(base_file_name) scores = self.get_impl_mean_scores() std_devs = self.get_statistical_property_scores(rel_std_dev_func) html += """

""" for impl in scores: html += """ """.format(impl, scores[impl], std_devs[impl]) html += "
implementationgeom mean over means relative to best (per input, program and category) aka mean score ... std devs per best means
{}{:5.5f}{:5.2%}
" if self.get_max_input_num() > 1: for n in range(0, self.get_max_input_num()): mean_scores = self.get_statistical_property_scores_per_input_per_impl(rel_mean_func, n) std_scores = self.get_statistical_property_scores_per_input_per_impl(rel_std_dev_func, n) html += """ Summary for input no. {n} Mean score per implementation. Excludes all categories with less than {m} inputs.

""".format(h=h_level + 1, n=n, m=self.get_max_input_num()) html += self.get_box_plot_per_input_per_impl_html(base_file_name, n) html += """

""" for impl in mean_scores.keys(): html += """ """.format(impl, stats.gmean(mean_scores[impl]), stats.gmean(std_scores[impl])) html += "
implgeom mean over means relative to best (per input and program) aka mean score ... std devs relative to the best means
{}{:5.5f}{:5.2%}
" objs = [] for (i, cat) in enumerate(self.categories): objs.append((i, cat, base_file_name + "_" + html_escape_property(cat), h_level + 1)) map_func = map if multiprocess: # doesn't work (fix warning issue of seaborn) pool = multiprocessing.Pool(2) map_func = pool.map html += "\n".join(map_func(self._get_html_for_category, objs)) return html def _get_html_for_category(self, arg: t.Tuple[int, str, str, int]) -> str: i, cat, base_name, h_level = arg return self.categories[cat].get_html(base_name, h_level) def get_full_html(self, base_dir: str) -> str: resources_path = os.path.abspath(os.path.join(os.path.dirname(report.__file__), "report_resources")) shutil.copytree(resources_path, os.path.join(base_dir, "resources")) html = """ Implementation comparison for {lang}
{inner_html}
""" lang = self.name comparing_str = util.join_strs(self.get_scores_per_impl().keys()) inner_html = self.get_html(base_dir + "/fig", 2, with_header=False) import humanfriendly timespan = humanfriendly.format_timespan(time.time() - START_TIME) return html.format(**locals()) def store_html(self, base_dir: str, clear_dir: bool = True): typecheck_locals(base_dir=DirName()) if not os.path.exists(base_dir): os.mkdir(base_dir) elif clear_dir: shutil.rmtree(base_dir) self.store_html(base_dir, clear_dir=False) return with open(os.path.join(base_dir, "report.html"), "w") as f: f.write(self.get_full_html(os.path.join(base_dir))) def get_scores_per_impl(self) -> t.Dict[str, t.List[float]]: return self.get_statistical_property_scores_per_impl(rel_mean_func) def get_statistical_property_scores_per_impl(self, func: StatisticalPropertyFunc) -> t.Dict[str, t.List[float]]: impl_scores = InsertionTimeOrderedDict() for cat in self.categories: scores = self.categories[cat].get_statistical_property_scores(func) for impl in scores: if impl not in impl_scores: impl_scores[impl] = [] impl_scores[impl].append(scores[impl]) return impl_scores def get_impl_mean_scores(self) -> t.Dict[str, float]: return self.get_statistical_property_scores(rel_mean_func) def get_statistical_property_scores(self, func: StatisticalPropertyFunc) -> t.Dict[str, float]: ret = InsertionTimeOrderedDict() scores_per_impl = self.get_statistical_property_scores_per_impl(func) for impl in scores_per_impl: ret[impl] = stats.gmean(scores_per_impl[impl]) return ret def get_max_input_num(self) -> int: return max(len(cat.get_input_strs()) for cat in self.categories.values()) def _get_categories_for_number_of_inputs(self, number_of_inputs: int) -> t.List[ProgramCategory]: return [cat for cat in self.categories.values() if len(cat.get_input_strs()) == number_of_inputs] def get_statistical_property_scores_per_input_per_impl(self, func: StatisticalPropertyFunc, input_num: int) -> t.Dict[str, t.List[float]]: """ Assumptions: - Most programs have the same number of input (known as max input number) - The input number n takes roughly the same amount of time for every program category """ cats = self._get_categories_for_number_of_inputs(self.get_max_input_num()) scores_per_impl = InsertionTimeOrderedDict() for cat in cats: scores = cat.get_statistical_property_scores_per_input_per_impl(func, cat.get_input_strs()[input_num]) for impl in scores: if impl not in scores_per_impl: scores_per_impl[impl] = [] scores_per_impl[impl].append(stats.gmean(scores[impl])) return scores_per_impl def get_box_plot_per_input_per_impl_html(self, base_file_name: str, input_num: int) -> str: """ A box plot for each input that shows the mean scores (over all programs) for each implementation. """ return self.boxplot_html_for_data("mean score", base_file_name + "__input_" + str(input_num), self.get_statistical_property_scores_per_input_per_impl(rel_mean_func, input_num)) def ref(name: str, value = None, _store={}): """ A simple YAML like reference utility. It to easily store a value under a given key and return it. :param name: name of the reference :param value: new value of the reference (if value isn't None) :param _store: dict to store everything in :return: the value of the reference """ if value is not None: _store[name] = value return _store[name] def bench_file(category: str, ending: str, number: int = 1) -> str: base = BENCH_PATH + "/{c}/{c}".format(c=category) if number == 1: return base + "." + ending return base + ".{ending}-{number}.{ending}".format(**locals()) def bench_program(category: str, ending: str, inputs: t.List[Input], number: int = 1) -> dict: return { "program": str(number), "file": bench_file(category, ending, number), "inputs": [input.replace("$INPUT", BENCH_PATH + "/../bencher/input").to_dict() for input in inputs] } def bench_category(category: str, ending: str, inputs: t.List[Input], numbers: t.List[int] = None) -> dict: if numbers is None: numbers = [] for i in range(1, 10): if os.path.exists(bench_file(category, ending, i)): numbers.append(i) #numbers = [numbers[0]] programs = [bench_program(category, ending, inputs, number) for number in numbers] return { "category": category, "programs": programs } InputsPerCategory = t.Dict[str, t.List[Input]] def bench_categories(ending: str, inputs: InputsPerCategory) -> t.List[dict]: categories = [] for cat in inputs: if os.path.exists(bench_file(cat, ending)): categories.append(bench_category(cat, ending, inputs[cat])) return categories def first_inputs(inputs_per_category: InputsPerCategory) -> InputsPerCategory: ret = InsertionTimeOrderedDict() for key in inputs_per_category: if len(inputs_per_category[key]) > 0: ret[key] = [inputs_per_category[key][0]] return ret def empty_inputs(inputs_per_category: InputsPerCategory) -> InputsPerCategory: ret = InsertionTimeOrderedDict() for key in inputs_per_category: if len(inputs_per_category[key]) > 0: ret[key] = [Input()] return ret def last_inputs(inputs_per_category: InputsPerCategory) -> t.Dict[str, t.List[Input]]: ret = InsertionTimeOrderedDict() for key in inputs_per_category: if len(inputs_per_category[key]) > 0: ret[key] = [inputs_per_category[key][-1]] return ret def divide_inputs(inputs_per_category: InputsPerCategory, divisor: t.Union[int, float]) \ -> t.Dict[str, t.List[Input]]: ret = InsertionTimeOrderedDict() for key in inputs_per_category: ret[key] = [input // divisor for input in inputs_per_category[key]] return ret def prefix_inputs(prefix: str, inputs: t.List[Input]) -> t.List[Input]: return [Input(prefix + input.prefix, input.number, input.appendix) for input in inputs] ConfigDict = t.Dict[str, t.Union[str, dict]] def replace_run_with_build_cmd(config_dict: ConfigDict) -> ConfigDict: config_dict = copy.deepcopy(config_dict) for impl_dict in config_dict["impls"]: impl_dict["run_cmd"] = impl_dict["build_cmd"] + " &> /dev/null" del(impl_dict["build_cmd"]) return config_dict # download the benchmarksgame source code from https://alioth.debian.org/snapshots.php?group_id=100815 BENCH_PATH = "/home/parttimenerd/benchmarksgame/bench" # Inputs based on the ones used in the benchmarksgame INPUTS_PER_CATEGORY = { # type: InputsPerCategory "binarytrees": Input.list_from_numbers(12, 16, 20), "binarytreesredux": Input.list_from_numbers(12, 16, 20), "chameneosredux": Input.list_from_numbers(60000, 600000, 6000000), "fannkuchredux": Input.list_from_numbers(10, 11, 12), "fasta": ref("fasta", Input.list_from_numbers(250000, 2500000, 25000000)), "fastaredux": ref("fasta"), "knucleotide": prefix_inputs("$INPUT/knucleotide-input.txt ", ref("fasta")), "mandelbrot": Input.list_from_numbers(1000, 4000, 16000), "meteor": Input.list_from_numbers(2098), "nbody": Input.list_from_numbers(500000, 5000000, 50000000), "pidigits": Input.list_from_numbers(2000, 6000, 10000), "regexdna": prefix_inputs("$INPUT/regexdna-input.txt ", Input.list_from_numbers(50000, 500000, 5000000)), "revcomp": prefix_inputs("$INPUT/revcomp-input.txt ", Input.list_from_numbers(250000, 2500000, 25000000)), "spectralnorm": Input.list_from_numbers(500, 3000, 5500), "threadring": Input.list_from_numbers(500000, 5000000, 50000000) } def c_config(inputs_per_category: InputsPerCategory, optimisation: str = "-O2", clang_version = "3.7") -> ConfigDict: """ Generates a game config that compares gcc and clang. """ def cat(category: str, numbers: t.List[int] = None): return bench_category(category, "gcc", inputs_per_category[category], numbers) config = { "language": "c", "categories": [ cat("binarytrees"), cat("chameneosredux", [2]), cat("fannkuchredux", [1, 5]), cat("fasta", [1, 4, 5]), cat("fastaredux"), #cat("knucleotide", "gcc", [9]) # doesn't compile cat("mandelbrot", [1, 2, 3, 4, 6, 9]), cat("meteor"), cat("nbody"), cat("pidigits"), #cat("regexdna", "gcc", [1, 2]), # runs almost infinitely cat("revcomp", [1]), cat("spectralnorm", [1]), cat("threadring") ], "impls": [ { "name": "gcc", # todo: tcl8.6 vs 8.4??? "build_cmd": "cp {file} {bfile}.c; gcc {bfile}.c $O -I/usr/include/tcl8.6 -ltcl8.4 -lglib-2.0 -lgmp " "-D_GNU_SOURCE -Doff_t=__off64_t -fopenmp -D_FILE_OFFSET_BITS=64 -I/usr/include/apr-1.0 " "-lapr-1 -lgomp -lm -std=c99 -mfpmath=sse -msse3 -I/usr/include/glib-2.0 " "-I/usr/lib/x86_64-linux-gnu/glib-2.0/include -lglib-2.0 -lpcre -o {bfile}" .replace("$O", optimisation), "run_cmd": "./{bfile} {input} > /dev/null" }, { "name": "clang", "build_cmd": "cp {file} {bfile}.c; clang-$CV {bfile}.c $O -I/usr/include/tcl8.6 -ltcl8.4 -fopenmp=libgomp " "-lglib-2.0 -lgmp -D_GNU_SOURCE -Doff_t=__off64_t -D_FILE_OFFSET_BITS=64 " "-I/usr/include/apr-1.0 -lapr-1 -lm -std=c99 -mfpmath=sse -msse3 -I/usr/include/glib-2.0 " "-I/usr/lib/x86_64-linux-gnu/glib-2.0/include -lglib-2.0 -lpcre -o {bfile}" .replace("$CV", clang_version).replace("$O", optimisation), "run_cmd": "./{bfile} {input} > /dev/null" } ] } return config def cparser_config(inputs_per_category: InputsPerCategory, optimisation: str = "-O2", clang_version = "3.7") -> ConfigDict: """ Generates a game config that compares gcc, clang and cparser. """ def cat(category: str, numbers: t.List[int] = None): return bench_category(category, "gcc", inputs_per_category[category], numbers) config = { "language": "c", "categories": [ cat("binarytrees", [1, 3, 5]), cat("chameneosredux", [2]), cat("fannkuchredux", [1, 5]), cat("fasta", [1, 4, 5]), cat("fastaredux"), #cat("knucleotide", "gcc", [9]) # doesn't compile cat("mandelbrot", [2, 9]), cat("meteor"), cat("nbody", [1, 2, 3, 6]), cat("pidigits"), #cat("regexdna", "gcc", [1, 2]), # runs almost infinitely cat("revcomp", [1]), cat("spectralnorm", [1]), cat("threadring", [1, 2, 3]) ], "impls": [ { "name": "gcc", "build_cmd": "cp {file} {bfile}.c; gcc {bfile}.c -w $O -I/usr/include/tcl8.6 -ltcl8.4 -lglib-2.0 -lgmp -D_GNU_SOURCE " "-Doff_t=__off64_t -D_FILE_OFFSET_BITS=64 -I/usr/include/apr-1.0 -lapr-1 -lgomp -lm -std=c99 " " -I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include -lglib-2.0 -lpcre " " -lpthread -o {bfile}.{impl_escaped}".replace("$O", optimisation), "run_cmd": "./{bfile} {input} > /dev/null" }, { "name": "clang", "build_cmd": "cp {file} {bfile}.c; clang-$CV {bfile}.c -w $O -I/usr/include/tcl8.6 -ltcl8.4 " "-fopenmp=libgomp -lglib-2.0 -lgmp -D_GNU_SOURCE " "-Doff_t=__off64_t -D_FILE_OFFSET_BITS=64 -I/usr/include/apr-1.0 -lapr-1 -lm -std=c99 " "-I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include -lglib-2.0 -lpcre " "-lpthread -o {bfile}.{impl_escaped}".replace("$CV", clang_version).replace("$O", optimisation), "run_cmd": "./{bfile}.{impl_escaped} {input} > /dev/null" }, { "name": "cparser", "build_cmd": "cp {file} {bfile}.c; cparser {bfile}.c -w $O -I/usr/include/tcl8.6 -ltcl8.4 -lglib-2.0 -lgmp -D_GNU_SOURCE " "-Doff_t=__off64_t -D_FILE_OFFSET_BITS=64 -I/usr/include/apr-1.0 -lapr-1 -lgomp -lm -std=c99 " " -I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include -lglib-2.0 -lpcre " " -lpthread -o {bfile}.{impl_escaped}".replace("$O", optimisation), "run_cmd": "./{bfile}.{impl_escaped} {input} > /dev/null" } ] } return config AV_GHC_VERSIONS = ["7.0.1", "7.2.1", "7.4.1", "7.6.1", "7.8.1", "7.10.1", "8.0.1"] """ These are (currently) the versions installable via the ppa on https://launchpad.net/~hvr/+archive/ubuntu/ghc Older versions can't be installed due to version conflicts and missing libraries """ def haskel_config(inputs_per_category: InputsPerCategory, optimisation: str, ghc_versions: t.List[str] = None) \ -> ConfigDict: """ Generate a game config comparing all available ghc versions :param inputs_per_category: :param optimisation: optimisation flags, e.g. '-Odph' or '-O' :param ghc_versions: compared ghc versions, if None, AV_GHC_VERSIONS is used """ ghc_versions = ghc_versions or AV_GHC_VERSIONS def cat(category: str, numbers: t.List[int] = None): return bench_category(category, "ghc", inputs_per_category[category], numbers) def ghc_impl_dir(version) -> str: typecheck_locals(version=ExactEither(*AV_GHC_VERSIONS)) dir = "/opt/ghc/{version}/bin/".format(**locals()) typecheck_locals(dir=DirName()) return dir def ghc_impl(version: str) -> t.Dict[str, str]: return { "name": "ghc-" + version, "build_cmd": "cp {{file}} {{bfile}}.{{impl}}.hs; PATH={impl_dir}:$PATH ghc {O} -XBangPatterns " "{{bfile}}.{{impl}}.hs -XCPP -XGeneralizedNewtypeDeriving -XTypeSynonymInstances " "-XFlexibleContexts -XUnboxedTuples -funbox-strict-fields -XScopedTypeVariables " "-XFlexibleInstances -funfolding-use-threshold=32 -XMagicHash -threaded" .format(O=optimisation, impl_dir=ghc_impl_dir(version)), "run_cmd": "./{{bfile}}.{{impl}} {{input}} > /dev/null".format(ghc_impl_dir(version)) } # Note to the removed programs: # These either don't compile with all ghc versions properly or use additional hackage packages # The latter is bad because installing the package for all ghc's isn't to costly config = { "language": "haskell", "categories": [ cat("binarytrees", [1]), ###cat("chameneosredux", [4]), cat("fannkuchredux", [1, 3]), cat("fasta", [1]), ###cat("knucleotide"), # seems to run forever cat("mandelbrot"), cat("meteor"), cat("nbody", [2]), cat("pidigits"), ###cat("regexdna"), # uses package PCRE ###cat("revcomp", [2]), # seems to runs forever cat("spectralnorm", [2]), ###cat("threadring") # doesn't compile properly ], "impls": [ ghc_impl(version) for version in AV_GHC_VERSIONS ] } return config def process(config: ConfigDict, name: str = None, build_dir: str = None, build: bool = True, benchmark: bool = True, report: bool = True, temci_runs: int = 15, temci_options: str = "--discarded_blocks 1", temci_stop_start: bool = True, report_dir: str = None, property: str = None): """ Process a config dict. Simplifies the build, benchmarking and report generating. :param config: processed config dict :param name: the name of the whole configuration (used to generate the file names), default "{config['language]}" :param build_dir: build dir that is used to build the programs, default is "/tmp/{name}" :param build: make a new build of all programs? (results in a "{name}.exec.yaml" file for temci) :param benchmark: benchmark the "{name}.exec.yaml" file (from a built)? (results in a "{name}.yaml" result file) :param report: generate a game report? (results in a report placed into the report_dir) :param temci_runs: number of benchmarking runs (if benchmark=True) :param temci_options: used options for temci :param temci_stop_start: does temci use the StopStart plugin for decreasing the variance while benchmarking? :param report_dir: the directory to place the report in, default is "{name}_report" :param property: measured property for which the report is generated, default is "task-clock" """ global START_TIME START_TIME = time.time() lang = Language.from_config_dict(config) name = name or config["language"] temci_run_file = name + ".exec.yaml" temci_result_file = name + ".yaml" build_dir = build_dir or "/tmp/" + name report_dir = report_dir or name + "_report" os.system("mkdir -p {} {}".format(build_dir, report_dir)) if build: lang.create_temci_run_file(build_dir, temci_run_file) if benchmark: print("Hi") stop_start_str = "--stop_start" if temci_stop_start else "" cmd = "temci exec {temci_run_file} --runs {temci_runs} {temci_options} {stop_start_str} --out {temci_result_file}"\ .format(**locals()) print(cmd) os.system(cmd) if report: lang.process_result_file(temci_result_file, property) lang.store_html(report_dir, clear_dir=True) MODE = "haskell_full" if MODE == "haskell_full": for opti in ["-O2", "-Odph"]:#["", "-O", "-O2", "-Odph"]: try: config = replace_run_with_build_cmd(haskel_config(empty_inputs(INPUTS_PER_CATEGORY), opti)) process(config, "compile_time_haskell_" + opti, temci_runs=30) shutil.rmtree("/tmp/compile_time_haskell_" + opti) except BaseException as ex: logging.error(ex) pass os.sync() time.sleep(60) for opti in reversed(["", "-O", "-O2", "-Odph"]): try: config = haskel_config(INPUTS_PER_CATEGORY, opti) process(config, "haskell" + opti, temci_options=" --discarded_blocks 1 --send_mail me@mostlynerdless.de --nice --other_nice --log_level=error") shutil.rmtree("/tmp/haskell" + opti) except BaseException as ex: logging.error(ex) pass os.sync() time.sleep(60)PK8H-22temci/build/builder.pyimport concurrent import datetime import logging import os, sys, yaml, json, subprocess import queue import random import shutil import threading from collections import namedtuple import multiprocessing from macpath import dirname from time import sleep import temci.setup.setup as setup from ..utils.typecheck import * from ..utils.vcs import VCSDriver from ..utils.settings import Settings class Builder: rand_conf_type = Dict({ "heap": (NaturalNumber() | NonExistent()) // Description("0: don't randomize, > 0 randomize with paddings in range(0, x)"), "stack": (NaturalNumber() | NonExistent()) // Description("0: don't randomize, > 0 randomize with paddings in range(0, x)"), "bss": (Bool() | NonExistent()) // Description("Randomize the bss sub segments?"), "data": (Bool() | NonExistent()) // Description("Randomize the data sub segments?"), "rodata": (Bool() | NonExistent()) // Description("Randomize the rodata sub segments?"), "file_structure": (Bool() | NonExistent()) // Description("Randomize the file structure.") }, all_keys=False) def __init__(self, build_dir: str, build_cmd: str, revision, number: int, rand_conf: dict, base_dir: str, branch: str): typecheck(build_dir, DirName()) typecheck(build_cmd, str) typecheck(revision, Int() | Str()) typecheck(number, PositiveInt()) typecheck(base_dir, DirName()) _rand_conf = rand_conf rand_conf = Settings()["build/rand"] rand_conf.update(rand_conf) typecheck(rand_conf, self.rand_conf_type) self.build_dir = os.path.join(base_dir, build_dir) self.build_cmd = build_cmd self.revision = revision self.number = number self.rand_conf = rand_conf self.vcs_driver = VCSDriver.get_suited_vcs(dir=self.build_dir, branch=None if branch is "" else branch) def build(self, thread_count: int = None) -> list: """ Build the program blocks. """ thread_count = thread_count or multiprocessing.cpu_count() logging.info("Create base temporary directory and copy build directory") time_tag = datetime.datetime.now().strftime("%s%f") def tmp_dirname(i: int = "base"): tmp_dir = os.path.join(Settings()["tmp_dir"], "build", time_tag, str(i)) return tmp_dir tmp_dir = tmp_dirname() os.makedirs(tmp_dir) self.vcs_driver.copy_revision(self.revision, self.build_dir, tmp_dir) ret_list = [] submit_queue = queue.Queue() threads = [] for i in range(0, self.number): tmp_build_dir = tmp_dirname(i) submit_queue.put(BuilderQueueItem(i, tmp_build_dir, tmp_dir, self.rand_conf, self.build_cmd)) ret_list.append(tmp_build_dir) try: for i in range(min(thread_count, self.number)): thread = BuilderThread(i, submit_queue) threads.append(thread) thread.start() for thread in threads: thread.join() except BaseException as err: for thread in threads: thread.stop = True shutil.rmtree(tmp_dir) logging.info("Error while building") raise BuilderKeyboardInterrupt(err, ret_list) logging.info("Finished building") shutil.rmtree(tmp_dir) return ret_list class BuilderKeyboardInterrupt(KeyboardInterrupt): def __init__(self, error, result): self.error = error self.result = result BuilderQueueItem = namedtuple("BuilderQueueItem", ["id", "tmp_build_dir", "tmp_dir", "rand_conf", "build_cmd"]) class BuilderThread(threading.Thread): def __init__(self, id: int, submit_queue: queue.Queue): threading.Thread.__init__(self) self.stop = False self.id = id self.submit_queue = submit_queue def run(self): while not self.stop: item = None try: item = self.submit_queue.get(timeout=1) except queue.Empty: return tmp_build_dir = item.tmp_build_dir if os.path.exists(tmp_build_dir): shutil.rmtree(tmp_build_dir) shutil.copytree(item.tmp_dir, tmp_build_dir) as_path = os.path.realpath(dirname(dirname(os.path.abspath(__file__)))) + "/scripts" env = { "RANDOMIZATION": json.dumps(item.rand_conf), "PATH": as_path + "/:" + os.environ["PATH"], "LANG": "en_US.UTF-8", "LANGUAGE": "en_US", "TMP_DIR": Settings()["tmp_dir"] } logging.info("Thread {}: Start building number {}".format(self.id, item.id)) proc = subprocess.Popen(["/bin/sh", "-c", "export PATH={}/:$PATH; sync;".format(as_path) + item.build_cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, cwd=tmp_build_dir, env=env) out, err = proc.communicate() if proc.poll() > 0: proc = subprocess.Popen(["/bin/sh", "-c", item.build_cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, cwd=tmp_build_dir, env=env) out, err = proc.communicate() if proc.poll() > 0: shutil.rmtree(tmp_build_dir) #self.submit_queue.put(item) raise EnvironmentError("Thread {}: Build error: {}".format(self.id, str(err))) logging.info("Thread {}: {}".format(self.id, str(out))) setup.exec("hadori", "./hadori {} {}".format(item.tmp_dir, tmp_build_dir)) PK#}6H⠁l;l;temci/build/assembly.py""" Enables the randomization of assembler files and can be used as a wrapper for as. Currently only tested on 64bit system. """ import logging import random import re import sys, os, subprocess, shlex import tempfile import time from temci.utils.typecheck import * class Line: """ A line of assembly """ def __init__(self, content: str, number: int): """ :param content: content of the line (without line separator) :param number: line number (starting at 0) """ typecheck(content, Str()) typecheck(number, Int()) self.content = content self.number = number def __str__(self) -> bool: return self.content def is_label(self) -> bool: return ":" in self.content and ":" in self.content.strip().split(" ")[0] def is_function_label(self) -> bool: return self.is_label() and not self.get_label().startswith(".") def get_label(self) -> str: """ Returns the label if the line consists of label, None otherwise. """ return self.content.split(":")[0] if self.is_label() else None def is_statement(self) -> bool: #print(self.content, not self.is_label(), self.startswith("\t"), not self.startswith("/")) return not self.is_label() and not self.startswith("/") and self.content.strip() != "" def to_statement_line(self) -> 'StatementLine': return StatementLine(self.content, self.number) def is_segment_statement(self, segment_names: list = None) -> bool: segment_names = segment_names or ["bss", "data", "rodata", "text"] checked_starts = ["." + x for x in segment_names] + [".section ." + x for x in segment_names] return self.is_statement() and any(self.startswith(x) for x in checked_starts) def split_section_before(self) -> bool: """ Does this statement split the current set of lines into to sections? """ if not self.is_statement(): return False return len(self.content.strip()) == 0 or \ self.is_segment_statement() or \ self.number == 1 def startswith(self, other_str: str) -> bool: return re.sub(r"\s+", " ", self.content.strip()).startswith(other_str) class StatementLine(Line): """ An assembly statement. """ def __init__(self, content: str, number: int): super().__init__(content, number) if not self.is_statement(): raise ValueError(content + "isn't a valid statement line") arr = re.split(r"\s+", self.content.strip(), maxsplit=1) self.statement = arr[0] self.rest = arr[1] if len(arr) == 2 else "" class Section: """ A set of assembly lines. """ def __init__(self, lines: list = None): self.lines = lines or [] def append(self, line: Line): typecheck(line, Line) self.lines.append(line) def extend(self, lines: list): typecheck(lines, List(Line)) self.lines.extend(lines) def __str__(self) -> str: return "\n".join(str(x) for x in self.lines if not x.startswith(".loc ")) def __len__(self) -> int: return len(self.lines) @classmethod def from_lines(cls, lines: list) -> 'Section': typecheck(lines, List(T(Line))) if any(line.is_function_label() for line in lines): return FunctionSection(lines) section = Section(lines) return section def starts_with_segement_statement(self) -> bool: """ Does the first (non empty) line of this section starts a new segment? """ for line in self.lines: if line.is_segment_statement(): return True if not line.is_empty(): return False return False def randomize_segment(self, segment_name: str): """ Randomizes the segment part in the current section by splitting it into label induced subsections and shuffling them. :param segment_name: bss, data or rodata (text doesn't make any sense) """ typecheck(segment_name, ExactEither("bss", "data", "rodata")) i = 0 while i < len(self.lines): possible_starts = ["." + segment_name, ".section " + segment_name] while i < len(self.lines) and \ not any(self.lines[i].startswith(x) for x in possible_starts): i += 1 if i == len(self.lines): return j = i + 1 while j < len(self.lines) and not self.lines[i].split_section_before(): j += 1 if j == len(self.lines): return parts_to_shuffle = self.lines[i + 1:j] # split the lines at the labels and shuffle these subsections subsections = [[]] for line in parts_to_shuffle: if line.is_label() and len(subsections[-1]) > 0: subsections.append([]) subsections[-1].append(line) random.shuffle(subsections) parts_to_shuffle = [x for sublist in subsections for x in sublist] self.lines[i + 1:j] = parts_to_shuffle i = j def randomize_malloc_calls(self, padding: range): """ Randomizes the malloc calls (and thereby the heap) by adding the given padding to each malloc call. :param padding: given padding """ def rand() -> int: return random.randrange(padding.start, padding.stop, padding.step) subq_statement_format = "\taddq ${}, %rdi" if sys.maxsize > 2**32 else "\tadd ${}, %edi" i = 0 while i < len(self.lines): line = self.lines[i] if line.is_statement() and line.to_statement_line().statement == "call": arr = re.split(r"\s+", line.to_statement_line().rest.strip()) if len(arr) == 0 or arr[0] != "malloc": i += 1 continue self.lines.insert(i, Line(subq_statement_format.format(rand()), i)) i += 1 i += 1 class FunctionSection(Section): """ A set of lines for a specific function. Assumptions: - a function uses "pushq %rbp" as its first real instruction - a function uses [real instruction] \n "ret" to return from it """ def pad_stack(self, amount: int): self._replace_leave() """ Pads the stack at the beginning of each function call by the given amount. :param amount: amount to pad the stack """ # search for function label i = 0 while i < len(self.lines) and not self.lines[i].is_function_label(): i += 1 if i == len(self.lines): logging.warning("Didn't pad function.") return # search for the first "pushq %rbp" instruction def is_push_instr(): line = self.lines[i] if not line.is_statement(): return False splitted = re.split(r"\s+", line.content.strip(), maxsplit=1) if len(splitted) != 2: return False return splitted[0].strip() == "pushq" and splitted[1].strip().startswith("%rbp") while i < len(self.lines) and not is_push_instr(): i += 1 if i == len(self.lines): logging.warning("Didn't pad function.") return False # insert a subq $xxx, %rsp instruction, that shouldn't have any bad side effect self.lines.insert(i + 1, Line("\tsubq ${}, %rsp\n".format(amount), i)) i += 1 # search for all ret instructions and place a "subq $-xxx, %rbp" like statement # right before the (real) instruction before the ret instruction def is_real_instruction(line: Line): return line.is_statement() and line.to_statement_line().statement == "popq" def is_ret_instruction(line: Line): return line.is_statement() and line.to_statement_line().statement == "ret" while i < len(self.lines): j = i # search for ret instruction while j < len(self.lines) and not is_ret_instruction(self.lines[j]): j += 1 if j == len(self.lines): return # no self.lines[j] =~ "ret" and search for real instruction directly before k = j while k > i and not is_real_instruction(self.lines[k]): k -= 1 if k == i: #print("error", self.lines[k]) logging.warning("Didn't pad function properly") return self.lines.insert(k, Line("\taddq ${}, %rsp\n".format(amount), k)) i = k + 2 def _replace_leave(self): i = 0 while i < len(self.lines): j = i while j < len(self.lines) and not (self.lines[j].is_statement() and self.lines[j].to_statement_line().statement == "leave"): j += 1 if j == len(self.lines): return self.lines[j] = Line("mov %rbp, %rsp", j) self.lines.insert(j + 1, Line("popq %rbp", j)) j += 2 i = j class AssemblyFile: """ A class that simplifies dealing with the lines of an assembly file. It allows the simple randomization of the assembly file. Attention: Most methods change the AssemblyFile directly, """ def __init__(self, lines: list): self._lines = [] self.sections = [] self.add_lines(lines) def _init_sections(self): self.sections = [] if any(line.startswith("# Begin") for line in self._lines): # libfirm mode cur = Section() for i, line in enumerate(self._lines): if line.content.strip() == "": self.sections.append(cur) cur = Section() cur.append(line) self.sections.append(cur) elif any(line.startswith(".cfi") for line in self._lines): # gcc mode cur = Section() for line in self._lines: if line.content.strip() == ".text": self.sections.append(cur) cur = Section() cur.append(line) self.sections.append(cur) else: raise ValueError("Unknown assembler") def add_lines(self, lines: list): """ Add the passed lines. :param lines: either list of Lines or strings representing Lines """ typecheck(lines, List(T(Line)|Str())) start_num = len(self._lines) for (i, line) in enumerate(lines): if isinstance(line, T(Line)): line.number = i + start_num self._lines.append(line) else: self._lines.append(Line(line, i + start_num)) self._init_sections() def randomize_file_structure(self, small_changes = True): """ Randomizes the sections relative positions but doesn't change the first section. """ _sections = self.sections[1:-1] if small_changes: i = 0 while i < len(_sections) - 1: if random.randrange(0, 2) == 0: tmp = _sections[i] _sections[i] = _sections[i + 1] _sections[i + 1] = tmp i += 2 else: random.shuffle(_sections) self.sections[1:-1] = _sections def randomize_stack(self, padding: range): for section in self.sections: if isinstance(section, FunctionSection): section.pad_stack(random.randrange(padding.start, padding.stop, padding.step)) def randomize_sub_segments(self, segment_name: str): """ Randomize the segments of the given name. :param segment_name: segment name, e.g. "bss", "data" or "rodata" """ for section in self.sections: section.randomize_segment(segment_name) def randomize_malloc_calls(self, padding: range): for section in self.sections: section.randomize_malloc_calls(padding) def __str__(self): return "\n/****/\n".join(map(str, self.sections)) + "\n" @classmethod def from_file(cls, file: str): with open(file, "r") as f: return AssemblyFile([line.rstrip() for line in f.readlines()]) def to_file(self, file: str): with open(file, "w") as f: f.write(str(self)) class AssemblyProcessor: config_scheme = Dict({ "heap": NaturalNumber() // Default(0) // Description("0: don't randomize, > 0 randomize with paddings in range(0, x)"), "stack": NaturalNumber() // Default(0) // Description("0: don't randomize, > 0 randomize with paddings in range(0, x)"), "bss": Bool() // Default(False) // Description("Randomize the bss sub segments?"), "data": Bool() // Default(False) // Description("Randomize the data sub segments?"), "rodata": Bool() // Default(False) // Description("Randomize the rodata sub segments?"), "file_structure": Bool() // Default(False) // Description("Randomize the file structure.") }) def __init__(self, config: dict): self.config = self.config_scheme.get_default() self.config.update(config) typecheck(self.config, self.config_scheme) def process(self, file: str, small_changes = False): assm = AssemblyFile.from_file(file) assm.to_file("/tmp/abc.s") if self.config["file_structure"]: assm.randomize_file_structure(small_changes) if self.config["heap"] > 0: assm.randomize_malloc_calls(padding=range(0, self.config["heap"])) if self.config["stack"] > 0: assm.randomize_stack(padding=range(0, self.config["stack"])) if self.config["bss"]: assm.randomize_sub_segments("bss") if self.config["data"]: assm.randomize_sub_segments("data") if self.config["rodata"]: assm.randomize_sub_segments("rodata") assm.to_file(file) assm.to_file("/tmp/abcd.s") if __name__ == "__main__": def test(assm: AssemblyFile): tmp_file = "/tmp/test.s" assm.to_file(tmp_file) os.system("gcc {} -o /tmp/test && /tmp/test".format(tmp_file)) print(Line(" .section .text.unlikely\n", 1).is_segment_statement()) #exit(0) #assm = AssemblyFile.from_file("/home/parttimenerd/Documents/Studium/Bachelorarbeit/test/hello2/hello.s") assm = AssemblyFile.from_file("/tmp/abc.s") #test(assm) #assm.randomize_malloc_calls(padding=range(1, 1000)) #test(assm) assm.randomize_file_structure() test(assm) #print("till randomize") #assm.randomize_stack(padding=range(1, 100)) #test(assm) #for x in ["bss", "data", "rodata"]: # assm.randomize_sub_segments(x) # test(assm) PK#}6H+= temci/build/build_processor.pyimport os, sys, yaml, json, subprocess from ..utils.typecheck import * from ..utils.vcs import VCSDriver from ..utils.settings import Settings from .builder import Builder, BuilderKeyboardInterrupt class BuildProcessor: block_scheme = Dict({ "attributes": Dict(all_keys=False, key_type=Str()), "run_config": Dict(all_keys=False), "build_config": Dict({ "build_cmd": Str() // Default(""), "number": (PositiveInt() | NonExistent()) // Default(1), "randomization": (Dict(all_keys=False) | NonExistent()) // Default({}), "working_dir": (DirName() | NonExistent()) // Default("."), "revision": (Str() | Int() | NonExistent()) // Default(-1), "branch": (Str() | NonExistent()) // Default(""), "base_dir": (DirName() | NonExistent()) // Default(".") }) }) def __init__(self, build_blocks: list = None): if build_blocks is None: typecheck(Settings()["build/in"], ValidYamlFileName()) with open(Settings()["build/in"], "r") as f: build_blocks = yaml.load(f) typecheck(build_blocks, List(self.block_scheme)) self.build_blocks = [self.block_scheme.get_default() for i in range(len(build_blocks))] #print(json.dumps(self.build_blocks)) for i, block in enumerate(build_blocks): for key in block.keys(): self.build_blocks[i][key].update(block[key]) typecheck(self.build_blocks[i], self.block_scheme, "build block {}".format(i)) #print(json.dumps(self.build_blocks)) typecheck(Settings()["build/out"], FileName()) typecheck_locals(build_blocks=List()) self.out = Settings()["build/out"] def build(self): run_blocks = [] try: for block in self.build_blocks: working_dirs = [] error = None try: block_builder = Builder(block["build_config"]["working_dir"], block["build_config"]["build_cmd"], block["build_config"]["revision"], block["build_config"]["number"], block["build_config"]["randomization"], block["build_config"]["base_dir"], block["build_config"]["branch"]) working_dirs = block_builder.build() except BuilderKeyboardInterrupt as err: working_dirs = err.result error = err.error block["run_config"]["cwd"] = working_dirs run_blocks.append({ "attributes": block["attributes"], "run_config": block["run_config"] }) if error: raise error except KeyboardInterrupt as err: with open(self.out, "w") as f: yaml.dump(run_blocks, f) raise err with open(self.out, "w") as f: yaml.dump(run_blocks, f) PK#}6Htemci/build/__init__.pyPK6HL..temci/run/cpuset.pyimport logging import re import shutil import subprocess, os, time from temci.utils.settings import Settings, SettingsError from temci.utils.util import ensure_root from temci.utils.typecheck import * import cgroupspy CPUSET_DIR = '/cpuset' NEW_ROOT_SET = 'bench.root' BENCH_SET = 'temci.set' CONTROLLER_SUB_BENCH_SET = 'temci.set.controller' SUB_BENCH_SET = 'temci.set.{}' class CPUSet: """ This class allows the usage of cpusets (see `man cpuset`) and therefore requires root privileges. It uses the program cset to modify the cpusets. """ def __init__(self, base_core_number: int = None, parallel: int = None, sub_core_number: int = None): """ Initializes the cpu sets an determines the number of parallel programs (parallel_number variable). :param base_core_number: :param parallel: :param sub_core_number: :raises ValueError if the passed parameters don't work together on the current platform :raises EnvironmentError if the environment can't be setup properly (e.g. no root privileges) """ #self.bench_set = "bench.set" logging.info("Initialize CPUSet") ensure_root("CPU sets can't be created and managed without root privileges") self.own_set = '' self.base_core_number = Settings().default(base_core_number, "run/cpuset/base_core_number") self.parallel = Settings().default(parallel, "run/cpuset/parallel") self.sub_core_number = Settings().default(sub_core_number, "run/cpuset/sub_core_number") self.av_cores = self._cpu_range_size("") if self.parallel == 0: self.parallel_number = 0 else: if self.parallel == -1: self.parallel_number = self._number_of_parallel_sets(self.base_core_number, True, self.sub_core_number) else: self.parallel_number = self.parallel if self.parallel > self._number_of_parallel_sets(self.base_core_number, True, self.sub_core_number): raise ValueError("Invalid values for base_core_number and sub_core_number " "on system with just {} cores. Note: The benchmark controller" "needs a cpuset too.".format(self.av_cores)) self.base_core_number = self.av_cores - self.sub_core_number * self.parallel_number - 1 av_cores = self._cpu_range_size("") typecheck(self.base_core_number, PositiveInt()) typecheck(self.parallel_number, NaturalNumber()) self.own_sets = [SUB_BENCH_SET.format(i) for i in range(0, self.parallel_number)] \ + [CONTROLLER_SUB_BENCH_SET, NEW_ROOT_SET, BENCH_SET] try: self._init_cpuset() except BaseException: logging.error("Forced teardown of CPUSet") self.teardown() raise logging.info("Finished initializing CPUSet") def move_process_to_set(self, pid: int, set_id: int): """ Moves the process with the passed id to the parallel sub cpuset with the passed id. :param pid: passed process id :param set_id: passed parallel sub cpuset id """ try: typecheck(pid, Int()) typecheck(set_id, Int(range=range(0, self.parallel_number))) self._move_process_to_set(SUB_BENCH_SET.format(set_id), pid) except BaseException: logging.error("Forced teardown of CPUSet") self.teardown() raise def get_sub_set(self, set_id: int) -> str: typecheck(set_id, Int(range=range(0, self.parallel_number))) return SUB_BENCH_SET.format(set_id) def teardown(self): """ Tears the created cpusets down and makes the system usable again. """ for set in self.own_sets: try: self._delete_set(set) except EnvironmentError as ex: pass #logging.error(str(ex)) except BaseException: raise def _number_of_parallel_sets(self, base_core_number: int, parallel: bool, sub_core_number: int) -> int: typecheck([base_core_number, parallel, sub_core_number], List(Int())) if base_core_number + 1 + sub_core_number > self.av_cores: raise ValueError("Invalid values for base_core_number and sub_core_number " "on system with just {} cores. Note: The benchmark controller" "needs a cpuset too.".format(self.av_cores)) av_cores_for_par = self.av_cores - base_core_number - 1 if parallel: return av_cores_for_par // sub_core_number return 1 def _init_cpuset(self): """ Mounts the cpuset pseudo filesystem at "/cpuset" and creates the necessary cpusets. :return: """ if not os.path.exists(CPUSET_DIR + "/cgroup.procs"): if not os.path.exists(CPUSET_DIR): os.mkdir(CPUSET_DIR) proc = subprocess.Popen(["bash", "-c", "mount -t cpuset none /cpuset/"], stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, universal_newlines=True) out, err = proc.communicate() if proc.poll() > 0: raise EnvironmentError( "Cannot mount /cpuset. " + "Probably you're you're not in root mode or you've already mounted cpuset elsewhere.", str(err)) self._create_cpuset(NEW_ROOT_SET, (0, self.base_core_number - 1)) logging.info("Move all processes to new root cpuset") self._move_all_to_new_root() if self.parallel == 0: # just use all available cores, as the benchmarked program also runs in it self._create_cpuset(CONTROLLER_SUB_BENCH_SET, (self.base_core_number, self.av_cores - 1)) else: self._create_cpuset(CONTROLLER_SUB_BENCH_SET, (self.base_core_number, self.base_core_number)) self._move_process_to_set(CONTROLLER_SUB_BENCH_SET) for i in range(0, self.parallel_number): start = self.base_core_number + 1 + (i * self.sub_core_number) self._create_cpuset(SUB_BENCH_SET.format(i), (start, start + self.sub_core_number - 1)) def _cpu_range_of_set(self, name: str) -> str: """ Returns the range of cpu nodes the set with the passed name has. :param name: cpuset name :return: either "-" or None if the cpuset doesn't exist """ name = self._relname(name) if self._has_set(name): res = self._cset("set {}".format(name)) arr = res.split("\n")[3].strip().split(" ") arr = [x for x in arr if x != ""] return arr[1] if "-" in arr[1] else "{core}-{core}".format(core=arr[1]) return None def _cpu_range_tuple_of_set(self, name: str) -> tuple: """ Returns the range of cpu nodes the cpuset with passed name has as a tuple (first node, last node). :param name: cpuset name :return: tuple or None if the cpuset doesn't exist """ if self._has_set(name): arr = self._cpu_range_of_set(name).split("-") return int(arr[0]), int(arr[0 if len(arr) == 1 else 1]) return None def _cpu_range_size(self, name: str) -> int: if self._has_set(name): f, s = self._cpu_range_tuple_of_set(name) return s - f + 1 return 0 def _has_set(self, name): name = self._relname(name) return name + " " in self._cset("set -rl") def _delete_set(self, name: str): self._cset("set -r --force -d %s" % NEW_ROOT_SET) def _move_all_to_new_root(self, name = 'root', _count: int = 100): cpus = "{}-{}".format(0, self.base_core_number - 1) if self.base_core_number > 1 else 0 self._set_cpu_affinity_of_set(name, cpus) if _count > 0: for child in self._child_sets(name): if len(child) > 1: #print("moved from {child} to {root}".format(child=child, root=NEW_ROOT_SET)) try: self._move_all_to_new_root(child, _count - 1) except EnvironmentError as err: pass #logging.warning(str(err)) self._move_processes(name, NEW_ROOT_SET) #if _count == 100: # self._cset("proc --move -k --force --threads --pid=0-100000 --toset={}".format(NEW_ROOT_SET)) def _move_processes(self, from_set: str, to_set: str): """ Move all processes from the first to the second cpuset. Only some kernel threads are left behind. :param from_set: name of the first cpuset :param to_set: name of the second cpuset """ from_set, to_set = (self._relname(from_set), self._relname(to_set)) self._cset("proc --move --kthread --force --threads --fromset %s --toset %s" % (from_set, to_set)) def _move_process_to_set(self, cpuset: str, pid: int = os.getpid()): self._cset("proc --move --force --pid %d --threads %s" % (pid, cpuset)) def _absname(self, relname: str): if "/" in relname: return relname res = self._cset("set %s" % relname) arr = res.split("\n")[-1].strip().split(" ") arr = [x for x in arr if x != ""] return arr[7] def _relname(self, absname: str): if not "/" in absname: return absname return absname.split("/")[-1] def _child_sets(self, name: str): name = self._relname(name) res = self._cset("set %s" % name) arr = [] for line in res.split("\n")[4:]: line = line.strip() arr.append(line.split(" ")[0]) return arr def _create_cpuset(self, name: str, cpus: tuple): typecheck(cpus, Tuple(Int(), Int())) cpu_range = "{}-{}".format(*cpus) path = [] for part in name.split("/"): path.append(part) self._cset("set --cpu {} {} ".format(cpu_range, "/".join(path))) def _set_cpu_affinity_of_set(self, set: str, cpus): if set == "root": set = "" app = "cgroup.procs" if set == "" else set + "/cgroup.procs" with open(os.path.join(CPUSET_DIR + "/" + app), "r") as f: for line in f.readlines(): try: self._set_cpu_affinity(int(line.strip()), cpus) #logging.info("success {}".format(line)) except EnvironmentError as err: pass #logging.error(str(err)) def _set_cpu_affinity(self, pid: int, cpus): cmd = "sudo taskset --all-tasks --cpu-list -p {} {}; sudo nice".format(cpus, pid) proc = subprocess.Popen(["/bin/sh", "-c", cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) out, err = proc.communicate() if proc.poll() > 0: raise EnvironmentError ( "taskset error (cmd = '{}'): ".format(cmd) + str(err) + str(out) ) return str(out) def _cset(self, argument: str): proc = subprocess.Popen(["/bin/sh", "-c", "sudo cset {}".format(argument)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) out, err = proc.communicate() if proc.poll() > 0: raise EnvironmentError ( "Error with cset tool. " " More specific error (cmd = 'sudo cset {}'): ".format(argument) + str(err) + str(out) ) return str(out)PK0`:H.%&%&temci/run/run_processor.pyimport copy import random from temci.utils.util import join_strs from temci.utils.mail import send_mail from temci.utils.typecheck import * from temci.run.run_worker_pool import RunWorkerPool, ParallelRunWorkerPool from temci.run.run_driver import RunProgramBlock, BenchmarkingResultBlock, RunDriverRegistry, ExecRunDriver import temci.run.run_driver_plugin from temci.tester.rundata import RunDataStatsHelper, RunData from temci.utils.settings import Settings from temci.tester.report_processor import ReporterRegistry import time, logging, humanfriendly, yaml, sys, math, pytimeparse, os class RunProcessor: """ This class handles the coordination of the whole benchmarking process. It is configured by setting the settings of the stats and run domain. """ def __init__(self, runs: list = None, append: bool = None, show_report: bool = None): """ Important note: this constructor also setups the cpusets and plugins that can alter the system, e.g. confine most processes on only one core. Be sure to call the teardown() or the benchmark() method to make the system usable again. :param runs: list of dictionaries that represent run program blocks if None Settings()["run/in"] is used """ if runs is None: typecheck(Settings()["run/in"], ValidYamlFileName()) with open(Settings()["run/in"], "r") as f: runs = yaml.load(f) typecheck(runs, List(Dict({ "attributes": Dict(all_keys=False, key_type=Str()), "run_config": Dict(all_keys=False) }))) self.run_blocks = [] for (id, run) in enumerate(runs): self.run_blocks.append(RunProgramBlock.from_dict(id, copy.deepcopy(run))) self.append = Settings().default(append, "run/append") self.show_report = Settings().default(show_report, "run/show_report") self.stats_helper = None # type: RunDataStatsHelper typecheck(Settings()["run/out"], FileName()) if self.append: run_data = [] try: if os.path.exists(Settings()["run/out"]): with open(Settings()["run/out"], "r") as f: run_data = yaml.load(f) self.stats_helper = RunDataStatsHelper.init_from_dicts(run_data, external=True) for run in runs: self.stats_helper.runs.append(RunData(attributes=run["attributes"])) except: self.teardown() raise else: self.stats_helper = RunDataStatsHelper.init_from_dicts(runs) if Settings()["run/cpuset/parallel"] == 0: self.pool = RunWorkerPool() else: self.pool = ParallelRunWorkerPool() self.run_block_size = Settings()["run/run_block_size"] self.discarded_blocks = Settings()["run/discarded_blocks"] self.pre_runs = self.discarded_blocks * self.run_block_size self.max_runs = max(Settings()["run/max_runs"], Settings()["run/min_runs"]) + self.pre_runs self.min_runs = Settings()["run/min_runs"] + self.pre_runs self.shuffle = Settings()["run/shuffle"] if Settings()["run/runs"] != -1: self.min_runs = self.max_runs = self.min_runs = Settings()["run/runs"] + self.pre_runs self.start_time = round(time.time()) try: self.end_time = self.start_time + pytimeparse.parse(Settings()["run/max_time"], Settings()["run/discarded_blocks"]) except: self.teardown() raise self.block_run_count = 0 def _finished(self): return (len(self.stats_helper.get_program_ids_to_bench()) == 0 \ or not self._can_run_next_block()) and self.min_runs < self.block_run_count def _can_run_next_block(self): estimated_time = self.stats_helper.estimate_time_for_next_round(self.run_block_size, all=self.block_run_count < self.min_runs) to_bench_count = len(self.stats_helper.get_program_ids_to_bench()) if round(time.time() + estimated_time) > self.end_time: logging.warning("Ran to long ({}) and is therefore now aborted. " "{} program blocks should've been benchmarked again." .format(humanfriendly.format_timespan(time.time() + estimated_time - self.start_time), to_bench_count)) return False if self.block_run_count >= self.max_runs and self.block_run_count > self.min_runs: #print("benchmarked too often, block run count ", self.block_run_count, self.block_run_count + self.run_block_size > self.min_runs) logging.warning("Benchmarked program blocks to often and aborted therefore now.") return False return True def benchmark(self): try: last_round_time = time.time() while self.block_run_count <= self.pre_runs or not self._finished(): last_round_span = time.time() - last_round_time last_round_time = time.time() try: if Settings().has_log_level("info") and self.block_run_count > self.pre_runs and \ ("exec" != RunDriverRegistry.get_used() or "start_stop" not in ExecRunDriver.get_used()): # last_round_actual_estimate = \ # self.stats_helper.estimate_time_for_next_round(self.run_block_size, # all=self.block_run_count < self.min_runs) # estimate = self.stats_helper.estimate_time(self.run_block_size, self.min_runs, self.max_runs) # if last_round_actual_estimate != 0: # estimate *= last_round_span / last_round_actual_estimate # estimate = (estimate / self.pool.parallel_number) - (time.time() - self.start_time) # else: # estimate = 0 nr = self.block_run_count - self.pre_runs estimate, title = "", "" if nr <= self.min_runs: estimate = last_round_span * (self.min_runs - self.block_run_count) title = "Estimated time till minimum runs completed" else: estimate = last_round_span * (self.max_runs - self.block_run_count) title = "Estimated time till maximum runs completed" estimate = min(estimate, self.end_time - time.time()) estimate_str = humanfriendly.format_timespan(math.floor(estimate)) logging.info("[Finished {nr:>3}] {title}: {time:>20}" .format(nr=nr, title=title, time=estimate_str)) except: logging.warning("Error in estimating and printing the needed time.") self._benchmarking_block_run() #print(not self._finished(), len(self.stats_helper.get_program_ids_to_bench()), self._can_run_next_block()) print() except BaseException as ex: logging.error("Forced teardown of RunProcessor") self.store_and_teardown() if isinstance(ex, KeyboardInterrupt) and Settings()["log_level"] == "info" and self.block_run_count > 0\ and self.show_report: self.print_report() raise self.store_and_teardown() if Settings().has_log_level("info") and self.show_report: self.print_report() def _benchmarking_block_run(self): try: self.block_run_count += self.run_block_size to_bench = [] if self.block_run_count <= self.min_runs: to_bench = list(enumerate(self.run_blocks)) else: to_bench = [(i, self.run_blocks[i]) for i in self.stats_helper.get_program_ids_to_bench()] if self.shuffle: random.shuffle(to_bench) for (id, run_block) in to_bench: self.pool.submit(run_block, id, self.run_block_size) for (block, result, id) in self.pool.results(): if self.block_run_count > self.pre_runs: self.stats_helper.add_data_block(id, result.data) except BaseException: self.store_and_teardown() logging.error("Forced teardown of RunProcessor") raise self.store() def teardown(self): self.pool.teardown() def store_and_teardown(self): self.teardown() self.store() if len(self.stats_helper.runs) > 0 and all(x.benchmarkings() > 0 for x in self.stats_helper.runs): report = ReporterRegistry.get_for_name("console", self.stats_helper)\ .report(with_tester_results=False, to_string = True) self.stats_helper.runs[0].description() subject = "Finished " + join_strs([repr(run.description()) for run in self.stats_helper.runs]) send_mail(Settings()["run/send_mail"], subject, report, [Settings()["run/out"]]) def store(self): with open(Settings()["run/out"], "w") as f: f.write(yaml.dump(self.stats_helper.serialize())) def print_report(self) -> str: if len(self.stats_helper.runs) > 0 and all(x.benchmarkings() > 0 for x in self.stats_helper.runs): ReporterRegistry.get_for_name("console", self.stats_helper).report(with_tester_results=False) PK#}6HU&U&temci/run/run_worker_pool.py""" This module consists of the abstract run worker pool class and several implementations. """ import re from ..utils.typecheck import * from ..utils.settings import Settings from .run_driver import RunProgramBlock, BenchmarkingResultBlock, AbstractRunDriver, RunDriverRegistry from queue import Queue, Empty from .cpuset import CPUSet import logging, threading, subprocess, shlex, os, tempfile, yaml class AbstractRunWorkerPool: """ An abstract run worker pool that just deals with the hyper threading setting. """ def __init__(self, run_driver_name: str = None): if Settings()["run/disable_hyper_threading"]: self._disable_hyper_threading() def submit(self, block: RunProgramBlock, id: int, runs: int): pass def results(self): pass def teardown(self): if Settings()["run/disable_hyper_threading"]: self._enable_hyper_threading() def _disable_hyper_threading(self): """ Adapted from http://unix.stackexchange.com/a/223322 """ total_logical_cpus = 0 total_physical_cpus = 0 total_cores = 0 cpu = None logical_cpus = {} physical_cpus = {} cores = {} hyperthreading = False for line in open('/proc/cpuinfo').readlines(): if re.match('processor', line): cpu = int(line.split()[2]) if cpu not in logical_cpus: logical_cpus[cpu] = [] total_logical_cpus += 1 if re.match('physical id', line): phys_id = int(line.split()[3]) if phys_id not in physical_cpus: physical_cpus[phys_id] = [] total_physical_cpus += 1 if re.match('core id', line): core = int(line.split()[3]) if core not in cores: cores[core] = [] total_cores += 1 cores[core].append(cpu) if (total_cores * total_physical_cpus) * 2 == total_logical_cpus: hyperthreading = True self.ht_cores = [] if hyperthreading: for c in cores: for p, val in enumerate(cores[c]): if p > 0: self.ht_cores.append(val) self._set_status_of_ht_cores(self.ht_cores, 0) def _enable_hyper_threading(self): self._set_status_of_ht_cores(self.ht_cores, 1) def _set_status_of_ht_cores(self, ht_cores: list, online_status: int): if len(ht_cores) == 0: return arg = "\n".join("echo {} > /sys/devices/system/cpu/cpu{}/online" .format(online_status, core_id) for core_id in ht_cores) proc = subprocess.Popen(["/bin/sh", "-c", "sudo bash -c '{}'".format(arg)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) out, err = proc.communicate() if proc.poll() > 0: raise EnvironmentError("Error while disabling the hyper threaded cores: " + str(err)) class RunWorkerPool(AbstractRunWorkerPool): """ This run worker pool implements the sequential benchmarking of program blocks. """ def __init__(self, run_driver_name: str = None): """ Initializes a worker pool. :param run_driver_name: name of the used run driver or None if the one set in the Settings should be used """ super().__init__(run_driver_name) self.queue = Queue() self.result_queue = Queue() if run_driver_name is None: run_driver_name = RunDriverRegistry().get_used() self.cpuset = CPUSet(parallel=0) if Settings()["run/cpuset/active"] else None self.run_driver = RunDriverRegistry().get_for_name(run_driver_name) self.parallel_number = 1 def submit(self, block: RunProgramBlock, id: int, runs: int): """ Submits the passed block for "runs" times benchmarking. It also sets the blocks is_enqueued property to True. :param block: passed run program block :param id: id of the passed block :param runs: number of individual benchmarking runs """ typecheck(block, RunProgramBlock) typecheck(runs, NaturalNumber()) typecheck(id, NaturalNumber()) block.is_enqueued = True self.result_queue.put((block, self.run_driver.benchmark(block, runs), id)) block.is_enqueued = False def results(self): """ An iterator over all available benchmarking results. The items of this iterator are tuples consisting of the benchmarked block, the benchmarking result and the blocks id. The benchmarking results are simple ..run_driver.BenchmarkingResultBlock objects. """ while not self.result_queue.empty(): yield self.result_queue.get() def teardown(self): """ Tears down the inherited run driver. This should be called if all benchmarking with this pool is finished. """ super().teardown() self.run_driver.teardown() if self.cpuset is not None: self.cpuset.teardown() class ParallelRunWorkerPool(AbstractRunWorkerPool): """ This run worker pool implements the parallel benchmarking of program blocks. It uses a server-client-model to benchmark on different cpu cores. """ def __init__(self, run_driver_name: str = None): """ Initializes a worker pool. :param run_driver_name: name of the used run driver or None if the one set in the Settings should be used """ super().__init__(run_driver_name) self.submit_queue = Queue() self.intermediate_queue = Queue() self.result_queue = Queue() if run_driver_name is None: run_driver_name = RunDriverRegistry().get_used() if Settings()["run/cpuset/active"]: self.cpuset = CPUSet() else: raise ValueError("Only works with run/cpuset/active=True") self.parallel_number = self.cpuset.parallel_number logging.info("Using {} parallel processes to benchmark.".format(self.parallel_number)) self.threads = [] self.run_driver = RunDriverRegistry.get_for_name(run_driver_name) try: for i in range(0, self.parallel_number): thread = BenchmarkingThread(i, self, self.run_driver, self.cpuset) self.threads.append(thread) thread.start() except BaseException: logging.error("Forced teardown of ParallelRunWorkerPool") self.teardown() raise def submit(self, block: RunProgramBlock, id: int, runs: int): """ Submits the passed block for "runs" times benchmarking. It also sets the blocks is_enqueued property to True. :param block: passed run program block :param id: id of the passed block :param runs: number of individual benchmarking runs """ typecheck(block, RunProgramBlock) typecheck(runs, NaturalNumber()) typecheck(id, NaturalNumber()) block.is_enqueued = True self.submit_queue.put((block, id, runs)) def results(self): """ An iterator over all available benchmarking results. The items of this iterator are tuples consisting of the benchmarked block, the benchmarking result and the blocks id. The benchmarking results are simple ..run_driver.BenchmarkingResultBlock objects. """ while not self.intermediate_queue.empty() or not self.submit_queue.empty() or not self.result_queue.empty(): yield self.result_queue.get() #print("++intermediate size", self.intermediate_queue.qsize()) #rint("++submit queue size", self.submit_queue.qsize()) def teardown(self): """ Tears down the inherited run driver. This should be called if all benchmarking with this pool is finished. """ super().teardown() self.cpuset.teardown() self.run_driver.teardown() try: for thread in self.threads: thread.stop = True thread.teardown() except BaseException as err: pass class BenchmarkingThread(threading.Thread): def __init__(self, id: int, pool: ParallelRunWorkerPool, driver: AbstractRunDriver, cpuset: CPUSet): threading.Thread.__init__(self) self.stop = False self.id = id self.pool = pool self.driver = driver self.cpuset = cpuset def run(self): while True: try: (block, block_id, runs) = self.pool.submit_queue.get(timeout=1) except Empty: if self.stop: return else: continue self.pool.intermediate_queue.put(block_id) try: self.pool.result_queue.put((block, self._process_block(block, runs), block_id)) logging.info("Thread {set_id}: Benchmarked block {id}".format(set_id=self.id, id=block_id)) block.is_enqueued = False self.pool.intermediate_queue.get() except BaseException: logging.error("Forced teardown of BenchmarkingThread") self.teardown() raise def _process_block(self, block: RunProgramBlock, runs: int) -> BenchmarkingResultBlock: return self.driver.benchmark(block, runs, self.cpuset, self.id) def teardown(self): passPK#}6Htemci/run/__init__.pyPK9:H\@@temci/run/run_driver_plugin.py""" This module consists of run driver plugin implementations. """ from .run_driver import RunProgramBlock from ..utils.util import ensure_root from .run_driver import ExecRunDriver from ..utils.registry import register from ..utils.typecheck import * import temci.setup.setup as setup import subprocess, logging, os, signal, random, multiprocessing, time import typing as t class AbstractRunDriverPlugin: """ A plugin for a run driver. It allows additional modifications. The object is instantiated before the benchmarking starts and used for the whole benchmarking runs. """ def __init__(self, misc_settings): self.misc_settings = misc_settings def setup(self): """ Called before the whole benchmarking starts (e.g. to set the "nice" value of the benchmarking process). """ pass def setup_block(self, block: RunProgramBlock, runs: int = 1): """ Called before each run program block is run "runs" time. :param block: run program block to modify :param runs: number of times the program block is run at once. """ pass def setup_block_run(self, block: RunProgramBlock): """ Called before each run program block is run. :param block: run program block to modify """ pass def teardown_block(self, block: RunProgramBlock): """ Called after each run program block is run. :param block: run program block """ pass def teardown(self): """ Called after the whole benchmarking is finished. :return: """ pass def _exec_command(self, cmd: str) -> str: proc = subprocess.Popen(["/bin/sh", "-c", cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) out, err = proc.communicate() if proc.poll() > 0: msg = "Error executing '" + cmd + "' in {}: ".format(type(self)) + str(err) + " " + str(out) #logging.error(msg) raise EnvironmentError(msg) return str(out) @register(ExecRunDriver, "nice", Dict({ "nice": Int(range=range(-20, 20)) // Description("Niceness values range from -20 (most favorable " "to the process) to 19 (least favorable to the process).") // Default(-15), "io_nice": Int(range=range(0, 4)) // Description("Specify the name or number of the scheduling class to use;" "0 for none, 1 for realtime, 2 for best-effort, 3 for idle.") // Default(1) })) class NicePlugin(AbstractRunDriverPlugin): """ Allows the setting of the nice and ionice values of the benchmarking process. """ def __init__(self, misc_settings): super().__init__(misc_settings) self.old_nice = int(self._exec_command("nice")) self.old_io_nice = int(self._exec_command("ionice").split(" prio ")[1]) def setup(self): ensure_root("The own nice value can't be lowered without") self._set_nice(self.misc_settings["nice"]) self._set_io_nice(self.misc_settings["io_nice"]) def _set_nice(self, nice: int): self._exec_command("sudo renice -n {} -p {}".format(nice, os.getpid())) def _set_io_nice(self, nice: int): self._exec_command("sudo ionice -n {} -p {}".format(nice, os.getpid())) def teardown(self): self._set_nice(self.old_nice) self._set_io_nice(self.old_io_nice) @register(ExecRunDriver, "env_randomize", Dict({ "min": NaturalNumber() // Default(0) // Description("Minimum number of added random environment variables"), "max": PositiveInt() // Default(100) // Description("Maximum number of added random environment variables"), "var_max": PositiveInt() // Default(1000) // Description("Maximum length of each random value"), "key_max": PositiveInt() // Default(100) // Description("Maximum length of each random key") })) class EnvRandomizePlugin(AbstractRunDriverPlugin): """ Adds random environment variables. """ def setup_block(self, block: RunProgramBlock, runs: int = 1): env = {} for i in range(random.randint(self.misc_settings["min"], self.misc_settings["max"])): env["a" * random.randint(0, self.misc_settings["key_max"])] \ = "a" * random.randint(0, self.misc_settings["var_max"]) block["env"] = env @register(ExecRunDriver, "preheat", Dict({ "time": NaturalNumber() // Default(10) // Description("Number of seconds to preheat the system with an cpu bound task") })) class PreheatPlugin(AbstractRunDriverPlugin): """ Preheats the system with a cpu bound task (calculating the inverse of a big random matrice with numpy). """ def setup(self): heat_time = self.misc_settings["time"] logging.info("Preheat the system for {} seconds with a cpu bound task" .format(heat_time)) cmd = "timeout {} python3 -c 'import numpy as np; " \ "m = np.random.randint(0, 100, (500, 500)); " \ "print(list(map(lambda x: len(np.linalg.eig(m)), range(10000))))' > /dev/null".format(heat_time) procs = [] for i in range(0, multiprocessing.cpu_count()): proc = subprocess.Popen(["bash", "-c", cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) procs.append(proc) time.sleep(heat_time) for proc in procs: try: proc.poll() except BaseException as ex: logging.error(ex) @register(ExecRunDriver, "other_nice", Dict({ "nice": Int(range=range(-20, 20)) // Description("Niceness values for other processes.") // Default(18), "min_nice": Int(range=range(-15, 20)) // Default(-10) // Description("Processes with lower nice values are ignored.") })) class OtherNicePlugin(AbstractRunDriverPlugin): """ Allows the setting of the nice value of all other processes (tha have nice > -10). """ def __init__(self, misc_settings): super().__init__(misc_settings) self.old_nices = {} ensure_root("The nice values of other processes can't be disabled without") def setup(self): for line in self._exec_command("sudo /bin/ps --noheaders -e -o pid,nice").split("\n"): line = line.strip() arr = list(filter(lambda x: len(x) > 0, line.split(" "))) if len(arr) == 0: continue pid = int(arr[0].strip()) nice = arr[1].strip() if nice != "-" and int(nice) > self.misc_settings["min_nice"] and pid != os.getpid(): self.old_nices[pid] = int(nice) try: self._set_nice(pid, self.misc_settings["nice"]) except EnvironmentError as err: #logging.info(err) pass def _set_nice(self, pid: int, nice: int): self._exec_command("sudo renice -n {} -p {}".format(nice, pid)) def teardown(self): for pid in self.old_nices: try: self._set_nice(pid, self.old_nices[pid]) except EnvironmentError as err: #logging.info(err) pass @register(ExecRunDriver, "stop_start", Dict({ "min_nice": Int(range=range(-15, 20)) // Default(-10) // Description("Processes with lower nice values are ignored."), "min_id": PositiveInt() // Default(1500) // Description("Processes with lower id are ignored."), "comm_prefixes": ListOrTuple(Str()) // Default(["ssh", "xorg", "bluetoothd"]) // Description("Each process which name (lower cased) starts with one of the prefixes is not ignored. " "Overrides the decision based on the min_id."), "comm_prefixes_ignored": ListOrTuple(Str()) // Default(["dbus", "kworker"]) // Description("Each process which name (lower cased) starts with one of the prefixes is ignored. " "It overrides the decisions based on comm_prefixes and min_id."), "subtree_suffixes": ListOrTuple(Str()) // Default(["dm", "apache"]) // Description("Suffixes of processes names which are stopped."), "dry_run": Bool() // Default(False) // Description("Just output the to be stopped processes but don't actually stop them?") })) class StopStartPlugin(AbstractRunDriverPlugin): """ Stop almost all other processes. """ def __init__(self, misc_settings): ensure_root("other processes can't be stopped without") super().__init__(misc_settings) self.processes = {} self.pids = [] def parse_processes(self): self.processes = {} for line in self._exec_command("/bin/ps --noheaders -e -o pid,nice,comm,cmd,ppid").split("\n"): line = line.strip() arr = list(map(lambda x: x.strip(), filter(lambda x: len(x) > 0, line.split(" ")))) if len(arr) == 0: continue self.processes[int(arr[0])] = { "pid": int(arr[0]) if arr[0] != "-" else -1, "nice": int(arr[1]) if arr[1] != "-" else -20, "comm": arr[2], "cmd": arr[3], "ppid": int(arr[4]) if len(arr) == 5 else 0 } def _get_ppids(self, pid: int) -> t.List[int]: ppids = [] cur_pid = pid while cur_pid >= 1: cur_pid = self.processes[cur_pid]["ppid"] if cur_pid != 0: ppids.append(cur_pid) return ppids def _get_pcomms(self, pid: int) -> t.List[str]: return [self.processes[id]["comm"] for id in self._get_ppids(pid)] def _get_child_pids(self, pid: int) -> t.List[int]: ids = [] for proc in self.processes: if proc["ppid"] == pid: ids.append(proc["ppid"]) return ids def _get_child_comms(self, pid: int) -> t.List[str]: return [self.processes[id] for id in self._get_child_pids(pid)] def _proc_dict_to_str(self, proc_dict: t.Dict) -> str: return "Process(id={pid:5d}, parent={ppid:5d}, nice={nice:2d}, name={comm})".format(**proc_dict) def setup(self): self.parse_processes() for proc in self.processes.values(): if proc["pid"] == os.getpid(): continue if any(proc["comm"].startswith(pref) for pref in self.misc_settings["comm_prefixes_ignored"]): continue if proc["nice"] == "-" or int(proc["nice"]) < self.misc_settings["min_nice"]: continue suffixes = self.misc_settings["subtree_suffixes"] if any(proc["comm"].startswith(pref) for pref in self.misc_settings["comm_prefixes"]) or \ proc["pid"] >= self.misc_settings["min_id"] or \ any(any(pcomm.endswith(suff) for suff in suffixes) for pcomm in self._get_pcomms(proc["pid"])): if self.misc_settings["dry_run"]: logging.info(self._proc_dict_to_str(proc)) else: self.pids.append(proc["pid"]) if self.misc_settings["dry_run"]: raise KeyboardInterrupt() self._send_signal(signal.SIGSTOP) def _send_signal(self, signal: int): for pid in self.pids: try: os.kill(pid, signal) except BaseException as ex: logging.info(ex) pass def teardown(self): self._send_signal(signal.SIGCONT) @register(ExecRunDriver, "sync", Dict({})) class SyncPlugin(AbstractRunDriverPlugin): """ Call sync before each program execution. """ def setup_block_run(self, block: RunProgramBlock, runs: int = 1): os.sync() @register(ExecRunDriver, "sleep", Dict({ "seconds": PositiveInt() // Default(10) // Description("Seconds to sleep") })) class SleepPlugin(AbstractRunDriverPlugin): """ Sleep a given amount of time before the benchmarking begins. See Gernot Heisers Systems Benchmarking Crimes: Make sure that the system is really quiescent when starting an experiment, leave enough time to ensure all previous data is flushed out. """ def setup_block(self, block: RunProgramBlock, runs: int = 1): block["cmd_prefix"].append("sleep {}".format(self.misc_settings["seconds"])) @register(ExecRunDriver, "drop_fs_caches", Dict({ "free_pagecache": Bool() // Default(True) // Description("Free the page cache"), "free_dentries_inodes": Bool() // Default(True) // Description("Free dentries and inodes") })) class DropFSCaches(AbstractRunDriverPlugin): """ Drop page cache, directoy entries and inodes before every benchmarking run. """ def setup(self): ensure_root("The page cache, directoy entries and inodes can't be dropped without") def setup_block_run(self, block: RunProgramBlock): num = self.misc_settings["free_pagecache"] + 2 * self.misc_settings["free_dentries_inodes"] self._exec_command("sudo sync; sudo sh -c 'echo {} > /proc/sys/vm/drop_caches'".format(num)) @register(ExecRunDriver, "disable_swap", Dict({})) class DisableSwap(AbstractRunDriverPlugin): """ Disables swapping on the system before the benchmarking and enables it after. """ def setup(self): ensure_root("Swapping can't be disabled without") self._exec_command("sudo swapoff -a") def teardown(self): self._exec_command("sudo swapon -a") @register(ExecRunDriver, "disable_cpu_caches", Dict({})) class DisableCPUCaches(AbstractRunDriverPlugin): """ Disable the L1 and L2 caches on x86 and x86-64 architectures. Uses a small custom kernel module (be sure to compile it via 'temci setup'). :warning slows program down significantly and has probably other weird consequences :warning this is untested :warning a linux-forum user declared: Disabling cpu caches gives you a pentium I like processor!!! """ def setup(self): ensure_root("Loading kernel module to disable the cpu caches can't be done without") setup.exec("cpu_cache", "sudo insmod disable_cache.ko") def teardown(self): setup.exec("cpu_cache", "sudo rmmod disable_cache.ko") @register(ExecRunDriver, "cpu_governor", Dict({ "governor": Str() // Default("performance") // Description("New scaling governor for all cpus") })) class CPUGovernor(AbstractRunDriverPlugin): """ Allows the setting of the scaling governor of all cpu cores, to ensure that all use the same. """ def setup(self): cpu_dir_temp = "/sys/devices/system/cpu/cpu{}/cpufreq/" self.cpu_paths = [] self.old_governors = [] self.av_governors = [] num = 0 while os.path.exists(cpu_dir_temp.format(num)) and os.path.isdir(cpu_dir_temp.format(num)): cpu_path = cpu_dir_temp.format(num) self.cpu_paths.append(cpu_path) with open(cpu_path + "scaling_governor", "r") as f: self.old_governors.append(f.readline().strip()) with open(cpu_path + "scaling_available_governors") as f: self.av_governors.append(f.readline().strip().split(" ")) num += 1 for cpu in range(len(self.cpu_paths)): self._set_scaling_governor(cpu, self.misc_settings["governor"]) def teardown(self): for cpu in range(len(self.cpu_paths)): self._set_scaling_governor(cpu, self.old_governors[cpu]) def _set_scaling_governor(self, cpu: int, governor: str): assert cpu <= len(self.cpu_paths) if governor not in self.av_governors: raise ValueError("No such governor {} for cpu {}, expected one of these: ". format(cpu, governor, ", ".join(self.av_governors))) with open(self.cpu_paths[cpu] + "scaling_governor", "w") as f: self._exec_command("sudo bash -c 'echo {gov} > {p}scaling_governor'" .format(gov=governor, p=self.cpu_paths[cpu])) PK#}6H5bbtemci/run/run_driver.py""" This modules contains the base run driver, needed helper classes and registries. """ import os import datetime import re import shutil import collections from temci.setup import setup from temci.utils.settings import Settings from temci.utils.typecheck import NoInfo from temci.utils.vcs import VCSDriver from ..utils.typecheck import * from ..utils.registry import AbstractRegistry, register from .cpuset import CPUSet from copy import deepcopy, copy import logging, time, random, subprocess from collections import namedtuple import gc import typing as t class RunDriverRegistry(AbstractRegistry): """ The registry for run drivers. """ settings_key_path = "run" use_key = "driver" use_list = False default = "exec" registry = {} class RunProgramBlock: """ An object that contains every needed information of a program block. """ def __init__(self, id: int, data, attributes: dict, run_driver: type = None): """ :param data: :param attributes: :param type_scheme: :return: """ if run_driver is not None: self.run_driver_class = run_driver else: self.run_driver_class = RunDriverRegistry.get_class(RunDriverRegistry.get_used()) self.type_scheme = self.run_driver_class.block_type_scheme self.data = deepcopy(self.run_driver_class.block_default) self.data.update(data) self.attributes = attributes self.is_enqueued = False self.id = id """Is this program block enqueued in a run worker pool queue?""" def __getitem__(self, key: str): """ Returns the value associated with the given key. """ return self.data[key] def __setitem__(self, key: str, value): """ Sets the value associated with the passed key to the new value. :param key: passed key :param value: new value :raises TypeError if the value hasn't the expected type """ value_name = "run programm block[{}]".format(key) typecheck(self.type_scheme, Dict) typecheck(value, self.type_scheme[key], value_name=value_name) self.data[key] = value def __contains__(self, item) -> bool: return item in self.data def __repr__(self): return "RunDataBlock({}, {})".format(self.data, self.attributes) def copy(self): """ Copy this run program block. Deep copies the data and uses the same type scheme and attributes. :return: """ return RunProgramBlock(self.id, deepcopy(self.data), self.attributes, self.run_driver_class) def __len__(self): return min(map(len, self.data.values())) if len(self.data) > 0 else 0 @classmethod def from_dict(cls, id: int, data, run_driver: type = None): """ Structure of data:: { "attributes": {"attr1": ..., ...}, "run_config": {"prop1": ..., ...} } :param id: id of the block (only used to track them later) :param data: used data :param run_driver: used RunDriver subclass :return: new RunProgramBlock """ typecheck(data, Dict({ "attributes": Dict(all_keys=False, key_type=Str()), "run_config": Dict(all_keys=False) })) block = RunProgramBlock(id, data["run_config"], data["attributes"], run_driver) return block def to_dict(self): return { "attributes": self.attributes, "run_config": self.data } class BenchmarkingResultBlock: def __init__(self, data: dict = None): self.data = collections.defaultdict(lambda: []) if data: self.add_run_data(data) def properties(self) -> t.List[str]: return list(self.data.keys()) def add_run_data(self, data: dict): typecheck(data, Dict(all_keys=False, key_type=Str(), value_type=Int()|Float())) for prop in data: self.data[prop].append(data[prop]) def to_dict(self): return { "properties": self.properties(), "data": self.data } @classmethod def from_dict(cls, source: dict): typecheck(source, Dict({ "data": Dict(all_keys=False, key_type=Str(), value_type=Int()|Float()) }, all_keys=False)) return BenchmarkingResultBlock(source["data"]) class AbstractRunDriver(AbstractRegistry): """ A run driver """ settings_key_path = "run/plugins" use_key = "active" use_list = True default = [] block_type_scheme = Dict() block_default = {} registry = {} def __init__(self, misc_settings: dict = None): """ Also calls the setup methods on all registered plugins. It calls the setup() method. :param misc_settings: further settings :return: """ self.misc_settings = misc_settings self.used_plugins = [self.get_for_name(name) for name in self.get_used()] self.setup() def setup(self): """ Call the setup() method on all used plugins for this driver. """ for plugin in self.used_plugins: plugin.setup() def teardown(self): """ Call the teardown() method on all used plugins for this driver. """ for plugin in self.used_plugins: plugin.teardown() def _setup_block(self, block: RunProgramBlock): """ Call the setup_block() method on all used plugins for this driver. """ for plugin in self.used_plugins: plugin.setup_block(block) def _setup_block_run(self, block: RunProgramBlock): """ Call the setup_block_run() method on all used plugins for this driver. """ for plugin in self.used_plugins: plugin.setup_block_run(block) def _teardown_block(self, block: RunProgramBlock): """ Call the teardown_block() method on all used plugins for this driver. """ for plugin in self.used_plugins: plugin.teardown_block(block) def benchmark(self, block: RunProgramBlock, runs: int, cpuset: CPUSet = None, set_id: int = 0) -> BenchmarkingResultBlock: """ Benchmark the passed program block "runs" times and return the benchmarking results. :param block: run program block to benchmark :param runs: number of benchmarking runs :return: object that contains a dictionary of properties with associated raw run data :raises BenchmarkingError if the benchmarking of the passed block fails """ raise NotImplementedError() @register(RunDriverRegistry, "exec", Dict({ "perf_stat_props": ListOrTuple(Str()) // Description("Measured properties") // Default(["task-clock", "branch-misses", "cache-references", "cache-misses", "cycles", "instructions"]), "perf_stat_repeat": PositiveInt() // Description("If runner=perf_stat make measurements of the program" "repeated n times. Therefore scale the number of times a program." "is benchmarked.") // Default(1), "runner": ExactEither("") // Description("If not '' overrides the runner setting for each program block") // Default(""), "random_cmd": Bool() // Default(True) // Description("Pick a random command if more than one run command is passed.") }, all_keys=False)) class ExecRunDriver(AbstractRunDriver): """ Implements a simple run driver that just executes one of the passed run_cmds in each benchmarking run. It meausures the time using the perf stat tool (runner=perf_stat). """ settings_key_path = "run/exec_plugins" use_key = "exec_active" use_list = True default = ["nice"] block_type_scheme = Dict({ "run_cmd": (List(Str()) | Str()) // Description("Commands to benchmark"), "env": Dict(all_keys=False, key_type=Str()) // Description("Environment variables"), "cmd_prefix": List(Str()) // Description("Command to append before the commands to benchmark"), "revision": (Int(lambda x: x >= -1) | Str()) // Description("Used revision (or revision number)." "-1 is the current revision."), "cwd": (List(Str())|Str()) // Description("Execution directories for each command"), "runner": ExactEither() // Description("Used runner") }, all_keys=False) block_default = { "run_cmd": "", "env": {}, "cmd_prefix": [], "revision": -1, "cwd": ".", "base_dir": ".", "runner": "perf_stat" } registry = {} def __init__(self, misc_settings: dict = None): super().__init__(misc_settings) self.dirs = {} def _setup_block(self, block: RunProgramBlock): if isinstance(block["run_cmd"], List(Str())): block["run_cmds"] = block["run_cmd"] else: block["run_cmds"] = [block["run_cmd"]] if isinstance(block["cwd"], List(Str())): if len(block["cwd"]) != len(block["run_cmd"]) and not isinstance(block["run_cmd"], str): raise ValueError("Number of passed working directories {} " "is unequal with number of passed run commands {}" .format(len(block["cwd"]), len(block["run_cmd"]))) block["cwds"] = block["cwd"] else: block["cwds"] = [block["cwd"]] * len(block["run_cmds"]) self.uses_vcs = block["revision"] != -1 self.vcs_driver = None self.tmp_dir = "" if self.uses_vcs and block.id not in self.dirs: self.vcs_driver = VCSDriver.get_suited_vcs(".") self.tmp_dir = os.path.join(Settings()["tmp_dir"], datetime.datetime.now().strftime("%s%f")) os.mkdir(self.tmp_dir) self.dirs[block.id] = os.path.join(self.tmp_dir, str(block.id)) os.mkdir(self.dirs[block.id]) self.vcs_driver.copy_revision(block["revision"], ".", self.dirs[block.id]) block["working_dir"] = self.dirs[block.id] if self.misc_settings["runner"] != "": block["runner"] = self.misc_settings["runner"] super()._setup_block(block) def benchmark(self, block: RunProgramBlock, runs: int, cpuset: CPUSet = None, set_id: int = 0) -> BenchmarkingResultBlock: t = time.time() block = block.copy() self._setup_block(block) gc.collect() gc.disable() try: res = self._benchmark(block, runs, cpuset, set_id) except BaseException: self.teardown() logging.error("Forced teardown of RunProcessor") raise finally: gc.enable() self._teardown_block(block) t = time.time() - t assert isinstance(res, BenchmarkingResultBlock) res.data["ov-time"] = [t / runs] * runs #print(res.data) return res ExecResult = namedtuple("ExecResult", ['time', 'stderr', 'stdout']) """ A simple named tuple named ExecResult with to properties: time, stderr and stdout """ def _benchmark(self, block: RunProgramBlock, runs: int, cpuset: CPUSet = None, set_id: int = 0): block = block.copy() runner = self.get_runner(block) runner.setup_block(block, runs, cpuset, set_id) results = [] for i in range(runs): self._setup_block_run(block) results.append(self._exec_command(block["run_cmds"], block, cpuset, set_id)) res = None # type: BenchmarkingResultBlock for exec_res in results: res = runner.parse_result(exec_res, res) return res def _exec_command(self, cmds: list, block: RunProgramBlock, cpuset: CPUSet = None, set_id: int = 0) -> ExecResult: """ Executes one randomly chosen command of the passed ones. And takes additional settings in the passed run program block into account. :param cmds: list of commands :param block: passed run program block :return: time in seconds the execution needed to finish """ typecheck(cmds, List(Str())) rand_index = random.randrange(0, len(cmds)) if self.misc_settings["random_cmd"] else 0 cmd = cmds[rand_index] cwd = block["cwds"][rand_index] executed_cmd = block["cmd_prefix"] + [cmd] if cpuset is not None: executed_cmd.insert(0, "sudo cset proc --move --force --pid $$ {} > /dev/null"\ .format(cpuset.get_sub_set(set_id))) env = os.environ.copy() env.update(block["env"]) env.update({'LC_NUMERIC': 'en_US.ASCII'}) t = time.time() executed_cmd = "; ".join(executed_cmd) proc = subprocess.Popen(["/bin/sh", "-c", executed_cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, cwd=cwd, env=env) out, err = proc.communicate() t = time.time() - t if proc.poll() > 0: msg = "Error executing " + cmd + ": "+ str(err) + " " + str(out) logging.error(msg) raise BenchmarkingError(msg) return self.ExecResult(time=t, stderr=str(err), stdout=str(out)) def teardown(self): super().teardown() if hasattr(self, "tmp_dir") and os.path.exists(self.tmp_dir): shutil.rmtree(self.tmp_dir) runners = {} @classmethod def register_runner(cls): def dec(klass): assert issubclass(klass, ExecRunner) cls.runners[klass.name] = klass cls.block_type_scheme["runner"] |= E(klass.name) Settings().modify_type_scheme("run/exec_misc/runner", lambda x: x | E(klass.name)) cls.block_type_scheme[klass.name] = klass.misc_options cls.block_default[klass.name] = klass.misc_options.get_default() if klass.__doc__ is not None: header = ""# "Description of {} (class {}):\n".format(name, klass.__qualname__) lines = str(klass.__doc__.strip()).split("\n") lines = map(lambda x: " " + x.strip(), lines) description = Description(header + "\n".join(lines)) klass.__description__ = description.description else: klass.__description__ = "" return klass return dec @classmethod def get_runner(cls, block: RunProgramBlock) -> 'ExecRunner': return cls.runners[block["runner"]](block) class ExecRunner: """ Base class for runners for the ExecRunDriver. """ name = None # type: str misc_options = Dict({}) def __init__(self, block: RunProgramBlock): self.misc = self.misc_options.get_default() if self.name in block: self.misc.update(block[self.name]) typecheck(self.misc, self.misc_options) def setup_block(self, block: RunProgramBlock, runs: int, cpuset: CPUSet = None, set_id: int = 0): pass def parse_result(self, exec_res: ExecRunDriver.ExecResult, res: BenchmarkingResultBlock = None) -> dict: raise NotImplementedError() def get_av_perf_stat_properties() -> t.List[str]: proc = subprocess.Popen(["/bin/sh", "-c", "perf list"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) out, err = proc.communicate() if proc.poll() > 0: raise EnvironmentError("Error calling 'perf list': {}".format(err)) lines = out.split("\n")[3:] props = [] for line in lines: line = line.strip() if line == "" or "=" in line or "<" in line or "NNN" in line: continue prop = line.split(" ", 1)[0].strip() if prop != "": props.append(prop) return props class ValidPerfStatPropertyList(Type): """ Checks for the value to be a valid perf stat measurement property list. """ def __init__(self): super().__init__() av = get_av_perf_stat_properties() self.completion_hints = { "zsh": "({})".format(" ".join(av)), "fish": { "hint": list(av) } } def _instancecheck_impl(self, value, info: Info = NoInfo()): if not isinstance(value, List(Str())): return info.errormsg(self) cmd = "perf stat -x ';' -e {props} -- /bin/echo".format(props=",".join(value)) proc = subprocess.Popen(["/bin/sh", "-c", cmd], stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, universal_newlines=True) out, err = proc.communicate() if proc.poll() > 0: return info.errormsg("Not a valid properties list: " + str(err).split("\n")[0].strip()) return info.wrap(True) def __str__(self) -> str: return "ValidPerfStatPropertyList()" def _eq_impl(self, other): return True @ExecRunDriver.register_runner() class PerfStatExecRunner(ExecRunner): """ Runner that uses perf stat for measurements. """ name = "perf_stat" misc_options = Dict({ "repeat": NaturalNumber() // Default(1), "properties": ValidPerfStatPropertyList() // Default(["cache-misses", "cycles", "task-clock", "instructions", "branch-misses", "cache-references"]) }) def setup_block(self, block: RunProgramBlock, runs: int, cpuset: CPUSet = None, set_id: int = 0): do_repeat = self.misc["repeat"] > 1 def modify_cmd(cmd): return "perf stat {repeat} -x ';' -e {props} -- {cmd}".format( props=",".join(self.misc["properties"]), cmd=cmd, repeat="--repeat {}".format(self.misc["properties"]) if do_repeat else "" ) block["run_cmds"] = [modify_cmd(cmd) for cmd in block["run_cmds"]] def parse_result(self, exec_res: ExecRunDriver.ExecResult, res: BenchmarkingResultBlock = None) -> BenchmarkingResultBlock: res = res or BenchmarkingResultBlock() m = {"ov-time": exec_res.time} for line in exec_res.stderr.strip().split("\n"): if ';' in line: var, empty, descr = line.split(";")[0:3] try: m[descr] = float(var) except: pass res.add_run_data(m) return res def get_av_rusage_properties() -> t.Dict[str, str]: """ Returns the available properties for the RusageExecRunner mapped to their descriptions. """ return { "utime": "user CPU time used", "stime": "system CPU time used", "maxrss": "maximum resident set size", "ixrss": "integral shared memory size", "idrss": "integral unshared data size", "isrss": "integral unshared stack size", "nswap": "swaps", "minflt": "page reclaims (soft page faults)", "majflt": "page faults (hard page faults)", "inblock": "block input operations", "oublock": "block output operations", "msgsnd": "IPC messages sent", "msgrcv": "IPC messages received", "nsignals": "signals received", "nvcsw": "voluntary context switches", "nivcsw": "involuntary context switches" } class ValidRusagePropertyList(Type): """ Checks for the value to be a valid rusage runner measurement property list. """ def __init__(self): super().__init__() self.av = list(get_av_rusage_properties().keys()) self.completion_hints = { "zsh": "({})".format(" ".join(self.av)), "fish": { "hint": list(self.av) } } def _instancecheck_impl(self, value, info: Info = NoInfo()): if not isinstance(value, List(Str())): return info.errormsg(self) for elem in value: if elem not in self.av: return info.errormsg(self, "No such rusage property " + repr(elem)) return info.wrap(True) def __str__(self) -> str: return "ValidRusagePropertyList()" def _eq_impl(self, other): return True @ExecRunDriver.register_runner() class RusageExecRunner(ExecRunner): """ Runner that uses the getrusage(2) function to obtain resource measurements. """ name = "rusage" misc_options = Dict({ "properties": ValidRusagePropertyList() // Default(sorted(list(get_av_rusage_properties().keys()))) }) def __init__(self, block: RunProgramBlock): super().__init__(block) def setup_block(self, block: RunProgramBlock, runs: int, cpuset: CPUSet = None, set_id: int = 0): def modify_cmd(cmd): return "{} {!r}".format( setup.script_relative("rusage/rusage"), cmd ) block["run_cmds"] = [modify_cmd(cmd) for cmd in block["run_cmds"]] def parse_result(self, exec_res: ExecRunDriver.ExecResult, res: BenchmarkingResultBlock = None) -> BenchmarkingResultBlock: res = res or BenchmarkingResultBlock() m = {"ov-time": exec_res.time} for line in reversed(exec_res.stdout.strip().split("\n")): if '#' in line: break if ' ' in line: var, val = line.strip().split(" ") if var in self.misc["properties"]: try: m[var] = float(val) except: pass res.add_run_data(m) return res @ExecRunDriver.register_runner() class SpecExecRunner(ExecRunner): """ Runner for SPEC like single benchmarking suites. It works with resulting property files, in which the properties are collon separated from their values. """ name = "spec" misc_options = Dict({ "file": Str() // Default("") // Description("SPEC result file"), "base_path": Str() // Default(""), "path_regexp": Str() // Default(".*") // Description("Regexp matching the base property path for each measured property"), "code": Str() // Default("get()") // Description("Code that is executed for each matched path. " "The code should evaluate to the actual measured value for the path." "it can use the function get(sub_path: str = '') and the modules " "pytimeparse, numpy, math, random, datetime and time.") }) def __init__(self, block: RunProgramBlock): super().__init__(block) if not self.misc["base_path"].endswith(".") and len(self.misc["base_path"]) > 0: self.misc["base_path"] += "." if not self.misc["path_regexp"].startswith("^"): self.misc["path_regexp"] = "^" + self.misc["path_regexp"] self.path_regexp = re.compile(self.misc["path_regexp"]) def setup_block(self, block: RunProgramBlock, runs: int, cpuset: CPUSet = None, set_id: int = 0): block["run_cmds"] = ["{}; cat {}".format(cmd, self.misc["file"]) for cmd in block["run_cmds"]] def parse_result(self, exec_res: ExecRunDriver.ExecResult, res: BenchmarkingResultBlock = None) -> BenchmarkingResultBlock: props = {} for line in exec_res.stdout.split("\n"): if ":" not in line: continue arr = line.split(":") if len(arr) != 2 or not arr[0].strip().startswith(self.misc["base_path"]): continue val = 0 try: val = float(arr[1].strip()) except ValueError: continue whole_path = arr[0].strip()[len(self.misc["base_path"]):] matches = self.path_regexp.match(whole_path) if matches: path = matches.group(0) if path not in props: props[path] = {} sub_path = whole_path[len(path):] props[path][sub_path] = val data = {} for prop in props: def get(sub_path: str = ""): return props[prop][sub_path] data[prop] = eval(self.misc["code"]) if len(data) == 0: logging.error("No properties in the result file matched begin with {!r} " "and match the passed regular expression {!r}" .format(self.misc["base_path"], self.path_regexp)) res = res or BenchmarkingResultBlock() res.add_run_data(data) return res class BenchmarkingError(RuntimeError): """ Thrown when the benchmarking of a program block fails. """PKco:H[lltemci/scripts/cli.pyimport warnings from temci.scripts.temci_completion import completion_file_name from temci.utils import util if __name__ == "__main__": util.allow_all_imports = True warnings.simplefilter("ignore") import shutil import subprocess import time import humanfriendly from temci.scripts.init import prompt_run_config, prompt_build_config from temci.utils.typecheck import * from temci.run.run_processor import RunProcessor from temci.build.assembly import AssemblyProcessor from temci.build.build_processor import BuildProcessor import temci.run.run_driver as run_driver import temci.run.run_driver_plugin from temci.tester.report import ReporterRegistry from temci.utils.settings import Settings from temci.tester.report_processor import ReportProcessor import click, sys, yaml, logging, json, os from temci.utils.click_helper import type_scheme_option, cmd_option, CmdOption, CmdOptionList import temci.scripts.version @click.group(epilog=""" temci (version {}) Copyright (C) 2016 Johannes Bechberger This program comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to redistribute it under certain conditions. For details, see the LICENSE file in the source folder of temci. This program is still in an alpha stage. It may happen that you're system needs to be rebooted to be usable again. The main workflow is to write config files and use them with the program. Although command line options are supported, config files are way easier to use. """.format(temci.scripts.version.version)) def cli(): pass command_docs = { "assembler": "Wrapper around the gnu assembler to allow assembler randomization", "build": "Build program blocks", "report": "Generate a report from benchmarking result", "init": "Helper commands to initialize files (like settings)", "completion": "Creates completion files for several shells.", "short": "Utility commands to ease working directly on the command line", "clean": "Clean up the temporary files", "setup": "Compile all needed binaries in the temci scripts folder", "version": "Print the current version ({})".format(temci.scripts.version.version) } for driver in run_driver.RunDriverRegistry.registry: command_docs[driver] = run_driver.RunDriverRegistry.registry[driver].__description__.strip().split("\n")[0] common_options = CmdOptionList( CmdOption.from_non_plugin_settings("") ) run_options = { "common": CmdOptionList( CmdOption.from_non_plugin_settings("run"), CmdOption.from_non_plugin_settings("stats", name_prefix="stats_") ), "run_driver_specific": { # besides DRIVER_misc and plugins "exec": CmdOptionList() } } # init the run_options dictionary for driver in run_driver.RunDriverRegistry.registry: options = CmdOptionList( CmdOption.from_registry(run_driver.RunDriverRegistry.registry[driver]), CmdOption.from_non_plugin_settings("run/{}_misc".format(driver)), CmdOption.from_non_plugin_settings("run/cpuset", name_prefix="cpuset_"), run_options["common"] ) if driver not in run_options["run_driver_specific"]: run_options["run_driver_specific"][driver] = options else: run_options["run_driver_specific"][driver].append(options) report_options = CmdOptionList( CmdOption.from_non_plugin_settings("report"), CmdOption.from_non_plugin_settings("stats") ) # init the report_options dictionary for reporter in ReporterRegistry.registry: options = CmdOption.from_non_plugin_settings("report/{}_misc".format(reporter), name_prefix=reporter + "_") report_options.append(options) build_options = CmdOptionList( CmdOption.from_non_plugin_settings("build") ) misc_commands = { "init": { "common": CmdOptionList(), "sub_commands": { "settings": CmdOptionList(), "build_config": CmdOptionList(), "run_config": CmdOptionList() } }, "completion": { "common": CmdOptionList(), "sub_commands": { "bash": CmdOptionList(), #"fish": CmdOptionList(), "zsh": CmdOptionList() } }, "short": { "common": CmdOptionList(), "sub_commands": { "exec": CmdOptionList(CmdOption("with_description", type_scheme=ListOrTuple(Tuple(Str(), Str())) // Description("DESCRIPTION COMMAND: Benchmark the command and set its" " description attribute."), short="d", completion_hints={"zsh": "_command"}), CmdOption("without_description", short="wd", type_scheme=ListOrTuple(Str()) // Description("COMMAND: Benchmark the command and use " "itself as its description."), completion_hints={"zsh": "_command"}), run_options["run_driver_specific"]["exec"], run_options["common"] ) } }, "clean": CmdOptionList() } misc_commands_description = { "completion": { "zsh": "Creates a new tab completion file for zsh and returns it's file name", #"fish": "Creates a file /tmp/temci_fish_completion for fish completion support.", "bash": "Creates a new tab completion file for zsh and returns it's file name", }, "init": { "settings": "Create a new settings file temci.yaml in the current directory", "build_config": "Interactive cli to create (or append to) a build config file", "run_config": "Interactive cli to create (or append to) a run config file" }, "short": { "exec": "Exec code snippets directly with the exec run driver" } } # Register a command for each run driver for driver in run_driver.RunDriverRegistry.registry: @cli.command(name=driver, short_help=command_docs[driver]) @click.argument("run_file") @cmd_option(common_options) @cmd_option(run_options["common"]) @cmd_option(run_options["run_driver_specific"][driver]) def func(run_file, **kwargs): Settings()["run/driver"] = driver Settings()["run/in"] = run_file try: RunProcessor().benchmark() except KeyboardInterrupt: logging.error("KeyboardInterrupt. Cleaned up everything.") @cli.group(short_help=command_docs["short"]) @cmd_option(common_options) @cmd_option(misc_commands["short"]["common"]) def short(**kwargs): pass @short.command(short_help=misc_commands_description["short"]["exec"]) @cmd_option(common_options) @cmd_option(misc_commands["short"]["sub_commands"]["exec"]) @cmd_option(run_options["run_driver_specific"]["exec"]) def exec(with_description: list = None, without_description: list = None, **kwargs): runs = [] if with_description is not None: for (descr, cmd) in with_description: runs.append({ "run_config": { "run_cmd": [cmd] }, "attributes": { "description": descr } }) if without_description is not None: for cmd in without_description: runs.append({"run_config": { "run_cmd": [cmd] }, "attributes": { "description": cmd } }) Settings()["run/driver"] = "exec" try: RunProcessor(runs).benchmark() except KeyboardInterrupt: logging.error("KeyboardInterrupt. Cleaned up everything.") @cli.command(short_help="Generate a report from benchmarking result") @click.argument('report_file', type=click.Path(exists=True)) @cmd_option(common_options) @cmd_option(report_options) def report(report_file: str, **kwargs): Settings()["report/in"] = report_file ReportProcessor().report() @cli.group(short_help=command_docs["init"]) @cmd_option(misc_commands["init"]["common"]) @cmd_option(common_options) def init(**kwargs): pass @init.command(short_help=misc_commands_description["init"]["settings"]) @cmd_option(misc_commands["init"]["sub_commands"]["settings"]) @cmd_option(common_options) def settings(**kwargs): Settings().store_into_file("temci.yaml") @init.command(short_help=misc_commands_description["init"]["build_config"]) @cmd_option(misc_commands["init"]["sub_commands"]["build_config"]) @cmd_option(common_options) def build_config(**kwargs): prompt_build_config() @init.command(short_help=misc_commands_description["init"]["run_config"]) @cmd_option(misc_commands["init"]["sub_commands"]["run_config"]) @cmd_option(common_options) def run_config(**kwargs): prompt_run_config() @cli.command(short_help=command_docs["build"]) @click.argument('build_file', type=click.Path(exists=True)) @cmd_option(common_options) def build(build_file: str, **kwargs): try: Settings()["build/in"] = build_file BuildProcessor().build() except KeyboardInterrupt: logging.error("Aborted") except BaseException as err: print(err) logging.error(str(err)) @cli.command(short_help=command_docs["clean"]) @cmd_option(common_options) def clean(**kwargs): shutil.rmtree(Settings()["tmp_dir"]) @cli.command(short_help=command_docs["version"]) @cmd_option(common_options) def version(**kwargs): print(temci.scripts.version.version) @cli.group(short_help=command_docs["completion"]) @cmd_option(common_options) def completion(**kwargs): pass @completion.command(short_help=misc_commands_description["completion"]["zsh"]) @cmd_option(common_options) def zsh(**kwargs): subcommands = "\n\t".join(['"{}:{}"'.format(cmd, command_docs[cmd]) for cmd in sorted(command_docs.keys())]) def process_options(options: CmdOptionList, one_line=False): typecheck(options, CmdOptionList) strs = [] for option in sorted(options): multiple = isinstance(option.type_scheme, List) or isinstance(option.type_scheme, ListOrTuple) rounds = 10 if multiple else 1 # hack to allow multiple applications of an option assert isinstance(option, CmdOption) descr = "{}".format(option.description) if option.description is not None else "Undoc" option_str = "--{}".format(option.option_name) if option.has_short: option_str = "{{-{},--{}}}".format(option.short, option.option_name) if option.is_flag: option_str = "{{--{o},--no-{o}}}".format(o=option.option_name) new_completion = "" if option.has_completion_hints and "zsh" in option.completion_hints: new_completion = '{option_str}\"[{descr}]: :{hint}"'.format( option_str=option_str, descr=descr, hint=option.completion_hints["zsh"] ) else: format_str = '{option_str}\"[{descr}]"' if option.is_flag else '{option_str}\"[{descr}]: :()"' new_completion = format_str.format( option_str=option_str, descr=descr ) for i in range(rounds): strs.append(new_completion) if one_line: return " ".join(strs) return "\n\t".join(strs) misc_cmds_wo_subcmds = list(filter(lambda x: isinstance(misc_commands[x], CmdOptionList), misc_commands.keys())) misc_cmds_w_subcmds = list(filter(lambda x: isinstance(misc_commands[x], dict), misc_commands.keys())) ret_str = """ # Auto generated tab completion for the temci ({version}) benchmarking tool. #compdef temci _temci(){{ # printf '%s ' "${{words[@]}}" > /tmp/out local ret=11 state local -a common_opts common_opts=( {common_opts} ) typeset -A opt_args _arguments -C ':subcommand:->subcommand' '2: :->second_level' '*::options:->options' && ret=0 #echo $state > tmp_file local sub_cmd="" case $words[1] in temci) sub_cmd=$words[2] ;; *) sub_cmd=$words[1] esac #echo $words[@] >> tmp_file case $words[2] in ({misc_cmds_wo_subs}) state="options" ;; esac case $state in subcommand) local -a subcommands subcommands=( {subcommands} ) _describe -t subcommands 'temci subcommand' subcommands && ret=0 ;; """.format(common_opts=process_options(common_options), subcommands=" ".join("\"{}:{}\"".format(cmd, command_docs[cmd]) for cmd in command_docs), misc_cmds_wo_subs="|".join(misc_cmds_wo_subcmds), version=temci.scripts.version.version) ret_str += """ second_level) #echo $words[@] > tmp_file case $words[2] in """ for misc_cmd in misc_cmds_w_subcmds: ret_str += """ ({misc_cmd}) #echo "here" > tmp_file local -a subcommands subcommands=( {sub_cmds} ) _describe -t subcommands 'temci subcommand' subcommands && ret=0 && return 0 ;; """.format(misc_cmd=misc_cmd, sub_cmds="\n\t".join("\"{}:{}\"".format(x, misc_commands_description[misc_cmd][x]) for x in misc_commands_description[misc_cmd])) ret_str += """ (build|report|{drivers}) _arguments "2: :_files -g '*\.yaml' "\ ;; esac ;; """.format(drivers="|".join(sorted(run_driver.RunDriverRegistry.registry.keys()))) ret_str +=""" (options) local -a args args=( $common_opts ) #echo "options" $words[@] > tmp_file case $words[1] in """ for driver in run_driver.RunDriverRegistry.registry.keys(): ret_str += """ {driver}) case $words[2] in *.yaml) args=( $common_opts {opts} ) _arguments "1:: :echo 3" $args && ret=0 ;; *) _arguments "1:: :echo 3" && ret=0 esac ;; """.format(driver=driver, opts=process_options(run_options["run_driver_specific"][driver])) ret_str += """ (report) #echo "(report)" $words[2] case $words[2] in *.yaml) args=( $common_opts {report_opts} ) _arguments "1:: :echo 3" $args && ret=0 ;; *) _arguments "1:: :echo 3" && ret=0 esac ;; (build) case $words[2] in *.yaml) args=( $common_opts {build_opts} ) _arguments "1:: :echo 3" $args && ret=0 ;; *) _arguments "1:: :echo 3" && ret=0 esac ;; """.format(report_opts=process_options(report_options), build_opts=process_options(build_options)) for misc_cmd in misc_cmds_w_subcmds: ret_str += """ ({misc_cmd}) case $words[2] in """.format(misc_cmd=misc_cmd) for sub_cmd in misc_commands[misc_cmd]["sub_commands"]: ret_str +=""" {sub_cmd}) #echo "{sub_cmd}" $words[@] > tmp_file args+=( {common_opts} {opts} ) #echo "sdf" $args[@] > tmp_file _arguments "1:: :echo 3" $args && ret=0 ;; """.format(sub_cmd=sub_cmd, opts=process_options(misc_commands[misc_cmd]["sub_commands"][sub_cmd]), common_opts=process_options(misc_commands[misc_cmd]["common"])) ret_str += """ esac ;; """ ret_str += """ esac case $sub_cmd in """ for misc_cmd in misc_cmds_wo_subcmds: ret_str += """ {misc_cmd}) # echo "{misc_cmd}" $words[@] >> tmp_file args+=( {opts} ) case $words[2] in $sub_cmd) _arguments "1:: :echo 3" $args && ret=0 ;; *) # echo "Hi" >> tmp_file _arguments $args && ret=0 ;; esac ;; """.format(misc_cmd=misc_cmd, opts=process_options(misc_commands[misc_cmd])) ret_str += """ esac #_arguments $common_opts && ret=0 && return 0 ;; esac } compdef _temci temci=temci """ file_name = completion_file_name("zsh") if not os.path.exists(os.path.dirname(file_name)): os.mkdir(os.path.dirname(file_name)) with open(file_name, "w") as f: f.write(ret_str) logging.debug("\n".join("{:>3}: {}".format(i, s) for (i, s) in enumerate(ret_str.split("\n")))) f.flush() os.chmod(file_name, 0o777) print(file_name) @completion.command(short_help=misc_commands_description["completion"]["bash"]) @cmd_option(common_options) def bash(**kwargs): subcommands = "\n\t".join(sorted(command_docs.keys())) def process_options(options: CmdOptionList) -> str: typecheck(options, CmdOptionList) strs = [] for option in sorted(options.options): strs.append("--" + option.option_name) if option.short is not None: strs.append("-" + option.short) if option.is_flag: strs.append("--no-" + option.option_name) return "\n\t".join(strs) def process_misc_commands(): ret_str = "" for misc_cmd in misc_commands: if "sub_commands" not in misc_commands[misc_cmd]: continue ret_str += """ case ${{COMP_WORDS[1]}} in {misc_cmd}) case ${{COMP_WORDS[2]}} in """.format(misc_cmd=misc_cmd) for sub_cmd in misc_commands[misc_cmd]["sub_commands"].keys(): ret_str += """ {sub_cmd}) args=( ${{common_opts[@]}} {common_opts} {cmd_ops} ) # printf ' _%s ' "${{args[@]}}" >> /tmp/out # printf ' __%s ' "${{args[*]}}" >> /tmp/out COMPREPLY=( $(compgen -W "${{args[*]}}" -- $cur) ) && return 0 ;; """.format(sub_cmd=sub_cmd, cmd_ops=process_options(misc_commands[misc_cmd]["sub_commands"][sub_cmd]), common_opts=process_options(misc_commands[misc_cmd]["common"])) ret_str += """ *) local args=( ) COMPREPLY=( $(compgen -W "" -- $cur) ) && return 0 esac ;; *) ;; esac """ return ret_str def process_misc_commands_case(): ret_str = "" for misc_cmd in misc_commands: args = [] if "sub_commands" in misc_commands[misc_cmd]: args = " ".join(sorted(misc_commands[misc_cmd]["sub_commands"].keys())) else: typecheck(misc_commands[misc_cmd], CmdOptionList) args = process_options(misc_commands[misc_cmd].append(common_options)) ret_str += """ {misc_cmd}) args=({sub_cmds}) ;; """.format(misc_cmd=misc_cmd, sub_cmds=args) return ret_str run_cmd_file_code = "" for driver in run_driver.RunDriverRegistry.registry: run_cmd_file_code += """ {driver}) case ${{COMP_WORDS[2]}} in *.yaml) args=( $common_opts $run_common_opts {driver_opts} ) COMPREPLY=( $(compgen -W "${{args[*]}}" -- $cur) ) && return 0 ;; esac ;; """.format(driver=driver, driver_opts=process_options(run_options["run_driver_specific"][driver])) file_structure = """ # Auto generated tab completion for the temci ({version}) benchmarking tool. _temci(){{ local cur=${{COMP_WORDS[COMP_CWORD]}} local prev=${{COMP_WORDS[COMP_CWORD-1]}} local common_opts=( {common_opts} ) local args=( {common_opts} ) local run_common_opts=( {run_common_opts} ) local report_common_opts=( {report_common_opts} ) local build_common_opts=( {build_common_opts} ) {misc_commands_code} case ${{COMP_WORDS[1]}} in report) case ${{COMP_WORDS[2]}} in *.yaml) args=( $common_opts $report_common_opts ) COMPREPLY=( $(compgen -W "${{args[*]}}" -- $cur) ) && return 0 ;; esac ;; build) case ${{COMP_WORDS[2]}} in *.yaml) args=( $common_opts $build_common_opts ) COMPREPLY=( $(compgen -W "${{args[*]}}" -- $cur) ) && return 0 ;; esac ;; {run_cmd_file_code} *) ;; esac case ${{COMP_WORDS[1]}} in (report|build|{run_drivers}) local IFS=$'\n' local LASTCHAR=' ' COMPREPLY=($(compgen -o plusdirs -o nospace -f -X '!*.yaml' -- "${{COMP_WORDS[COMP_CWORD]}}")) if [ ${{#COMPREPLY[@]}} = 1 ]; then [ -d "$COMPREPLY" ] && LASTCHAR=/ COMPREPLY=$(printf %q%s "$COMPREPLY" "$LASTCHAR") else for ((i=0; i < ${{#COMPREPLY[@]}}; i++)); do [ -d "${{COMPREPLY[$i]}}" ] && COMPREPLY[$i]=${{COMPREPLY[$i]}}/ done fi return 0 ;; {misc_commands_case_code} *) args=({commands}) esac COMPREPLY=( $(compgen -W "${{args[*]}}" -- $cur) ) }} shopt -s extglob complete -F _temci temci """.format(common_opts=process_options(common_options), run_common_opts=process_options(run_options["common"]), report_common_opts=process_options(report_options), commands=" ".join(sorted(command_docs.keys())), run_drivers="|".join(run_options["run_driver_specific"].keys()), misc_commands_case_code=process_misc_commands_case(), misc_commands_code=process_misc_commands(), build_common_opts=process_options(build_options), run_cmd_file_code=run_cmd_file_code, version=temci.scripts.version.version ) file_name = completion_file_name("bash") with open(file_name, "w") as f: f.write(file_structure) logging.debug("\n".join("{:>3}: {}".format(i, s) for (i, s) in enumerate(file_structure.split("\n")))) f.flush() os.chmod(file_name, 0o777) print(file_name) @cli.command(short_help=command_docs["assembler"]) @click.argument("call", type=str) def assembler(call: str): call = call.split(" ") input_file = os.path.abspath(call[-1]) config = json.loads(os.environ["RANDOMIZATION"]) if "RANDOMIZATION" in os.environ else {} as_tool = os.environ["USED_AS"] if "USED_AS" in os.environ else "/usr/bin/as" tmp_assm_file = os.path.join(os.environ["TMP_DIR"] if "TMP_DIR" in os.environ else "/tmp", "temci_assembler.s") def exec(cmd): proc = subprocess.Popen(["/bin/sh", "-c", cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) out, err = proc.communicate() if proc.poll() > 0: return str(err) return None processor = AssemblyProcessor(config) shutil.copy(input_file, tmp_assm_file) call[0] = as_tool shutil.copy(tmp_assm_file, input_file) processor.process(input_file) ret = exec(" ".join(call)) if ret is None: return for i in range(0, 6): shutil.copy(tmp_assm_file, input_file) processor.process(input_file, small_changes=True) ret = exec(" ".join(call)) if ret is None: return #else: # logging.debug("Another try") if processor.config["file_structure"]: logging.warning("Disabled file structure randomization") config["file_structure"] = False for i in range(0, 6): processor = AssemblyProcessor(config) shutil.copy(tmp_assm_file, input_file) processor.process(input_file) ret = exec(" ".join(call)) if ret is None: return logging.info("Another try") logging.error(ret) shutil.copy(tmp_assm_file, input_file) ret = exec(" ".join(call)) if ret is not None: logging.error(ret) exit(1) def cli_with_error_catching(): try: cli() except EnvironmentError as err: logging.error(err) exit(1) @cli.command(short_help=command_docs["setup"]) def setup(): from temci.setup.setup import make_scripts make_scripts() if __name__ == "__main__": # for testing purposes only sys.argv[1:] = ["short", "exec", "-wd", "ls", "--send_mail", "me@mostlynerdless.de"] #sys.argv[1:] = ["exec", "-wd", "ls", "-wd", "ls ..", "-wd", "ls /tmp", "--min_runs", "5", "--max_runs", "5", # "--out", "ls_100.yaml", "--stop_start"] #sys.argv[1:] = ["report", "../misc/cparser_c_time.yaml", "--reporter", "html"] #sys.argv[1:] = ["init", "settings"] #sys.argv[1:] = ["completion", "zsh"] #sys.argv[1:] = ["assembler", "'dsafasdf sdaf'"] # default = Settings().type_scheme.get_default_yaml() # print(str(default)) # print(yaml.load(default) == Settings().type_scheme.get_default()) #sys.argv[1:] = ["run", "spec_like.exec.yaml", "--min_runs", "20", "--max_runs", "20"] #sys.argv[1:] = ["completion", "bash"] #if len(sys.argv) == 1: # sys.argv[1:] = ['build', os.path.join(os.path.abspath("."), 'build.yaml')] # os.chdir(os.path.abspath("../../../test/hadori")) #print(repr(sys.argv)) #run_driver.ExecRunDriver.get_for_name("stop_start").setup() #import cProfile t = time.time() #cProfile.runctx("cli()", globals(), locals(), filename="cli.profile") print("Execution took ", humanfriendly.format_timespan(time.time() - t)) #ctr.create_snapshot() # create kcachegrind valid file via "python3 -m pyprof2calltree -i cli.profile" #ctr.stats.print_summary() #tr.print_diff() cli() PKtK8H={  !temci/scripts/temci_completion.py""" Just a more performant version of `temci completion` that rebuilds the completion files only if the temci version changed. The advantage over using `temci completion` directly is, that it's normally significantly faster. Usage: ``` temci_completion [zsh|bash] ``` This returns the location of the completion file. """ from temci.scripts.version import version from sys import argv from os.path import realpath, dirname, exists, join SUPPORTED_SHELLS = ["zsh", "bash"] COMPLETION_FILES_DIR = realpath(dirname(__file__) + "/completion_files") def print_help(): print(""" temci (version {}) Copyright (C) 2016 Johannes Bechberger This program comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to redistribute it under certain conditions. For details, see the LICENSE file in the source folder of temci. Usage of temci_completion: temci_completion [{}] This will return the completion file name. """.format(version, "|".join(SUPPORTED_SHELLS))) def completion_file_name(shell: str) -> str: assert shell in SUPPORTED_SHELLS file = "{shell}.{version}.sh".format(shell=shell, version=version) return realpath(join(COMPLETION_FILES_DIR, file)) def cli(): if len(argv) != 2 or argv[1] not in SUPPORTED_SHELLS: print_help() exit(len(argv) != 1) shell = argv[1] file_name = completion_file_name(shell) if exists(file_name): print(file_name) else: import subprocess try: subprocess.check_output(["/bin/sh", "-c", "temci completion" + shell]) except subprocess.CalledProcessError as ex: print("While executing {!r} and error occured: {}".format(ex.cmd, ex.output)) exit(1) print(file_name) if __name__ == "__main__": cli()PK#}6H[$ bool: """ Checks whether the passed value is convertable into the given builtin type. :param type: builtin type (like int) :param val: tested value """ try: type(val) return True except ValueError: return False class TypeValidator(Validator): """ A validator that validates against type schemes. """ def __init__(self, type_scheme: Type, allow_empty: bool = False): self.type_scheme = type_scheme self.allow_empty = allow_empty def _int_like(self) -> bool: return isinstance(self.type_scheme, Int) or self.type_scheme == int def _bool_like(self) -> bool: return isinstance(self.type_scheme, T(Bool) | T(BoolOrNone)) or self.type_scheme == bool def validate(self, document: Document): val = document.text def raise_error(msg: str = None): msg = msg or str(verbose_isinstance(val, self.type_scheme)) raise ValidationError(message=msg, cursor_position=len(document.text)) if val == "" and self.allow_empty: return if self._int_like(): if not is_builtin_type(int, val): raise_error("Isn't a valid integer.") else: val = int(val) elif self._bool_like(): if not is_builtin_type(bool, val): raise_error("Isn't a valid boolean") else: val = bool(val) if not isinstance(val, self.type_scheme): raise_error() class WordValidator(Validator): """ Like the SentenceValidator but accepts only one word. """ def __init__(self, valid_words: list, ignore_case: bool = True, move_cursor_to_end: bool = False, error_msg: str = None, allow_empty: bool = False): self.valid_words = valid_words self.ignore_case = ignore_case if ignore_case: self.valid_words = [word.lower() for word in self.valid_words] self.move_cursor_to_end = move_cursor_to_end self.allow_empty = allow_empty self.error_msg = error_msg def validate(self, document: Document): text = document.text.lower() if self.ignore_case else document.text if text == "" and self.allow_empty: return if text not in self.valid_words: msg = "Invalid word, expected one of these" if self.ignore_case: msg += " (case is ignored)" reprs = list(map(repr, self.valid_words)) if len(reprs) > 1: msg += ":" + " or ".join([", ".join(reprs[:-1]), reprs[-1]]) else: msg = "Invalid word, expected " + reprs[0] index = len(text) if self.move_cursor_to_end else 0 if self.error_msg is not None: msg = self.error_msg raise ValidationError(message=msg, cursor_position=index) class NonEmptyValidator(Validator): """ Matches all non empty strings. """ def validate(self, document: Document): if document.text == "": raise ValidationError(message="The input mustn't be empty") class RevisionValidator(Validator): """ Matches all valid revision ids. """ def __init__(self, vcs: VCSDriver, allow_empty_string: bool = True): self.vcs = vcs self.allow_empty_string = allow_empty_string def validate(self, document: Document): val = document.text if val == "" and self.allow_empty_string: return if is_builtin_type(int, val): val = int(val) if val == "" or not self.vcs.validate_revision(val): raise ValidationError(message="Invalid revision id") def create_revision_completer(vcs: VCSDriver) -> WordCompleter: """ Creates a WordCompleter for revision ids. :param vcs: used vcs driver :return: WordCompleter """ valid = [] meta_dict = {} if vcs.has_uncommitted(): valid.append("HEAD") meta_dict["HEAD"] = "Uncommitted changes" for info_dict in vcs.get_info_for_all_revisions(max=50): commit_number = str(info_dict["commit_number"]) if not info_dict["is_uncommitted"]: valid.append(str(info_dict["commit_id"])) msg = info_dict["commit_message"] other_branch_str = " from branch " + info_dict["branch"] + "" if info_dict["is_from_other_branch"] else "" msg = "Commit no. {commit_number}{other_branch_str}: {msg}".format(**locals()) meta_dict[info_dict["commit_id"]] = msg return WordCompleter(valid, ignore_case=True, meta_dict=meta_dict) class StrListValidator(Validator): """ Matches comma separated lists of strings that are acceptable python identifiers. """ def __init__(self, allow_empty: bool = False): self.allow_empty = False def validate(self, document: Document): if document.text == "": if self.allow_empty: return else: raise ValidationError("Empty list isn't allowed") for elem in document.text.split(","): elem = elem.strip() if not elem.isidentifier(): raise ValidationError("{!r} is not a valid entry of this comma separated list") def prompt_bash(msg: str, allow_empty: bool) -> str: """ Prompts for bash shell code. :param msg: shown message :return: user input """ from pygments.lexers.shell import BashLexer validator = None if allow_empty else NonEmptyValidator() return prompt(msg, lexer=PygmentsLexer(BashLexer), completer=SystemCompleter()) def prompt_python(msg: str, get_globals: t.Callable, get_locals: t.Callable) -> str: """ Prompt for python code. :param get_globals: function that returns the global variables :param get_locals: function that returns the local variables :return: user input """ from ptpython.completer import PythonCompleter from pygments.lexers.python import Python3Lexer python_completer = PythonCompleter(get_globals, get_locals) return prompt(msg, multiline=True, mouse_support=True, lexer=PygmentsLexer(Python3Lexer), completer=python_completer) def prompt_yesno(msg: str, default: bool = None, meta_dict=None) -> bool: """ Prompt for simple yes or no decision. :param msg: asked question :param default: default value :param meta_dict: mapping 'yes' or 'no' to further explanations :return: user input converted to bool """ valid_words = ["yes", "no", "y", "n"] if default is not None: msg += "[" + ("y" if default else "n") + "] " valid_words.append("") completer = WordCompleter(["yes", "no"], ignore_case=True, meta_dict=meta_dict) text = prompt(msg, completer=completer, display_completions_in_columns=True, validator=WordValidator(valid_words, ignore_case=True)) if text == "": return default return text.lower().startswith("y") def message(msg: str, default = None) -> str: """ A utility function to a valid message string with an optional default value. :param msg: original message :param default: optional default value :return: modified message """ if not msg.endswith(" "): msg += " " if default is not None: return msg + "[" + str(default) + "] " return msg def default_prompt(msg: str, default = None, **kwargs): """ Wrapper around prompt that shows a nicer prompt with a default value that isn't editable. Interpretes the empty string as "use default value" :param msg: message :param default: default value :param kwargs: arguments passed directly to the prompt function :return: user input """ msg = message(msg, default) if default is not None and "validator" in kwargs: vali = kwargs["validator"] if isinstance(vali, TypeValidator): vali.allow_empty = True if isinstance(vali, WordValidator): vali.allow_empty = True res = prompt(msg, **kwargs) if res == "" and default is not None: return default return res def prompt_dir(msg: str) -> str: """ Prompt a directory path. Default is ".". :param msg: shown message :return: user input """ return default_prompt(msg, default=".", validator=TypeValidator(DirName()), completer=PathCompleter(only_directories=True)) def prompt_attributes_dict(default_description: str = None) -> t.Dict[str, str]: """ Prompts for the contents of the attributes dict. :param default_description: default value for the description attribute :return: attributes dict """ attributes = {} descr_msg = "Give a description for the current block: " if default_description is not None: attributes["description"] = default_prompt(descr_msg, default_description, completer=WordCompleter([default_description])) else: attributes["description"] = prompt(descr_msg, validator=NonEmptyValidator()) try: while prompt_yesno("Do you want to set or add another attribute? ", default=False): name = prompt("Attribute name: ", validator=NonEmptyValidator(), completer=WordCompleter(sorted(list(attributes.keys())), meta_dict=attributes)) default = attributes[name] if name in attributes else "" attributes[name] = prompt("Attribute value: ", default=default, validator=NonEmptyValidator()) except KeyboardInterrupt: pass return attributes def prompt_build_dict(with_header: bool = True, whole_config: bool = True) -> dict: """ Prompts for the contents of the build config dictionary. :param with_header: print "Create the …" header? :param whole_config: prompt for the whole build config (with attributes and run config) :return: build config dictionary """ if with_header: print("Create the build configuration for the program block") old_cwd = os.path.realpath(".") build_dict = {} build_dict["base_dir"] = prompt_dir("Base directory: ") os.chdir(build_dict["base_dir"]) build_dict["working_dir"] = prompt_dir("Working directory (relative to the base dir): ") os.chdir(build_dict["working_dir"]) working_dir_abs = os.path.realpath(".") build_dict["build_cmd"] = prompt_bash("Command to build the program: ", allow_empty=True) vcs = VCSDriver.get_suited_vcs() cur_branch = vcs.get_branch() default_description = None if cur_branch is not None: # version control system is used build_dict["branch"] = default_prompt("Used branch? ", default=cur_branch, completer=WordCompleter(vcs.get_valid_branches(), meta_dict={ cur_branch: "Current branch" }), validator=WordValidator(vcs.get_valid_branches(), ignore_case=False, error_msg="Invalid branch name"), display_completions_in_columns=True) vcs.set_branch(build_dict["branch"]) build_dict["revision"] = default_prompt("Revision in this branch: ", default="HEAD", completer=create_revision_completer(vcs), validator=RevisionValidator(vcs), display_completions_in_columns=True) if is_builtin_type(int, build_dict["revision"]): build_dict["revision"] = int(build_dict["revision"]) default_description = vcs.get_info_for_revision(build_dict["revision"])["commit_message"] if prompt_yesno("Randomize program binaries (works with gcc built programs)? ", default=True): rand_dict = dict() meta_dict = {str(get_cache_line_size()): "Current cache line size", "0": "No padding"} size_completer = WordCompleter(sorted(list(meta_dict.keys())), meta_dict=meta_dict) rand_dict["heap"] = int(default_prompt("Maximum size of the random padding of each heap allocation? ", default=get_cache_line_size(), completer=size_completer, validator=TypeValidator(NaturalNumber()))) rand_dict["stack"] = int(default_prompt("Maximum size of the random padding of each stack frame? ", default=get_cache_line_size(), completer=size_completer, validator=TypeValidator(NaturalNumber()))) rand_dict["bss"] = prompt_yesno("Randomize bss segment? ", default=True) rand_dict["data"] = prompt_yesno("Randomize data segment? ", default=True) rand_dict["rodata"] = prompt_yesno("Randomize rodata segment? ", default=True) rand_dict["file_structure"] = prompt_yesno("Randomize the file structure (location of functions)? ", default=True) build_dict["randomization"] = rand_dict build_dict["number"] = int(prompt("How many times should the program be built? ", validator=TypeValidator(Int()))) os.chdir(old_cwd) if whole_config: attributes_dict = prompt_attributes_dict(default_description) run_config = prompt_run_dict(working_dir=build_dict["working_dir"], binary_number=build_dict["number"], whole_config=False, driver="exec") return { "attributes": attributes_dict, "build_config": build_dict, "run_config": run_config } return build_dict def prompt_run_dict(with_header: bool = True, working_dir: str = None, binary_number: int = None, whole_config: bool = True, driver: str = None) -> dict: """ Prompt the contents of the run config dictionary. :param with_header: print the explanation header :param working_dir: current working dir preset :param binary_number: number of available binaries :param whole_config: return the whole run config (with attributes part)? :param driver: used run driver :return: run config dict """ if with_header: print("Create the run configuration for the program block") run_drivers = { "exec": { "func": prompt_exec_driver_dict, "description": ExecRunDriver.__description__ } } assert driver in run_drivers or driver is None if driver is None: valid = sorted(list(run_drivers.keys())) meta_dict = {} for driver in run_drivers: meta_dict[driver] = run_drivers[driver]["description"] driver = prompt("Used run driver: ", completer=WordCompleter(words=valid, ignore_case=True, meta_dict=meta_dict), validator=WordValidator(ignore_case=False, valid_words=valid, error_msg="Invalid run driver name")) run_dict = run_drivers[driver]["func"](choose_revision=whole_config, working_dir=working_dir, binary_number=binary_number) if whole_config: attributes_dict = prompt_attributes_dict() return { "attributes": attributes_dict, "run_config": run_dict } return run_dict def prompt_exec_driver_dict(choose_revision: bool, working_dir: str = None, binary_number: int = None) -> dict: """ Prompt for the contents of run config dict for suitable for the exec run driver. """ from pygments.lexers.shell import BashLexer old_cwd = os.path.realpath(".") working_dir = working_dir or prompt_dir("Working directory: ") run_dict = {} run_dict["cwd"] = working_dir os.chdir(working_dir) run_dict["run_cmd"] = prompt_bash("Command to execute the program: ", allow_empty=False) if prompt_yesno("Set some environment variables? ", default=False): env_dict = {} def set_env_var(): name = prompt("Environment variable name: ", validator=NonEmptyValidator(), completer=WordCompleter(sorted(list(env_dict.keys())), meta_dict=env_dict)) default = env_dict[name] if name in env_dict else "" env_dict[name] = prompt("New value: ", default=default) try: set_env_var() while prompt_yesno("Set another environment variable? "): set_env_var() except KeyboardInterrupt: pass run_dict["env"] = env_dict if choose_revision: vcs = VCSDriver.get_suited_vcs() if vcs.number_of_revisions() + int(vcs.has_uncommitted()) > 1: run_dict["revision"] = default_prompt("Choose a revision in the current repository: ", default="HEAD", completer=create_revision_completer(vcs), validator=RevisionValidator(vcs), display_completions_in_columns=True) if is_builtin_type(int, run_dict["revision"]): run_dict["revision"] = int(run_dict["revision"]) if prompt_yesno("Run some commands before that actually benchmarked command? ", default=False): print("The commands are entered via a multiline input. ") print("Press [Meta+Enter] or [Esc] followed by [Enter] to accept input.") print("You can click with the mouse in order to select text.") run_dict["cmd_prefix"] = prompt('', multiline=True, mouse_support=True, lexer=PygmentsLexer(BashLexer), completer=SystemCompleter()) runners = { "perf_stat": { "func": prompt_perf_stat_exec_dict, "description": PerfStatExecRunner.__description__, }, "rusage": { "func": prompt_rusage_exec_dict, "description": RusageExecRunner.__description__, }, "spec": { "func": prompt_spec_exec_dict, "description": SpecExecRunner.__description__ } } valid = sorted(list(runners.keys())) meta_dict = {} for driver in runners: meta_dict[driver] = runners[driver]["description"] driver = prompt("Used runner: ", completer=WordCompleter(words=valid, ignore_case=True, meta_dict=meta_dict), validator=WordValidator(ignore_case=False, valid_words=valid, error_msg="Invalid runner"), display_completions_in_columns=True) run_dict["runner"] = driver run_dict[driver] = runners[driver]["func"](run_dict) os.chdir(old_cwd) return run_dict def prompt_perf_stat_exec_dict(run_dict: dict) -> dict: """ Prompt for the config of the perf stat exec runner. :param run_dict: run config dict (without the runner part) :return: runner config """ runner_dict = {} default_repeat = PerfStatExecRunner.misc_options["repeat"].get_default() runner_dict["repeat"] = int(default_prompt("How many times should perf stat itself repeat the measurement? ", default=default_repeat, validator=TypeValidator(PositiveInt()))) default_props = ", ".join(PerfStatExecRunner.misc_options["properties"].get_default()) class PerfStatPropertiesValidator(Validator): def validate(self, document: Document): vals = [elem.strip() for elem in document.text.split(",")] cmd = "perf stat -x ';' -e {props} -- /bin/echo".format(props=",".join(vals)) proc = subprocess.Popen(["/bin/sh", "-c", cmd], stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, universal_newlines=True) out, err = proc.communicate() if proc.poll() > 0: msg = str(err).split("\n")[0].strip() raise ValidationError(message=msg, cursor_position=len(document.text)) props = prompt("Which properties should perf stat measure? ", validator=PerfStatPropertiesValidator(), default=default_props, completer=WordCompleter(sorted(list(set(get_av_perf_stat_properties()))), ignore_case=False, WORD=True)) runner_dict["properties"] = [prop.strip() for prop in props.split(",")] return runner_dict def prompt_rusage_exec_dict(run_dict: dict) -> dict: """ Prompt for the config of the rusage exec runner. :param run_dict: run config dict (without the runner part) :return: runner config """ runner_dict = {} default_props = ", ".join(RusageExecRunner.misc_options["properties"].get_default()) class RusagePropertiesValidator(Validator): def validate(self, document: Document): vals = [elem.strip() for elem in document.text.split(",")] ret = verbose_isinstance(vals, ValidRusagePropertyList()) if not ret: raise ValidationError(message=str(ret), cursor_position=len(document.text)) props = prompt("Which properties should be obtained from getrusage(1)? ", validator=RusagePropertiesValidator(), default=default_props, completer=WordCompleter(sorted(list(set(get_av_rusage_properties().keys()))), meta_dict=get_av_rusage_properties(), ignore_case=False, WORD=True)) runner_dict["properties"] = [prop.strip() for prop in props.split(",")] return runner_dict def prompt_spec_exec_dict(run_dict: dict) -> dict: """ Prompt for the config of the spec exec runner. :param run_dict: run config dict (without the runner part) :return: runner config """ runner_dict = {} runner_dict["file"] = default_prompt("SPEC like result file to use: ", validator=TypeValidator(FileName()), completer=PathCompleter()) runner_dict["base_path"] = prompt("Property base path: ") runner_dict["path_regexp"] = prompt("Regexp matching the property path for each measured property: ", validator=NonEmptyValidator()) def get(sub_path: str = ""): # just a mock """ Get the value of the property with the given path. :param sub_path: given path relative to the base path :return: value of the property """ print("The python code is entered via a multiline input. ") print("Press [Meta+Enter] or [Esc] followed by [Enter] to accept input.") print("You can click with the mouse in order to select text.") print("Use the get(sub_path: str) -> str function to obtain a properties value.") locs = locals() runner_dict["code"] = prompt_python("The python is executed for each measured property: \n", lambda: {}, lambda: {"get": locs["get"]}) return runner_dict def prompt_config(name: str, prompt_dict_func: t.Callable[[], dict]): """ Prompt for the whole config file. :param name: description of the config (i.e. "run config") :param prompt_dict_func: function to get a single config dict """ blocks = [] file = prompt("YAML file to store the {name} in: ".format(name=name), validator=TypeValidator(ValidYamlFileName(allow_non_existent=True)), completer=PathCompleter()) fd = None # type: io.IOBase if os.path.exists(file): actions = { "append": "Append to the file", "overwrite": "Overwrite the file" } res = prompt("The file already exists. What should be done? ", completer=WordCompleter(sorted(list(actions.keys())), meta_dict=actions, ignore_case=True), validator=WordValidator(list(actions.keys()) + ["a", "o"], error_msg="Not a valid action")) if res.startswith("a"): fd = open(file, "a+") elif res.startswith("o"): fd = open(file, "w+") else: fd = open(file, "w+") blocks.append(prompt_dict_func()) def store_in_file(): #print(blocks) yaml.dump(blocks, fd) fd.flush() fd.close() while prompt_yesno("Add another {name}? ".format(name=name)): try: blocks.append(prompt_dict_func()) except KeyboardInterrupt: store_in_file() return except BaseException as ex: store_in_file() raise ex store_in_file() def prompt_build_config(): prompt_config("build config", prompt_build_dict) def prompt_run_config(): prompt_config("run config", prompt_run_dict) def main(): #print(repr(prompt_attributes_dict("dsfsdf"))) #print(repr(prompt_build_dict())) #print(repr(prompt_python(globals, locals))) #print(repr(prompt_spec_exec_dict({}))) #vcs = VCSDriver.get_suited_vcs() #print(repr(vcs.validate_revision(13))) prompt_run_config() #print(isinstance("a", ValidYamlFileName(allow_non_existent=True))) if __name__ == '__main__': main()PK#}6Htemci/scripts/__init__.pyPK#}6H)[temci/scripts/version.py""" Contains the current version of temci. The first number gives the major version and the second the minor version. Versions with uneven minor version number are considered beta. """ version = "0.5" """ The current version of temci """PKH8HL3]]temci/setup/setup.pyimport os, subprocess, logging def script_relative(file: str): return os.path.join(os.path.realpath(os.path.dirname(__file__)), "../scripts", file) class ExecError(BaseException): def __init__(self, cmd: str, out: str, err: str): super().__init__() self.cmd = cmd self.out = out self.err = err def __str__(self): return "Running {!r} failed: out={!r}, err={!r}".format(self.cmd, self.out, self.err) def exec(dir: str, cmd: str): """ Run the passed command in the passed directory :param dir: passed directory :param cmd: passed command :raises ExecError if the executed program has a > 0 error code """ proc = subprocess.Popen(["/bin/sh", "-c", cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, cwd=script_relative(dir)) out, err = proc.communicate() if proc.poll() > 0: raise ExecError(cmd, str(out), str(err)) def make_scripts(): try: exec("hadori", "make") exec("rusage", "make") except ExecError as err: logging.error(err) exit(1) try: exec("cpu_cache", "make") except ExecError as err: logging.error(err) logging.error("You probably haven't installed the proper packages for kernel module development " "(kernel-devel on fedora or linux-headers-generic on ubuntu).") logging.error("Not compiling the kernel module results in the malfunctioning of the DisableCaches " "exec run driver plugin.") PK#}6Htemci/setup/__init__.pyPKm:H椦temci/utils/typecheck.py""" Implements basic type checking for complex types. Why? Because it's nice to be able to type check complex structures that come directly from the user (e.g. from YAML config files). The Type instance are usable with the standard isinstance function:: isinstance(4, Either(Float(), Int())) Type instances also support the "&" (producres All(one, two)) and "|" (produces Either(one, two)) operators. The above sample code can therefore be written as:: isinstance(4, Float() | Int()) The native type wrappers also support custom constraints. With help of the fn module one can write:: t = Float(_ > 0) | Int(_ > 10) isinstance(var, t) "t" is a Type that matches only floats greater than 0 and ints greater than 10. For more examples look into the test_typecheck.py file. """ __all__ = [ "Type", "Exact", "ExactEither", "T", "E", "Any", "Int", "Float", "NonExistent", "Bool", "BoolOrNone", "Str", "NaturalNumber", "FileName", "FileNameOrStdOut", "ValidYamlFileName", "PositiveInt", "DirName", "ValidTimeSpan", "Info", "Description", "Default", "CompletionHint", "YAML_FILE_COMPLETION_HINT", "All", "Either", "Optional", "Constraint", "NonErrorConstraint", "List", "StrList", "ListOrTuple", "Tuple", "Dict", "verbose_isinstance", "typecheck", "typecheck_locals" ] import pytimeparse import itertools, os, yaml, click, inspect class ConstraintError(ValueError): pass class Info(object): def __init__(self, value_name: str = None, _app_str: str = None, value = None): self.value_name = value_name self._app_str = _app_str if _app_str is not None else "" if value_name is None: self._value_name = "value {{!r}}{}".format(self._app_str) else: self._value_name = "{}{} of value {{!r}}".format(self.value_name, self._app_str) if value is None: self.value = None self.has_value = False else: self.value = value self.has_value = True def set_value(self, value): self.value = value self.has_value = True def get_value(self): if not self.has_value: raise ValueError("value is not defined") return self.value def add_to_name(self, app_str: str): """ Creates a new info object based on this one. :param app_str: app string appended to the own app string to create the app string for the new info object :return: new info object :rtype Info """ return Info(self.value_name, self._app_str + app_str, self.value) def _str(self): return self._value_name.format(self.get_value()) def errormsg(self, constraint, msg: str = None): app = ": " + msg if msg is not None else "" return InfoMsg("{} hasn't the expected type {}{}".format(self._str(), constraint, app)) def errormsg_cond(self, cond, constraint, value): if cond: return InfoMsg(True) else: return InfoMsg(self.errormsg(constraint)) def errormsg_non_existent(self, constraint): return InfoMsg("{} is non existent, expected value of type {}".format(self._str(), constraint)) def errormsg_too_many(self, constraint, value_len, constraint_len): return InfoMsg("{} has to many elements ({}), " \ "expected value of type {} with {} elements".format(self._str(), value_len, constraint, constraint_len)) def wrap(self, result: bool): return InfoMsg(result) def __getitem__(self, item): raise NotImplementedError() def __setitem__(self, key, value): raise NotImplementedError() class NoInfo(Info): def __init__(self, value_name: str = None, _app_str: str = None, value=None): if False: super().__init__(value_name, _app_str, value) self.has_value = True def get_value(self): return None def set_value(self, value): pass def add_to_name(self, app_str): return self def errormsg(self, constraint, msg: str = None): return False def errormsg_cond(self, cond, constraint, value): return cond def errormsg_non_existent(self, constraint): return False def errormsg_too_many(self, constraint, value_len, constraint_len): return False def wrap(self, result: bool): return result class InfoMsg(object): def __init__(self, msg_or_bool): self.success = msg_or_bool is True self.msg = msg_or_bool if isinstance(msg_or_bool, str) else str(self.success) def __str__(self): return self.msg def __bool__(self): return self.success class Description(object): """ A description of a Type, that annotates it. Usage example:: Int() // Description("Description of Int()") """ def __init__(self, description: str): typecheck(description, str) self.description = description def __str__(self): return self.description class Default(object): """ A default value annotation for a Type. Usage example:: Int() // Default(3) Especially useful to declare the default value for a key of an dictionary. Allows to use Dict(...).get_default() -> dict. """ def __init__(self, default): self.default = default YAML_FILE_COMPLETION_HINT = "_files -g '*\.yaml'" class CompletionHint(object): """ A completion hint annotation for a type. Usage example:: Int() // Completion(zsh="_files") """ def __init__(self, **hints): self.hints = hints class Type(object): """ A simple type checker type class. """ def __init__(self): self.description = None self.default = None self.typecheck_default = True self.completion_hints = {} def __instancecheck__(self, value, info: Info = NoInfo()): """ Checks whether or not the passed value has the type specified by this instance. :param value: passed value """ if not info.has_value: info.set_value(value) return self._instancecheck_impl(value, info) def _instancecheck_impl(self, value, info: Info): return False def __str__(self): return "Type[]" def _validate_types(self, *types): for t in types: if not isinstance(t, Type): raise ConstraintError("{} is not an instance of a Type subclass".format(t)) def __and__(self, other): """ Alias for All(self, other) """ return All(self, other) def __or__(self, other): """ Alias for Either(self, other). The only difference is that it flattens trees of Either instances """ if isinstance(other, Either): other.types.index(other, 0) return other return Either(self, other) def __floordiv__(self, other): """ Alias for Constraint(other, self). Self mustn't be a Type. If other is a string the description property of this Type object is set. It also can annotate the object with Description, Default or CompletionHint objects. """ if isinstance(other, str) or isinstance(other, Description): self.description = str(other) return self if isinstance(other, Default): self.default = other if self.typecheck_default: typecheck(self.default.default, self) return self if isinstance(other, CompletionHint): for shell in other.hints: self.completion_hints[shell] = other.hints[shell] return self if isinstance(other, Type): raise ConstraintError("{} mustn't be an instance of a Type subclass".format(other)) return Constraint(other, self) def __eq__(self, other): if type(other) == type(self): return self._eq_impl(other) return False def _eq_impl(self, other): return False def get_default(self): if self.default is None: raise ValueError("{} has no default value.".format(self)) return self.default.default def get_default_yaml(self, indents: int = 0, indentation: int = 4, str_list: bool = False, defaults = None) -> str: if defaults is None: defaults = self.get_default() else: typecheck(defaults, self) i_str = " " * indents * indentation y_str = yaml.dump(defaults).strip() if y_str.endswith("\n..."): y_str = y_str[0:-4] strs = list(map(lambda x: i_str + x, y_str.split("\n"))) return strs if str_list else "\n".join(strs) class Exact(Type): """ Checks for value equivalence. """ def __init__(self, exp_value): """ :param exp_value: value to check for """ super().__init__() self.exp_value = exp_value def _instancecheck_impl(self, value, info: Info = NoInfo()): """ Is the value the same as the expected one? """ cond = isinstance(value, type(self.exp_value)) and value == self.exp_value return info.errormsg_cond(cond, self, value) def __str__(self): return "Exact({!r})".format(self.exp_value) def _eq_impl(self, other): return other.exp_value == self.exp_value def __or__(self, other): if isinstance(other, ExactEither): other.exp_values.insert(0, self.exp_value) return other if isinstance(other, Exact): return ExactEither(self.exp_value, other.exp_value) return Either(self, other) def E(exp_value): """ Alias for Exact. """ return Exact(exp_value) class Either(Type): """ Checks for the value to be of one of several types. """ def __init__(self, *types: list): """ :param types: list of types (or SpecialType subclasses) :raises ConstraintError if some of the contraints aren't (typechecker) Types """ super().__init__() self._validate_types(*types) self.types = list(types) def _instancecheck_impl(self, value, info: Info = NoInfo()): """ Does the type of the value match one of the expected types? """ for type in self.types: res = type.__instancecheck__(value, info) if res: return info.wrap(True) return info.errormsg(self) def __str__(self): return "Either({})".format("|".join(str(type) for type in self.types)) def _eq_impl(self, other): return len(other.types) == len(self.types) \ and all(other.types[i] == self.types[i] for i in range(len(self.types))) def __or__(self, other): if isinstance(other, Either): self.types += other.types return self return Either(self, other) class ExactEither(Type): """ Checks for the value to be of one of several exact values. """ def __init__(self, *exp_values: list): """ :param exp_values: list of types (or SpecialType subclasses) :raises ConstraintError if some of the contraints aren't (typechecker) Types """ super().__init__() self.exp_values = list(exp_values) self._update_completion_hints() def _instancecheck_impl(self, value, info: Info = NoInfo()): """ Does the type of the value match one of the expected types? """ if value in self.exp_values: return info.wrap(True) return info.errormsg(self) def __str__(self): return "ExactEither({})".format("|".join(repr(val) for val in self.exp_values)) def _eq_impl(self, other): return len(other.exp_values) == len(self.exp_values) \ and all(other.exp_values[i] == self.exp_values[i] for i in range(len(self.exp_values))) def __or__(self, other): if isinstance(other, ExactEither): self.exp_values += other.exp_values self._update_completion_hints() return self if isinstance(other, Exact): self.exp_values.append(other.exp_value) self._update_completion_hints() return self return Either(self, other) def _update_completion_hints(self): self.completion_hints = { "zsh": "({})".format(" ".join(repr(val) for val in self.exp_values)), "fish": { "hint": self.exp_values } } class Union(Either): """ Alias for Either. Checks for the value to be of one of several types. """ class All(Type): """ Checks for the value to be of all of several types. """ def __init__(self, *types): """ :param types: list of types (or SpecialType subclasses) :raises ConstraintError if some of the contraints aren't (typechecker) Types """ super().__init__() self._validate_types(*types) self.types = types def _instancecheck_impl(self, value, info: Info = NoInfo()): """ Does the type of the value match all of the expected types? """ for type in self.types: res = type.__instancecheck__(value, info) if not res: return res return info.wrap(True) def __str__(self): return "All[{}]".format("|".join(str(type) for type in self.types)) def _eq_impl(self, other): return len(other.types) == len(self.types) \ and all(other.types[i] == self.types[i] for i in range(len(self.types))) class Any(Type): """ Checks for the value to be of any type. """ def __instancecheck__(self, value, info: Info = NoInfo()): return info.wrap(True) def __str__(self): return "Any" def _eq_impl(self, other): return True class T(Type): """ Wrapper around a native type. """ def __init__(self, native_type): super().__init__() if not isinstance(native_type, type): raise ConstraintError("{} is not a native type".format(type)) self.native_type = native_type def _instancecheck_impl(self, value, info: Info = NoInfo()): """ Does the passed value be an instance of the wrapped native type? """ return info.errormsg_cond(isinstance(value, self.native_type), self, info) def __str__(self): return "T({})".format(self.native_type) def _eq_impl(self, other): return other.native_type == self.native_type class Optional(Either): """ Checks the value and checks that its either of native type None or of another Type constraint. Alias for Either(Exact(None), other_type) """ def __init__(self, other_type): """ :raises ConstraintError if other_type isn't a (typechecker) Types """ super().__init__(Exact(None), other_type) def __str__(self): return "Optional({})".format(self.types[1]) class Constraint(Type): """ Checks the passed value by an user defined constraint. """ def __init__(self, constraint, constrained_type: Type = Any(), description: str = None): """ :param constraint: function that returns True if the user defined constraint is satisfied :param constrained_type: Type that the constrain is applied on :param description: short description of the constraint (e.g. ">0") :raises ConstraintError if constrained_type isn't a (typechecker) Types """ super().__init__() self._validate_types(constrained_type) self.constraint = constraint self.constrained_type = constrained_type self.description = description def _instancecheck_impl(self, value, info: Info = NoInfo()): """ Checks the passed value to be of the constrained type and to adhere the user defined constraint. """ res = self.constrained_type.__instancecheck__(value, info) if not res: return res if not self.constraint(value): return info.errormsg(self) return info.wrap(True) def __str__(self): descr = self.description if self.description is None: #if isinstance(self.constraint, type(fn._)): # descr = str(self.constraint) #else: descr = "" return "{}:{}".format(self.constrained_type, descr) class NonErrorConstraint(Type): """ Checks the passed value by an user defined constraint that fails if it raise an error. """ def __init__(self, constraint, error_cls, constrained_type: Type = Any(), description: str = None): """ :param constraint: function that doesn't raise an error if the user defined constraint is satisfied :param error_cls: class of the errors the constraint method throws :param constrained_type: Type that the constrain is applied on :param description: short description of the constraint (e.g. ">0") :raises ConstraintError if constrained_type isn't a (typechecker) Types """ super().__init__() self._validate_types(constrained_type) self.constraint = constraint self.error_cls = error_cls self.constrained_type = constrained_type self.description = description def _instancecheck_impl(self, value, info: Info = NoInfo()): """ Checks the passed value to be of the constrained type and to adhere the user defined constraint (that the method doesn't throw the user specified exception). """ res = self.constrained_type.__instancecheck__(value, info) if not res: return res try: self.constraint(value) except self.error_cls as err: return info.errormsg(self, msg=str(err)) return info.wrap(True) def __str__(self): descr = self.description if self.description is None: #if isinstance(self.constraint, type(fn._)): # descr = str(self.constraint) #else: descr = "" return "{}:{}".format(self.constrained_type, descr) class List(Type): """ Checks for the value to be a list with elements of a given type. """ def __init__(self, elem_type=Any()): """ :param elem_type: type of elements :param must_contain: the elements the value has to contain at least :raises ConstraintError if elem_type isn't a (typechecker) Types """ super().__init__() self._validate_types(elem_type) self.elem_type = elem_type def _instancecheck_impl(self, value, info: Info = NoInfo()): if not isinstance(value, list): return info.errormsg(self) for (i, elem) in enumerate(value): new_info = info.add_to_name("[{}]".format(i)) res = self.elem_type.__instancecheck__(elem, new_info) if not res: return res return info.wrap(True) def __str__(self): return "List({})".format(self.elem_type) def _eq_impl(self, other): return other.elem_type == self.elem_type class ListOrTuple(Type): """ Checks for the value to be a list or tuple with elements of a given type. """ def __init__(self, elem_type=Any()): """ :param elem_type: type of elements :param must_contain: the elements the value has to contain at least :raises ConstraintError if elem_type isn't a (typechecker) Types """ super().__init__() self._validate_types(elem_type) self.elem_type = elem_type def _instancecheck_impl(self, value, info: Info = NoInfo()): if not isinstance(value, T(list) | T(tuple)): return info.errormsg(self) for (i, elem) in enumerate(list(value)): new_info = info.add_to_name("[{}]".format(i)) res = self.elem_type.__instancecheck__(elem, new_info) if not res: return res return info.wrap(True) def __str__(self): return "ListOrTuple({})".format(self.elem_type) def _eq_impl(self, other): return other.elem_type == self.elem_type class Tuple(Type): """ Checks for the value to be a tuple (or a list) with elements of the given types. """ def __init__(self, *elem_types): """ :param elem_types: types of elements :raises ConstraintError if elem_type isn't a (typechecker) Types """ super().__init__() for elem_type in elem_types: self._validate_types(elem_type) self.elem_types = elem_types def _instancecheck_impl(self, value, info: Info = NoInfo()): if not (isinstance(value, list) or isinstance(value, tuple)) or len(self.elem_types) != len(value): return info.errormsg(self) if len(self.elem_types) == 0: return info.wrap(True) for (i, elem) in enumerate(value): new_info = info.add_to_name("[{}]".format(i)) res = self.elem_types[i].__instancecheck__(elem, new_info) if not res: return res return info.wrap(True) def __str__(self): return "Tuple({})".format(", ".join(str(t) for t in self.elem_types)) def _eq_impl(self, other): return len(other.elem_types) == len(self.elem_types) and \ all(a == b for (a, b) in itertools.product(self.elem_types, other.elem_types)) class _NonExistentVal(object): """ Helper class for NonExistent Type. """ def __str__(self): return "" def __repr__(self): return self.__str__() _non_existent_val = _NonExistentVal() class NonExistent(Type): """ Checks a key of a dictionary for existence if its associated value has this type. """ def _instancecheck_impl(self, value, info: Info): return info.errormsg_cond(type(value) == _NonExistentVal, self, "[value]") def __str__(self): return "non existent" def _eq_impl(self, other): return True class Dict(Type): """ Checks for the value to be a dictionary with expected keys and values satisfy given type constraints. """ def __init__(self, data: dict = None, all_keys=True, key_type: Type = Any(), value_type: Type = Any()): """ :param data: dictionary with the expected keys and the expected types of the associated values :param all_keys: does the type checking fail if more keys are present in the value than in data? :param key_type: expected Type of all dictionary keys :param value_type: expected Type of all dictionary values :raises ConstraintError if one of the given types isn't a (typechecker) Types """ super().__init__() self.data = data if data is not None else {} self._validate_types(*self.data.values()) self._validate_types(key_type, value_type) self.all_keys = all_keys self.key_type = key_type self.value_type = value_type def _instancecheck_impl(self, value, info: Info = NoInfo()): if not isinstance(value, dict): return info.errormsg(self) non_existent_val_num = 0 for key in self.data.keys(): if key in value: res = self.data[key].__instancecheck__(value[key], info.add_to_name("[{!r}]".format(key))) if not res: return res else: is_non_existent = self.data[key].__instancecheck__(_non_existent_val, info.add_to_name("[{!r}]".format(key))) non_existent_val_num += 1 if key not in value and not is_non_existent: info = info.add_to_name("[{!r}]".format(key)) return info.errormsg_non_existent(self) for key in value.keys(): ninfo = info.add_to_name("(key={!r})".format(key)) res = self.key_type.__instancecheck__(key, ninfo) if not res: return res for key in value.keys(): val = value[key] ninfo = info.add_to_name("[{!r}]".format(key)) res = self.value_type.__instancecheck__(val, ninfo) if not res: return res if self.all_keys and len(self.data) - non_existent_val_num != len(value): return info.errormsg_too_many(self, len(value), len(self.data)) return info.wrap(True) def __str__(self): fmt = "Dict({data}, keys={key_type}, values={value_type})" data_str = ", ".join("{!r}: {}".format(key, self.data[key]) for key in self.data) if self.all_keys: fmt = "Dict({{{data}}}, {all_keys}, keys={key_type}, values={value_type})" return fmt.format(data=data_str, all_keys=self.all_keys, key_type=self.key_type, value_type=self.value_type) def __getitem__(self, key) -> Type: """ Returns the Type of the keys value. """ if key in self.data: return self.data[key] if not self.all_keys and isinstance(key, self.key_type): return self.value_type return NonExistent() def __setitem__(self, key, value): """ Sets the Type of the keys values. """ if (key in self.data and isinstance(value, self.value_type)) or\ (isinstance(key, self.key_type) and isinstance(value, self.value_type)): self.data[key] = value else: raise ValueError("Key or value have wrong types") def get_description(self, key: str) -> str: """ Returns the description for the passed key or None if there isn't one. :param key: passed key """ return self[key].description def _eq_impl(self, other) -> bool: return all(self.data[key] == other.data[key] and self.get_description(key) == other.get_description(key) for key in itertools.chain(self.data.keys(), other.data.keys())) def get_default(self) -> dict: default_dict = {} if self.default is not None: default_dict = self.default.default for key in self.data: if key not in default_dict: default_dict[key] = self[key].get_default() return default_dict def get_default_yaml(self, indent: int = 0, indentation: int = 4, str_list: bool = False, defaults = None) -> str: if len(self.data.keys()) == 0: ret = "!!map {}" return [ret] if str_list else ret if defaults is None: defaults = self.get_default() else: typecheck(defaults, self) strs = [] groups = { "simple": [], "misc": [] } for key in self.data: if isinstance(self.data[key], Dict): groups["misc"].append(key) else: groups["simple"].append(key) keys = sorted(groups["simple"]) + sorted(groups["misc"]) for i in range(0, len(keys)): #if i != 0: strs.append("") key = keys[i] if self.data[key].description is not None: comment_lines = self.data[key].description.split("\n") comment_lines = map(lambda x: "# " + x, comment_lines) strs.extend(comment_lines) key_yaml = yaml.dump(key).split("\n")[0] if len(self.data[key].get_default_yaml(str_list=True, defaults=defaults[key])) == 1 and \ (not isinstance(self.data[key], Dict) or len(self.data[key].data.keys()) == 0): value_yaml = self.data[key].get_default_yaml(defaults=defaults[key]) strs.append("{}: {}".format(key_yaml, value_yaml.strip())) else: value_yaml = self.data[key].get_default_yaml(1, indentation, str_list=True, defaults=defaults[key]) strs.append("{}:".format(key_yaml)) strs.extend(value_yaml) i_str = " " * indent * indentation ret_strs = list(map(lambda x: i_str + x, strs)) return ret_strs if str_list else "\n".join(ret_strs) class Int(Type): """ Checks for the value to be of type int and to adhere to some constraints. """ def __init__(self, constraint = None, range: range = None, description: str = None): """ :param constraint: user defined constrained function :param range. range (or list) that the value has to be part of :param description: description of the constraints """ super().__init__() self.constraint = constraint self.range = range self.description = description if range is not None and len(range) <= 20: self.completion_hints = { "zsh": "({})".format(" ".join(str(x) for x in range)), "fish": { "hint": list(self.range) } } def _instancecheck_impl(self, value, info: Info): if not isinstance(value, int) or (self.constraint is not None and not self.constraint(value)) \ or (self.range is not None and value not in self.range): return info.errormsg(self) return info.wrap(True) def __str__(self): arr = [] if self.description is not None: arr.append(self.description) else: if self.constraint is not None: descr = "" #if isinstance(self.constraint, type(fn._)): # descr = str(self.constraint) #else: descr = "" arr.append("constraint={}".format(descr)) if self.range is not None: arr.append("range={}".format(self.range)) return "Int({})".format(",".join(arr)) def _eq_impl(self, other): return other.constraint == self.constraint and other.range == self.range class StrList(Type, click.ParamType): """ A comma separated string list which contains elements from a fixed of allowed values. """ name = "coma_sep_str_list" def __init__(self): super().__init__() self.allowed_values = None def __or__(self, other): if isinstance(other, Exact) and isinstance(other.exp_value, Str()): if self.allowed_values is None: self.allowed_values = [other.exp_value] else: self.allowed_values.append(other.exp_value) return self return super().__or__(other) def _instancecheck_impl(self, value, info: Info): res = List(Str()).__instancecheck__(value, info) if not res: return info.errormsg(self, "Not a list of strings") if self.allowed_values is None or all(val in self.allowed_values for val in value): return info.wrap(True) return info.errormsg(self, "Does contain invalid elements") def convert(self, value, param, ctx): if isinstance(value, self): return value elif isinstance(value, str): value = str(value) return value.split(",") self.fail("{} is no valid comma separated string list".format(value), param, ctx) def __str__(self): if self.allowed_values is None: return "StrList()" else: return "StrList(allowed={})".format(repr(self.allowed_values)) def get_default_yaml(self, indents: int = 0, indentation: int = 4, str_list: bool = False, defaults = None) -> str: if defaults is None: defaults = self.get_default() else: typecheck(defaults, self) i_str = " " * indents * indentation ret_str = i_str + "[{}]".format(", ".join(defaults)) return [ret_str] if str_list else ret_str class Str(Type): def __init__(self, constraint = None): super().__init__() self.constraint = constraint def _instancecheck_impl(self, value, info: Info): if not isinstance(value, str): return info.errormsg(self) if self.constraint is not None and not self.constraint(value): return info.errormsg(self) return info.wrap(True) def __str__(self): if self.constraint is not None: return "Str({})".format(repr(self.constraint)) else: return "Str()" class FileName(Str): """ A valid file name. If the file doesn't exist, at least the parent directory must exist and the file must be creatable. """ def __init__(self, constraint = None, allow_std: bool = False, allow_non_existent: bool = True): super().__init__() self.constraint = constraint self.completion_hints = { "zsh": "_files", "fish": { "files": True } } self.allow_std = allow_std self.allow_non_existent = allow_non_existent def _instancecheck_impl(self, value, info: Info): if not isinstance(value, str) or value == "": return info.errormsg(self) if self.allow_std and value == "-" and (self.constraint is None or self.constraint(value)): return info.wrap(True) is_valid = True if os.path.exists(value): if os.path.isfile(value) and os.access(os.path.abspath(value), os.W_OK)\ and (self.constraint is None or self.constraint(value)): return info.wrap(True) return info.errormsg(self) if not self.allow_non_existent: return info.errormsg("File doesn't exist") abs_name = os.path.abspath(value) dir_name = os.path.dirname(abs_name) if os.path.exists(dir_name) and os.access(dir_name, os.EX_OK) and os.access(dir_name, os.W_OK) \ and (self.constraint is None or self.constraint(value)): return info.wrap(True) return info.errormsg(self) def __str__(self): if self.constraint is not None: return "FileName({}, allow_std={})".format(repr(self.constraint), self.allow_std) else: return "FileName(allow_std={})".format(self.allow_std) class ValidYamlFileName(Str): """ A valid file name that refers to a valid YAML file. """ def __init__(self, allow_non_existent: bool = False): super().__init__() self.completion_hints = { "zsh": "_files", "fish": { "files": True } } self.allow_non_existent = allow_non_existent def _instancecheck_impl(self, value, info: Info): if not isinstance(value, str): return info.errormsg(self, "isn't a string") if not os.path.exists(value): if not self.allow_non_existent or not isinstance(value, FileName()): return info.errormsg(self, "doesn't exist or hasn't an accessible parent directory") return info.wrap(True) if not os.path.isfile(value): return info.errormsg(self, "isn't a file") try: with open(value, "r") as f: yaml.load(f.readline()) except (IOError, yaml.YAMLError) as ex: return info.errormsg(self, "YAML parse error: " + str(ex)) return info.wrap(True) def __str__(self): return "ValidYamlFileName()" class DirName(Str): """ A valid directory name. If the directory doesn't exist, at least the parent directory must exist. """ def __init__(self, constraint = None): super().__init__() self.constraint = constraint self.completion_hints = { "zsh": "_directories", "fish": { "files": True } } def _instancecheck_impl(self, value, info: Info): if not isinstance(value, str): return info.errormsg(self) is_valid = True if os.path.exists(value): if os.path.isdir(value) and os.access(os.path.abspath(value), os.W_OK)\ and (self.constraint is None or self.constraint(value)): return info.wrap(True) return info.errormsg(self) abs_name = os.path.abspath(value) dir_name = os.path.dirname(abs_name) if os.path.exists(dir_name) and os.access(dir_name, os.EX_OK) and os.access(dir_name, os.W_OK) \ and (self.constraint is None or self.constraint(value)): return info.wrap(True) return info.errormsg(self) def __str__(self): if self.constraint is not None: return "DirName({})".format(repr(self.constraint)) else: return "DirName()" class BoolOrNone(Type, click.ParamType): """ Like Bool but with a third value none that declares that the value no boolean value. It has None as its default value (by default). """ name = "bool_or_none" def __init__(self): super().__init__() self.completion_hints = { "zsh": "(true, false, none)", "fish": { "hint": ["true", "false", "none"] } } self.default = None def _instancecheck_impl(self, value, info: Info): res = ExactEither(True, False, None).__instancecheck__(value, info) return info.errormsg_cond(self, bool(res), str(res)) def convert(self, value, param, ctx): if isinstance(value, self): return value elif isinstance(value, str): value = value.lower() if value == "true" : return True elif value == "false": return False elif value == "none": return None self.fail("{} is no valid bool or 'none'".format(value), param, ctx) def __str__(self): return "BoolOrNone()" class Bool(Type, click.ParamType): """ Like Bool but with a third value none that declares that the value no boolean value. It has None as its default value (by default). """ name = "bool" def __init__(self): super().__init__() self.completion_hints = { "zsh": "(true, false)", "fish": { "hint": ["true", "false"] } } def _instancecheck_impl(self, value, info: Info): res = ExactEither(True, False).__instancecheck__(value, info) return info.errormsg_cond(self, bool(res), str(res)) def __str__(self): return "Bool()" class ValidTimeSpan(Type, click.ParamType): """ A string that is parseable as timespan by pytimeparse. E.g. "32m" or "2h 32m". """ name = "valid_timespan" def __init__(self): super().__init__() def _instancecheck_impl(self, value, info: Info): res = Str().__instancecheck__(value, info) return info.errormsg_cond(self, res and pytimeparse.parse(value), value) def convert(self, value, param, ctx): if isinstance(value, self): return value self.fail("{} is no valid time span".format(value), param, ctx) def __str__(self): return "ValidTimespan()" def NaturalNumber(constraint = None): """ Matches all natural numbers (ints >= 0) that satisfy the optional user defined constrained. """ if constraint is not None: return Int(lambda x: x >= 0 and constraint(x)) return Int(lambda x: x >= 0) def PositiveInt(constraint = None): """ Matches all positive integers that satisfy the optional user defined constrained. """ if constraint is not None: return Int(lambda x: x > 0 and constraint(x)) return Int(lambda x: x > 0) def Float(constraint = None): """ Alias for Constraint(constraint, T(float)) or T(float) """ if constraint is not None: return Constraint(constraint, T(float)) return T(float) def FileNameOrStdOut(): """ A valid file name or "-" for standard out. """ return FileName(allow_std=True) def verbose_isinstance(value, type, value_name: str = None): """ Verbose version of isinstance that returns a InfoMsg object. :param value: value to check :param type: type or Type to check for :param value_name: name of the passed value (improves the error message) """ if not isinstance(type, Type): type = T(type) if not isinstance(value, type): return type.__instancecheck__(value, Info(value_name)) return InfoMsg(True) def typecheck(value, type, value_name: str = None): """ Like verbose_isinstance but raises an error if the value hasn't the expected type. :param value: passed value :param type: expected type of the value :param value_name: optional description of the value :raises TypeError """ if not isinstance(value, type): raise TypeError(str(verbose_isinstance(value, type, value_name))) def typecheck_locals(locals: dict = None, **variables: dict): """ Like typecheck but checks several variables for their associated expected type. The advantage against typecheck is that it sets the value descriptions properly. Example usage:: def func(a: str, b: int): typecheck_locals(locals(), a=Str(), b=Int()) :param locals: directory to get the variable values from :param variables: variable names with their associated expected types :raises TypeError """ if locals is None: locals = inspect.currentframe().f_back.f_locals typecheck(locals, Dict(all_keys=False, key_type=Str())) for var in variables: typecheck(locals[var], variables[var], value_name=var) """ # Hack: Implement a typed() decorator for runtime type testing class typed(object): def __init__(self, *args, **kwargs): print(args, kwargs) def __call__(self, f): def wrapped_f(*args, **kwargs): print(args, kwargs) print("dsf") f(*args, **kwargs) return wrapped_f @typed(3) def func(a, b, c, d = 4, *ds): return "abc" func(55) exit() """PKYm:H.'jjtemci/utils/__init__.py""" Package with utility modules. """ import click #@click.option("abc", type=str) #def asd(): # passPK#}6H8temci/utils/registry.pyfrom .settings import Settings from .util import Singleton from .typecheck import * class AbstractRegistry: """ An abstract registry. To create an own registry set the settings_key_path (type str), the use_key (type str), the use_list (type bool) and the default attribute (type (use_list ? list of strings : str). Important: Be sure to have a "_register = {}" line in your extending class. """ settings_key_path = "" use_key = None use_list = False default = [] registry = {} @classmethod def get_for_name(cls, name: str, *args, **kwargs): """ Creates a plugin with the given name. :param name: name of the registered class :return: object of the registered class :raises ValueError if there isn't such a class """ if name not in cls.registry: raise ValueError("No such registered class {}".format(name)) misc_settings = Settings()["/".join([cls.settings_key_path, name + "_misc"])] return cls.registry[name](misc_settings, *args, **kwargs) @classmethod def get_used(cls): """ Get the list of name of the used plugins (use_list=True) or the name of the used plugin (use_list=False). """ key = "/".join([cls.settings_key_path, cls.use_key]) if not Settings().has_key(key): return [] if cls.use_list else None if cls.use_list: plugin_allow_vals = {} active_list = Settings()[key].split(",") if not isinstance(Settings()[key], list) else Settings()[key] ret_list = [] for name in sorted(cls.registry.keys()): active_path = "{}_active".format("/".join([cls.settings_key_path, name])) active = Settings()[active_path] if active is None and name in active_list: ret_list.append(name) if active is True: ret_list.append(name) return ret_list else: return Settings()[key] @classmethod def register(cls, name: str, klass: type, misc_type: Type): """ Registers a new class. The constructor of the class gets as first argument the misc settings. :param name: common name of the registered class :param klass: actual class :param misc_type: type scheme of the {name}_misc settings :param misc_default: default value of the {name}_misc settings """ misc_default = misc_type.get_default() description = None if klass.__doc__ is not None: header = ""# "Description of {} (class {}):\n".format(name, klass.__qualname__) lines = str(klass.__doc__.strip()).split("\n") lines = map(lambda x: " " + x.strip(), lines) description = Description(header + "\n".join(lines)) klass.__description__ = description.description misc_type //= description else: klass.__description__ = "" Settings().modify_setting("{}_misc".format("/".join([cls.settings_key_path, name])), misc_type) use_key_path = "/".join([cls.settings_key_path, cls.use_key]) if cls.use_list: if not Settings().validate_key_path(use_key_path.split("/")) \ or isinstance(Settings().get_type_scheme(use_key_path), Any): t = (StrList() | Exact(name)) t.typecheck_default = False Settings().modify_setting(use_key_path, t // Default(cls.default)) else: use_key_list = Settings().get_type_scheme(use_key_path) assert isinstance(use_key_list, StrList) use_key_list |= Exact(name) use_key_list = Settings().get_type_scheme(use_key_path) use_key_list // Description("Possible plugins are: {}"\ .format(repr(sorted(use_key_list.allowed_values))[1:-1])) active_path = "{}_active".format("/".join([cls.settings_key_path, name])) if not Settings().validate_key_path(active_path.split("/")): Settings().modify_setting(active_path, BoolOrNone() // Default(None)) Settings().get_type_scheme(active_path) // Description("Enable: " + klass.__description__) else: if not Settings().validate_key_path(use_key_path.split("/")) \ or not isinstance(Settings().get_type_scheme(use_key_path), ExactEither): t = ExactEither(name) t.typecheck_default = False Settings().modify_setting(use_key_path, t // Default(cls.default)) else: Settings().modify_setting(use_key_path, Settings().get_type_scheme(use_key_path) | Exact(name)) t = Settings().get_type_scheme(use_key_path) t // Description("Possible plugins are: {}"\ .format(repr(sorted(t.exp_values))[1:-1])) cls.registry[name] = klass @classmethod def __getitem__(cls, name: str): """ Alias for get_for_name(name). """ return cls.get_for_name(name) @classmethod def get_class(cls, name: str): return cls.registry[name] def register(registry: type, name: str, misc_type: Type): """ Class decorator that calls the register method for the decorated method. :param registry: the registry class to register the class in :param name: common name of the registered class :param misc_type: type scheme of the {name}_misc settings (each dict key must have a default value) """ assert issubclass(registry, AbstractRegistry) def dec(klass): registry.register(name, klass, misc_type) return klass return decPKt7H% **temci/utils/mail.py""" Utilities to send mails. """ from temci.utils.settings import Settings import subprocess import typing as t import logging def hostname() -> str: return str(subprocess.check_output("hostname").strip())[2:-1] def send_mail(recipient: str, subject: str, content: str, attached_files: t.List[str] = None): from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText import smtplib if recipient == "": return sender = "" try: sender = "temci@" + hostname() except subprocess.CalledProcessError: sender = "temci@temci" try: attached_files = attached_files or [] msg = MIMEMultipart() msg["From"] = sender msg["To"] = recipient msg["Subject"] = subject msg.attach(MIMEText(content)) for file in attached_files: try: with open(file, "r") as f: msg_part = MIMEText(f.read()) msg_part.add_header('Content-Disposition', 'attachment', filename=file) msg.attach(msg_part) except IOError: pass smtp = smtplib.SMTP("localhost") smtp.sendmail(sender, recipient, msg.as_string()) smtp.quit() except BaseException as ex: logging.error(ex)PKDn:H4>>temci/utils/settings.pyimport yaml import copy import os, logging import click from temci.utils.util import recursive_exec_for_leafs, Singleton from temci.utils.typecheck import * import multiprocessing import typing as t def ValidCPUCoreNumber(): return Int(range=range(0, multiprocessing.cpu_count())) class SettingsError(ValueError): pass class Settings(metaclass=Singleton): """ Manages the Settings. """ type_scheme = Dict({ "settings_file": Str() // Description("Additional settings file") // Default("") // CompletionHint(zsh=YAML_FILE_COMPLETION_HINT), "tmp_dir": Str() // Default("/tmp/temci") // Description("Used temporary directory"), "log_level": ExactEither("debug", "info", "warn", "error", "quiet") // Default("info") // Description("Logging level"), "stats": Dict({ "properties": ListOrTuple(Str()) // Default(["all"]) // CompletionHint(zsh="(" + " ".join(["ov-time", "cache-misses", "cycles", "task-clock", "instructions", "branch-misses", "cache-references", "all"]) + ")") // Description("Properties to use for reporting and null hypothesis tests"), "uncertainty_range": Tuple(Float(lambda x: x >= 0), Float(lambda x: x >= 0)) // Default((0.05, 0.15)) // Description("Range of p values that allow no conclusion.") }, all_keys=False), "report": Dict({ # "reporter": Str() // Default("console") // Description(), "in": Str() // Default("run_output.yaml") // Description("File that contains the benchmarking results") // CompletionHint(zsh=YAML_FILE_COMPLETION_HINT), }, all_keys=False), "run": Dict({ "discarded_blocks": NaturalNumber() // Description("First n blocks that are discarded") // Default(2), "min_runs": NaturalNumber() // Default(20) // Description("Minimum number of benchmarking runs"), "max_runs": NaturalNumber() // Default(100) // Description("Maximum number of benchmarking runs"), "runs": Int(lambda x: x >= -1) // Default(-1) // Description("if != -1 sets max and min runs to it's value"), "max_time": ValidTimeSpan() // Default("2h") // Description("Maximum time the whole benchmarking should take " "+- time to execute one block."), # in seconds "run_block_size": PositiveInt() // Default(5) // Description("Number of benchmarking runs that are done together"), "in": Str() // Default("input.exec.yaml") // Description("Input file with the program blocks to benchmark") // CompletionHint(zsh=YAML_FILE_COMPLETION_HINT), "out": Str() // Default("run_output.yaml") // Description("Output file for the benchmarking results") // CompletionHint(zsh=YAML_FILE_COMPLETION_HINT), "exec_plugins": Dict({ }), "cpuset": Dict({ "active": Bool() // Description("Use cpuset functionality?") // Default(True), "base_core_number": ValidCPUCoreNumber() // Description("Number of cpu cores for the base " "(remaining part of the) system") // Default(1), "parallel": Int(lambda x: x >= -1) // Description("0: benchmark sequential, " "> 0: benchmark parallel with n instances, " "-1: determine n automatically") // Default(0), "sub_core_number": ValidCPUCoreNumber() // Description("Number of cpu cores per parallel running program.") // Default(1) }), "disable_hyper_threading": Bool() // Default(False) // Description("Disable the hyper threaded cores. Good for cpu bound programs."), "show_report": Bool() // Default(True) // Description("Print console report if log_level=info"), "append": Bool() // Default(False) // Description("Append to the output file instead of overwriting by adding new run data blocks"), "shuffle": Bool() // Default(True) // Description("Randomize the order in which the program blocks are " "benchmarked."), "send_mail": Str() // Default("") // Description("If not empty, recipient of a mail after the benchmarking finished.") }), "build": Dict({ "rand": Dict({ "heap": NaturalNumber() // Default(0) // Description("0: don't randomize, > 0 randomize with paddings in range(0, x)"), "stack": NaturalNumber() // Default(0) // Description("0: don't randomize, > 0 randomize with paddings in range(0, x)"), "bss": Bool() // Default(False) // Description("Randomize the bss sub segments?"), "data": Bool() // Default(False) // Description("Randomize the data sub segments?"), "rodata": Bool() // Default(False) // Description("Randomize the rodata sub segments?"), "file_structure": Bool() // Default(False) // Description("Randomize the file structure.") }) // Description("Assembly randomization"), "in": Str() // Default("build.yaml") // Description("Input file with the program blocks to build") // CompletionHint(zsh=YAML_FILE_COMPLETION_HINT), "out": Str() // Default("run.exec.yaml") // Description("Resulting run config file") }) }, all_keys=False) config_file_name = "temci.yaml" def __init__(self): """ Inits a Settings singleton object and thereby loads the Settings files. It loads the settings files from the app folder (config.yaml) and the current working directory (temci.yaml) if they exist. :raises SettingsError if some of the settings aren't in the format described via the type_scheme class property """ self.prefs = copy.deepcopy(self.type_scheme.get_default()) res = self._validate_settings_dict(self.prefs, "default settings") if not res: self.prefs = copy.deepcopy(self.defaults) raise SettingsError(str(res)) self.load_from_config_dir() self.load_from_current_dir() self._setup() def _setup(self): """ Simple setup method that checks if basic directories exist and creates them if necessary. """ if not os.path.exists(self.prefs["tmp_dir"]): os.mkdir(self.prefs["tmp_dir"]) log_level = self["log_level"] logging.Logger.disabled = log_level == "quiet" logger = logging.getLogger() mapping = { "debug": logging.DEBUG, "info": logging.INFO, "warn": logging.WARNING, "error": logging.ERROR, "quiet": logging.ERROR } logger.setLevel(mapping[log_level]) def reset(self): """ Resets the current settings to the defaults. """ self.prefs = copy.deepcopy(self.type_scheme.get_default()) def _validate_settings_dict(self, data, description: str): """ Check whether the passed dictionary matches the settings type scheme. :param data: passed dictionary :param description: short description of the passed dictionary :return True like object if valid, else string like object which is the error message """ return verbose_isinstance(data, self.type_scheme, description) def load_file(self, file: str): """ Loads the settings from the settings yaml file. :param file: path to the file :raises SettingsError if the settings file is incorrect or doesn't exist """ tmp = copy.deepcopy(self.prefs) try: with open(file, 'r') as stream: map = yaml.load(stream) def func(key, path, value): self._set(path, value) recursive_exec_for_leafs(map, func) except (yaml.YAMLError, IOError) as err: self.prefs = tmp raise SettingsError(str(err)) res = self._validate_settings_dict(self.prefs, "settings with ones from file '{}'".format(file)) if not res: self.prefs = tmp raise SettingsError(str(res)) self._setup() def load_from_dir(self, dir: str): """ Loads the settings from the `config.yaml` file inside the passed directory. :param dir: path of the directory """ self.load_file(os.path.join(dir, "config.yaml")) def load_from_config_dir(self): """ Loads the config file from the application directory (e.g. in the users home folder). If it exists. """ conf = os.path.join(click.get_app_dir("temci"), "config.yaml") if os.path.exists(conf) and os.path.isfile(conf): self.load_file(conf) def load_from_current_dir(self): """ Loads the settings from the `temci.yaml` file from the current working directory if it exists. """ if os.path.exists(self.config_file_name) and os.path.isfile(self.config_file_name): self.load_file(self.config_file_name) def get(self, key: str): """ Get the setting with the given key. :param key: name of the setting :return value of the setting :raises SettingsError if the setting doesn't exist """ path = key.split("/") if not self.validate_key_path(path): raise SettingsError("No such setting {}".format(key)) data = self.prefs for sub in path: data = data[sub] return data def __getitem__(self, key: str): """ Alias for self.get(self, key). """ return self.get(key) def _set(self, path: list, value): tmp_pref = self.prefs tmp_type = self.type_scheme for key in path[0:-1]: if key not in tmp_pref: tmp_pref[key] = {} tmp_type[key] = Dict(all_keys=False, key_type=Str()) tmp_pref = tmp_pref[key] tmp_type = tmp_type[key] tmp_pref[path[-1]] = value if path[-1] not in tmp_type.data: tmp_type[path[-1]] = Any() // Default(value) if path == ["settings_file"] and value is not "": self.load_file(value) def set(self, key, value): """ Sets the setting key to the passed new value :param key: settings key :param value: new value :raises SettingsError if the setting isn't valid """ tmp = copy.deepcopy(self.prefs) path = key.split("/") self._set(path, value) res = self._validate_settings_dict(self.prefs, "settings with new setting ({}={!r})".format(key, value)) if not res: self.prefs = tmp raise SettingsError(str(res)) self._setup() def __setitem__(self, key: str, value): """ Alias for self.set(key, value). :raises SettingsError if the setting isn't valid """ self.set(key, value) def validate_key_path(self, path: list): """ Validates a path into in to the settings trees, :param path: list of sub keys :return Is this key path valid? """ tmp = self.prefs for item in path: if item not in tmp: return False tmp = tmp[item] return True def has_key(self, key: str) -> bool: return self.validate_key_path(key.split("/")) def modify_setting(self, key: str, type_scheme: Type): """ Modifies the setting with the given key and adds it if it doesn't exist. :param key: key of the setting :param type_scheme: Type of the setting :param default_value: default value of the setting :raises SettingsError if the settings domain (the key without the last element) doesn't exist :raises TypeError if the default value doesn't adhere the type scheme """ path = key.split("/") domain = "/".join(path[:-1]) if len(path) > 1 and not self.validate_key_path(path[:-1]) \ and not isinstance(self.get(domain), dict): raise SettingsError("Setting domain {} doesn't exist".format(domain)) tmp_typ = self.type_scheme tmp_prefs = self.prefs for subkey in path[:-1]: tmp_typ = tmp_typ[subkey] tmp_prefs = tmp_prefs[subkey] tmp_typ[path[-1]] = type_scheme if path[-1] in tmp_prefs: if type_scheme.typecheck_default: typecheck(tmp_prefs[path[-1]], type_scheme) tmp_typ[path[-1]] = type_scheme else: tmp_prefs[path[-1]] = type_scheme.get_default() def get_type_scheme(self, key: str) -> Type: """ Returns the type scheme of the given key. :param key: given key :return: type scheme :raises SettingsError if the setting with the given key doesn't exist """ if not self.validate_key_path(key.split("/")): raise SettingsError("Setting {} doesn't exist".format(key)) tmp_typ = self.type_scheme for subkey in key.split("/"): tmp_typ = tmp_typ[subkey] return tmp_typ def modify_type_scheme(self, key: str, modificator: t.Callable[[Type], Type]): """ Modifies the type scheme of the given key via a modificator function. :param key: given key :param modificator: gets the type scheme and returns its modified version :raises SettingsError if the setting with the given key doesn't exist """ if not self.validate_key_path(key.split("/")): raise SettingsError("Setting {} doesn't exist".format(key)) tmp_typ = self.type_scheme subkeys = key.split("/") for subkey in subkeys[:-1]: tmp_typ = tmp_typ[subkey] tmp_typ[subkeys[-1]] = modificator(tmp_typ[subkeys[-1]]) assert isinstance(tmp_typ[subkeys[-1]], Type) def get_default_value(self, key: str): """ Returns the default value of the given key. :param key: given key :return: default value :raises SettingsError if the setting with the given key doesn't exist """ if not self.validate_key_path(key.split("/")): raise SettingsError("Setting {} doesn't exist".format(key)) tmp_def = self.defaults for subkey in key.split("/"): tmp_def = tmp_def[subkey] return tmp_def def default(self, value, key: str): """ :param value: :param key: :return: """ if value is None: return self[key] typecheck(value, self.get_type_scheme(key)) return value def store_into_file(self, file_name): """ Stores the current settings into a yaml file with comments. :param file_name: name of the resulting file """ with open(file_name, "w") as f: print(self.type_scheme.get_default_yaml(defaults=self.prefs), file=f) def has_log_level(self, level: str) -> bool: levels = ["error", "warn", "info", "debug"] return levels.index(level) <= levels.index(self["log_level"])PK#}6H 7lGGtemci/utils/vcs.pyimport os, shutil, errno, subprocess, tarfile from .settings import Settings from os.path import abspath from temci.utils.typecheck import * import typing as t class VCSDriver: """ Abstract version control system driver class used to support different vcss. """ dir = "." branch = None id_type = Str()|Int() def __init__(self, dir=".", branch: str = None): """ Initializes the VCS driver for a given base directory. It also sets the current branch if it's defined in the Settings :param dir: base directory :param branch: used branch """ typecheck_locals(dir=Str(), branch=Optional(Str())) self._exec_command_cache = {} self._exec_err_code_cache = {} self.dir = os.path.abspath(dir) self.branch = branch or self.get_branch() @classmethod def get_suited_vcs(cls, mode="auto", dir=".", branch: str = None) -> 'VCSDriver': """ Chose the best suited vcs driver for the passed base directory and the passed mode. If mode is "auto" the best suited vcs driver is chosen. If mode is "git" or "file", the GitDriver or the FileDriver is chosen. If the chosen driver isn't applicable than a VCSError is raised. :param mode: passed mode :param dir: base directory :param branch: used branch :return: vcs driver for the base directory :raises VCSError if the selected driver isn't applicable """ typecheck_locals(mode=ExactEither("file", "git", "auto"), dir=Str(), branch=Optional(Str())) if mode is "file" and FileDriver.is_suited_for_dir(dir): return FileDriver(dir, branch) elif mode is "git" and GitDriver.is_suited_for_dir(dir): return GitDriver(dir, branch) elif mode is "auto" and FileDriver.is_suited_for_dir(dir): avcls = [cls for cls in [GitDriver, FileDriver] if cls.is_suited_for_dir(dir)] return avcls[0](dir, branch) else: raise NoSuchVCSError("No such vcs driver for mode {0} and directory {1}".format(mode, dir)) @classmethod def is_suited_for_dir(cls, dir=".") -> bool: """ Checks whether or not this vcs driver can work with the passed base directory. :param dir: passed base directory path """ raise NotImplementedError() def set_branch(self, new_branch: str): """ Sets the current branch and throws an error if the branch doesn't exist. :param new_branch: new branch to set :raises VCSError if new_branch doesn't exist """ raise NotImplementedError() def get_branch(self) -> t.Optional[str]: """ Gets the current branch. :return: current branch name :raises VCSError if something goes terribly wrong """ raise None def get_valid_branches(self) -> t.Optional[t.List[str]]: """ Gets the valid branches for the associated repository or None if the vcs doesn't support branches. """ return None def has_uncommitted(self) -> bool: """ Check for uncommitted changes in the repository. :return: """ raise NotImplementedError() def number_of_revisions(self): """ Number of committed revisions in the current branch (if branches are supported). :return number of revisions """ raise NotImplementedError() def validate_revision(self, id_or_num): """ Validate the existence of the referenced revision. :param id_or_num: id or number of the reverenced revision :return: does it exists? """ raise NotImplementedError() def get_info_for_revision(self, id_or_num): """ Get an info dict for the given commit (-1 and 'HEAD' represent the uncommitted changes). Structure of the info dict:: "commit_id"; …, "commit_message": …, "commit_number": …, "is_uncommitted": True/False, "is_from_other_branch": True/False, "branch": … # branch name or empty string if this commit belongs to no branch :param id_or_num: id or number of the commit :return info dict :raises VCSError if the number or id isn't valid """ raise NotImplementedError() def get_info_for_all_revisions(self, max: int = -1) -> t.List[t.Dict[str, t.Any]]: """ Get an info dict for all revisions. A single info dict has the following structure:: "commit_id"; …, "commit_message": …, "commit_number": …, "is_uncommitted": True/False, "is_from_other_branch": True/False, "branch": … # branch name or empty string if this commit belongs to no branch :param max: if max isn't -1 it gives the maximum number of revision infos returned :return: list of info dicts """ info_dicts = [] if self.has_uncommitted() and (max >= 1 or max == -1): info_dicts.append(self.get_info_for_revision(-1)) if max != -1: max -= 1 num = self.number_of_revisions() if max != -1 and max < num: num = max for i in range(num): info_dicts.append(self.get_info_for_revision(i)) return info_dicts def copy_revision(self, id_or_num, sub_dir, dest_dirs): """ Copy the sub directory of the current vcs base directory into all of the destination directories. :param id_or_num: id or number of the revision (-1 and 'HEAD' represent the uncommitted changes) :param sub_dir: sub directory of the current vcs base directory relative to it :param dest_dirs: list of destination directories in which the content of the sub dir is placed or dest dir string :raises VCSError if something goes wrong while copying the directories """ raise NotImplementedError() def _copy_dir(self, src_dir: str, dest_dirs): """ Helper method to copy a directory to many destination directories. It also works if for files. :param src_dir: source directory relative to the current base directory :param dest_dirs: list of destination directories or just one destination directory string """ typecheck_locals(src_dir=Str(), dest_dirs=List(Str())|Str()) src_dir_path = os.path.abspath(os.path.join(self.dir, src_dir)) dest_dir_paths = [] if type(dest_dirs) is str: dest_dir_paths = [os.path.abspath(dest_dirs)] else: dest_dir_paths = [os.path.abspath(dest) for dest in dest_dirs] for dest in dest_dir_paths: try: shutil.rmtree(dest) shutil.copytree(src_dir_path, dest) except OSError as exc: try: if exc.errno == errno.ENOTDIR: shutil.copy(src_dir_path, dest) else: raise except OSError as exc2: raise VCSError(str(exc2)) def _exec_command(self, command, error: str = "Error executing {cmd}: {err}", cacheable: bool = False): """ Executes the given external command and returns the resulting output. :param command: given external command, list or string (uses /bin/sh) :param error: error message with can have a placeholder `cmd` for the command and `èrr` for stderr :param cacheable: can the result of the command be cached to reduce the number of needed calls? :return output as string :raises VCSError if the external command hasn't exit code 0 """ typecheck_locals(command=List()|Str(), error=Str(), cacheable=Bool()) args = command if isinstance(command, Str()): args = ["/bin/sh", "-c", command] args_str = "#~#".join(args) if cacheable and args_str in self._exec_command_cache: return self._exec_command_cache[args_str] proc = subprocess.Popen(args, cwd=abspath(self.dir), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) out, err = proc.communicate() if proc.poll() > 0: msg = error.format(cmd=command, err=err) raise VCSError(msg) if cacheable: self._exec_command_cache[args_str] = str(out) return str(out) def _exec_err_code(self, command, cacheable=False): """ Executes the given external command and returns its error code. :param command: given external command (as string or list) :param cacheable: can the result of the command be cached to reduce the number of needed calls? :return error code of the command (or 0 if no error occurred) """ typecheck_locals(command=List(Str())|Str(), cacheable=Bool()) args = [] if isinstance(command, list): args = command else: args = ["/bin/sh", "-c", command] args_str = "#~#".join(args) if cacheable and args_str in self._exec_err_code_cache: return self._exec_err_code_cache[args_str] proc = subprocess.Popen(args, cwd=abspath(self.dir), universal_newlines=True, stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL) out, err = proc.communicate() err_code = proc.poll() if cacheable: self._exec_err_code_cache[args_str] = err_code return err_code class FileDriver(VCSDriver): """ The default driver, that works with plain old files and directories without any vcs. Therefore its also usable with every directory. It has only one revision: Number -1 or 'HEAD', the current directory content. This class is also a simple example implementation of a VCSDriver. """ @classmethod def is_suited_for_dir(cls, dir="."): typecheck_locals(dir=Str()) return os.path.exists(dir) and os.path.isdir(dir) def set_branch(self, new_branch: str): typecheck_locals(new_branch=Optional(Str())) if new_branch is None: return raise VCSError("No branch support in FileDriver") def get_branch(self): return None def has_uncommitted(self): return True def number_of_revisions(self): return 0 def validate_revision(self, id_or_num): return id_or_num == -1 or id_or_num == 'HEAD' def get_info_for_revision(self, id_or_num): typecheck_locals(id_or_num=self.id_type) if not self.validate_revision(id_or_num): raise NoSuchRevision(id_or_num) return { "commit_id": "", "commit_message": "", "commit_number": -1, "is_uncommitted": True, "is_from_other_branch": False, "branch": "" } def copy_revision(self, id_or_num, sub_dir, dest_dirs): typecheck_locals(id_or_num=self.id_type, dest_dirs=List(Str())|Str()) if not self.validate_revision(id_or_num): raise NoSuchRevision(id_or_num) self._copy_dir(sub_dir, dest_dirs) class GitDriver(VCSDriver): """ The driver for git repositories. """ def __init__(self, dir=".", branch: str = None): super().__init__(dir, branch) self.base_path = self._get_git_base_dir(dir) @classmethod def is_suited_for_dir(cls, dir="."): typecheck_locals(dir=Str()) return cls._get_git_base_dir(dir) is not None @classmethod def _get_git_base_dir(cls, dir=".") -> str: path = os.path.abspath(dir).split("/") if path[-1] == "": path = path[0:-1] for i in reversed(range(1, len(path) - 1)): sub_path = path[0:i] if os.path.isdir(os.path.join("/", os.path.join(*sub_path), ".git")): return os.path.join(*path[i:]) return None def get_branch(self): if self.branch is not None: return self.branch return self._exec_command("git rev-parse --abbrev-ref HEAD", error="Can't get current branch. Somethings wrong with the repository: {err}").strip() def set_branch(self, new_branch: str): typecheck_locals(new_branch=Str()) if new_branch is self.get_branch(): return out = self._exec_command("git branch --list".format(new_branch), cacheable=True) if new_branch not in out: raise VCSError("No such branch {}".format(new_branch)) self.branch = new_branch def get_valid_branches(self) -> t.Optional[t.List[str]]: res = self._exec_command("git branch --list", cacheable=True).split(" ") branches = [] for line in res: line = line.split("\n")[0].strip() if line != "": branches.append(line) return branches def has_uncommitted(self): return self._exec_err_code("git diff --cached --quiet", cacheable=True) == 1 def _list_of_commit_tuples(self): """ Executes `git log BRANCH` and parses it's output lines into tuples (hash, msg). :return list of tuples """ res = self._exec_command("git log --oneline {}".format(self.branch), cacheable=True).split("\n") list = [] for line in res: if len(line.strip()) > 0: list.append(line.strip().split(" ", 1)) return list def number_of_revisions(self): return len(self._list_of_commit_tuples()) def _commit_number_to_id(self, num): """ Returns a commit id for the given commit number and normalizes passed commit ids. :param num: commit number :return commit id (string) :raises VCSError if the commit number isn't valid """ typecheck_locals(num=self.id_type) if not isinstance(num, int): return self._normalize_commit_id(num) if num >= self.number_of_revisions() or num < -1: raise VCSError("{} isn't a valid commit number (they are counted from 0).".format(num)) cid, __ = self._list_of_commit_tuples()[num] return cid def _normalize_commit_id(self, id): """ Normalizes the given commit id. :return normalized commit id :raises VCSError if something goes wrong """ out = self._exec_command("git show {} | head -n 1".format(id), cacheable=True).strip() return out.split(" ")[1] def validate_revision(self, id_or_num): typecheck_locals(id_or_num=self.id_type) if id_or_num is -1 or id_or_num == "HEAD": return self.has_uncommitted() if isinstance(id_or_num, int) and id_or_num < -1: return False try: cid = self._commit_number_to_id(id_or_num) return self._exec_err_code("git show {} | head -n 1".format(cid), cacheable=True) == 0 except VCSError: return False def _get_branch_for_revision(self, id_or_num): if id_or_num == -1 or id_or_num == "HEAD": return self.get_branch() id = self._commit_number_to_id(id_or_num) out = self._exec_command("git branch --contains {}".format(id), cacheable=True) out = out.split("\n")[0].strip() return out.split(" ")[-1] def get_info_for_revision(self, id_or_num): typecheck_locals(id_or_num=self.id_type) if id_or_num == -1 or id_or_num == "HEAD": return { "commit_id": "HEAD", "commit_message": "Uncommitted changes", "commit_number": -1, "is_uncommitted": True, "is_from_other_branch": False, "branch": self._get_branch_for_revision(id_or_num) } cid = self._commit_number_to_id(id_or_num) line = self._exec_command("git show {} --oneline | head -n 1".format(cid), cacheable=True).strip() cid, msg = line.split(" ", 1) branch = self._get_branch_for_revision(id_or_num) cid = self._normalize_commit_id(cid) other_branch = True commit_number = -2 tuples = self._list_of_commit_tuples() for i in range(0, len(tuples)): if self._normalize_commit_id(tuples[i][0]) == cid: commit_number = i other_branch = False break return { "commit_id": cid, "commit_message": msg.strip(), "commit_number": commit_number, "is_uncommitted": False, "is_from_other_branch": other_branch, "branch": branch } def copy_revision(self, id_or_num, sub_dir: str, dest_dirs): typecheck_locals(id_or_num=self.id_type, dest_dirs=List(Str())|Str()) if isinstance(dest_dirs, str): dest_dirs = [dest_dirs] if id_or_num == -1 or id_or_num == "HEAD": self._copy_dir(sub_dir, dest_dirs) sub_dir = os.path.join(self.base_path, sub_dir) tar_file = os.path.abspath(os.path.join(Settings()["tmp_dir"], "tmp.tar")) cmd = "git archive --format tar --output {} {}".format(tar_file, self._commit_number_to_id(id_or_num)) self._exec_command(cmd) try: with tarfile.open(tar_file) as tar: for dest in dest_dirs: if sub_dir == ".": tar.extractall(os.path.abspath(dest)) else: subdir_and_files = [ tarinfo for tarinfo in tar.getmembers() if tarinfo.name.startswith(sub_dir + "/") or tarinfo.name is sub_dir ] tar.extractall(members=subdir_and_files, path=os.path.abspath(dest)) except tarfile.TarError as err: os.remove(tar_file) raise VCSError(str(err)) os.remove(tar_file) class VCSError(EnvironmentError): """ Error for everything that goes fataly wrong with vcs handling. """ pass class NoSuchVCSError(VCSError): pass class NoSuchRevision(VCSError): passPKt:H@@temci/utils/click_helper.pyimport logging import warnings import click from temci.utils.typecheck import * from temci.utils.settings import Settings, SettingsError from temci.utils.registry import AbstractRegistry import typing as t def type_scheme_option(option_name: str, type_scheme: Type, is_flag: bool = False, callback = None, short: str = None, with_default: bool = True, default = None): """ Is essentially a wrapper around click.option that works with type schemes. :param option_name: name of the option :param type_scheme: type scheme to use :param is_flag: is this option a "--ABC/--no-ABC" like flag :param callback: callback that is called with the parameter and the argument and has to return its argument :param short: short name of the option (ignored if flag=True) :param with_default: set a default value for the option if possible? :param default: default value (if with_default is true), default: default value of the type scheme """ __type_scheme = type_scheme __short = short help_text = type_scheme.description has_default = with_default default_value = default if with_default and not default_value: try: default_value = type_scheme.get_default() except ValueError: has_default = False def raw_type(_type): while isinstance(_type, Constraint) or isinstance(_type, NonErrorConstraint): _type = _type.constrained_type if not isinstance(_type, Type): return _type if isinstance(_type, T): return _type.native_type if isinstance(_type, Int): return int if isinstance(_type, Str): return str if isinstance(_type, ExactEither) and isinstance(_type.exp_values, List(T(type(_type.exp_values[0])))): return _type.exp_values[0] else: raise ValueError("type scheme {} (option {}) is not annotatable".format(str(type_scheme), option_name)) def func(decorated_func): used_raw_type = None multiple = False type_scheme = __type_scheme _type_scheme = type_scheme while isinstance(type_scheme, Either): type_scheme = type_scheme.types[0] while isinstance(type_scheme, Constraint) or isinstance(type_scheme, NonErrorConstraint): type_scheme = type_scheme.constrained_type if isinstance(type_scheme, List) or isinstance(type_scheme, ListOrTuple): multiple = True type_scheme = type_scheme.elem_type if isinstance(type_scheme, click.ParamType): used_raw_type = type_scheme elif isinstance(type_scheme, ExactEither): used_raw_type = click.Choice(type_scheme.exp_values) elif isinstance(type_scheme, Exact): used_raw_type = click.Choice(type_scheme.exp_value) elif isinstance(type_scheme, Tuple): used_raw_type = tuple([raw_type(x) for x in type_scheme.elem_types]) elif isinstance(type_scheme, Any): used_raw_type = object elif isinstance(type_scheme, T): used_raw_type = type_scheme.native_type elif isinstance(type_scheme, Str): used_raw_type = str else: used_raw_type = raw_type(type_scheme) option_args = { "type": used_raw_type, "callback": None, "multiple": multiple } if has_default: option_args["default"] = default_value option_args["show_default"] = True #else: # option_args["show_default"] = False if not isinstance(option_args["type"], click.ParamType): option_args["callback"] = validate(_type_scheme) if not isinstance(option_args["type"], Either(T(tuple), T(str))): option_args["type"] = raw_type(option_args["type"]) if callback is not None: if option_args["callback"] is None: option_args["callback"] = lambda ctx, param, value: callback(param, value) else: old_callback = option_args["callback"] option_args["callback"] = lambda ctx, param, value: callback(param, old_callback(ctx, param, value)) if is_flag: option_args["is_flag"] = True #print(type(option_args["callback"]), option_name, type_scheme) opt = None if help_text is not None: typecheck(help_text, Str()) option_args["help"] = help_text if is_flag: del(option_args["type"]) opt = click.option("--{name}/--no-{name}".format(name=option_name), **option_args)(decorated_func) if __short is not None: opt = click.option("--{}".format(option_name), "-" + __short, **option_args)(decorated_func) else: opt = click.option("--{}".format(option_name), **option_args)(decorated_func) return opt #print(type(f())) return func def validate(type_scheme): """ Creates a valid click option validator function that can be passed to click via the callback parameter. The validator function expects the type of the value to be the raw type of the type scheme. :param type_scheme: type scheme the validator validates against :return: the validator function """ def func(ctx, param, value): param = param.human_readable_name param = param.replace("-", "") res = verbose_isinstance(value, type_scheme, value_name=param) if not res: raise click.BadParameter(str(res)) return value return func class CmdOption: """ Represents a command line option. """ def __init__(self, option_name, settings_key: str = None, type_scheme: Type = None, short: str = None, completion_hints: dict = None, is_flag: bool = None): """ Initializes a option either based on a setting (via settings key) or on a type scheme. If this is backed by a settings key, the setting is automatically set. If is_flag is None, it is set True if type_scheme is an instance of Bool() or BoolOrNone() :param option_name: name of the option :param settings_key: :param type_scheme: type scheme with default value :param short: short version of the option (ignored if is_flag=True) :param completion_hints: additional completion hints (dict with keys for each shell) :param is_flag: is the option a "--ABC/--no-ABC" flag like option? :return: """ typecheck(option_name, Str()) self.option_name = option_name #typecheck([settings_key, short], List(Str() | E(None))) self.settings_key = settings_key self.short = short self.completion_hints = completion_hints if (settings_key is None) == (type_scheme is None): raise ValueError("settings_key and type_scheme are both None (or not None)") self.type_scheme = Settings().get_type_scheme(settings_key) if settings_key is not None else type_scheme if type_scheme is not None and not isinstance(type_scheme, click.ParamType): self.callback = lambda x: None if settings_key is not None and not isinstance(self.type_scheme, click.ParamType): def callback(param: click.Option, val): try: Settings()[settings_key] = val except SettingsError as err: logging.error("Error while processing the passed value ({val}) of option {opt}: {msg}".format( val=repr(val), opt=option_name, msg=str(err) )) exit(1) self.callback = callback else: self.callback = None self.description = self.type_scheme.description.strip().split("\n")[0] self.has_description = self.description not in [None, ""] if not self.has_description: warnings.warn("Option {} is without documentation.".format(option_name)) self.has_default = True self.default = None try: self.default = self.type_scheme.get_default() except ValueError: self.has_default = False if settings_key: self.default = Settings()[settings_key] if hasattr(self.type_scheme, "completion_hints") and self.completion_hints is None: self.completion_hints = self.type_scheme.completion_hints self.is_flag = is_flag is True or (is_flag is None and type(self.type_scheme) in [Bool, BoolOrNone]) if self.is_flag: self.completion_hints = None self.short = None def callback(param, val): if val is not None: try: Settings()[settings_key] = val except SettingsError as err: logging.error("Error while processing the passed value ({val}) of option {opt}: {msg}".format( val=val, opt=option_name, msg=str(err) )) return val self.callback = callback self.has_completion_hints = self.completion_hints is not None self.has_short = short is not None def __lt__(self, other) -> bool: """ Compare by option_name. """ typecheck(other, CmdOption) return self.option_name < other.option_name def __str__(self) -> str: return self.option_name def __repr__(self) -> str: return "CmdOption({})".format(self.option_name) @classmethod def from_registry(cls, registry: type, name_prefix: str = None) -> 'CmdOptionList': """ Creates a list of CmdOption objects from an registry. It creates an activation flag (--OPT/--no-OPT) for each registered plugin and creates for each plugin preference an option with name OPT_PREF. Deeper nesting is intentionally not supported. :param registry: :param name_prefix: prefix of each option name (usable to avoid ambiguity problems) :return list of CmdOptions :rtype List[CmdOption] """ assert issubclass(registry, AbstractRegistry) typecheck_locals(name_prefix=Str()|E(None)) name_prefix = name_prefix if name_prefix is not None else "" ret_list = CmdOptionList() for plugin in registry.registry: active_key = "{}_active".format("/".join([registry.settings_key_path, plugin])) ret_list.append(CmdOption( option_name=name_prefix + plugin, settings_key=active_key )) misc_key = "{}_misc".format("/".join(registry.settings_key_path.split("/") + [plugin])) misc = Settings().get_type_scheme(misc_key) typecheck(misc, Dict) for misc_sub_key in misc.data: misc_sub = misc[misc_sub_key] if not isinstance(misc_sub, Dict): ret_list.append(CmdOption( option_name="{}{}_{}".format(name_prefix, plugin, misc_sub_key), settings_key="{}/{}".format(misc_key, misc_sub_key) )) return ret_list @classmethod def from_non_plugin_settings(cls, settings_domain: str, exclude: t.List[Str] = None, name_prefix: str = None) -> 'CmdOptionList': """ Creates a list of CmdOption object from all sub settings (in the settings domain). It excludes all sub settings that are either in the exclude list or end with "_active" or "_misc" (used for plugin settings). Also every setting that is of type Dict is ignored. :param settings_domain: settings domain to look into (or "" for the root domain) :param exclude: list of sub keys to exclude :return list of CmdOptions :rtype List[CmdOption] """ exclude = exclude or [] name_prefix = name_prefix or "" typecheck_locals(settings_domain=str, exclude=List(Str()), name_prefix=Str()) domain = Settings().type_scheme if settings_domain != "": domain = Settings().get_type_scheme(settings_domain) ret_list = [] for sub_key in domain.data: if sub_key not in exclude and all(not sub_key.endswith(suf) for suf in ["_active", "_misc"]) \ and not isinstance(domain[sub_key], Dict): ret_list.append(CmdOption( option_name=name_prefix + sub_key, settings_key=settings_domain + "/" + sub_key if settings_domain != "" else sub_key )) return CmdOptionList(*ret_list) class CmdOptionList: """ A simple list for CmdOptions that supports list flattening. """ def __init__(self, *options: t.Union[CmdOption, 'CmdOptionList']): self.options = [] for option in options: self.append(option) def append(self, options: t.Union[CmdOption, 'CmdOptionList']) -> 'CmdOptionList': """ Appends the passed CmdÖptionList or CmdOption and flattens the resulting list. :param options: CmdÖptionList or CmdOption :return self """ typecheck_locals(options=T(CmdOptionList)|T(CmdOption)) if isinstance(options, CmdOption): self.options.append(options) else: self.options.extend(options.options) return self def set_short(self, option_name: str, new_short: str) -> 'CmdOptionList': """ Sets the short option name of the included option with the passed name. :param option_name: passed option name :param new_short: new short option name :return: self :raises IndexError if the option with the passed name doesn't exist """ self[option_name].short = new_short return self def __getitem__(self, key: t.Union[int, str]) -> CmdOption: """ Get the included option with the passed name or at the passed index. :param key: passed name or index :return: found cmd option :raises IndexError if the option doesn't exist """ if isinstance(key. int): return self.options[key] for option in self.options: if option.option_name == key: return option raise IndexError("No such key {!r}".format(key)) def __iter__(self): return self.options.__iter__() def __str__(self) -> str: return str(self.options) def __repr__(self) -> str: return repr(self.options) def cmd_option(option: t.Union[CmdOption, CmdOptionList], name_prefix: str = None): """ Wrapper around click.option that works with CmdOption objects. If option is a list of CmdOptions then the type_scheme_option decorators are chained. Also supports nested lists in the same manner. :param option: CmdOption or (possibly nested) list of CmdOptions :param name_prefix: prefix of all options :return click.option(...) like decorator """ typecheck(option, T(CmdOption) | T(CmdOptionList)) name_prefix = name_prefix or "" typecheck(name_prefix, Str()) if isinstance(option, CmdOption): return type_scheme_option(option_name=name_prefix + option.option_name, type_scheme=option.type_scheme, short=option.short, is_flag=option.is_flag, callback=option.callback, with_default=option.has_default, default=option.default ) def func(f): for opt in sorted(option.options): f = cmd_option(opt, name_prefix)(f) return f return func #@annotate(Dict({"count": Int(), "abc": Str(), "d": Dict({ # "abc": NaturalNumber() #})}), {"count": 3, "abc": "", "d": {"abc": 1}}, {"count": "Hilfe!!!"}) """ (Dict({ "abc": Int() // Default(4), "d": Dict({ "sad": (CommaSepStringList() | Exact("f")) // Default("f") }) }) """ """ @click.command() @type_scheme_option(Settings().type_scheme) def cmd(**kwargs): def f(**kwargs): print(kwargs) return f cmd() """ PKJ:HNSKtemci/utils/util.pyimport os import subprocess import typing as t import sys def recursive_exec_for_leafs(data: dict, func, _path_prep=[]): """ Executes the function for every leaf key (a key without any sub keys) of the data dict tree. :param data: dict tree :param func: function that gets passed the leaf key, the key path and the actual value """ if not isinstance(data, dict): return for subkey in data.keys(): if type(data[subkey]) is dict: recursive_exec_for_leafs(data[subkey], func, _path_prep=_path_prep + [subkey]) else: func(subkey, _path_prep + [subkey], data[subkey]) def ensure_root(reason: str): """ Throws an error if the user has no root privileges. :param reason: why do you need root privileges? To improve the error message. :raises EnvironmentError if the current user has no root privileges. """ proc = subprocess.Popen(["/usr/bin/sudo", "-n", "/usr/bin/id"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) proc.communicate() if proc.poll() > 0: raise EnvironmentError("This program needs to be run with super user privileges: " + reason) def get_cache_line_size(cache_level: int = None) -> t.Optional[int]: """ Returns the cache line size of the cache on the given level. Level 0 and 1 are actually on the same level. :param cache_level: if None the highest level cache is used :return: cache line size or none if the cache on the given level doesn't exist """ if cache_level is None: cache_level = -1 for path in os.listdir("/sys/devices/system/cpu/cpu0/cache/"): if path.startswith("index"): cache_level = max(cache_level, int(path.split("index")[1])) if cache_level == -1: return None level_dir = "/sys/devices/system/cpu/cpu0/cache/index" + str(cache_level) with open(level_dir + "/coherency_line_size") as f: return int(f.readline().strip()) def join_strs(strs: t.List[str], last_word: str = "and") -> str: """ Joins the passed strings together with ", " except for the last to strings that separated by the passed word. """ if len(strs) == 1: return strs[0] elif len(strs) > 1: return " {} ".format(last_word).join([", ".join(strs[0:-1]), strs[-1]]) allow_all_imports = False def can_import(module: str) -> bool: """ Can a module (like scipy or numpy) be imported without a severe and avoidable performance penalty? :param module: name of the module """ if allow_all_imports: return True if module not in ["scipy", "numpy"]: return True if len(sys.argv) == 1 or sys.argv[1] in ["completion", "version", "assembler"]: return False return True class Singleton(type): """ Singleton meta class. See http://stackoverflow.com/a/6798042 """ _instances = {} def __call__(cls, *args, **kwargs): if cls not in cls._instances: cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) return cls._instances[cls] class InsertionTimeOrderedDict: """ It's a dict which's elements are sorted by their insertion time. """ def __init__(self): self._dict = {} self._keys = [] dict() def __delitem__(self, key): del(self._dict[key]) del(self._keys[self._keys.index(key)]) def __getitem__(self, key): return self._dict[key] def __setitem__(self, key, value): if key not in self._dict: self._keys.append(key) self._dict[key] = value def __iter__(self): return self._keys.__iter__() def values(self) -> t.List: return [self._dict[key] for key in self._keys] def keys(self) -> t.List: return self._keys def __len__(self): return len(self._keys) @classmethod def from_list(cls, items: t.Optional[list], key_func: t.Callable[[t.Any], t.Any]) -> 'InsertionTimeOrderedDict': """ Creates an ordered dict out of a list of elements. :param items: list :param key_func: function that returns a key for each passed list element :return: created ordered dict with the elements in the same order as in the passed list """ if items is None: return InsertionTimeOrderedDict() ret = InsertionTimeOrderedDict() for item in items: ret[key_func(item)] = item return ret PKP;H3%%#temci-0.5.dist-info/DESCRIPTION.rsttemci ===== An advanced benchmarking tool. Currently developed as my bachelor thesis. Therefore expect some rough parts, but most of the features should (hopefully) work well. Why should you use temci? ------------------------- temci allows you to easily measure the execution time (and other things) of programs and compare them against each other resulting in a pretty HTML5 based report. Furthermore it set's up the environment to ensure benchmarking results with a low variance and use some kind of assembly randomisation to reduce the effect of caching. Requirements ------------ - python3 - numpy and scipy - perf (e.g. from the ubuntu package `linux-tools-generics` - super user rights (for benchmarking, although it's possible without) - linux (other oses aren't supported) - tested with Fedora 23 and Ubuntu 15.10 (most distros with a kernel version >= 3.0 should work) - gcc (for compiling a needed tool for `temci build`) - kernel-devel packages (for compiling the kernel module to disable caches, not required, see below) - (pdf)latex (for pdf report generation) Testing ------- The initial goal was to develop this application in a test driven way. But as testing is only reasonable for non UI and non OS interacting code, automatic testing is only feasible for a small amount of code. Therefore the code is extensively tested by hand. Installing ---------- First you have to install - numpy and scipy (python packages) - latex (most linux distros use texlive) if you want to output plots as pdfs If they aren't installed already. Then install temci itself either via pip3: ```sh pip3 install temci ``` Or from source: Clone this repository locally and install it via ```sh cd FOLDER_THIS_README_LIES_IN pip3 install . ``` If you want to use this DisableCaches Plugin or the `temci build` tool, you have to run ```sh temci setup ``` To simplify using temci, enable tab completion for your favorite shell (bash and zsh are supported): Add the following line to your bash or zsh configuration file ```sh source `temci_completion [bash|zsh]` ``` To update the completion after an update (or after developing some plugins), run: ```sh temci completion [bash|zsh] ``` It's a variant of `temci_completion` that rebuilds the completion files every time its called. Usage ----- *Side note: This tool needs root privileges for most benchmarking features.* There are currently two good ways to explore the features of temci: 1. Play around with temci using the provided tab completion for zsh (preferred) and bash 2. Look into the annotated settings file (it can be generated via `temci init settings`) A user guide is planned but a priority as it's not part of my bachelor thesis. Geting started with simple benchmarking --------------------------------------- *Or: How to benchmarking a simple program called ls (a program is every valid shell code that is executable by /bin/sh)* There are two ways to benchmark a program: A short and a long one. The short one first: Just type: ```sh temci short exec -wd "ls" --runs 100 --out out.yaml ``` Explanation: - `short` is the category of small helper subprograms that allow to use some temci features without config files - `-wd` is the short option for `--without_description` an tells temci to use the program as its own description - `ls` is the executed program - `--runs 100` is short for `--min_runs 100 --max_runs 100` - `--min_runs 100` tells temci to benchmark `ls` at least 100 times (the default value is currently 20) - `--max_runs 100` tells temci to benchmark `ls` at most 100 times (the default value is currently 100) - setting min and max runs non equal makes only sense when comparing two or more programs via temci - `--out out.yaml` tells temci to store the YAML result file as `out.yaml` (default is `result.yaml`) The long one now: Just type ```sh temci init run_config ``` This let's you create a temci run config file by using a textual interface (if you don't want to create it entirely by hand). To actually run the configuration type: ```sh temci exec [file you stored the run config in] --out out.yaml ``` Explanation: - `exec` is the sub program that takes a run config an benchmarks all the included program blocks - `--out out.yaml` tells temci where to store the YAML file containing the benchmarking results Now you have a YAML result file that has the following structure: ```yaml - attributes: description: ls data: … task-clock: - [first measurement for property task-clock] - … … ``` You can either create a report by parsing the YAML file yourself or by using the temci report tool. To use the latter type: ``` temci report out.yaml --reporter html2 --html2_out ls_report ``` Explanation: - `out.yaml` is the previously generated benchmarking result file - `--reporter html2` tells temci to use the HTML2Reporter. This reporter creates a fancy HTML5 based report in the folder `ls_report`. The main HTML file is named `report.html`. Other possible reporters are `html` and `console`. The default reporter is `html2` - `--html2_out` tells the HTML2Reporter the folder in which to place the report. Now you have a report on the performance of `ls`. ###How to go further from here - Benchmark two programs against each other either by adding a `-wd [other program]` to the command line or appending the run config file (also possible via `temci init run_config`) - If using `temci short exec` - add a better description for the benchmarked program by using `-d [DESCRIPTION] [PROGRAM]` instead `-wd`. `-d` is short for `--with_description` - If using `temci init run_config`: - Choose another set of measured properties (e.g. to measure the LL1 cache misses) - Change the used runner. The default runner is `perf_stat` and uses `perf stat` to actually measure the program Other possible runners are currently `rusage` and `spec`: - The `rusage` runner uses a small C wrapper around the `getrusage(2)` system call to measure things like the maximum resource usage (i.e. most thing you get by using `time` or `/usr/bin/time -v`) - The `spec` runner gets its measurement by parsing a SPEC benchmark like result file. This allows using the SPEC benchmark with temci. - Append `--send_mail [you're email adress]` to get a mail after the benchmarking finished. This mail has the benchmarking result file in it's appendix temci build usage ----------------- Some random notes about using `temci build` that should later be transformed in an actual description. ###Haskell support for assembly randomisation. To build haskell projects randomized (or any other compiled language that is not directly supported by gcc) you'll to tell the compiler to use the gcc or the gnu as tool. This is e.g. possible with ghc's "-pgmc" option. Fancy Plugins ------------- ###DisableCaches Build it via "temci setup". Needs the kernel develop packet of you're distribitution. It's called `kernel-devel` on fedora. _Attention_: Everything takes very very long. It might require a restart of you're system. Example for the slow down: A silly haskell program (just printing `"sdf"`): the measured task-clock went from just 1.4 seconds to 875,2 seconds. The speed up with caches is 62084%. ###StopStart This plugin tries to stop most other processes on the system, that aren't really needed. By default most processes that are children (or children's children, …) of a process which ends with "dm" are stopped. This is a simple heuristic to stop all processes that are not vital (e.i. created by some sort of display manager). SSH and X11 are stopped too. The advantages of this plugin (which is used via the command line flag `--stop_start`): - No one can start other programs on the system (via ssh or the user interface) => less other processes interfere with the benchmarking - Processes like firefox don't interfere with the benchmarking as they are stopped - It reduces the variance of benchmarkings significantly Disadvantages: - You can't interact with the system (therefore use the send_mail option to get mails after the benchmarking finished) - Not all processes that could be safely stopped are stopped as this decision is hard to make - You can't stop the benchmarking as all keyboard interaction is disabled (by stopping X11) Stopping a process here means to send a process a SIGSTOP signal and resume it by sending a SIGCONT signal later. Why is temci called temci? -------------------------- The problem in naming programs is that most good program names are already taken. A good program or project name has (in my opinion) the following properties: - it shouldn't be used on the relevant platforms (in this case: github and pypi) - it should be short (no one want's to type long program names) - it should be pronounceable - it should have at least something to do with the program temci is such a name. It's lojban for time (i.e. the time duration between to moments or events). Contributing ------------ Bug reports are highly appreciated. As this is the code for my bachelor thesis, actual code contributions are problematic. Whole classes or modules (like plugins, reporters are runners can be contributed, as they pose no attribution problem (I can clearly state that a class is written by XYZ). Other kinds of code contribution could pose problems for me. PKP;H'In.$temci-0.5.dist-info/entry_points.txt [console_scripts] temci=temci.scripts.cli:cli_with_error_catching temci_completion=temci.scripts.temci_completion:cli PKP;HsL00!temci-0.5.dist-info/metadata.json{"classifiers": ["Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3 :: Only", "Operating System :: POSIX :: Linux", "Topic :: System :: Benchmark", "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", "Development Status :: 3 - Alpha", "Environment :: Console", "Intended Audience :: Developers"], "extensions": {"python.commands": {"wrap_console": {"temci": "temci.scripts.cli:cli_with_error_catching", "temci_completion": "temci.scripts.temci_completion:cli"}}, "python.details": {"contacts": [{"email": "me@mostlynerdless.de", "name": "Johannes Bechberger", "role": "author"}], "document_names": {"description": "DESCRIPTION.rst"}}, "python.exports": {"console_scripts": {"temci": "temci.scripts.cli:cli_with_error_catching", "temci_completion": "temci.scripts.temci_completion:cli"}}}, "extras": [], "generator": "bdist_wheel (0.26.0)", "license": "GPLv3", "metadata_version": "2.0", "name": "temci", "platform": "linux", "run_requires": [{"requires": ["Click", "cairocffi", "cgroupspy", "cpuset-py3", "docopt", "fn", "humanfriendly", "jedi", "matplotlib", "prompt-toolkit", "ptpython", "pyaml", "pygments", "pytimeparse", "ruamel.yaml", "seaborn", "typing", "wcwidth"]}], "summary": "Advanced benchmarking tool", "version": "0.5"}PKP;Hu!temci-0.5.dist-info/top_level.txttemci PKP;H}\\temci-0.5.dist-info/WHEELWheel-Version: 1.0 Generator: bdist_wheel (0.26.0) Root-Is-Purelib: true Tag: py3-none-any PKP;H#5[)[)temci-0.5.dist-info/METADATAMetadata-Version: 2.0 Name: temci Version: 0.5 Summary: Advanced benchmarking tool Home-page: UNKNOWN Author: Johannes Bechberger Author-email: me@mostlynerdless.de License: GPLv3 Platform: linux Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3 :: Only Classifier: Operating System :: POSIX :: Linux Classifier: Topic :: System :: Benchmark Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+) Classifier: Development Status :: 3 - Alpha Classifier: Environment :: Console Classifier: Intended Audience :: Developers Requires-Dist: Click Requires-Dist: cairocffi Requires-Dist: cgroupspy Requires-Dist: cpuset-py3 Requires-Dist: docopt Requires-Dist: fn Requires-Dist: humanfriendly Requires-Dist: jedi Requires-Dist: matplotlib Requires-Dist: prompt-toolkit Requires-Dist: ptpython Requires-Dist: pyaml Requires-Dist: pygments Requires-Dist: pytimeparse Requires-Dist: ruamel.yaml Requires-Dist: seaborn Requires-Dist: typing Requires-Dist: wcwidth temci ===== An advanced benchmarking tool. Currently developed as my bachelor thesis. Therefore expect some rough parts, but most of the features should (hopefully) work well. Why should you use temci? ------------------------- temci allows you to easily measure the execution time (and other things) of programs and compare them against each other resulting in a pretty HTML5 based report. Furthermore it set's up the environment to ensure benchmarking results with a low variance and use some kind of assembly randomisation to reduce the effect of caching. Requirements ------------ - python3 - numpy and scipy - perf (e.g. from the ubuntu package `linux-tools-generics` - super user rights (for benchmarking, although it's possible without) - linux (other oses aren't supported) - tested with Fedora 23 and Ubuntu 15.10 (most distros with a kernel version >= 3.0 should work) - gcc (for compiling a needed tool for `temci build`) - kernel-devel packages (for compiling the kernel module to disable caches, not required, see below) - (pdf)latex (for pdf report generation) Testing ------- The initial goal was to develop this application in a test driven way. But as testing is only reasonable for non UI and non OS interacting code, automatic testing is only feasible for a small amount of code. Therefore the code is extensively tested by hand. Installing ---------- First you have to install - numpy and scipy (python packages) - latex (most linux distros use texlive) if you want to output plots as pdfs If they aren't installed already. Then install temci itself either via pip3: ```sh pip3 install temci ``` Or from source: Clone this repository locally and install it via ```sh cd FOLDER_THIS_README_LIES_IN pip3 install . ``` If you want to use this DisableCaches Plugin or the `temci build` tool, you have to run ```sh temci setup ``` To simplify using temci, enable tab completion for your favorite shell (bash and zsh are supported): Add the following line to your bash or zsh configuration file ```sh source `temci_completion [bash|zsh]` ``` To update the completion after an update (or after developing some plugins), run: ```sh temci completion [bash|zsh] ``` It's a variant of `temci_completion` that rebuilds the completion files every time its called. Usage ----- *Side note: This tool needs root privileges for most benchmarking features.* There are currently two good ways to explore the features of temci: 1. Play around with temci using the provided tab completion for zsh (preferred) and bash 2. Look into the annotated settings file (it can be generated via `temci init settings`) A user guide is planned but a priority as it's not part of my bachelor thesis. Geting started with simple benchmarking --------------------------------------- *Or: How to benchmarking a simple program called ls (a program is every valid shell code that is executable by /bin/sh)* There are two ways to benchmark a program: A short and a long one. The short one first: Just type: ```sh temci short exec -wd "ls" --runs 100 --out out.yaml ``` Explanation: - `short` is the category of small helper subprograms that allow to use some temci features without config files - `-wd` is the short option for `--without_description` an tells temci to use the program as its own description - `ls` is the executed program - `--runs 100` is short for `--min_runs 100 --max_runs 100` - `--min_runs 100` tells temci to benchmark `ls` at least 100 times (the default value is currently 20) - `--max_runs 100` tells temci to benchmark `ls` at most 100 times (the default value is currently 100) - setting min and max runs non equal makes only sense when comparing two or more programs via temci - `--out out.yaml` tells temci to store the YAML result file as `out.yaml` (default is `result.yaml`) The long one now: Just type ```sh temci init run_config ``` This let's you create a temci run config file by using a textual interface (if you don't want to create it entirely by hand). To actually run the configuration type: ```sh temci exec [file you stored the run config in] --out out.yaml ``` Explanation: - `exec` is the sub program that takes a run config an benchmarks all the included program blocks - `--out out.yaml` tells temci where to store the YAML file containing the benchmarking results Now you have a YAML result file that has the following structure: ```yaml - attributes: description: ls data: … task-clock: - [first measurement for property task-clock] - … … ``` You can either create a report by parsing the YAML file yourself or by using the temci report tool. To use the latter type: ``` temci report out.yaml --reporter html2 --html2_out ls_report ``` Explanation: - `out.yaml` is the previously generated benchmarking result file - `--reporter html2` tells temci to use the HTML2Reporter. This reporter creates a fancy HTML5 based report in the folder `ls_report`. The main HTML file is named `report.html`. Other possible reporters are `html` and `console`. The default reporter is `html2` - `--html2_out` tells the HTML2Reporter the folder in which to place the report. Now you have a report on the performance of `ls`. ###How to go further from here - Benchmark two programs against each other either by adding a `-wd [other program]` to the command line or appending the run config file (also possible via `temci init run_config`) - If using `temci short exec` - add a better description for the benchmarked program by using `-d [DESCRIPTION] [PROGRAM]` instead `-wd`. `-d` is short for `--with_description` - If using `temci init run_config`: - Choose another set of measured properties (e.g. to measure the LL1 cache misses) - Change the used runner. The default runner is `perf_stat` and uses `perf stat` to actually measure the program Other possible runners are currently `rusage` and `spec`: - The `rusage` runner uses a small C wrapper around the `getrusage(2)` system call to measure things like the maximum resource usage (i.e. most thing you get by using `time` or `/usr/bin/time -v`) - The `spec` runner gets its measurement by parsing a SPEC benchmark like result file. This allows using the SPEC benchmark with temci. - Append `--send_mail [you're email adress]` to get a mail after the benchmarking finished. This mail has the benchmarking result file in it's appendix temci build usage ----------------- Some random notes about using `temci build` that should later be transformed in an actual description. ###Haskell support for assembly randomisation. To build haskell projects randomized (or any other compiled language that is not directly supported by gcc) you'll to tell the compiler to use the gcc or the gnu as tool. This is e.g. possible with ghc's "-pgmc" option. Fancy Plugins ------------- ###DisableCaches Build it via "temci setup". Needs the kernel develop packet of you're distribitution. It's called `kernel-devel` on fedora. _Attention_: Everything takes very very long. It might require a restart of you're system. Example for the slow down: A silly haskell program (just printing `"sdf"`): the measured task-clock went from just 1.4 seconds to 875,2 seconds. The speed up with caches is 62084%. ###StopStart This plugin tries to stop most other processes on the system, that aren't really needed. By default most processes that are children (or children's children, …) of a process which ends with "dm" are stopped. This is a simple heuristic to stop all processes that are not vital (e.i. created by some sort of display manager). SSH and X11 are stopped too. The advantages of this plugin (which is used via the command line flag `--stop_start`): - No one can start other programs on the system (via ssh or the user interface) => less other processes interfere with the benchmarking - Processes like firefox don't interfere with the benchmarking as they are stopped - It reduces the variance of benchmarkings significantly Disadvantages: - You can't interact with the system (therefore use the send_mail option to get mails after the benchmarking finished) - Not all processes that could be safely stopped are stopped as this decision is hard to make - You can't stop the benchmarking as all keyboard interaction is disabled (by stopping X11) Stopping a process here means to send a process a SIGSTOP signal and resume it by sending a SIGCONT signal later. Why is temci called temci? -------------------------- The problem in naming programs is that most good program names are already taken. A good program or project name has (in my opinion) the following properties: - it shouldn't be used on the relevant platforms (in this case: github and pypi) - it should be short (no one want's to type long program names) - it should be pronounceable - it should have at least something to do with the program temci is such a name. It's lojban for time (i.e. the time duration between to moments or events). Contributing ------------ Bug reports are highly appreciated. As this is the code for my bachelor thesis, actual code contributions are problematic. Whole classes or modules (like plugins, reporters are runners can be contributed, as they pose no attribution problem (I can clearly state that a class is written by XYZ). Other kinds of code contribution could pose problems for me. PKP;H* temci-0.5.dist-info/RECORDtemci/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 temci/build/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 temci/build/assembly.py,sha256=U_HRUeeal7k2Dpo9rR2Iw9y69UCz0AShkwa-_t0P7GQ,15212 temci/build/build_processor.py,sha256=R0yvg4M5oxMU5TfMtBVpuhW8QbvI_I5Az9dE2mKHwHc,3022 temci/build/builder.py,sha256=iTfmPyi-d_XTXAeTO0Q_CeGMq593DpDjhNWeqrhAOMY,6194 temci/misc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 temci/misc/game.py,sha256=VmX4gIdOQ8SHVhec44_qWZIaJymfOPAfF1lbHDsy2G0,52184 temci/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 temci/run/cpuset.py,sha256=zLHfkYdnEkgFiVKUPqJDspkwzF2Qd2qZagodeBY4XIk,12009 temci/run/run_driver.py,sha256=Ue0k0bl9vhvzY88Jdhgj09icIzU05TqAlpJdS6lY7OA,25237 temci/run/run_driver_plugin.py,sha256=ToL6gpMPFpcTspDW-m9KQFRcEywvRinFf-VPW_RSVE8,16527 temci/run/run_processor.py,sha256=qD4mg-qwanC8yOXD0-bMqeiCI3XV_z8-sqKFVgzDu7k,9765 temci/run/run_worker_pool.py,sha256=qlhukYuVdKRntwtGQYftMqlAhPWzkV9ZIVTQLY7FEJg,9813 temci/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 temci/scripts/cli.py,sha256=hvy1zUxS9iYmHRDynakkgkCrvyyeHvibrpxA_UZ07ro,27895 temci/scripts/init.py,sha256=ATDRcZJtp1VcxhtnQZMznrsvoBTmMPM8XjzAOVKYZzQ,26428 temci/scripts/temci_completion.py,sha256=X2gFJ0li6jiNixPxPUHVEEOciuM0-1Owtn4APLWDz98,1803 temci/scripts/version.py,sha256=LA-K65CIDMmmDszb5FjM9fjskD17DBdv6CYuPQ8rmHo,238 temci/setup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 temci/setup/setup.py,sha256=-K2eAsGDWM8R0WHsbOK5mgKkwb30pLNexORL0oLXAkE,1629 temci/tester/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 temci/tester/report.py,sha256=DuGMIKYEex2l7TzNASJYE_2jIc6WQ82o7cDSmtwGm6g,92149 temci/tester/report_processor.py,sha256=H66d-91SGMLzahutnzIo2z5ITwb8zWv-QcZbgCt0n7U,341 temci/tester/rundata.py,sha256=lmP8kUsOcdar1xGuOE7LU8l2mjrzAvzN6xw3yWch9d4,14275 temci/tester/stats.py,sha256=deBDrQBzkLiMKoEAT_YXhRZwOC6XrvC-XJlVSgZBNhM,42472 temci/tester/testers.py,sha256=U-2vORMk0X4TJrONCeSzASVDTIuibJYUo50NQN6IuBU,5314 temci/utils/__init__.py,sha256=cKjhNAyfCDa6PWmEYmW58UnfvDLGlswaNHI9K7ZgsCA,106 temci/utils/click_helper.py,sha256=QAWBChZTFlUXfrNbO-edq-ipAp4y4oj-GKsPQ2E2h3w,16585 temci/utils/mail.py,sha256=rifewcPkiO03e4tGW1lKPTT77BlkX2cerC7IiiKFgIo,1322 temci/utils/registry.py,sha256=F6rxJ3aiHHfglE0sF34GUX2lYmHKSRv9PHT6DReYwjQ,5832 temci/utils/settings.py,sha256=3K3QP8cHpKoETL1ImBeenxZ8FVYN6zg1OIRH3eAm7rY,16103 temci/utils/typecheck.py,sha256=EkcRACi0VPMU6LOi3_em_xwDiPtDE0TmF-9I8-3ZeWM,42660 temci/utils/util.py,sha256=tIzCIOzOvpr4SySMZiMojpoLWH9FVea34sRCWxxlRdU,4541 temci/utils/vcs.py,sha256=UPAplsCm56LusWmW_7XLOI6nlp-t9bQ8b0Zjoxu8Zrc,18418 temci-0.5.dist-info/DESCRIPTION.rst,sha256=fo9oXabht4SQKIPTaLpP2Q68-jn5Xiq2bLYS-vD-xwg,9503 temci-0.5.dist-info/METADATA,sha256=kEBgerRr3KsHZxU7XzvQuaMXC2a8zIHXflTtDmb-_h0,10587 temci-0.5.dist-info/RECORD,, temci-0.5.dist-info/WHEEL,sha256=zX7PHtH_7K-lEzyK75et0UBa3Bj8egCBMXe1M4gc6SU,92 temci-0.5.dist-info/entry_points.txt,sha256=qNQWs9irNfW9OrBeqQ2TbCYP7yqt3bWt9JP28GefM2s,147 temci-0.5.dist-info/metadata.json,sha256=RtdK6AaQ5VkE0G6bsgUehDzHkRCTQ2RYksPtnZJNpkU,1328 temci-0.5.dist-info/top_level.txt,sha256=faR0yx2W7nCuUUnWI5cJ-tPTHrhT16dU_Se3P60PPLI,6 PK#}6Htemci/__init__.pyPK#}6H 9o/temci/tester/testers.pyPK8HK݉gg&temci/tester/report.pyPK8H7_O}temci/tester/stats.pyPK#}6H9d77j#temci/tester/rundata.pyPK#}6H{6UU b[temci/tester/report_processor.pyPK#}6H\temci/tester/__init__.pyPK6H+]temci/misc/__init__.pyPKN;H^l_]temci/misc/game.pyPK8H-22g)temci/build/builder.pyPK#}6H⠁l;l;Atemci/build/assembly.pyPK#}6H+= n}temci/build/build_processor.pyPK#}6Hxtemci/build/__init__.pyPK6HL..temci/run/cpuset.pyPK0`:H.%&%&Ǹtemci/run/run_processor.pyPK#}6HU&U&$temci/run/run_worker_pool.pyPK#}6Htemci/run/__init__.pyPK9:H\@@temci/run/run_driver_plugin.pyPK#}6H5bbFtemci/run/run_driver.pyPKco:H[ll{temci/scripts/cli.pyPKtK8H={  !temci/scripts/temci_completion.pyPK#}6H[$>MQtemci/utils/settings.pyPK#}6H 7lGGitemci/utils/vcs.pyPKt:H@@temci/utils/click_helper.pyPKJ:HNSKtemci/utils/util.pyPKP;H3%%#{+temci-0.5.dist-info/DESCRIPTION.rstPKP;H'In.$Ptemci-0.5.dist-info/entry_points.txtPKP;HsL00!Qtemci-0.5.dist-info/metadata.jsonPKP;Hu!Wtemci-0.5.dist-info/top_level.txtPKP;H}\\dWtemci-0.5.dist-info/WHEELPKP;H#5[)[)Wtemci-0.5.dist-info/METADATAPKP;H* temci-0.5.dist-info/RECORDPK))K