PK!jlolaml/__init__.pyfrom pkg_resources import DistributionNotFound, get_distribution from .core import Run, run try: __version__ = get_distribution('lolaml').version except DistributionNotFound: __version__ = '(local)' __all__ = ["run", "Run"] PK!dВlolaml/__main__.py#!/usr/bin/env python """Package entry point.""" from lolaml.cli import cli if __name__ == '__main__': # pragma: no cover cli() PK! gZbb lolaml/cli.pyimport random from pathlib import Path import click import log import lolaml as lola @click.group() def cli(): log.init() """Lola - track, manage, and visualize your ML runs.""" @cli.command() def ui_flask(): """Start the local flask UI and in DEV mode.""" print("TODO not implemented yet") @cli.command() @click.option("-n", help="Number of runs to create.", default=10) def mkdata(n): """Create lola testdata under `testdata/`.""" path = Path("testdata") path.mkdir(exist_ok=True) print(f"# Creating (or adding) testdata. Check {path}") print() archs = ["lin_reg", "conv_net", "fully_connected", "rnn"] for i in range(n): with lola.run(path_prefix=path) as run: print(run.data.path) run.log_param("arch", random.choice(archs)) lr = random.random() run.log_param("lr", lr) epochs = random.randint(50, 100) run.log_param("epochs", epochs) for i in range(1, epochs + 1): run.log_metric("train_loss", 1 / i + random.random() / i, step=i) run.log_metric("val_loss", 1 / i + random.random() / i, step=i) @cli.command() @click.argument("path") def push(path): """Push all local runs to the server.""" print("TODO not implemented yet") if __name__ == '__main__': # pragma: no cover cli() PK!_Ƚyylolaml/config.py""" You can configure Lola with a ``.lola.toml`` configuration file. Lola always looks for ``.lola.toml`` in the current working dir. Code has precedence over configuration options. Here is an example of a ``.lola.toml`` configuration file that shows all possible configuration options:: [lola] # the remote location for uploading all artifacts to remote_location = "gs://somewhere # the location of the credentials to use for the remote location remote_credentials = "path/to/service_account.json" """ from pathlib import Path from typing import Any, Dict import log import tomlkit def load_lola_config( user_params: Dict[str, Any], ignore_config=False ) -> Dict[str, Any]: default_config = {"remote_location": "", "remote_credentials": ""} user_config: Dict = {} user_config_file = Path.cwd() / ".lola.toml" if user_config_file.is_file() and not ignore_config: log.info(f"Loading {user_config_file}...") with user_config_file.open() as f: user_config = tomlkit.parse(f.read())["lola"] config = {**default_config, **user_config, **user_params} return config PK!dgE E lolaml/core.py"""``Run`` is the main interface to log information about an experiment.""" import getpass import os import subprocess import sys import tempfile import uuid from datetime import datetime from pathlib import Path from typing import Any, Dict, Optional import log from lolaml.config import load_lola_config from lolaml.data import RunData from lolaml.remotes import RemoteStorage from lolaml.storage import JSONStorage from lolaml.utils import ls_files class Run: """ `Run` is the main interface to log information about an experiment. Use it as a context manager to start/stop the run (or instanciate it directly). Then use the ``log_*`` functions to log everything that is important for your experiment. Attributes: run_id: The id of the run. data (lolaml.data.RunData): The actual data that is being logged. Args: project: The name of the project this run belongs to (default project name is `default`). path_prefix: Where to store the artifacts. The final path is `{path_prefix}/{project}/{run_id}` If not specified, Lola will generate a temp path automatically: `{tmp_path}/{project}/{run_id}` log_git: Log git sha, status and diff if ``True``. log_call_info: Log general call info (``__file__`` and ``argv``) if ``True``. remote_location: Specify the remote bucket to upload the artifacts to. Don't upload anything if not specified. remote_credentials: The path to the credentials for the remote location. ignore_config: Don't read the `.lola.toml` if ``True``. wip_mode: Set "WIP" tag and don't upload any artifacts if ``True``. Useful during the development of an experiment. Example: >>> import lolaml as lola >>> with lola.Run(ignore_config=True) as run: # doctest:+ELLIPSIS ... run.log_param("lr", 0.1) ... run.log_tags("WIP", "RNN") ... run.log_metric("loss", .56, step=1) """ def __init__( self, project: str = "default", path_prefix: Optional[str] = None, log_git: bool = True, log_call_info: bool = True, remote_location: str = "", remote_credentials: str = "", ignore_config: bool = False, wip_mode: bool = False, ): assert isinstance(remote_location, str) run_id = str(uuid.uuid4()) log.info(f"# Starting lola run '{run_id}'") _user_config = {} if remote_credentials: _user_config["remote_credentials"] = remote_credentials if remote_location: _user_config["remote_location"] = remote_location config = load_lola_config(_user_config, ignore_config=ignore_config) self._storage = JSONStorage self._remote_storage = RemoteStorage.from_spec( config["remote_location"], config["remote_credentials"] ) if path_prefix is None: path_prefix = tempfile.mktemp(prefix=f"lolaml/{project}/") _path = Path(path_prefix) / project / str(run_id) _run_file = _path / "lola_run.json" _path.mkdir(parents=True, exist_ok=True) path: str = str(_path) run_file: str = str(_run_file) log.info(f" * Store artifacts under '{path}'") data = RunData( project=project, run_id=run_id, status="running", start_time=_now(), path=path, run_file=run_file, user=getpass.getuser(), remote_location=config["remote_location"], ) if log_git: data.git = { "sha": get_git_sha(), "status": get_git_status(), "diff": get_git_diff(), } if log_call_info: data.call_info = { "cwd": os.getcwd(), "__file__": __file__, "argv": list(sys.argv), } self.data = data self.wip_mode = wip_mode if self.wip_mode: self.log_tag("WIP") @property def path(self) -> str: return self.data.path @property def project(self) -> str: return self.data.project @property def run_id(self) -> str: return self.data.run_id @property def run_file(self) -> str: return self.data.run_file def summary(self) -> str: """Return a summary of the current run.""" return self.data.summary() def log_metric(self, name: str, value: Any, *, step: int = None) -> None: """ Log a metric (key/value) with an optional ``step``. Additionally the current time is logged. """ self.data.metrics.append( {"name": name, "value": value, "step": step, "ts": _now()} ) def log_metrics(self, metric_dict: Dict, *, step: int = None) -> None: """ Log a dict of metrics (key/value) with an optional ``step``. Additionally the current time is logged. """ now = _now() for name, value in metric_dict.items(): self.data.metrics.append( {"name": name, "value": value, "step": step, "ts": now} ) def log_tag(self, tag: str) -> None: """Log a tag.""" self.data.tags = list(set([*self.data.tags, tag])) def log_tags(self, *tags) -> None: """Log many tags.""" for tag in tags: self.log_tag(tag) def log_param(self, name: str, value: Any) -> None: """Log the parameter (key/value).""" self.data.params[name] = value def log_params(self, params: Dict[str, Any]) -> None: """Log many parameters (dict).""" for k, v in params.items(): self.log_param(k, v) def _log_artifact(self, path: str) -> None: """Log the artifacts under the given `path`.""" # TODO calc md5/sha/hashes # TODO mark special artifacts like images _path = Path(path) artifact_info: Dict[str, Any] = {} if _path.is_file(): stat = _path.stat() artifact_info = { "type": "file", "st_size": stat.st_size, "st_atime": stat.st_atime_ns, "st_mtime": stat.st_mtime_ns, "st_ctime": stat.st_ctime_ns, } self.data.artifacts[str(path)] = artifact_info elif _path.is_dir(): log.debug(f"Skipping {path}") else: self.data.artifacts[str(path)] = {} def _log_all_artifacts(self) -> None: """Log all artifacts under the current path.""" for p in ls_files(self.path): self._log_artifact(str(p)) def close(self, status="done"): """Write the current json representation to disk (and upload artifacts).""" assert status in ["done", "error", "running"], f"Status '{status} is invalid." self.data.status = status self.data.end_time = _now() self._log_all_artifacts() self._log_artifact(self.data.run_file) self._storage.write(self.data) if self._remote_storage and not self.wip_mode: self._remote_storage.upload(self.data) # Context manager def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): if exc_type: # record error and write log self.close(status="error") else: self.close() def run(**kwargs): return Run(**kwargs) ######################################################################################## # HELPERS def _now() -> str: return str(datetime.now()) def _git_cmd(*args): try: return subprocess.check_output(args).strip().decode("utf-8") except subprocess.CalledProcessError as e: log.warning("Git error:", e) return str(e) except FileNotFoundError as e: log.warning("FileNotFoundError:", e) return str(e) def get_git_sha(): return _git_cmd("git", "rev-parse", "HEAD") def get_git_status(): return _git_cmd("git", "status") def get_git_diff(): return _git_cmd("git", "diff") PK! lolaml/data.pyfrom dataclasses import dataclass, field from typing import Any, Dict, List, Optional @dataclass class RunData: project: str # the name of the project run_id: str # the uuid for the run status: str # running, done, error path: str # the path for the artifacts run_file: str # the path to the json representation of the run user: str # the user who runs the experiment start_time: str # datetime str end_time: Optional[str] = None # datetime str metrics: List[Dict] = field(default_factory=list) params: Dict[str, Any] = field(default_factory=dict) tags: List[str] = field(default_factory=list) artifacts: Dict[str, Dict] = field(default_factory=dict) git: Optional[Dict[str, Any]] = None call_info: Optional[Dict[str, Any]] = None remote_location: str = "" def summary(self) -> str: """Return a summary string of the data.""" return f"""{'#' * 80} # LOLA RUN SUMMARY {self.run_id} - run_id: {self.run_id} - project: {self.project} - path: {self.path} - status: {self.status} - remote_location: {self.remote_location} - start_time: {self.start_time} - end_time: {self.end_time} - metrics: {len(self.metrics)} datapoint(s) - params: {len(self.params)} datapoint(s) - artifacts: {len(self.artifacts)} datapoint(s) {'#' * 80} """ PK! lolaml/incubator.py""" HERE BE DRAGONS. Everything in incubator.py is subject to breaking changes. Use at your own risk. """ import json from pathlib import Path from typing import Callable, Tuple, Union import log try: import pandas as pd except ImportError: print("You have to install pandas in order to use lolaml.incubator") try: import altair as alt except ImportError: print("You have to install altair in order to use lolaml.incubator") def read_json_runs(glob_pattern: str) -> Tuple: """Glob for the pattern and read the json runs.""" lola_jsons = sorted(list(Path().glob(glob_pattern))) log.info(f"# Loading {len(lola_jsons)} lola runs...") if len(lola_jsons) == 0: return None, None lola_runs = [json.load(file.open()) for file in lola_jsons] # df_metrics is a table of metrics df_metrics = pd.io.json.json_normalize( lola_runs, record_path="metrics", meta="run_id" )[["run_id", "name", "value", "step", "ts"]] # will contain one row for each run with all relevant overview information df_overview = pd.io.json.json_normalize(lola_runs) col_order = [ "project", "run_id", "status", "path", "user", "start_time", "end_time", # "git_sha", # "git_diff", # "git_status", *sorted( [col for col in df_overview.columns.tolist() if col.startswith("params.")] ), ] df_overview = df_overview[col_order] def _aggregate_metrics( df_metrics: pd.DataFrame, agg_fn: Union[Callable, str] = "max", agg_name: str = "max", ) -> pd.DataFrame: """Aggregate and reshape.""" # get the min values per group, ... df_agg = ( df_metrics.groupby(["run_id", "name"]) .agg({"value": agg_fn}) .reset_index() .pivot_table(values="value", index="run_id", columns="name") .reset_index() ) columns = df_agg.columns.to_list() df_agg = df_agg.rename( columns={ col: f"metric.{col}.{agg_name}" for col in columns if col != "run_id" } ) return df_agg metrics_max = _aggregate_metrics(df_metrics, agg_fn="max", agg_name="max") df_overview = df_overview.merge(metrics_max, on="run_id") metrics_min = _aggregate_metrics(df_metrics, agg_fn="min", agg_name="min") df_overview = df_overview.merge(metrics_min, on="run_id") return df_overview, df_metrics def _esc(name: str) -> str: r""" Escape altair field names that contain a dot. Dots, i.e. ".", in field names are interpreted as hierachical access, but quite often they are not. Examples: >>> _esc("foo.bar") == "foo\\.bar" True >>> _esc("foo_bar") == "foo_bar" True """ return name.replace(".", "\\.") def plot_overview(df_overview): plot_max = ( alt.Chart(df_overview) .mark_bar() .encode(x=alt.X("run_id:N"), y=alt.Y(_esc("metric.val_loss.max:Q"))) ) plot_min = ( alt.Chart(df_overview) .mark_bar() .encode(x=alt.X("run_id:N"), y=alt.Y(_esc("metric.val_loss.min:Q"))) ) return plot_min | plot_max def plot_metrics_siple(df_metrics): plot = ( alt.Chart(df_metrics) .mark_line() .encode(x="step:Q", y="value:Q", column="name", color=alt.Color("run_id:N")) .interactive() ) return plot PK!s͏\ \ lolaml/remotes.pyfrom dataclasses import dataclass from pathlib import Path import log from lolaml.data import RunData @dataclass class RemoteStorage: """ Store all artifacts in a remote bucket. Currently only google storage and local storage are supported. The local `` is stored under `///` TODO more docs """ remote_location: str remote_credentials: str @staticmethod def from_spec(remote: str, remote_credentials: str): if not remote: return None remote = str(remote) if remote.startswith("gs://"): if remote_credentials: return RemoteStorage(remote, remote_credentials) else: raise ValueError("Remote_credentials not set") else: return RemoteStorage(remote, remote_credentials) log.debug( "Set `remote` and `remote_credentials` to save data on a remote server" ) return None def _get_container(self): if self.remote_location.startswith("gs://"): from cloudstorage.drivers.google import GoogleStorageDriver _protocol, container_name = self.remote_location.split("://") container_name = container_name.strip("/") storage = GoogleStorageDriver(self.remote_credentials) container = storage.get_container(container_name) else: from cloudstorage.drivers.local import LocalDriver tmp = Path(self.remote_location) loc, name = str(tmp.parent), str(tmp.name) container = LocalDriver(loc).get_container(name) return container def upload(self, data: RunData): container = self._get_container() artifacts = list(data.artifacts.keys()) n = len(artifacts) log.info(f"# Starting uploading of {n} artifact(s)...") for i, artifact_path in enumerate(artifacts, start=1): log.info(f" * Uploading {i}/{n}: {artifact_path}") dst = self.artifact_destination(artifact_path, data.run_id, data.project) container.upload_blob(artifact_path, blob_name=dst) @staticmethod def artifact_destination(artifact_path: str, run_id: str, project: str): """ Create the remote artifact destination. Examples: >>> artifact_path = "/tmp/lolaml/73epbsfm/6caf409f-8e27-4d4c-80db-1b05d510601e/lola_run_6caf409f-8e27-4d4c-80db-1b05d510601e.json" >>> run_id = "6caf409f-8e27-4d4c-80db-1b05d510601e" >>> project = "default" >>> RemoteStorage.artifact_destination(artifact_path, run_id, project) 'default/6caf409f-8e27-4d4c-80db-1b05d510601e/lola_run_6caf409f-8e27-4d4c-80db-1b05d510601e.json' """ return str(Path(project) / artifact_path[artifact_path.find(run_id) :]) PK!l::lolaml/storage.pyimport json from dataclasses import asdict from pathlib import Path import log from lolaml.data import RunData class Storage: @staticmethod def write(data: RunData) -> None: raise NotADirectoryError() @staticmethod def read(path: str) -> RunData: raise NotADirectoryError() class JSONStorage(Storage): """JSONStorage stores a json representation of the `RunData`.""" @staticmethod def write(data: RunData) -> None: dst = data.run_file Path(dst).parent.mkdir(parents=True, exist_ok=True) log.info(f"# Storing run under '{dst}'") with open(dst, "w") as f: f.write(json.dumps(asdict(data))) @staticmethod def read(path: str) -> RunData: with open(path, "r") as f: return RunData(**json.loads(f.read())) PK!}; lolaml/templates/index.html The dashboard that does not deserve the name

Lola - Dashboard

Overview

Toggle columns: (TODO reset)
{% for item in table_column_names %} | {{ item["title"] }} {% endfor %}

Plots

{% for vega_spec in vega_specs %}
{% endfor %}
PK!ޙPb""lolaml/tests/__init__.py"""Unit tests for the package.""" PK!c{WWlolaml/tests/conftest.py"""Unit tests configuration file.""" import log def pytest_configure(config): """Disable verbose output when running tests.""" log.init(debug=True) terminal = config.pluginmanager.getplugin('terminal') class QuietReporter(terminal.TerminalReporter): # type: ignore """Reporter that only shows dots when running tests.""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.verbosity = 0 self.showlongtestinfo = False self.showfspath = False terminal.TerminalReporter = QuietReporter PK!W$:lolaml/utils.pyfrom pathlib import Path from typing import List def lprint(text: str) -> None: print(f"# LOLA: {text}") def ls(path: str) -> List[Path]: """Return a list of content under `path`.""" return [p for p in Path(path).rglob("*")] def ls_files(path: str) -> List[Path]: """Return a list of all files under `path`.""" return [p for p in Path(path).rglob("*") if p.is_file()] PK!H'lolaml-0.0.4.dist-info/entry_points.txtPK!Ҵlolaml-0.0.4.dist-info/LICENSEThe Prosperity Public License 2.0.0 Contributor: Stefan Otte Source Code: https://lolaml.readthedocs.io/en/latest/ This license lets you use and share this software for free, with a trial-length time limit on commercial use. Specifically: If you follow the rules below, you may do everything with this software that would otherwise infringe either the contributor's copyright in it, any patent claim the contributor can license that covers this software as of the contributor's latest contribution, or both. 1. You must limit use of this software in any manner primarily intended for or directed toward commercial advantage or private monetary compensation to a trial period of 32 consecutive calendar days. This limit does not apply to use in developing feedback, modifications, or extensions that you contribute back to those giving this license. 2. Ensure everyone who gets a copy of this software from you, in source code or any other form, gets the text of this license and the contributor and source code lines above. 3. Do not make any legal claim against anyone for infringing any patent claim they would infringe by using this software alone, accusing this software, with or without changes, alone or as part of a larger application. You are excused for unknowingly breaking rule 1 if you stop doing anything requiring this license within 30 days of learning you broke the rule. **This software comes as is, without any warranty at all. As far as the law allows, the contributor will not be liable for any damages related to this software or this license, for any kind of legal claim.** --- Licensor Signature (Ed25519): 07b241f2602b777d1d03254d60b83ba672fdd0af1202254041b0e829771fa670 68bc94ae23da60d7d7905f1fdfd955fcc7e3cbaef3f80902570253337b9f5f0a --- Agent Signature (Ed25519): 96b6e340480dd3fa4b0fb2109157134d0d014042f70758777c975fd014533643 c63135b393cc24b45c9e61e8e4bb1e565c043651902ab0a970d007b7b46c9004 PK!HڽTUlolaml-0.0.4.dist-info/WHEEL A н#Z;/"d&F[xzw@Zpy3Fv]\fi4WZ^EgM_-]#0(q7PK!HNlolaml-0.0.4.dist-info/METADATAXms۸_SJMReImb|QArDMm}bь:+AҏhDjI VzI/FUQPw3= kɍ(r6.x\Εv6yZ3ֻ+ qu*rsmU9^?yAW +Qt}Wp,Ah8JNχI/z#b&8 .uDa*Kc2{~l^*UR5XNP9]oɥxr7F*OG+|hCN-`Evmoϟ%TBf~;x-_jpHm`#2{ vHL$wXñ ӭ92SA;qϊJOk4߀DJ--!MyC_Qt낲9Ɨ(l,'0RФ "D6E!m|#s椤о8EhP+l!&a+Rr+)AlzF 0\T(ВW69ϒn^2X8B.ɔ;8 HЙޙ"y( j(7\O+Oo&rP'Q_9,܎_xm+Dw (fF`oC}Lx&;)Y"v mv" ?HpVvRj.?(P-C1'HDmԗ\bH(j6TSBΤI@&9T,(K b cu jdHA%+H>T؋Nkw |&NJtrʇN CTJ^q"2ﻚJ~'"tq|>_d\Y" cI̹{mWds-K毢:*B0M|taH>%-ar4O7 puiɼ o|\BpڻhP;`@J[ïh>oRi L-]m/Z R>Nۄ3wZ/8;"aM䋅Ɠk಩wrUS. j?_M.J=K,d+6)ݝ:?:cdz-D.~F{qƆG0c,>=;9GY6W'F %r[ n <Q=XXgnѥyP0ȡ MDjKE'8޴ 9 *EB0FWʭ|n8D,lmm!LJ?pMk(HfNöqx# ǽTL2z_`u7Hd_Е 1[n~{MhEUeT2_ qQr(T0#-zf &2' Xd0.d+n޴?ۃJơ xIWwW=h2wWRzPC, ޸Cj-ɹ73h炳a?o]U*3|W * pq<.=xEPK!H5pHIlolaml-0.0.4.dist-info/RECORDu˲Hy} X ouB $ $~}sܲ"r'NT1~GQQ(6yL?*w;n=ӑKnT2r(U/#1RѴ@؛0:0> nZ F*%ULEXSg\΅l:Jq7O_?)fа4@>za2/krG9AΓn&2'<0d?P}0"Ơə\?>"ҡc#Rs'Γ GLZ;9Omj $HW4}oMx)!}_sIE݇`4#eAY!uݒZϋLPzޝ1; %a@3Ij ;ƠIl%'APPiP*^Z>f&GZz| m}BA? o6dJ YcM7|?^3 #O#x1=M]#8)^,ƒz}>80mWKA,̜)+DK €5BeCֆUS-X;|١cblҢGxQߚY9jFMh^v[ xi0Y} v< kZ=v>F.X2we`*Cq%6ps;#E U/`=÷hQa/@ӑ38= Ox"yҪ} nuP-'|KI.% 뀭(?PK!jlolaml/__init__.pyPK!dВlolaml/__main__.pyPK! gZbb lolaml/cli.pyPK!_Ƚyydlolaml/config.pyPK!dgE E  lolaml/core.pyPK! |,lolaml/data.pyPK! ;2lolaml/incubator.pyPK!s͏\ \  @lolaml/remotes.pyPK!l::Klolaml/storage.pyPK!}; Ololaml/templates/index.htmlPK!ޙPb""\lolaml/tests/__init__.pyPK!c{WW>]lolaml/tests/conftest.pyPK!W$:_lolaml/utils.pyPK!H'alolaml-0.0.4.dist-info/entry_points.txtPK!Ҵalolaml-0.0.4.dist-info/LICENSEPK!HڽTUilolaml-0.0.4.dist-info/WHEELPK!HNBjlolaml-0.0.4.dist-info/METADATAPK!H5pHI[slolaml-0.0.4.dist-info/RECORDPKv