PK!jlolaml/__init__.pyfrom pkg_resources import DistributionNotFound, get_distribution from .core import Run, run try: __version__ = get_distribution('lolaml').version except DistributionNotFound: __version__ = '(local)' __all__ = ["run", "Run"] PK!dВlolaml/__main__.py#!/usr/bin/env python """Package entry point.""" from lolaml.cli import cli if __name__ == '__main__': # pragma: no cover cli() PK!rr lolaml/cli.pyimport random from pathlib import Path import click import log import lolaml as lola from lolaml.server import app @click.group() def cli(): log.init() """Lola - track, manage, and visualize your ML runs.""" @cli.command() def ui_flask(): """Start the local flask UI and in DEV mode.""" app.run(debug=True) @cli.command() @click.option("-n", help="Number of runs to create.", default=10) def mkdata(n): """Create lola testdata under `testdata/`.""" path = Path("testdata") path.mkdir(exist_ok=True) print(f"# Creating (or adding) testdata. Check {path}") print() archs = ["lin_reg", "conv_net", "fully_connected", "rnn"] for i in range(n): with lola.run(path_prefix=path) as run: print(run.data.path) run.log_param("arch", random.choice(archs)) lr = random.random() run.log_param("lr", lr) epochs = random.randint(50, 100) run.log_param("epochs", epochs) for i in range(1, epochs + 1): run.log_metric("train_loss", 1 / i + random.random() / i, step=i) run.log_metric("val_loss", 1 / i + random.random() / i, step=i) @cli.command() @click.argument("path") def push(path): """Push all local runs to the server.""" print("TODO not implemented yet") if __name__ == '__main__': # pragma: no cover cli() PK! 6sslolaml/config.py""" You can configure Lola with a `.lola.toml` configuration file. Lola always looks for `.lola.toml` in the current working dir. Code has precedence over configuration options. Here is an example of a `.lola.toml` configuration file that shows all possible configuration options:: [lola] # the remote location for uploading all artifacts to remote_location = "gs://somewhere # the location of the credentials to use for the remote location remote_credentials = "path/to/service_account.json" """ from pathlib import Path from typing import Any, Dict import log import tomlkit def load_lola_config( user_params: Dict[str, Any], ignore_config=False ) -> Dict[str, Any]: default_config = {"remote_location": "", "remote_credentials": ""} user_config: Dict = {} user_config_file = Path.cwd() / ".lola.toml" if user_config_file.is_file() and not ignore_config: log.info(f"Loading {user_config_file}...") with user_config_file.open() as f: user_config = tomlkit.parse(f.read())["lola"] config = {**default_config, **user_config, **user_params} return config PK! lolaml/core.py"""``Run`` is the main interface to log information about an experiment.""" import getpass import subprocess import sys import tempfile import uuid from datetime import datetime from pathlib import Path from typing import Any, Dict, Optional import log from lolaml.config import load_lola_config from lolaml.data import RunData from lolaml.remotes import RemoteStorage from lolaml.storage import JSONStorage from lolaml.utils import ls_files class Run: """ `Run` is the main interface to log information about an experiment. Use it as a context manager to start/stop the run (or instanciate it directly). Then use the ``log_*`` functions to log everything that is important for your experiment. Attributes: run_id: The id of the run. data (lolaml.data.RunData): The actual data that is being logged. Args: project: The name of the project this run belongs to (default project name is `default`). path_prefix: Where to store the artifacts. The run_id is prepended to the `path_prefix`. If not specified, Lola will generate a temp path automatically. log_git: Log git sha, status and diff if ``True``. log_call_info: Log general call info (``__file__`` and ``argv``) if ``True``. remote_location: Specify the remote bucket to upload the artifacts to. Don't upload anything if not specified. remote_credentials: The path to the credentials for the remote location. ignore_config: Don't read the `.lola.toml` if ``True``. Example: >>> import lolaml as lola >>> with lola.Run(ignore_config=True) as run: # doctest:+ELLIPSIS ... run.log_param("lr", 0.1) ... run.log_tags("WIP", "RNN") ... run.log_metric("loss", .56, step=1) """ def __init__( self, project: str = "default", path_prefix: Optional[str] = None, log_git: bool = True, log_call_info: bool = True, remote_location: Optional[str] = None, remote_credentials: Optional[str] = None, ignore_config=False, ): run_id = str(uuid.uuid4()) log.info(f"# Starting run '{run_id}'") _user_config = {} if remote_credentials is not None: _user_config["remote_credentials"] = remote_credentials if remote_location is not None: _user_config["remote_location"] = remote_location config = load_lola_config(_user_config, ignore_config=ignore_config) self._storage = JSONStorage self._remote_storage = RemoteStorage.from_spec( config["remote_location"], config["remote_credentials"] ) if path_prefix is None: path_prefix = tempfile.mktemp(prefix=f"lolaml/{project}/") log.debug( f" * No path_prefix set. Storing artifacts under '{path_prefix}'." ) _path = Path(path_prefix) / str(run_id) _path.mkdir(parents=True, exist_ok=True) path: str = str(_path) data = RunData( project=project, run_id=run_id, status="running", start_time=_now(), path=path, user=getpass.getuser(), ) if log_git: data.git = { "sha": get_git_sha(), "status": get_git_status(), "diff": get_git_diff(), } if log_call_info: data.call_info = {"__file__": __file__, "argv": list(sys.argv)} self.data = data @property def path(self) -> str: return self.data.path @property def run_id(self) -> str: return self.data.run_id def log_metric(self, name: str, value: Any, *, step: Optional[int] = None) -> None: """ Log a metric (key/value) with an optional step. Additionally the current time is logged. """ self.data.metrics.append( {"name": name, "value": value, "step": step, "ts": _now()} ) def log_tag(self, tag: str) -> None: """Log a tag.""" self.data.tags = list(set([*self.data.tags, tag])) def log_tags(self, *tags) -> None: """Log many tags.""" for tag in tags: self.log_tag(tag) def log_param(self, name: str, value: Any) -> None: """Log the parameter (key/value).""" self.data.params[name] = value def log_params(self, params: Dict[str, Any]) -> None: """Log many parameters (dict).""" for k, v in params.items(): self.log_param(k, v) def _log_artifact(self, path: str) -> None: """Log the artifacts under the given `path`.""" # TODO calc md5/sha/hashes # TODO mark special artifacts like images _path = Path(path) artifact_info: Dict[str, Any] = {} if _path.is_file(): stat = _path.stat() artifact_info = { "type": "file", "st_size": stat.st_size, "st_atime": stat.st_atime_ns, "st_mtime": stat.st_mtime_ns, "st_ctime": stat.st_ctime_ns, } self.data.artifacts[str(path)] = artifact_info elif _path.is_dir(): log.debug(f"Skipping {path}") else: self.data.artifacts[str(path)] = {} def _log_all_artifacts(self) -> None: """Log all artifacts under the current path.""" for p in ls_files(self.path): self._log_artifact(str(p)) def _finish(self) -> None: """Finish a successful run and write to disk.""" self.data.status = "done" self.data.end_time = _now() self._write() def _write(self): """Write the current json representation to disk (and upload all artifacts).""" self._log_all_artifacts() self._log_artifact(self._storage.log_file_path(self.path, self.run_id)) self._storage.write(self.data) if self._remote_storage: self._remote_storage.upload(self.data) # Context manager def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): if exc_type: # record error and write log self.data.status = "error" self.data.end_time = _now() self._write() else: self._finish() def run(**kwargs): return Run(**kwargs) ######################################################################################## # HELPERS def _now() -> str: return str(datetime.now()) def _git_cmd(*args): try: return subprocess.check_output(args).strip().decode("utf-8") except subprocess.CalledProcessError as e: log.warning("Git error:", e) return str(e) except FileNotFoundError as e: log.warning("FileNotFoundError:", e) return str(e) def get_git_sha(): return _git_cmd("git", "rev-parse", "HEAD") def get_git_status(): return _git_cmd("git", "status") def get_git_diff(): return _git_cmd("git", "diff") PK!E.lolaml/data.pyfrom dataclasses import dataclass, field from typing import Any, Dict, List, Optional @dataclass class RunData: project: str # the name of the project run_id: str # the uuid for the run status: str # running, done, error path: str user: str start_time: str # datetime str end_time: Optional[str] = None # datetime str metrics: List[Dict] = field(default_factory=list) params: Dict[str, Any] = field(default_factory=dict) tags: List[str] = field(default_factory=list) artifacts: Dict[str, Dict] = field(default_factory=dict) git: Optional[Dict[str, Any]] = None call_info: Optional[Dict[str, Any]] = None PK!Λulolaml/incubator.py""" HERE BE DRAGONS. Everything in incubator.py is subject to breaking changes. Use at your own risk. """ import altair as alt import pandas as pd def _esc(name: str) -> str: r""" Escape altair field names that contain a dot. dots "." in field names are interpreted as hierachical access, but quite often they are not. Escape the field names. Examples: >>> _esc("foo.bar") == "foo\\.bar" True >>> _esc("foo_bar") == "foo_bar" True """ return name.replace(".", "\\.") def plot_overview(df_overview: pd.DataFrame) -> alt.Chart: plot_max = ( alt.Chart(df_overview) .mark_bar() .encode(x=alt.X("run_id:N"), y=alt.Y(_esc("metric.val_loss.max:Q"))) ) plot_min = ( alt.Chart(df_overview) .mark_bar() .encode(x=alt.X("run_id:N"), y=alt.Y(_esc("metric.val_loss.min:Q"))) ) return plot_min | plot_max def plot_metrics_siple(df_metrics: pd.DataFrame) -> alt.Chart: plot = ( alt.Chart(df_metrics) .mark_line() .encode(x="step:Q", y="value:Q", column="name", color=alt.Color("run_id:N")) .interactive() ) return plot PK!ƧE/ / lolaml/remotes.pyfrom dataclasses import dataclass from pathlib import Path import log from lolaml.data import RunData @dataclass class RemoteStorage: """ Store all artifacts in a remote bucket. Currently only google storage and local storage are supported. The local `` is stored under `///` TODO more docs """ remote_location: str remote_credentials: str @staticmethod def from_spec(remote: str, remote_credentials: str): if not remote: return None remote = str(remote) if remote.startswith("gs://"): if remote_credentials: return RemoteStorage(remote, remote_credentials) else: raise ValueError("Remote_credentials not set") else: return RemoteStorage(remote, remote_credentials) log.debug( "Set `remote` and `remote_credentials` to save data on a remote server" ) return None def _get_container(self): if self.remote_location.startswith("gs://"): from cloudstorage.drivers.google import GoogleStorageDriver _protocol, container_name = self.remote_location.split("://") container_name = container_name.strip("/") storage = GoogleStorageDriver(self.remote_credentials) container = storage.get_container(container_name) else: from cloudstorage.drivers.local import LocalDriver tmp = Path(self.remote_location) loc, name = str(tmp.parent), str(tmp.name) container = LocalDriver(loc).get_container(name) return container def upload(self, data: RunData): container = self._get_container() artifacts = list(data.artifacts.keys()) log.info(f"# Starting uploading of {len(artifacts)} artifact(s)...") for artifact_path in artifacts: log.debug(f" * Uploading {artifact_path}") dst = self.artifact_destination(artifact_path, data.run_id, data.project) container.upload_blob(artifact_path, blob_name=dst) @staticmethod def artifact_destination(artifact_path: str, run_id: str, project: str): """ Create the remote artifact destination. Examples: >>> artifact_path = "/tmp/lolaml/73epbsfm/6caf409f-8e27-4d4c-80db-1b05d510601e/lola_run_6caf409f-8e27-4d4c-80db-1b05d510601e.json" >>> run_id = "6caf409f-8e27-4d4c-80db-1b05d510601e" >>> project = "default" >>> RemoteStorage.artifact_destination(artifact_path, run_id, project) 'default/6caf409f-8e27-4d4c-80db-1b05d510601e/lola_run_6caf409f-8e27-4d4c-80db-1b05d510601e.json' """ return str(Path(project) / artifact_path[artifact_path.find(run_id) :]) PK!&alolaml/server.pyfrom flask import Flask, render_template from lolaml.incubator import plot_metrics_siple, plot_overview from lolaml.storage import read_json_runs app = Flask(__name__) @app.route("/") def index(): return """

helLOLA

Check out testdata """ @app.route("/") def glob(prefix): pattern = f"{prefix}/**/lola_run_*.json" df_overview, df_metrics = read_json_runs(pattern) return render_template( "index.html", table_data=df_overview.as_matrix().tolist(), table_column_names=[{"title": col} for col in df_overview.columns.to_list()], vega_specs=[ plot_overview(df_overview).to_dict(), plot_metrics_siple(df_metrics).to_dict(), ], ) PK!@; lolaml/storage.pyimport json from dataclasses import asdict from pathlib import Path from typing import Callable, Tuple, Union import log import pandas as pd from .data import RunData class Storage: @staticmethod def write(data: RunData) -> None: raise NotADirectoryError() @staticmethod def read(path: str) -> RunData: raise NotADirectoryError() class JSONStorage(Storage): """ JSONStorage stores a json representation of the `RunData`. The destination is `path / f"lola_run_{data.run_id}.json"`. """ @staticmethod def log_file_path(path, run_id) -> Path: return Path(path) / f"lola_run_{run_id}.json" @staticmethod def write(data: RunData) -> None: dst = JSONStorage.log_file_path(data.path, data.run_id) dst.parent.mkdir(parents=True, exist_ok=True) log.info(f"# Storing run under '{dst}'") with dst.open("w") as f: f.write(json.dumps(asdict(data))) @staticmethod def read(path: str) -> RunData: with open(path, "r") as f: return RunData(**json.loads(f.read())) # @staticmethod # def _date_handler(obj): # return obj.isoformat() if isinstance(obj, (datetime, date)) else None def read_json_runs(glob_pattern: str) -> Tuple[pd.DataFrame, pd.DataFrame]: """Glob for the pattern and read the json runs.""" lola_jsons = sorted(list(Path().glob(glob_pattern))) lola_runs = [json.load(file.open()) for file in lola_jsons] log.info(f"# Loading {len(lola_runs)} lola runs...") # df_metrics is a table of metrics df_metrics = pd.io.json.json_normalize( lola_runs, record_path="metrics", meta="run_id" )[["run_id", "name", "value", "step", "ts"]] # df_overview will contain one row for each run with all relevant overview information df_overview = pd.io.json.json_normalize(lola_runs) col_order = [ "project", "run_id", "path", "status", "start_time", "end_time", "git_sha", # "git_diff", # "git_status", # "metrics", *sorted( [col for col in df_overview.columns.tolist() if col.startswith("params.")] ), ] df_overview = df_overview[col_order] def _aggregate_metrics( df_metrics: pd.DataFrame, agg_fn: Union[Callable, str] = "max", agg_name: str = "max", ) -> pd.DataFrame: """Aggregate and reshape.""" # get the min values per group, ... df_agg = ( df_metrics.groupby(["run_id", "name"]) .agg({"value": agg_fn}) .reset_index() .pivot_table(values="value", index="run_id", columns="name") .reset_index() ) columns = df_agg.columns.to_list() df_agg = df_agg.rename( columns={ col: f"metric.{col}.{agg_name}" for col in columns if col != "run_id" } ) return df_agg metrics_max = _aggregate_metrics(df_metrics, agg_fn="max", agg_name="max") df_overview = df_overview.merge(metrics_max, on="run_id") metrics_min = _aggregate_metrics(df_metrics, agg_fn="min", agg_name="min") df_overview = df_overview.merge(metrics_min, on="run_id") return df_overview, df_metrics PK!}; lolaml/templates/index.html The dashboard that does not deserve the name

Lola - Dashboard

Overview

Toggle columns: (TODO reset)
{% for item in table_column_names %} | {{ item["title"] }} {% endfor %}

Plots

{% for vega_spec in vega_specs %}
{% endfor %}
PK!ޙPb""lolaml/tests/__init__.py"""Unit tests for the package.""" PK!c{WWlolaml/tests/conftest.py"""Unit tests configuration file.""" import log def pytest_configure(config): """Disable verbose output when running tests.""" log.init(debug=True) terminal = config.pluginmanager.getplugin('terminal') class QuietReporter(terminal.TerminalReporter): # type: ignore """Reporter that only shows dots when running tests.""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.verbosity = 0 self.showlongtestinfo = False self.showfspath = False terminal.TerminalReporter = QuietReporter PK!W$:lolaml/utils.pyfrom pathlib import Path from typing import List def lprint(text: str) -> None: print(f"# LOLA: {text}") def ls(path: str) -> List[Path]: """Return a list of content under `path`.""" return [p for p in Path(path).rglob("*")] def ls_files(path: str) -> List[Path]: """Return a list of all files under `path`.""" return [p for p in Path(path).rglob("*") if p.is_file()] PK!H'lolaml-0.0.2.dist-info/entry_points.txtPK!"lolaml-0.0.2.dist-info/LICENSE.rstPK!HڽTUlolaml-0.0.2.dist-info/WHEEL A н#Z;/"d&F[xzw@Zpy3Fv]\fi4WZ^EgM_-]#0(q7PK!H: lolaml-0.0.2.dist-info/METADATAXms۸_S:S+5W[J&׉o} QAyHz{glK] ^1Kjio WrLQ/DZFݨYFu9&7oHLynA 24Յw:yH鴓kŶSټ1..;TkY,dbL[=GQ3an2φQ7 1|ȖL,Ϲps=HJ ΰ5 \"Ydy02caCў#^"#LuTQ~| ˇbsM%*6WY k[AgX_!ve|\SJAI edaٳ< xxˇc)$T&W=Cqk ("x&A $pFkBH"xJ.Vh#\ȃ'LBGo?Osj(V31PRHkO\*<+\CvIS80wX$LzQ,f{ #6 ߑm̝3`*p@ZB0_z0Y |}jȼ!Asߠ]k@J&W ;SǠՖTTJէJ#5tq).WaH(Zv}0xiX5‘M蛁EĊK$ QѦ3@مgG\.nU6kD)= x8I''h4O'|XSӸ>+{}~w0dS vq흆~;&xx:I褩Iox?mjӅ>~]=܌JTRou8 E.5!aϞ"kNSW)@-=`R_bi=j~bό՚cB}˯l@gtPmBŎ& n_q*̤l1CzW>T [^p#08V&ʵ0nNA=$9oC#' Ge⡛aõQ64 %Ȕj "M?="{BL۩At7 ٲ9r*l5Ya,:rCOp!4ð^wnnU-VT$\xp?_0%nVI$;_\A.0s{U%9[]BlraJ.ذZm8G* IķubŅņ}ָ/ EYw `i}2xDPK!H -Slolaml-0.0.2.dist-info/RECORDԻJ|L M$,FZv9oUUUF)F?V$ɬLu] 3o.&C~3tԜSX!&E1q&?t $%p5/yF2?.Jޑ5j.29]ͺpjA~ #rVꪀ+BԱ6I_׬=‘b7ԂFEu]M6R{Y $8H6υPQRsI^R|/}6ze]xbWK^hvsQȾgcsڧ5YRi4kYB.'.l#?jk\407_HnP9g%e֖ԙS_j#9ݣе{"ңk|m-ym#*Aq?1Oh݆ߨ;8`S[Rdq=6ŔRӞlolaml/storage.pyPK!}; aKlolaml/templates/index.htmlPK!ޙPb""FYlolaml/tests/__init__.pyPK!c{WWYlolaml/tests/conftest.pyPK!W$:+\lolaml/utils.pyPK!H']lolaml-0.0.2.dist-info/entry_points.txtPK!")^lolaml-0.0.2.dist-info/LICENSE.rstPK!HڽTUi^lolaml-0.0.2.dist-info/WHEELPK!H: ^lolaml-0.0.2.dist-info/METADATAPK!H -SFhlolaml-0.0.2.dist-info/RECORDPKk