PK8L&&nyptune/__init__.py"""Nyptune hides a copy of your environment in your Jypyter notebooks so that other people can easily reproduce your work""" import time, os, platform __version__ = "0.0.1." + str(int(time.time())) from .magic import * def load_ipython_extension(ipy): ipy.register_magics(CacheMagics) PK8LڛA( nyptune/cache.pyfrom .handler.path import PathHandler from .handler.pickle import PickleHandler from .directory.local import LocalDirectory from pathlib import Path import json, io class Cache: @classmethod def default_handlers(cls): return [PathHandler(), PickleHandler()] def __init__( self, name="default", directory=LocalDirectory(Path.home() / ".nyptune"), secret=None, handlers=None, ): """Create a new instance of a Cache in the specified Directory.""" self.base_directory = directory self.secret = secret self.name = name self.handlers = handlers or self.default_handlers() def save(self): """Writes a listing of the currently cached files/metadata to NAME.json""" with self.directory.writer(self.namespace + ".json") as file: json.dump(self.metadata, io.TextIOWrapper(file)) @property def secret(self): return self._secret @secret.setter def secret(self, secret): self._secret = secret if secret: self.directory = EncryptedDirectory(self.base_directory, secret) else: self.directory = self.base_directory @property def name(self): return self.namespace @name.setter def name(self, name): self.namespace = name if self.directory.exists(name + ".json"): try: with self.directory.reader(name + ".json") as file: self.metadata = json.load(file) except: print("cannot read metadata, continuing") self.metadata = {} else: self.metadata = {} def is_cached(self, name): return name in self.metadata def retrieve(self, name): metadata = self.metadata[name] handler = [ h for h in self.handlers if h.__class__.__name__ == metadata["type"] ][0] with self.directory.reader(metadata["checksum"]) as file: return handler.deserialize(metadata, file) def cache(self, name, value, save=True): for handler in self.handlers: existing = self.metadata.get(name, {}) if handler.understands(value): if handler.checksum_matches_existing(value, existing): pass else: metadata = existing metadata["type"] = handler.__class__.__name__ metadata["quicksum"] = handler.quicksum(value) metadata["checksum"] = handler.checksum(value) with self.directory.writer(metadata["checksum"]) as file: more = handler.serialize(value, file) self.metadata[name] = {**metadata, **more} if save: self.save() return def invalidate(self, name): del (self.metadata[name]) PKLvܪF(F(nyptune/cli.pyfrom pathlib import Path from subprocess import * from tempfile import * import re, platform, os, json, sys, argparse, signal, urllib, tarfile, tempfile, glob, subprocess, io import urllib.request from .directory.local import LocalDirectory from .directory.encrypted import EncryptedDirectory # directory = def main(): parser = argparse.ArgumentParser() parser.add_argument( "--secret", metavar="SECRET", nargs="?", help="value of %%nyptune_secret in the related notebook", ) parser.add_argument( "--cache", metavar="PATH", nargs="?", default=str(Path.home() / ".nyptune"), help="an alternative location to ~/.nyptune", ) notebook = { "metavar": "NOTEBOOK", "type": str, "nargs": 1, "help": "the path to a nyptune-enabled notebook", } name = { "metavar": "NAME", "type": str, "nargs": 1, "help": "value of %%nyptune_name in the related notebook", } subparsers = parser.add_subparsers(help="a subcommand") init_parser = subparsers.add_parser( "init", help="initialize ipfs and add the save hook to the jupyter config file" ) init_parser.set_defaults(func=init) add_parser = subparsers.add_parser( "add", help="Makes nyptune aware of a notebook file you haven't yet opened and saved locally.", ) add_parser.add_argument("notebook", **notebook) add_parser.set_defaults(func=add) pull_parser = subparsers.add_parser( "pull", help="Pulls cached files from ipfs to local storage" ) # pull_parser.add_argument("name", **name) pull_parser.set_defaults(func=pull) push_parser = subparsers.add_parser( "push", help="Pushes cached files from local storage to ipfs" ) # push_parser.add_argument("name", **name) push_parser.set_defaults(func=push) pin_parser = subparsers.add_parser("pin", help="Makes cached files more permanent") pin_parser.set_defaults(func=pin) gc_parser = subparsers.add_parser( "gc", help="Removes unused files from the local cache" ) gc_parser.set_defaults(func=gc) recreate_parser = subparsers.add_parser( "recreate", help="Recreate the environment used to create a notebook" ) recreate_parser.set_defaults(func=recreate) recreate_parser.add_argument("notebook", **notebook) start_parser = subparsers.add_parser("start", help="starts ipfs") start_parser.set_defaults(func=start) stop_parser = subparsers.add_parser("stop", help="stops ipfs") stop_parser.set_defaults(func=stop) if len(sys.argv) == 1: parser.print_help() else: parsed = parser.parse_args() parsed.func(parsed) def _dir(parsed_args): args = vars(parsed_args) d = LocalDirectory(args["cache"]) if "secret" in args and args["secret"]: d = EncryptedDirectory(d, args["secret"]) return d def add(parsed_args): root = _dir(parsed_args) for path in root.glob("*.json"): with root.reader(path) as file: props = json.load(file) for name, details in props.items(): if not "ipfs" in details: proc = subprocess.run( [ ipfs(), "add", "--nocopy", str(Path.home() / ".nyptune" / details["checksum"]), ], stdout=subprocess.PIPE, ) result = proc.stdout.decode("utf-8") print(result) _, sig, _ = result.split() details["ipfs"] = sig with root.writer(path) as file: json.dump(props, io.TextIOWrapper(file)) def push(parsed_args): root = _dir(parsed_args) for path in root.glob("*.json"): with root.reader(path) as file: props = json.load(file) for name, details in props.items(): if not "ipfs" in details: proc = subprocess.run( [ ipfs(), "add", "--nocopy", str(Path.home() / ".nyptune" / details["checksum"]), ], stdout=subprocess.PIPE, ) result = proc.stdout.decode("utf-8") print(result) _, sig, _ = result.split() details["ipfs"] = sig with root.writer(path) as file: json.dump(props, io.TextIOWrapper(file)) def pull(parsed_args): root = _dir(parsed_args) for path in root.glob("*.json"): with root.reader(path) as file: props = json.load(file) for name, details in props.items(): if "ipfs" in details and not root.exists(details["checksum"]): proc = subprocess.run( [ ipfs(), "get", details["ipfs"], "-o", str(Path(parsed_args.cache) / details["checksum"]), ], stdout=subprocess.PIPE, ) result = proc.stdout.decode("utf-8") print(result) def gc(parsed_args): root = _dir(parsed_args) checksums = {} for path in root.glob("*.json"): with root.reader(path) as file: props = json.load(file) for name, details in props.items(): checksums[details["checksum"]] = True for path in root.glob("*"): if ( not path.startswith("_") and not path.endswith(".json") and not checksums.get(path) ): print("no reference to " + path + ", removing...") root.remove(path) def pin(parsed_args): print("not implemented") pass def start(parsed_args): if not Path(ipfs()).is_file(): init() pid = os.fork() if pid == 0: os.execl(ipfs(), "ipfs", "daemon") else: with open(Path(gettempdir()) / "nyptune.pid", "w") as file: file.write(str(pid) + "\n") def stop(parsed_args): pid_path = Path(gettempdir()) / "nyptune.pid" if pid_path.is_file(): with open(pid_path) as file: pid = file.read() os.kill(int(pid), signal.SIGTERM) pid_path.unlink() else: print("Nyptune daemon not running: no pid file found") def ipfs(): return str(Path(os.path.realpath(__file__)).parent / "go-ipfs" / "ipfs") def recreate(parsed_args): notebook = parsed_args.notebook[0] cache = Path(notebook).parent / ".nyptune" with open(notebook) as file: model = json.load(file) conda = "\n".join( [ line for line in model["metadata"]["magix"]["conda"] if line != "@EXPLICIT" ] ) with NamedTemporaryFile() as conda_env_yaml: conda_env_yaml.write(conda.encode("utf-8")) conda_env_yaml.flush() print(conda_env_yaml.name) result = run( [ "conda", "create", "-y", "--name", sys.argv[1], "--file", conda_env_yaml.name, ], encoding="utf-8", shell=False, ) if result.returncode != 0: result = run( [ "conda", "env", "update", "-y", "--name", sys.argv[1], "--file", conda_env_yaml.name, ], encoding="utf-8", shell=False, ) sig = model["metadata"]["magix"]["cache"][".nyptune"] run([ipfs(), "get", sig, "-o", cache], shell=False) with NamedTemporaryFile() as requirements: pip = "\n".join(model["metadata"]["magix"]["pip"]) requirements.write(pip.encode("utf-8")) requirements.flush() with NamedTemporaryFile() as script: s = [ "#!/bin/bash", "source activate " + sys.argv[1], "pip install -y -r " + requirements.name, "jupyter notebook", ] script.write("\n".join(s).encode("utf-8")) script.flush() os.chmod(script.name, 0o755) print("running " + script.name) print("\n".join(s)) os.execl(script.name, "jupyter") def init(parsed_args): config = Path.home() / ".jupyter" / "jupyter_notebook_config.py" if not config.is_file(): print("generating an empty jupyter config file") run(["jupyter", "notebook", "--generate-config"], encoding="utf-8", shell=False) with open(config, "r") as file: contents = file.read() if "nyptune" in contents: print("jupyter config file already mentions nyptune") else: with open(config, "a") as file: print("appending nyptune pre-save-hook to jupyter config") file.write( "\nfrom nyptune.jupyter import presave\nc.ContentsManager.pre_save_hook = presave\n" ) if "64" in platform.machine(): arch = "amd64" else: arch = "386" plat = platform.system().lower() version = "0.4.14" print("downloading ipfs") local = Path(tempfile.gettempdir()) / "go-ipfs.tar.gz" urllib.request.urlretrieve( f"https://dist.ipfs.io/go-ipfs/v{version}/go-ipfs_v{version}_{plat}-{arch}.tar.gz", local, ) with tarfile.open(local, "r|gz") as tar: tar.extractall(Path(os.path.realpath(__file__)).parent) print("initializing ipfs") run([ipfs(), "init"], encoding="utf-8", shell=False) run( [ipfs(), "config", "--json", "Experimental.FilestoreEnabled", "true"], encoding="utf-8", shell=False, ) PK8Lhv{{nyptune/jupyter.pyfrom subprocess import * from pathlib import Path import os, json def presave(path, model, contents_manager): if model["type"] == "notebook": namespace = "default" for cell in model["content"]["cells"]: lines = cell["source"].split("\n") for line in lines: if line.startswith("%nyptune_name") or line.startswith( "%%nyptune_name" ): namespace = line.split()[1] path = Path.home() / ".nyptune" / (namespace + ".metadata.json") nyptune = model["content"]["metadata"]["nyptune"] = {} if path.is_file(): with open(path, "r") as file: model["content"]["metadata"]["nyptune"]["cache"] = json.load(file) conda = run( ["conda", "list", "--explicit"], stdout=PIPE, encoding="utf-8", shell=False ) nyptune["conda"] = conda.stdout.split("\n") pip = run( ["pip", "list", "--format", "freeze"], stdout=PIPE, encoding="utf-8", shell=False, ) nyptune["pip"] = pip.stdout.split("\n") PK8L[5Znyptune/magic.pyimport pprint import json, base64 from subprocess import * from pathlib import Path from abc import ABC, abstractmethod from collections import namedtuple import pickle, os import hashlib from IPython.core.magic import * from .util import * from nyptune.cache import * from collections import namedtuple from collections import OrderedDict @magics_class class CacheMagics(Magics): def __init__(self, **kwargs): super(CacheMagics, self).__init__(**kwargs) cacheargs = kwargs.copy() del (cacheargs["shell"]) self._cache = Cache(**cacheargs) self.enabled = True @line_cell_magic def nyptune_name(self, line, cell=None): """Set the namespace used by this cache. This should be similar to the notebook name and will cause other notebooks with the same nyptune_name on this computer to share cache entries.""" self._cache.name = line.strip() @line_cell_magic def nyptune_secret(self, line, cell=None): """Optionally set the encryption key used by this cache, if desired.""" self._cache.secret = line.strip() @line_magic def caching(self, line): line = line.strip() if line == "on": self.enabled = True elif line == "off": self.enabled = False @line_magic def invalidate(self, line, cell=None): """Remove the names listed from the cache. Note that this does not clean up much disk space until you run `nyptune gc`.""" names = line.strip().split() for name in names: self._cache.invalidate(name) @line_cell_magic def recache(self, line, cell=None): """Recalculate and saves into cache this line/cell.""" self._cache(line, cell, overwrite=True) def checkpoint(self, *names): for name in names: value = self.shell.user_ns[name] self.shell.user_ns[name] = self._cache.cache(name, value) self._cache.save() def restore(self, *names): for name in names: self.shell.user_ns[name] = self._cache.retrieve(name) def is_cached(self, *names): for name in names: if not self._cache.is_cached(name): return False return True @line_cell_magic def cache(self, line, cell=None, overwrite=False): names = line.strip().split() if cell: m = hashlib.md5() m.update(cell.encode("utf-8")) cell_id = m.hexdigest() if self.enabled: if self.is_cached(*names) and not overwrite: self.restore(*names) if self._cache.is_cached(cell_id): return self._cache.retrieve(cell_id).result else: print( "Your variables were restored from cache, but we could not find the output of this cell in cache" ) return None else: output = self.shell.run_cell(cell) self._cache.cache(cell_id, output) self.checkpoint(*names) return output.result else: return self.shell.run_cell(cell) else: name = names[0] if self.enabled: if self.is_cached(name) and not overwrite: return self.restore(name) else: self.shell.run_cell(line) self.checkpoint(name) return self.shell.user_ns[name] else: self.shell.run_cell(line) return self.shell.user_ns[name] PK8Lxnyptune/util.pyimport pathlib, os, urllib, shutil from pathlib import Path from tqdm import tqdm import urllib.request def unlink_f(path): if Path(path).is_file(): os.unlink(path) def link_f(src, target): target = Path(target) if target.is_file() or target.is_symlink(): os.unlink(target) try: os.link(Path(src).resolve(), target) except: shutil.copy(Path(src).resolve(), target) class TqdmUpTo(tqdm): """Provides `update_to(n)` which uses `tqdm.update(delta_n)`.""" def update_to(self, b=1, bsize=1, tsize=None): """ b : int, optional Number of blocks transferred so far [default: 1]. bsize : int, optional Size of each block (in tqdm units) [default: 1]. tsize : int, optional Total size (in tqdm units). If [default: None] remains unchanged. """ if tsize is not None: self.total = tsize self.update(b * bsize - self.n) # will also set self.n = b * bsize def download(url, path): with TqdmUpTo(unit="B", unit_scale=True, miniters=1, desc=url.split("/")[-1]) as t: urllib.request.urlretrieve(url, path, reporthook=t.update_to, data=None) return Path(path) PK8Lnyptune/directory/__init__.pyPK8LN„nyptune/directory/base.pyfrom abc import ABC, abstractmethod class DirectoryBase(ABC): """Base is the abstract base class for Directories. A Directory exposes a subset of expected Posix functionality, in order to make it easier to implement. For example, you might have an S3-backed directory. """ @abstractmethod def glob(self, pattern): """Return a list of matching files """ pass @abstractmethod def remove(self, name): """Delete a file """ pass @abstractmethod def exists(self, name): """Return a boolean if the file with this name exists """ pass @abstractmethod def writer(self, name): """Return an write-mode IO for the file name. """ pass @abstractmethod def reader(self, name): """Return an read-mode IO for the file name. """ pass PK8Ls3nyptune/directory/encrypted.pyfrom .base import DirectoryBase import base64, hashlib, os, io from Crypto.Cipher import AES class EncryptedDirectory(DirectoryBase): def __init__(self, inner, secret): self.inner = inner if inner.exists("_salt"): with inner.reader("_salt") as file: salt = file.read() else: salt = os.urandom(16) with inner.writer("_salt") as file: file.write(salt) self.key = hashlib.pbkdf2_hmac("sha256", secret.encode("utf-8"), salt, 100000) def glob(self, pattern): return self.inner.glob(pattern) def remove(self, name): return self.inner.remove(name) def exists(self, name): return self.inner.exists(name) def writer(self, name): return EncryptedWriter(self.inner.writer(name), self.key) def reader(self, name): return io.BufferedReader(EncryptedReader(self.inner.reader(name), self.key)) class EncryptedWriter(io.BufferedIOBase): def __init__(self, inner, key): self.cipher = AES.new(key, AES.MODE_EAX) self.inner = inner def write(self, content): ciphertext, tag = self.cipher.encrypt_and_digest(content) [ inner.write(x) for x in ( cipher.nonce, tag, len(ciphertext).to_bytes(8, byteorder="big"), ciphertext, ) ] def close(self): return inner.close() class EncryptedReader(io.BufferedIOBase): def __init__(self, inner, key): self.cipher = AES.new(key, AES.MODE_EAX) self.inner = inner def read(self, size=-1): assert size == -1, "Not wrapped in a BufferedReader" nonce = inner.read(16) if len(nonce) == 0: return nonce tag = inner.read(16) size = int.from_bytes(inner.read(8), "big") ciphertext = inner.read(size) return self.cypher.decrypt_and_verify(ciphertext, tag) def close(self): return inner.close() PK8Lanyptune/directory/local.pyfrom .base import DirectoryBase import os, pathlib, fnmatch class LocalDirectory(DirectoryBase): """A simple local directory on the FileSystem. """ def __init__(self, path): self.root = pathlib.Path(path) os.makedirs(self.root, exist_ok=True) def glob(self, pattern): return [ file for file in os.listdir(self.root) if fnmatch.fnmatch(file, pattern) ] def remove(self, name): os.unlink(self.root / name) def exists(self, name): return (self.root / name).is_file() def writer(self, name): return open(self.root / name, "wb") def reader(self, name): return open(self.root / name, "rb") PK8Lnyptune/handler/__init__.pyPK8LΉznyptune/handler/base.pyfrom abc import ABC, abstractmethod class HandlerBase(ABC): """Base is the abstract base class for Handlers. A Handler should know how to cache and restore some type of object. """ @abstractmethod def understands(self, value): """Return a boolean indicating whether this handler knows what to do with the value given. """ pass @abstractmethod def checksum(self, value): """Return a unique identifier string for the value. Typically, this is a cryptographic hash of the full value. """ pass @abstractmethod def quicksum(self, value): """Return a mostly-unique identifier string for the value that can be used as a quick way to tell if a value has changed. Typically, this is something like the mtime+size of a file. """ pass def checksum_matches_existing(self, value, existing): if not existing: return False qs = self.quicksum(value) if qs and qs == existing["quicksum"]: return True else: cs = self.checksum(value) return cs == existing["checksum"] @abstractmethod def serialize(self, value, handle): """Serialize the value into a writable IO given by handle. Additionally, You may also return a JSON-serializable Dict, which may contain a **limited** amount of metadata, such as the encoding, etc. """ pass @abstractmethod def deserialize(self, metadata, handle): """Given the metadata (i.e. the return value of the corresponding serialize function) and a readable IO, return the rehydrated object. """ pass PK8Lw//nyptune/handler/handler_base.pyfrom .base import HandlerBase class PathHandler(HandlerBase): def understands(self, value): try: if isinstance(value, Path): return True elif type(value) is str and len(value) < 200 and Path(str(value)).is_file(): return True else: return False except: return False def quicksum(self, value): path = Path(str(value)) stats = os.stat(path) return str(stats.st_size) + str(stats.st_mtime) def checksum(self, value): path = Path(str(value)) with open(path, "rb") as file: m = hashlib.md5() block = b"whatever" while len(block) > 0: block = file.read(1 << 20) m.update(block) return m.hexdigest() def serialize(self, value, handle): path = Path(str(value)) with open(path, "rb") as file: m = hashlib.md5() block = b"whatever" while len(block) > 0: block = file.read(1 << 20) handle.write(block) return {"original": base64.b64encode(pickle.dumps(value)).decode("ASCII")} def deserialize(self, metadata, handle): original = pickle.loads(base64.b64decode(metadata["original"])) path = Path(str(ipython.user_ns[name])) with open(path, "wb") as file: block = b"whatever" while len(block) > 0: block = handle.read(1 << 20) file.write(block) return original PK8LZA>wwnyptune/handler/path.pyfrom .base import HandlerBase from pathlib import Path import os, hashlib, base64, pickle class PathHandler(HandlerBase): def understands(self, value): try: print(value) if isinstance(value, Path): return True elif type(value) is str and len(value) < 200 and Path(str(value)).is_file(): return True else: return False except: return False def quicksum(self, value): path = Path(str(value)) stats = os.stat(path) return str(stats.st_size) + str(stats.st_mtime) def checksum(self, value): path = Path(str(value)) with open(path, "rb") as file: m = hashlib.md5() block = b"whatever" while len(block) > 0: block = file.read(1 << 20) m.update(block) return m.hexdigest() def serialize(self, value, handle): path = Path(str(value)) with open(path, "rb") as file: m = hashlib.md5() block = b"whatever" while len(block) > 0: block = file.read(1 << 20) handle.write(block) return {"original": base64.b64encode(pickle.dumps(value)).decode("ASCII")} def deserialize(self, metadata, handle): original = pickle.loads(base64.b64decode(metadata["original"])) path = Path(str(original)) with open(path, "wb") as file: block = b"whatever" while len(block) > 0: block = handle.read(1 << 20) file.write(block) return original PK8Ljnyptune/handler/pickle.pyfrom .base import HandlerBase import pickle, hashlib from pathlib import Path class PickleHandler(HandlerBase): def understands(self, value): return True def quicksum(self, value): try: return str(hash(value)) except: return None def checksum(self, value): block = pickle.dumps(value) path = Path(str(value)) m = hashlib.md5() m.update(block) return m.hexdigest() def serialize(self, value, handle): pickle.dump(value, handle) return {} def deserialize(self, metadata, handle): return pickle.load(handle) PK!HJ+(,3nyptune-0.0.1.1527314895.dist-info/entry_points.txtN+I/N.,()ʫ,()Kz9Vy\\PK!HNO(nyptune-0.0.1.1527314895.dist-info/WHEEL HM K-*ϳR03rOK-J,/RH,zd&Y)r$[)T&UrPK!HCjN]+nyptune-0.0.1.1527314895.dist-info/METADATAEN0~}$MK4 P&ju5yz\YuU>he-ePME'MIY|z1.-|ЌVuEh?'k2h;Kk]@b 3wHn@hTA <:O]lq 8߉2X8.ȪjlTu~K<mUh?q'/a:p~`FA.KaF*^qP_^ΈoPK!HQ=2T)nyptune-0.0.1.1527314895.dist-info/RECORD?:| -䟂@`DA1D,gw[VI{yv8'MK\4iLϑa/x]Zpx^?VI%/^]oS2S(CE>quq_ΡUZMJۿԥnܡwG⿠0'U_9zIi1 DXi`m!fxnsέ##͛U!F&v8fUrNuӴgm:0<#h0Ȑ7_l %@(oi74,f_%u&eXJrK@>" s9Mh݌?m,GnTMȊw^"~ OY;]ڷ41tvL]]!f#nwq^s5y ྻc7ț*Pث+{=XAbn/Q6mS75?Z;r#326c/d"*o*??IJ1ZGN%Ĺ@LCu{ilkfyyn {o7nftEZu ^o.h1ąpJ'7fƝ\+O'r6] @5 m1sWJPK8L&&nyptune/__init__.pyPK8LڛA( Wnyptune/cache.pyPKLvܪF(F( nyptune/cli.pyPK8Lhv{{5nyptune/jupyter.pyPK8L[5Z3:nyptune/magic.pyPK8LxInyptune/util.pyPK8LNnyptune/directory/__init__.pyPK8LN„wwjnyptune/handler/path.pyPK8Ljzqnyptune/handler/pickle.pyPK!HJ+(,35tnyptune-0.0.1.1527314895.dist-info/entry_points.txtPK!HNO(tnyptune-0.0.1.1527314895.dist-info/WHEELPK!HCjN]+Bunyptune-0.0.1.1527314895.dist-info/METADATAPK!HQ=2T)vnyptune-0.0.1.1527314895.dist-info/RECORDPKj#z