PKACKTnbclean/__init__.py"""Tools for preprocessing and cleaning Jupyter Notebooks.""" __version__ = "0.2.1" from .clean import NotebookCleaner from .run import run_notebook_directory, run_notebook PKCK>;Qnbclean/clean.py"""Functions to assist with grading.""" import nbformat as nbf import os from nbgrader.preprocessors import ClearSolutions from .preprocessors import RemoveCells, ClearCells from .utils import _check_nb_file class NotebookCleaner(object): """Prepare Jupyter notebooks for distribution to students. Parameters ---------- ntbk : string | instance of NotebookNode The input notebook. """ def __init__(self, ntbk): self.ntbk = _check_nb_file(ntbk) self.preprocessors = [] def __repr__(self): s = "Number of preprocessors: {}\n---".format( len(self.preprocessors)) for pre in self.preprocessors: s += '\n' + str(pre) return s def clear(self, content=False, output=False, output_image=False, output_text=False, stderr=False, tag=None): """Clear the components of a notebook cell. Parameters ---------- content : bool Whether to clear the content of cells. output : bool Whether to clear the entire output of cells. output_text : bool Whether to clear the text output of cells. output_image : bool Whether to clear the image output of cells. stderr : bool Whether to clear the stderr of cells. tag : string | None Only apply clearing to cells with a certain tag. If None, apply clearing to all cells. """ if not any([output, output_image, output_text, content, stderr]): raise ValueError("At least of the clear options must be True.") # See if the cell matches the string pre = ClearCells(content=content, output=output, output_text=output_text, output_image=output_image, stderr=stderr, tag=str(tag)) self.ntbk = pre.preprocess(self.ntbk, {})[0] self.preprocessors.append(pre) return self def remove_cells(self, tag): """Remove cells that contain a specific string. Parameters ---------- match_text : str A string to search for in input cells. Any cells with the `match_text` inside will be removed. """ # See if the cell matches the string pre = RemoveCells(tag=tag) self.ntbk = pre.preprocess(self.ntbk, {})[0] self.preprocessors.append(pre) return self def replace_text(self, text_replace_begin=u'### SOLUTION BEGIN', text_replace_end=u'### SOLUTION END', replace_code=None, replace_md=None): """Create answer cells for students to fill out. This will remove all text after `match_string`. Students should then give their answers in this section. Alternatively, a markdown cell will replace the student answer cell Parameters ---------- text_replace_begin : str A string to search for in input cells. If the string is found, then anything between it and `text_replace_end` is removed. text_replace_end : str The ending delimiter for solution cells. replace_code : str | None Text to add to code solution cells. If None, `nbgrader` default is used. replace_md : str | None Text to add to markdown solution cells. If None, a default template will be used. """ kwargs = dict(begin_solution_delimeter=text_replace_begin, end_solution_delimeter=text_replace_end, enforce_metadata=False) if replace_code is not None: kwargs['code_stub'] = dict(python=replace_code) if replace_md is None: replace_md = ('---\n## Student Answer' '\n\n*Double-click and add your answer between the ' 'lines*\n\n---') kwargs['text_stub'] = replace_md pre = ClearSolutions(**kwargs) self.ntbk = pre.preprocess(self.ntbk, {})[0] self.preprocessors.append(pre) return self def save(self, path_save): """Save the notebook to disk. Parameters ---------- path_save : string The path for saving the file. """ dir_save = os.path.dirname(path_save) print('Saving to {}'.format(path_save)) if not os.path.isdir(dir_save): os.makedirs(dir_save) nbf.write(self.ntbk, path_save) PKCKSi&g g nbclean/preprocessors.pyfrom traitlets import Unicode, Bool from nbgrader.preprocessors import NbGraderPreprocessor class RemoveCells(NbGraderPreprocessor): """A helper class to remove cells from a notebook. This should not be used directly, instead, use the NotebookCleaner class. """ tag = Unicode("None") def preprocess(self, nb, resources): new_cells = [] for ii, cell in enumerate(nb['cells']): if self.tag != 'None': tags = cell['metadata'].get('tags', []) # Only keep the cell if the tag doesn't match if self.tag not in tags: new_cells.append(cell) nb['cells'] = new_cells return nb, resources class ClearCells(NbGraderPreprocessor): """A helper class to remove cells from a notebook. This should not be used directly, instead, use the NotebookCleaner class. """ output = Bool(True) output_image = Bool(False) output_text = Bool(False) content = Bool(False) stderr = Bool(True) tag = Unicode('None') def preprocess(self, nb, resources): for cell in nb['cells']: # Check to see whether we process this cell if self.tag != 'None': tags = cell['metadata'].get('tags', []) if self.tag not in tags: continue # Clear all cell output if self.output is True: if 'outputs' in cell.keys(): cell['outputs'] = [] # Clear cell text output if self.output_text is True: if 'outputs' in cell.keys(): for output in cell['outputs']: data = output.get('data', {}) for key in list(data.keys()): if 'text/' in key: data.pop(key) # Clear cell image output if self.output_image is True: if 'outputs' in cell.keys(): for output in cell['outputs']: data = output.get('data', {}) for key in list(data.keys()): if 'image/' in key: data.pop(key) # Clear cell content if self.content is True: cell['source'] = '' # Clear stdout if self.stderr is True: new_outputs = [] if 'outputs' not in cell.keys(): continue for output in cell['outputs']: name = output.get('name', None) if name != 'stderr': new_outputs.append(output) cell['outputs'] = new_outputs return nb, resources def __repr__(self): s = " Tag: {}".format(self.tag) return s PKݎCKu nbclean/run.pyimport nbformat as nbf import os import os.path as op from nbgrader.preprocessors import LimitOutput, Execute from .utils import _check_nb_file from glob import glob from tqdm import tqdm def run_notebook_directory(path, path_save=None, max_output_lines=1000, overwrite=False): """Run all the notebooks in a directory and save them somewhere else. Parameters ---------- path : str A path to a directory that contains jupyter notebooks. All notebooks in this folder ending in `.ipynb` will be run, and the outputs will be placed in `path_save`. This may optionally contain a wildcard matching ``.ipynb`` in which case only notebooks that match will be run. path_save : str | None A path to a directory to save the notebooks. If this doesn't exist, it will be created. If `None`, notebooks will not be saved. max_output_lines : int | None The maximum number of lines allowed in notebook outputs. overwrite : bool Whether to overwrite the output directory if it exists. Returns ------- notebooks : list A list of the `NotebookNode` instances, one for each notebook. """ if not op.exists(path): raise ValueError("You've specified an input path that doesn't exist") to_glob = op.join(path, '*.ipynb') if '.ipynb' not in path else path notebooks = glob(to_glob) # Execute notebooks outputs = [] for notebook in tqdm(notebooks): outputs.append(run_notebook(notebook, max_output_lines=max_output_lines)) # Now save them if path_save is not None: print('Saving {} notebooks to: {}'.format(len(notebooks), path_save)) if not op.exists(path_save): os.makedirs(path_save) elif overwrite is True: print('Overwriting output directory') for ifile in glob(path_save + '*-exe.ipynb'): os.remove(ifile) else: raise ValueError('path_save exists and overwrite is not True') for filename, notebook in zip(notebooks, outputs): this_name = op.basename(filename) left, right = this_name.split('.') left += '-exe' this_name = '.'.join([left, right]) nbf.write(notebook, op.join(path_save, this_name)) def run_notebook(ntbk, max_output_lines=1000): """Run the cells in a notebook and limit the output length. Parameters ---------- ntbk : string | instance of NotebookNode The input notebook. max_output_lines : int | None The maximum number of lines allowed in notebook outputs. """ ntbk = _check_nb_file(ntbk) preprocessors = [Execute()] if max_output_lines is not None: preprocessors.append(LimitOutput(max_lines=max_output_lines, max_traceback=max_output_lines)) for prep in preprocessors: ntbk, _ = prep.preprocess(ntbk, {}) return ntbk PKݎCKAggnbclean/utils.pyimport nbformat as nbf from nbformat.notebooknode import NotebookNode from copy import deepcopy def _check_nb_file(ntbk): if isinstance(ntbk, str): ntbk = nbf.read(ntbk, nbf.NO_CONVERT) elif not isinstance(ntbk, NotebookNode): raise TypeError('`ntbk` must be type string or `NotebookNode`') ntbk = deepcopy(ntbk) return ntbk PK\CK5\-nbclean/.ipynb_checkpoints/demo-checkpoint.pyimport matplotlib.pyplot as plt import numpy as np plt.ion() plt.scatter(*np.random.randn(2, 1000), c=np.random.randn(1000)) plt.show()PKCK DDnbclean/tests/test_nbclean.pyimport nbclean as nbc import pytest import os # We'll use the test notebook in `examples` path = os.path.dirname(__file__) path_notebook = path + '/../../examples/test_notebooks/test_notebook.ipynb' # Clear different parts of the notebook cells based on tags ntbk = nbc.NotebookCleaner(path_notebook) ntbk.clear(output=True, tag='hide_output') ntbk.clear(output=False, content=True, tag='hide_content') ntbk.clear(output=False, stderr=True, tag='hide_stderr') # Removing entire cells ntbk.remove_cells(tag='remove') # Replacing text text_replace_begin = '### SOLUTION BEGIN' text_replace_end = '### SOLUTION END' ntbk.replace_text(text_replace_begin, text_replace_end) def test_nbclean(): # Make sure we're testing for all of these TEST_KINDS = ['hide_output', 'hide_content', 'hide_stderr'] for kind in TEST_KINDS: assert any(kind in cell['metadata'].get('tags', []) for cell in ntbk.ntbk.cells) for cell in ntbk.ntbk.cells: # Tag removal tags = cell['metadata'].get('tags', None) if tags is None: continue if 'hide_output' in tags: assert len(cell['outputs']) == 0 if 'hide_content' in tags: assert len(cell['source']) == 0 if 'hide_stderr' in tags: assert all('stderr' != output.get('name', '') for output in cell['outputs']) assert 'remove' not in tags # Text replacing if "# First we'll create 'a'" in cell['source']: assert '### SOLUTION BEGIN' not in cell['source'] # Make sure final cell has all this stuff cell = ntbk.ntbk.cells[-1] assert len(cell['outputs']) != 0 assert any('stderr' == output.get('name', '') for output in cell['outputs']) assert len(cell['source']) != 0 if __name__ == '__main__': test_nbclean() PKaCK DD;nbclean/tests/.ipynb_checkpoints/test_nbclean-checkpoint.pyimport nbclean as nbc import pytest import os # We'll use the test notebook in `examples` path = os.path.dirname(__file__) path_notebook = path + '/../../examples/test_notebooks/test_notebook.ipynb' # Clear different parts of the notebook cells based on tags ntbk = nbc.NotebookCleaner(path_notebook) ntbk.clear(output=True, tag='hide_output') ntbk.clear(output=False, content=True, tag='hide_content') ntbk.clear(output=False, stderr=True, tag='hide_stderr') # Removing entire cells ntbk.remove_cells(tag='remove') # Replacing text text_replace_begin = '### SOLUTION BEGIN' text_replace_end = '### SOLUTION END' ntbk.replace_text(text_replace_begin, text_replace_end) def test_nbclean(): # Make sure we're testing for all of these TEST_KINDS = ['hide_output', 'hide_content', 'hide_stderr'] for kind in TEST_KINDS: assert any(kind in cell['metadata'].get('tags', []) for cell in ntbk.ntbk.cells) for cell in ntbk.ntbk.cells: # Tag removal tags = cell['metadata'].get('tags', None) if tags is None: continue if 'hide_output' in tags: assert len(cell['outputs']) == 0 if 'hide_content' in tags: assert len(cell['source']) == 0 if 'hide_stderr' in tags: assert all('stderr' != output.get('name', '') for output in cell['outputs']) assert 'remove' not in tags # Text replacing if "# First we'll create 'a'" in cell['source']: assert '### SOLUTION BEGIN' not in cell['source'] # Make sure final cell has all this stuff cell = ntbk.ntbk.cells[-1] assert len(cell['outputs']) != 0 assert any('stderr' == output.get('name', '') for output in cell['outputs']) assert len(cell['source']) != 0 if __name__ == '__main__': test_nbclean() PKݎCK 99nbclean-0.2.1.dist-info/LICENSEThe MIT License (MIT) Copyright (c) 2017 Chris Holdgraf Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. PK!H١Wdnbclean-0.2.1.dist-info/WHEEL HM K-*ϳR03rOK-J,/RH,Q0343 /, (-JLR()*M ILR(4KM̫#DPK!HiIm nbclean-0.2.1.dist-info/METADATAePAr0 @ѧ2m 3g'V2ܙNzv]퐕V/ɐU%e윊#M0P1D1%O?<01Fh#:ZlaԩCMS YuvdTwӣOe}k߭Xe)JX$ؐէ+\SJ+a<ũFڪr0XWc-B)zwx&b^Lb 93+p\ixKlMwE; PK!H\D*nbclean-0.2.1.dist-info/RECORDr@} Er* ]O2j*,] ΢9 ˪р}I8$e}&j.Z>xs#klT# (2F^oe _nyQP.T&rǃȦ.`_]+PKACKTnbclean/__init__.pyPKCK>;Qnbclean/clean.pyPKCKSi&g g nbclean/preprocessors.pyPKݎCKu nbclean/run.pyPKݎCKAgg*nbclean/utils.pyPK\CK5\-?,nbclean/.ipynb_checkpoints/demo-checkpoint.pyPKCK DD-nbclean/tests/test_nbclean.pyPKaCK DD;4nbclean/tests/.ipynb_checkpoints/test_nbclean-checkpoint.pyPKݎCK 99-<nbclean-0.2.1.dist-info/LICENSEPK!H١Wd@nbclean-0.2.1.dist-info/WHEELPK!HiIm 5Anbclean-0.2.1.dist-info/METADATAPK!H\D*vBnbclean-0.2.1.dist-info/RECORDPK D