PK9( M{bnbclean/__init__.py"""Tools for preprocessing and cleaning Jupyter Notebooks.""" __version__ = "0.3.2" from .clean import NotebookCleaner from .run import run_notebook_directory, run_notebook PK:( MY)$$nbclean/clean.py"""Functions to assist with grading.""" import nbformat as nbf import os from nbgrader.preprocessors import ClearSolutions from .preprocessors import RemoveCells, ClearCells, ConvertCells from .utils import _check_nb_file class NotebookCleaner(object): """Prepare Jupyter notebooks for distribution to students. Parameters ---------- ntbk : string | instance of NotebookNode The input notebook. """ def __init__(self, ntbk, verbose=False): self._verbose = verbose self.ntbk = _check_nb_file(ntbk) self.preprocessors = [] def __repr__(self): s = "Number of preprocessors: {}\n---".format( len(self.preprocessors)) for pre in self.preprocessors: s += '\n' + str(pre) return s def clear(self, kind, tag=None, search_text=None, clear=None): """Clear the components of a notebook cell. Parameters ---------- kind : string | list of strings The elements of the notebook you wish to clear. Must be one of: "content": the content of cells. "output": the entire output of cells. "output_text": the text output of cells. "output_image": the image output of cells. "stderr": the stderr of cells. If a list, must contain one or more of the above strings. tag : string | None Only apply clearing to cells with a certain tag. If None, apply clearing to all cells. search_text : str | None A string to search for within cells. Any cells with this string inside will be removed. """ ALLOWED_KINDS = ['content', 'output', 'output_text', 'output_image', 'stderr'] if isinstance(kind, str): kind = [kind] if not isinstance(kind, list) or len(kind) == 0: raise ValueError('kind must be a list of at least one string. All ' 'strings must be one of {}'.format(ALLOWED_KINDS)) if any(ii not in ALLOWED_KINDS for ii in kind): raise ValueError('Unknown kind found. kind must be one of {}'.format(ALLOWED_KINDS)) kwargs = {key: key in kind for key in ALLOWED_KINDS} # See if the cell matches the string pre = ClearCells(tag=str(tag), search_text=str(search_text), **kwargs) self.ntbk = pre.preprocess(self.ntbk, {})[0] self.preprocessors.append(pre) return self def remove_cells(self, tag=None, empty=False, search_text=None): """Remove cells that match a given tag. Parameters ---------- tag : str | None A string to search for in cell tags cells. Any cells with the tag inside will be removed. empty : bool Whether to remove any cell that is empty. search_text : str | None A string to search for within cells. Any cells with this string inside will be removed. """ # See if the cell matches the string tag = 'None' if tag is None else tag search_text = 'None' if search_text is None else search_text pre = RemoveCells(tag=tag, empty=empty, search_text=search_text) self.ntbk = pre.preprocess(self.ntbk, {})[0] self.preprocessors.append(pre) return self def create_tests(self, tag, oktest_path, base_dir): """Create tests for code cells that are tagged with `tag`. The cell source will be used as the code for the doctest that is created. This function assumes that `test_path` is a directory relative to the final notebook directory specified in `base_dir`. Tests are created using the oktest format. Parameters ---------- tag : str Cells tagged with this string will be converted into oktests test_path : str Path at which each oktests will be created. We assume this is a path relative to where the processed notebook will be stored. base_dir : str Path at which the processed notebook will be stored. """ pre = ConvertCells(tag=tag, oktest_path=oktest_path, base_dir=base_dir) self.ntbk = pre.preprocess(self.ntbk, {})[0] self.preprocessors.append(pre) return self def replace_text(self, text_replace_begin=u'### SOLUTION BEGIN', text_replace_end=u'### SOLUTION END', replace_code=None, replace_md=None): """Create answer cells for students to fill out. This will remove all text after `match_string`. Students should then give their answers in this section. Alternatively, a markdown cell will replace the student answer cell Parameters ---------- text_replace_begin : str A string to search for in input cells. If the string is found, then anything between it and `text_replace_end` is removed. text_replace_end : str The ending delimiter for solution cells. replace_code : str | None Text to add to code solution cells. If None, `nbgrader` default is used. replace_md : str | None Text to add to markdown solution cells. If None, a default template will be used. """ kwargs = dict(begin_solution_delimeter=text_replace_begin, end_solution_delimeter=text_replace_end, enforce_metadata=False) if replace_code is not None: kwargs['code_stub'] = dict(python=replace_code) if replace_md is None: replace_md = ('---\n## Student Answer' '\n\n*Double-click and add your answer between the ' 'lines*\n\n---') kwargs['text_stub'] = replace_md pre = ClearSolutions(**kwargs) self.ntbk = pre.preprocess(self.ntbk, {})[0] self.preprocessors.append(pre) return self def save(self, path_save): """Save the notebook to disk. Parameters ---------- path_save : string The path for saving the file. """ dir_save = os.path.dirname(path_save) if self._verbose is True: print('Saving to {}'.format(path_save)) # if we are saving to a subdirectory make sure it exists if dir_save and not os.path.exists(dir_save): os.makedirs(dir_save) nbf.write(self.ntbk, path_save) PK:( MР_  nbclean/preprocessors.pyimport hashlib import os from traitlets import Unicode, Bool from nbgrader.preprocessors import NbGraderPreprocessor class RemoveCells(NbGraderPreprocessor): """A helper class to remove cells from a notebook. This should not be used directly, instead, use the NotebookCleaner class. """ tag = Unicode("None") search_text = Unicode("None") empty = Bool(False) def preprocess(self, nb, resources): if self.tag == 'None' and self.empty is False and self.search_text == 'None': raise ValueError("One of `tag`, `empty`, or `search_text` must be used.") new_cells = [] for ii, cell in enumerate(nb['cells']): is_empty = len(cell['source']) == 0 if self.tag != 'None': if self.tag in cell['metadata'].get('tags', []): # Skip appending the cell if the tag matches if self.empty: if is_empty: continue else: continue elif self.search_text != 'None': if self.search_text in cell['source']: # Skip appending the cell if the tag matches if self.empty: if is_empty: continue else: continue elif self.empty and is_empty: continue # If we didn't trigger anything above, append the cell to keep it new_cells.append(cell) nb['cells'] = new_cells return nb, resources def __repr__(self): s = " Tag: {}".format(self.tag) if self.empty: s += ' | Remove if empty' return s class ConvertCells: """A helper class to convert cells in a notebook to oktests. This should not be used directly, instead, use the NotebookCleaner class. """ template = ''' test = { 'name': 'test_valid', 'points': 1, 'suites': [ { 'cases': [{'code': r""" %s """}, ] } ] } ''' def __init__(self, tag, oktest_path, base_dir): self.tag = tag # path at which to store oktests, this will be created as a subdirectory # of `base_dir` self.oktest_path = oktest_path if os.path.isabs(self.oktest_path): raise RuntimeError("Expected `oktest_path` to be a path relative" " to `base_dir`, got '%s' instead." % oktest_path) # path at which the notebook will be stored self.base_dir = base_dir def preprocess(self, nb, resources): os.makedirs(os.path.join(self.base_dir, self.oktest_path), exist_ok=True) new_cells = [] for cell in nb['cells']: cell_tags = cell['metadata'].get('tags', []) if self.tag in cell_tags and cell['cell_type'] == 'code': # convert cell to oktest source = cell['source'] h = hashlib.md5(source.encode('utf-8')).hexdigest()[:7] oktest = os.path.join(self.oktest_path, 'q-%s.py' % h) with open(os.path.join(self.base_dir, oktest), 'w') as f: lines = [" >>> " + l for l in source.split("\n") if l] f.write(self.template % '\n'.join(lines)) cell['source'] = 'check("%s")' % oktest # clear outputs and execution count cell['outputs'] = [] cell['execution_count'] = None new_cells.append(cell) nb['cells'] = new_cells return nb, resources def __repr__(self): s = " Tag: {}".format(self.tag) return s class ClearCells(NbGraderPreprocessor): """A helper class to remove cells from a notebook. This should not be used directly, instead, use the NotebookCleaner class. """ output = Bool(True) output_image = Bool(False) output_text = Bool(False) content = Bool(False) stderr = Bool(True) tag = Unicode('None') search_text = Unicode("None") def preprocess(self, nb, resources): for cell in nb['cells']: # Check to see whether we process this cell if self.tag != 'None': tags = cell['metadata'].get('tags', []) if self.tag not in tags: continue elif self.search_text != 'None': if self.search_text not in cell['source']: continue # Clear all cell output if self.output is True: if 'outputs' in cell.keys(): cell['outputs'] = [] # Clear cell text output if self.output_text is True: if 'outputs' in cell.keys(): for output in cell['outputs']: data = output.get('data', {}) for key in list(data.keys()): if 'text/' in key: data.pop(key) # Clear cell image output if self.output_image is True: if 'outputs' in cell.keys(): for output in cell['outputs']: data = output.get('data', {}) for key in list(data.keys()): if 'image/' in key: data.pop(key) # Clear cell content if self.content is True: cell['source'] = '' # Clear stdout if self.stderr is True: new_outputs = [] if 'outputs' not in cell.keys(): continue for output in cell['outputs']: name = output.get('name', None) if name != 'stderr': new_outputs.append(output) cell['outputs'] = new_outputs return nb, resources def __repr__(self): s = " Tag: {}".format(self.tag) return s PKJu nbclean/run.pyimport nbformat as nbf import os import os.path as op from nbgrader.preprocessors import LimitOutput, Execute from .utils import _check_nb_file from glob import glob from tqdm import tqdm def run_notebook_directory(path, path_save=None, max_output_lines=1000, overwrite=False): """Run all the notebooks in a directory and save them somewhere else. Parameters ---------- path : str A path to a directory that contains jupyter notebooks. All notebooks in this folder ending in `.ipynb` will be run, and the outputs will be placed in `path_save`. This may optionally contain a wildcard matching ``.ipynb`` in which case only notebooks that match will be run. path_save : str | None A path to a directory to save the notebooks. If this doesn't exist, it will be created. If `None`, notebooks will not be saved. max_output_lines : int | None The maximum number of lines allowed in notebook outputs. overwrite : bool Whether to overwrite the output directory if it exists. Returns ------- notebooks : list A list of the `NotebookNode` instances, one for each notebook. """ if not op.exists(path): raise ValueError("You've specified an input path that doesn't exist") to_glob = op.join(path, '*.ipynb') if '.ipynb' not in path else path notebooks = glob(to_glob) # Execute notebooks outputs = [] for notebook in tqdm(notebooks): outputs.append(run_notebook(notebook, max_output_lines=max_output_lines)) # Now save them if path_save is not None: print('Saving {} notebooks to: {}'.format(len(notebooks), path_save)) if not op.exists(path_save): os.makedirs(path_save) elif overwrite is True: print('Overwriting output directory') for ifile in glob(path_save + '*-exe.ipynb'): os.remove(ifile) else: raise ValueError('path_save exists and overwrite is not True') for filename, notebook in zip(notebooks, outputs): this_name = op.basename(filename) left, right = this_name.split('.') left += '-exe' this_name = '.'.join([left, right]) nbf.write(notebook, op.join(path_save, this_name)) def run_notebook(ntbk, max_output_lines=1000): """Run the cells in a notebook and limit the output length. Parameters ---------- ntbk : string | instance of NotebookNode The input notebook. max_output_lines : int | None The maximum number of lines allowed in notebook outputs. """ ntbk = _check_nb_file(ntbk) preprocessors = [Execute()] if max_output_lines is not None: preprocessors.append(LimitOutput(max_lines=max_output_lines, max_traceback=max_output_lines)) for prep in preprocessors: ntbk, _ = prep.preprocess(ntbk, {}) return ntbk PKJAggnbclean/utils.pyimport nbformat as nbf from nbformat.notebooknode import NotebookNode from copy import deepcopy def _check_nb_file(ntbk): if isinstance(ntbk, str): ntbk = nbf.read(ntbk, nbf.NO_CONVERT) elif not isinstance(ntbk, NotebookNode): raise TypeError('`ntbk` must be type string or `NotebookNode`') ntbk = deepcopy(ntbk) return ntbk PKLnbclean/tests/test_nbclean.pyimport nbclean as nbc import pytest import os # We'll use the test notebook in `examples` path = os.path.dirname(__file__) path_notebook = path + '/../../examples/test_notebooks/test_notebook.ipynb' HIDE_TEXT = '# HIDDEN' # Clear different parts of the notebook cells based on tags ntbk = nbc.NotebookCleaner(path_notebook) ntbk.clear(kind='output', tag='hide_output') ntbk.clear(kind='content', tag='hide_content') ntbk.clear(kind=['stderr'], tag='hide_stderr') with pytest.raises(ValueError): ntbk.clear(kind='foo') with pytest.raises(ValueError): ntbk.clear(kind=[]) # Removing entire cells ntbk.remove_cells(tag='remove') ntbk.remove_cells(tag='remove_if_empty', empty=True) ntbk.remove_cells(search_text=HIDE_TEXT) # Replacing text text_replace_begin = '### SOLUTION BEGIN' text_replace_end = '### SOLUTION END' ntbk.replace_text(text_replace_begin, text_replace_end) def test_nbclean(): # Make sure we're testing for all of these TEST_KINDS = ['hide_output', 'hide_content', 'hide_stderr'] for kind in TEST_KINDS: assert any(kind in cell['metadata'].get('tags', []) for cell in ntbk.ntbk.cells) for cell in ntbk.ntbk.cells: # Tag removal tags = cell['metadata'].get('tags', None) if tags is None: continue if 'hide_output' in tags: assert len(cell['outputs']) == 0 if 'hide_content' in tags: assert len(cell['source']) == 0 if 'hide_stderr' in tags: assert all('stderr' != output.get('name', '') for output in cell['outputs']) if 'remove_if_empty' in tags: assert len(cell['source']) != 0 assert 'remove' not in tags assert HIDE_TEXT not in cell['source'] # Text replacing if "# First we'll create 'a'" in cell['source']: assert '### SOLUTION BEGIN' not in cell['source'] # Make sure final cell has all this stuff cell = ntbk.ntbk.cells[-1] assert len(cell['outputs']) != 0 assert any('stderr' == output.get('name', '') for output in cell['outputs']) assert len(cell['source']) != 0 if __name__ == '__main__': test_nbclean() PK6J 99nbclean-0.3.2.dist-info/LICENSEThe MIT License (MIT) Copyright (c) 2017 Chris Holdgraf Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. PK!Hp!Qanbclean-0.3.2.dist-info/WHEEL HM K-*ϳR03rOK-J,/RH,zd&Y)r$[)T&UD"PK!HD0ag nbclean-0.3.2.dist-info/METADATAePAr0 @|*CЖ0Sh{vbx-c˝k:8jWYiۘ y QʡCT/>;$@d !D zL#(Oy^s#R- XN!ɦ):;y2i{rѧپV,3%hcT W蔱qƟ;'8ըXYUR~6)a2?/vsDLՋI,!!3;<0/p-^[2|NPK!HKnbclean-0.3.2.dist-info/RECORDu;@|~ 8Mcs 6@lB@04ʯ߭51PK9( M{bnbclean/__init__.pyPK:( MY)$$nbclean/clean.pyPK:( MР_  2nbclean/preprocessors.pyPKJu s3nbclean/run.pyPKJAgg큌?nbclean/utils.pyPKL!Anbclean/tests/test_nbclean.pyPK6J 99Inbclean-0.3.2.dist-info/LICENSEPK!Hp!QajNnbclean-0.3.2.dist-info/WHEELPK!HD0ag Nnbclean-0.3.2.dist-info/METADATAPK!HK8Pnbclean-0.3.2.dist-info/RECORDPK VR