PK 9(
M{b nbclean/__init__.py"""Tools for preprocessing and cleaning Jupyter Notebooks."""
__version__ = "0.3.2"
from .clean import NotebookCleaner
from .run import run_notebook_directory, run_notebook
PK :(
MY)$ $ nbclean/clean.py"""Functions to assist with grading."""
import nbformat as nbf
import os
from nbgrader.preprocessors import ClearSolutions
from .preprocessors import RemoveCells, ClearCells, ConvertCells
from .utils import _check_nb_file
class NotebookCleaner(object):
"""Prepare Jupyter notebooks for distribution to students.
Parameters
----------
ntbk : string | instance of NotebookNode
The input notebook.
"""
def __init__(self, ntbk, verbose=False):
self._verbose = verbose
self.ntbk = _check_nb_file(ntbk)
self.preprocessors = []
def __repr__(self):
s = "Number of preprocessors: {}\n---".format(
len(self.preprocessors))
for pre in self.preprocessors:
s += '\n' + str(pre)
return s
def clear(self, kind, tag=None, search_text=None, clear=None):
"""Clear the components of a notebook cell.
Parameters
----------
kind : string | list of strings
The elements of the notebook you wish to clear. Must be one of:
"content": the content of cells.
"output": the entire output of cells.
"output_text": the text output of cells.
"output_image": the image output of cells.
"stderr": the stderr of cells.
If a list, must contain one or more of the above strings.
tag : string | None
Only apply clearing to cells with a certain tag. If
None, apply clearing to all cells.
search_text : str | None
A string to search for within cells. Any cells with this string
inside will be removed.
"""
ALLOWED_KINDS = ['content', 'output', 'output_text',
'output_image', 'stderr']
if isinstance(kind, str):
kind = [kind]
if not isinstance(kind, list) or len(kind) == 0:
raise ValueError('kind must be a list of at least one string. All '
'strings must be one of {}'.format(ALLOWED_KINDS))
if any(ii not in ALLOWED_KINDS for ii in kind):
raise ValueError('Unknown kind found. kind must be one of {}'.format(ALLOWED_KINDS))
kwargs = {key: key in kind for key in ALLOWED_KINDS}
# See if the cell matches the string
pre = ClearCells(tag=str(tag), search_text=str(search_text), **kwargs)
self.ntbk = pre.preprocess(self.ntbk, {})[0]
self.preprocessors.append(pre)
return self
def remove_cells(self, tag=None, empty=False, search_text=None):
"""Remove cells that match a given tag.
Parameters
----------
tag : str | None
A string to search for in cell tags cells. Any cells with the
tag inside will be removed.
empty : bool
Whether to remove any cell that is empty.
search_text : str | None
A string to search for within cells. Any cells with this string
inside will be removed.
"""
# See if the cell matches the string
tag = 'None' if tag is None else tag
search_text = 'None' if search_text is None else search_text
pre = RemoveCells(tag=tag, empty=empty, search_text=search_text)
self.ntbk = pre.preprocess(self.ntbk, {})[0]
self.preprocessors.append(pre)
return self
def create_tests(self, tag, oktest_path, base_dir):
"""Create tests for code cells that are tagged with `tag`.
The cell source will be used as the code for the doctest that is
created. This function assumes that `test_path` is a directory
relative to the final notebook directory specified in `base_dir`.
Tests are created using the oktest format.
Parameters
----------
tag : str
Cells tagged with this string will be converted into oktests
test_path : str
Path at which each oktests will be created. We assume this is a
path relative to where the processed notebook will be stored.
base_dir : str
Path at which the processed notebook will be stored.
"""
pre = ConvertCells(tag=tag,
oktest_path=oktest_path,
base_dir=base_dir)
self.ntbk = pre.preprocess(self.ntbk, {})[0]
self.preprocessors.append(pre)
return self
def replace_text(self, text_replace_begin=u'### SOLUTION BEGIN',
text_replace_end=u'### SOLUTION END',
replace_code=None, replace_md=None):
"""Create answer cells for students to fill out.
This will remove all text after `match_string`. Students should then
give their answers in this section. Alternatively, a markdown cell will
replace the student answer cell
Parameters
----------
text_replace_begin : str
A string to search for in input cells. If the string is
found, then anything between it and `text_replace_end` is removed.
text_replace_end : str
The ending delimiter for solution cells.
replace_code : str | None
Text to add to code solution cells. If None, `nbgrader`
default is used.
replace_md : str | None
Text to add to markdown solution cells. If None, a default template
will be used.
"""
kwargs = dict(begin_solution_delimeter=text_replace_begin,
end_solution_delimeter=text_replace_end,
enforce_metadata=False)
if replace_code is not None:
kwargs['code_stub'] = dict(python=replace_code)
if replace_md is None:
replace_md = ('---\n## Student Answer'
'\n\n*Double-click and add your answer between the '
'lines*\n\n---')
kwargs['text_stub'] = replace_md
pre = ClearSolutions(**kwargs)
self.ntbk = pre.preprocess(self.ntbk, {})[0]
self.preprocessors.append(pre)
return self
def save(self, path_save):
"""Save the notebook to disk.
Parameters
----------
path_save : string
The path for saving the file.
"""
dir_save = os.path.dirname(path_save)
if self._verbose is True:
print('Saving to {}'.format(path_save))
# if we are saving to a subdirectory make sure it exists
if dir_save and not os.path.exists(dir_save):
os.makedirs(dir_save)
nbf.write(self.ntbk, path_save)
PK :(
MР_ nbclean/preprocessors.pyimport hashlib
import os
from traitlets import Unicode, Bool
from nbgrader.preprocessors import NbGraderPreprocessor
class RemoveCells(NbGraderPreprocessor):
"""A helper class to remove cells from a notebook.
This should not be used directly, instead, use the
NotebookCleaner class.
"""
tag = Unicode("None")
search_text = Unicode("None")
empty = Bool(False)
def preprocess(self, nb, resources):
if self.tag == 'None' and self.empty is False and self.search_text == 'None':
raise ValueError("One of `tag`, `empty`, or `search_text` must be used.")
new_cells = []
for ii, cell in enumerate(nb['cells']):
is_empty = len(cell['source']) == 0
if self.tag != 'None':
if self.tag in cell['metadata'].get('tags', []):
# Skip appending the cell if the tag matches
if self.empty:
if is_empty:
continue
else:
continue
elif self.search_text != 'None':
if self.search_text in cell['source']:
# Skip appending the cell if the tag matches
if self.empty:
if is_empty:
continue
else:
continue
elif self.empty and is_empty:
continue
# If we didn't trigger anything above, append the cell to keep it
new_cells.append(cell)
nb['cells'] = new_cells
return nb, resources
def __repr__(self):
s = " Tag: {}".format(self.tag)
if self.empty:
s += ' | Remove if empty'
return s
class ConvertCells:
"""A helper class to convert cells in a notebook to oktests.
This should not be used directly, instead, use the
NotebookCleaner class.
"""
template = '''
test = {
'name': 'test_valid',
'points': 1,
'suites': [
{
'cases': [{'code': r"""
%s
"""},
]
}
]
}
'''
def __init__(self, tag, oktest_path, base_dir):
self.tag = tag
# path at which to store oktests, this will be created as a subdirectory
# of `base_dir`
self.oktest_path = oktest_path
if os.path.isabs(self.oktest_path):
raise RuntimeError("Expected `oktest_path` to be a path relative"
" to `base_dir`, got '%s' instead." % oktest_path)
# path at which the notebook will be stored
self.base_dir = base_dir
def preprocess(self, nb, resources):
os.makedirs(os.path.join(self.base_dir, self.oktest_path),
exist_ok=True)
new_cells = []
for cell in nb['cells']:
cell_tags = cell['metadata'].get('tags', [])
if self.tag in cell_tags and cell['cell_type'] == 'code':
# convert cell to oktest
source = cell['source']
h = hashlib.md5(source.encode('utf-8')).hexdigest()[:7]
oktest = os.path.join(self.oktest_path, 'q-%s.py' % h)
with open(os.path.join(self.base_dir, oktest), 'w') as f:
lines = [" >>> " + l for l in source.split("\n") if l]
f.write(self.template % '\n'.join(lines))
cell['source'] = 'check("%s")' % oktest
# clear outputs and execution count
cell['outputs'] = []
cell['execution_count'] = None
new_cells.append(cell)
nb['cells'] = new_cells
return nb, resources
def __repr__(self):
s = " Tag: {}".format(self.tag)
return s
class ClearCells(NbGraderPreprocessor):
"""A helper class to remove cells from a notebook.
This should not be used directly, instead, use the
NotebookCleaner class.
"""
output = Bool(True)
output_image = Bool(False)
output_text = Bool(False)
content = Bool(False)
stderr = Bool(True)
tag = Unicode('None')
search_text = Unicode("None")
def preprocess(self, nb, resources):
for cell in nb['cells']:
# Check to see whether we process this cell
if self.tag != 'None':
tags = cell['metadata'].get('tags', [])
if self.tag not in tags:
continue
elif self.search_text != 'None':
if self.search_text not in cell['source']:
continue
# Clear all cell output
if self.output is True:
if 'outputs' in cell.keys():
cell['outputs'] = []
# Clear cell text output
if self.output_text is True:
if 'outputs' in cell.keys():
for output in cell['outputs']:
data = output.get('data', {})
for key in list(data.keys()):
if 'text/' in key:
data.pop(key)
# Clear cell image output
if self.output_image is True:
if 'outputs' in cell.keys():
for output in cell['outputs']:
data = output.get('data', {})
for key in list(data.keys()):
if 'image/' in key:
data.pop(key)
# Clear cell content
if self.content is True:
cell['source'] = ''
# Clear stdout
if self.stderr is True:
new_outputs = []
if 'outputs' not in cell.keys():
continue
for output in cell['outputs']:
name = output.get('name', None)
if name != 'stderr':
new_outputs.append(output)
cell['outputs'] = new_outputs
return nb, resources
def __repr__(self):
s = " Tag: {}".format(self.tag)
return s
PK Ju nbclean/run.pyimport nbformat as nbf
import os
import os.path as op
from nbgrader.preprocessors import LimitOutput, Execute
from .utils import _check_nb_file
from glob import glob
from tqdm import tqdm
def run_notebook_directory(path, path_save=None, max_output_lines=1000,
overwrite=False):
"""Run all the notebooks in a directory and save them somewhere else.
Parameters
----------
path : str
A path to a directory that contains jupyter notebooks.
All notebooks in this folder ending in `.ipynb` will be run,
and the outputs will be placed in `path_save`. This may
optionally contain a wildcard matching ``.ipynb`` in which
case only notebooks that match will be run.
path_save : str | None
A path to a directory to save the notebooks. If this doesn't exist,
it will be created. If `None`, notebooks will not be saved.
max_output_lines : int | None
The maximum number of lines allowed in notebook outputs.
overwrite : bool
Whether to overwrite the output directory if it exists.
Returns
-------
notebooks : list
A list of the `NotebookNode` instances, one for each notebook.
"""
if not op.exists(path):
raise ValueError("You've specified an input path that doesn't exist")
to_glob = op.join(path, '*.ipynb') if '.ipynb' not in path else path
notebooks = glob(to_glob)
# Execute notebooks
outputs = []
for notebook in tqdm(notebooks):
outputs.append(run_notebook(notebook,
max_output_lines=max_output_lines))
# Now save them
if path_save is not None:
print('Saving {} notebooks to: {}'.format(len(notebooks), path_save))
if not op.exists(path_save):
os.makedirs(path_save)
elif overwrite is True:
print('Overwriting output directory')
for ifile in glob(path_save + '*-exe.ipynb'):
os.remove(ifile)
else:
raise ValueError('path_save exists and overwrite is not True')
for filename, notebook in zip(notebooks, outputs):
this_name = op.basename(filename)
left, right = this_name.split('.')
left += '-exe'
this_name = '.'.join([left, right])
nbf.write(notebook, op.join(path_save, this_name))
def run_notebook(ntbk, max_output_lines=1000):
"""Run the cells in a notebook and limit the output length.
Parameters
----------
ntbk : string | instance of NotebookNode
The input notebook.
max_output_lines : int | None
The maximum number of lines allowed in notebook outputs.
"""
ntbk = _check_nb_file(ntbk)
preprocessors = [Execute()]
if max_output_lines is not None:
preprocessors.append(LimitOutput(max_lines=max_output_lines,
max_traceback=max_output_lines))
for prep in preprocessors:
ntbk, _ = prep.preprocess(ntbk, {})
return ntbk
PK JAg g nbclean/utils.pyimport nbformat as nbf
from nbformat.notebooknode import NotebookNode
from copy import deepcopy
def _check_nb_file(ntbk):
if isinstance(ntbk, str):
ntbk = nbf.read(ntbk, nbf.NO_CONVERT)
elif not isinstance(ntbk, NotebookNode):
raise TypeError('`ntbk` must be type string or `NotebookNode`')
ntbk = deepcopy(ntbk)
return ntbk
PK L nbclean/tests/test_nbclean.pyimport nbclean as nbc
import pytest
import os
# We'll use the test notebook in `examples`
path = os.path.dirname(__file__)
path_notebook = path + '/../../examples/test_notebooks/test_notebook.ipynb'
HIDE_TEXT = '# HIDDEN'
# Clear different parts of the notebook cells based on tags
ntbk = nbc.NotebookCleaner(path_notebook)
ntbk.clear(kind='output', tag='hide_output')
ntbk.clear(kind='content', tag='hide_content')
ntbk.clear(kind=['stderr'], tag='hide_stderr')
with pytest.raises(ValueError):
ntbk.clear(kind='foo')
with pytest.raises(ValueError):
ntbk.clear(kind=[])
# Removing entire cells
ntbk.remove_cells(tag='remove')
ntbk.remove_cells(tag='remove_if_empty', empty=True)
ntbk.remove_cells(search_text=HIDE_TEXT)
# Replacing text
text_replace_begin = '### SOLUTION BEGIN'
text_replace_end = '### SOLUTION END'
ntbk.replace_text(text_replace_begin, text_replace_end)
def test_nbclean():
# Make sure we're testing for all of these
TEST_KINDS = ['hide_output', 'hide_content', 'hide_stderr']
for kind in TEST_KINDS:
assert any(kind in cell['metadata'].get('tags', [])
for cell in ntbk.ntbk.cells)
for cell in ntbk.ntbk.cells:
# Tag removal
tags = cell['metadata'].get('tags', None)
if tags is None:
continue
if 'hide_output' in tags:
assert len(cell['outputs']) == 0
if 'hide_content' in tags:
assert len(cell['source']) == 0
if 'hide_stderr' in tags:
assert all('stderr' != output.get('name', '')
for output in cell['outputs'])
if 'remove_if_empty' in tags:
assert len(cell['source']) != 0
assert 'remove' not in tags
assert HIDE_TEXT not in cell['source']
# Text replacing
if "# First we'll create 'a'" in cell['source']:
assert '### SOLUTION BEGIN' not in cell['source']
# Make sure final cell has all this stuff
cell = ntbk.ntbk.cells[-1]
assert len(cell['outputs']) != 0
assert any('stderr' == output.get('name', '') for output in cell['outputs'])
assert len(cell['source']) != 0
if __name__ == '__main__':
test_nbclean()
PK 6J
9 9 nbclean-0.3.2.dist-info/LICENSEThe MIT License (MIT)
Copyright (c) 2017 Chris Holdgraf
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
PK !Hp!Q a nbclean-0.3.2.dist-info/WHEELHM
K-*ϳR03rOK-J,/RH,zd&Y)r$[)T&UD" PK !HD0ag nbclean-0.3.2.dist-info/METADATAePAr0@|*CЖ0Sh{vbx-c˝k:8jWYiŪy QʡCT/>;$@d!DzL#(Oy^s#R-
XN!ɦ):;y2i{rѧپV,3%hcTW蔱qƟ;'8ըXYUR~6)a2?/vsDLՋI,!!3;<0/p-^[2|NPK !HK nbclean-0.3.2.dist-info/RECORDu;@|~8Mcs 6@lB@04ʯ߭51