PK zJV nbclean/__init__.py"""Tools for preprocessing Jupyter Notebooks."""
__version__ = "0.1"
from .clean import NotebookCleaner
from .run import run_notebook_directory, run_notebook
PK xJӧ9j j nbclean/clean.py"""Functions to assist with grading."""
import nbformat as nbf
import os
from nbgrader.preprocessors import ClearSolutions
from .preprocessors import RemoveCells, ClearCells
from .utils import _check_nb_file
class NotebookCleaner(object):
"""Prepare Jupyter notebooks for distribution to students.
Parameters
----------
ntbk : string | instance of NotebookNode
The input notebook.
"""
def __init__(self, ntbk):
self.ntbk = _check_nb_file(ntbk)
self.preprocessors = []
def __repr__(self):
s = "Number of preprocessors: {}\n---".format(
len(self.preprocessors))
for pre in self.preprocessors:
s += '\n' + str(pre)
return s
def clear(self, output=False, content=False, stderr=False, tag=None):
"""Clear the components of a notebook cell.
Parameters
----------
output : bool
Whether to clear the output of cells.
content : bool
Whether to clear the content of cells.
stderr : bool
Whether to clear the stderr of cells.
tag : string | None
Only apply clearing to cells with a certain tag.
"""
if not any([output, content, stderr]):
raise ValueError("At least of the clear options must be True.")
# See if the cell matches the string
pre = ClearCells(output=output, content=content,
stderr=stderr, tag=str(tag))
self.ntbk = pre.preprocess(self.ntbk, {})[0]
self.preprocessors.append(pre)
return self
def remove_cells(self, tag):
"""Remove cells that contain a specific string.
Parameters
----------
match_text : str
A string to search for in input cells. Any cells with the
`match_text` inside will be removed.
"""
# See if the cell matches the string
pre = RemoveCells(tag=tag)
self.ntbk = pre.preprocess(self.ntbk, {})[0]
self.preprocessors.append(pre)
return self
def replace_text(self, text_replace_begin=u'### SOLUTION BEGIN',
text_replace_end=u'### SOLUTION END',
replace_code=None, replace_md=None):
"""Create answer cells for students to fill out.
This will remove all text after `match_string`. Students should then
give their answers in this section. Alternatively, a markdown cell will
replace the student answer cell
Parameters
----------
text_replace_begin : str
A string to search for in input cells. If the string is
found, then anything between it and `text_replace_end` is removed.
text_replace_end : str
The ending delimiter for solution cells.
replace_code : str | None
Text to add to code solution cells. If None, `nbgrader`
default is used.
replace_md : str | None
Text to add to markdown solution cells. If None, a default template
will be used.
"""
kwargs = dict(begin_solution_delimeter=text_replace_begin,
end_solution_delimeter=text_replace_end,
enforce_metadata=False)
if replace_code is not None:
kwargs['code_stub'] = dict(python=replace_code)
if replace_md is None:
replace_md = ('---\n## Student Answer'
'\n\n*Double-click and add your answer between the '
'lines*\n\n---')
kwargs['text_stub'] = replace_md
pre = ClearSolutions(**kwargs)
self.ntbk = pre.preprocess(self.ntbk, {})[0]
self.preprocessors.append(pre)
return self
def save(self, path_save):
"""Save the notebook to disk.
Parameters
----------
path_save : string
The path for saving the file.
"""
dir_save = os.path.dirname(path_save)
print('Saving to {}'.format(path_save))
if not os.path.isdir(dir_save):
os.makedirs(dir_save)
nbf.write(self.ntbk, path_save)
PK xJ>>E3 3 nbclean/preprocessors.pyfrom traitlets import Unicode, Bool
from nbgrader.preprocessors import NbGraderPreprocessor
class RemoveCells(NbGraderPreprocessor):
"""A helper class to remove cells from a notebook.
This should not be used directly, instead, use the
NotebookCleaner class.
"""
tag = Unicode("None")
def preprocess(self, nb, resources):
new_cells = []
for ii, cell in enumerate(nb['cells']):
if self.tag != 'None':
tags = cell['metadata'].get('tags', [])
# Only keep the cell if the tag doesn't match
if self.tag not in tags:
new_cells.append(cell)
nb['cells'] = new_cells
return nb, resources
class ClearCells(NbGraderPreprocessor):
"""A helper class to remove cells from a notebook.
This should not be used directly, instead, use the
NotebookCleaner class.
"""
output = Bool(True)
content = Bool(False)
stderr = Bool(True)
tag = Unicode('None')
def preprocess(self, nb, resources):
for cell in nb['cells']:
# Check to see whether we process this cell
if self.tag != 'None':
tags = cell['metadata'].get('tags', [])
if self.tag not in tags:
continue
# Clear cell output
if self.output is True:
if 'outputs' in cell.keys():
cell['outputs'] = []
# Clear cell content
if self.content is True:
cell['source'] = ''
# Clear stdout
if self.stderr is True:
new_outputs = []
if 'outputs' not in cell.keys():
continue
for output in cell['outputs']:
name = output.get('name', None)
if name != 'stderr':
new_outputs.append(output)
cell['outputs'] = new_outputs
return nb, resources
def __repr__(self):
s = " Tag: {}".format(self.tag)
return s
PK yJu nbclean/run.pyimport nbformat as nbf
import os
import os.path as op
from nbgrader.preprocessors import LimitOutput, Execute
from .utils import _check_nb_file
from glob import glob
from tqdm import tqdm
def run_notebook_directory(path, path_save=None, max_output_lines=1000,
overwrite=False):
"""Run all the notebooks in a directory and save them somewhere else.
Parameters
----------
path : str
A path to a directory that contains jupyter notebooks.
All notebooks in this folder ending in `.ipynb` will be run,
and the outputs will be placed in `path_save`. This may
optionally contain a wildcard matching ``.ipynb`` in which
case only notebooks that match will be run.
path_save : str | None
A path to a directory to save the notebooks. If this doesn't exist,
it will be created. If `None`, notebooks will not be saved.
max_output_lines : int | None
The maximum number of lines allowed in notebook outputs.
overwrite : bool
Whether to overwrite the output directory if it exists.
Returns
-------
notebooks : list
A list of the `NotebookNode` instances, one for each notebook.
"""
if not op.exists(path):
raise ValueError("You've specified an input path that doesn't exist")
to_glob = op.join(path, '*.ipynb') if '.ipynb' not in path else path
notebooks = glob(to_glob)
# Execute notebooks
outputs = []
for notebook in tqdm(notebooks):
outputs.append(run_notebook(notebook,
max_output_lines=max_output_lines))
# Now save them
if path_save is not None:
print('Saving {} notebooks to: {}'.format(len(notebooks), path_save))
if not op.exists(path_save):
os.makedirs(path_save)
elif overwrite is True:
print('Overwriting output directory')
for ifile in glob(path_save + '*-exe.ipynb'):
os.remove(ifile)
else:
raise ValueError('path_save exists and overwrite is not True')
for filename, notebook in zip(notebooks, outputs):
this_name = op.basename(filename)
left, right = this_name.split('.')
left += '-exe'
this_name = '.'.join([left, right])
nbf.write(notebook, op.join(path_save, this_name))
def run_notebook(ntbk, max_output_lines=1000):
"""Run the cells in a notebook and limit the output length.
Parameters
----------
ntbk : string | instance of NotebookNode
The input notebook.
max_output_lines : int | None
The maximum number of lines allowed in notebook outputs.
"""
ntbk = _check_nb_file(ntbk)
preprocessors = [Execute()]
if max_output_lines is not None:
preprocessors.append(LimitOutput(max_lines=max_output_lines,
max_traceback=max_output_lines))
for prep in preprocessors:
ntbk, _ = prep.preprocess(ntbk, {})
return ntbk
PK xJAg g nbclean/utils.pyimport nbformat as nbf
from nbformat.notebooknode import NotebookNode
from copy import deepcopy
def _check_nb_file(ntbk):
if isinstance(ntbk, str):
ntbk = nbf.read(ntbk, nbf.NO_CONVERT)
elif not isinstance(ntbk, NotebookNode):
raise TypeError('`ntbk` must be type string or `NotebookNode`')
ntbk = deepcopy(ntbk)
return ntbk
PK !H|&U