Source code for hera_stats.automate

import numpy as np
import json
import os, copy, re

# Fail gracefully if jupyter package is not installed
try:
    import nbformat
    from nbconvert.preprocessors import ExecutePreprocessor, CellExecutionError
except:
    print("WARNING: hera_stats.automate: jupyter is not installed; jupyter notebook automation disabled.")

[docs]def jupyter_replace_tags(fname_tmpl, replace, outfile=None, overwrite=False, verbose=False): """ Search through a Jupyter notebook file for tagged lines, replace them with new values, and then save into a new Jupyter notebook. Tags work in a simple way: If any line in the notebook has an in-line comment of the form '# @tag', and a key 'tag' exists in the 'replace' dict, the entire line will be replaced with 'tag = value', where 'value' is the value of 'replace[key]', which is assumed to be a string. Any expression that was on that line will be completely replaced. Parameters ---------- fname_tmpl : str Filename of input notebook that contains tags to be replaced. replace : dict Dictionary of tag:value pairs. The values will all be inserted as Python strings, and so the code in the Jupyter notebook should be prepared to do a type conversion if necessary. outfile : str, optional Filename to save the tag-replaced notebook to. If not specified, the updated JSON dict will be returned from this function. Default: None. overwrite : bool, optional If outfile is not None, whether to overwrite a notebook file if one with the same filename already exists. Default: False. verbose : bool, optional If True, print out tags as they are found. Default: False. Returns ------- new_tree : JSON dict, optional If outfile=None, a dict containing the updated JSON data for the notebook is returned. """ # Load Jupyter notebook as JSON file with open(fname_tmpl, 'r') as f: tree = json.load(f) new_tree = copy.copy(tree) if verbose: print("jupyter_replace_tags(): Running on '{}'".format(fname_tmpl)) # Loop over cells and replace tagged strings num_cells = 0; replaced = 0 for i, cell in enumerate(new_tree['cells']): num_cells += 1 # Loop over lines in cell for j, line in enumerate(cell['source']): # Check for tag, denoted by an '@' if '@' in line: # Parse tag using regex p = re.compile("@\w+") tags = p.search(line) if tags is None: continue # ignore floating '@' symbols # Get tag name (only keep first if >1 found) tag = tags.group(0)[1:] # Check if tag exists in replace dict and then do replacement if tag in replace.keys(): # Do replacement if verbose: print(" Found valid tag:", tag) replaced += 1 new_tree['cells'][i]['source'][j] \ = "{} = \"{}\"\n".format(tag, replace[tag]) else: if verbose: print(" Found unmatched tag:", tag) # Report status if verbose: print(" Number of cells: %d" % num_cells) print(" Replacements made: %d" % replaced) # Either save or return notebook data if outfile is not None: if os.path.exists(outfile) and not overwrite: raise OSError( "File '{}' already exists and overwrite=False.".format(outfile)) with open(outfile, 'w') as f: json.dump(new_tree, f) else: return new_tree
[docs]def jupyter_run_notebook(tree=None, fname=None, outfile=None, rundir='.', version=4, kernel='python3'): """ Run a Jupyter notebook programatically. The notebook to run can be passed as either a filename or a dict derived from JSON data. If the notebook experiences an error, a CellExecutionError will be raised. The notebook will still be saved to disk even if it errors though. Parameters ---------- tree : dict, optional Dict containing JSON tree representing a Jupyter notebook. fname : str, optional Filename of Jupyter notebook to load. Only one of 'tree' and 'fname' should be specified. outfile : str, optional File to store Jupyter notebook into after it has run. Default: None (no notebook file will be saved). rundir : str, optional Directory to run the script from. Default: '.' (current directory). version : int, optional Version of Jupyter notebooks to use. kernel : str, optional Name of Jupyter Python kernel to use. Default: 'python3'. """ # Check that nbformat is installed try: ver = nbformat.__version__ except: raise NotImplementedError("The 'jupyter' package must be installed " "to use this function.") # Check for valid arguments if (tree is None and fname is None) \ or (tree is not None and fname is not None): raise ValueError("Must specify either 'tree' or 'fname'.") # Load Jupyter notebook as JSON file if fname is not None: with open(fname, 'r') as f: tree = json.load(f) # Create NotebookNode object needed for execution nb = nbformat.reads(json.dumps(tree), as_version=version) # Validate notebook nbformat.validate(nb) # Initialise notebook preprocessor object execp = ExecutePreprocessor(timeout=600, kernel_name=kernel) # Try to execute notebook; raise error if it fails try: out = execp.preprocess(nb, {'metadata': {'path': rundir}}) except CellExecutionError as err: raise(err) finally: # Write notebook file to disk if outfile specified if outfile is not None: with open(outfile, mode='w') as f: nbformat.write(nb, f)