Source code for hera_stats.automate

import numpy as np
import json
import os, copy, re

# Fail gracefully if jupyter package is not installed
try:
    import nbformat
    from nbconvert.preprocessors import ExecutePreprocessor, CellExecutionError
except:
    print("WARNING: hera_stats.automate: jupyter is not installed; jupyter notebook automation disabled.")

[docs]def jupyter_replace_tags(fname_tmpl, replace, outfile=None, overwrite=False, 
                         verbose=False):
    """
    Search through a Jupyter notebook file for tagged lines, replace them with 
    new values, and then save into a new Jupyter notebook.
    
    Tags work in a simple way: If any line in the notebook has an in-line 
    comment of the form '# @tag', and a key 'tag' exists in the 'replace' dict, 
    the entire line will be replaced with 'tag = value', where 'value' is the 
    value of 'replace[key]', which is assumed to be a string. Any expression 
    that was on that line will be completely replaced.
    
    Parameters
    ----------
    fname_tmpl : str
        Filename of input notebook that contains tags to be replaced.
    
    replace : dict
        Dictionary of tag:value pairs. The values will all be inserted as 
        Python strings, and so the code in the Jupyter notebook should be 
        prepared to do a type conversion if necessary.
    
    outfile : str, optional
        Filename to save the tag-replaced notebook to. If not specified, the 
        updated JSON dict will be returned from this function. Default: None.
    
    overwrite : bool, optional
        If outfile is not None, whether to overwrite a notebook file if one 
        with the same filename already exists. Default: False.
    
    verbose : bool, optional
        If True, print out tags as they are found. Default: False.
    
    Returns
    -------
    new_tree : JSON dict, optional
        If outfile=None, a dict containing the updated JSON data for the 
        notebook is returned.
    """
    # Load Jupyter notebook as JSON file
    with open(fname_tmpl, 'r') as f:
        tree = json.load(f)
    new_tree = copy.copy(tree)
    if verbose:
        print("jupyter_replace_tags(): Running on '{}'".format(fname_tmpl))
    
    # Loop over cells and replace tagged strings
    num_cells = 0; replaced = 0
    for i, cell in enumerate(new_tree['cells']):
        num_cells += 1
        
        # Loop over lines in cell
        for j, line in enumerate(cell['source']):
        
            # Check for tag, denoted by an '@'
            if '@' in line:
                
                # Parse tag using regex
                p = re.compile("@\w+")
                tags = p.search(line)
                if tags is None: continue # ignore floating '@' symbols
                
                # Get tag name (only keep first if >1 found)
                tag = tags.group(0)[1:]
            
                # Check if tag exists in replace dict and then do replacement
                if tag in replace.keys():
                    
                    # Do replacement
                    if verbose: print("  Found valid tag:", tag)
                    replaced += 1
                    new_tree['cells'][i]['source'][j] \
                        = "{} = \"{}\"\n".format(tag, replace[tag])
                else:
                    if verbose: print("  Found unmatched tag:", tag)
                    
    # Report status
    if verbose:
        print("  Number of cells: %d" % num_cells)
        print("  Replacements made: %d" % replaced)
    
    # Either save or return notebook data
    if outfile is not None:
        if os.path.exists(outfile) and not overwrite:
            raise OSError(
                "File '{}' already exists and overwrite=False.".format(outfile))
        with open(outfile, 'w') as f:
            json.dump(new_tree, f)
    else:
        return new_tree


[docs]def jupyter_run_notebook(tree=None, fname=None, outfile=None, rundir='.', 
                         version=4, kernel='python3'):
    """
    Run a Jupyter notebook programatically. The notebook to run can be passed 
    as either a filename or a dict derived from JSON data.
    
    If the notebook experiences an error, a CellExecutionError will be raised. 
    The notebook will still be saved to disk even if it errors though.
    
    Parameters
    ----------
    tree : dict, optional
        Dict containing JSON tree representing a Jupyter notebook.
        
    fname : str, optional
        Filename of Jupyter notebook to load. Only one of 'tree' and 'fname' 
        should be specified.
    
    outfile : str, optional
        File to store Jupyter notebook into after it has run. Default: None 
        (no notebook file will be saved).
    
    rundir : str, optional
        Directory to run the script from. Default: '.' (current directory).
    
    version : int, optional
        Version of Jupyter notebooks to use.
    
    kernel : str, optional
        Name of Jupyter Python kernel to use. Default: 'python3'.
    """
    # Check that nbformat is installed
    try:
        ver = nbformat.__version__
    except:
        raise NotImplementedError("The 'jupyter' package must be installed "
                                  "to use this function.")
    
    # Check for valid arguments
    if (tree is None and fname is None) \
    or (tree is not None and fname is not None):
        raise ValueError("Must specify either 'tree' or 'fname'.")
    
    # Load Jupyter notebook as JSON file
    if fname is not None:
        with open(fname, 'r') as f:
            tree = json.load(f)
    
    # Create NotebookNode object needed for execution
    nb = nbformat.reads(json.dumps(tree), as_version=version)
    
    # Validate notebook
    nbformat.validate(nb)
    
    # Initialise notebook preprocessor object
    execp = ExecutePreprocessor(timeout=600, kernel_name=kernel)
    
    # Try to execute notebook; raise error if it fails
    try:
        out = execp.preprocess(nb, {'metadata': {'path': rundir}})
    except CellExecutionError as err:
        raise(err)
    finally:
        # Write notebook file to disk if outfile specified
        if outfile is not None:
            with open(outfile, mode='w') as f:
                nbformat.write(nb, f)