bokeh.plotting.save

Here are the examples of the python api bokeh.plotting.save taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

21 Examples 7

3 Source : plot_apple_watch_data.py
with Apache License 2.0
from openPfizer

def save_plot(plot, title):
    """
    Saves plot in local directory as file_name

    :param plot: bokeh plot object
    :param title: file name for plot
    :return: None
    """
    output_file("apple_watch_plots/{}.html".format(title))
    save(plot)

def plot_heart_rate(apple_watch):

0 Source : html_reporting.py
with Apache License 2.0
from allegroai

def report_html_periodic_table(logger, iteration=0):
    # type: (Logger, int) -> ()
    """
    reporting interactive (html) of periodic table to debug samples section
    :param logger: The task.logger to use for sending the plots
    :param iteration: The iteration number of the current reports
    """
    output_file("periodic.html")
    periods = ["I", "II", "III", "IV", "V", "VI", "VII"]
    groups = [str(x) for x in range(1, 19)]
    autompg_clean = elements.copy()
    autompg_clean["atomic mass"] = autompg_clean["atomic mass"].astype(str)
    autompg_clean["group"] = autompg_clean["group"].astype(str)
    autompg_clean["period"] = [periods[x - 1] for x in autompg_clean.period]
    autompg_clean = autompg_clean[autompg_clean.group != "-"]
    autompg_clean = autompg_clean[autompg_clean.symbol != "Lr"]
    autompg_clean = autompg_clean[autompg_clean.symbol != "Lu"]
    cmap = {
        "alkali metal": "#a6cee3",
        "alkaline earth metal": "#1f78b4",
        "metal": "#d93b43",
        "halogen": "#999d9a",
        "metalloid": "#e08d49",
        "noble gas": "#eaeaea",
        "nonmetal": "#f1d4Af",
        "transition metal": "#599d7A",
    }
    source = ColumnDataSource(autompg_clean)
    p = figure(
        plot_width=900,
        plot_height=500,
        title="Periodic Table (omitting LA and AC Series)",
        x_range=groups,
        y_range=list(reversed(periods)),
        toolbar_location=None,
        tools="hover",
    )
    p.rect(
        "group",
        "period",
        0.95,
        0.95,
        source=source,
        fill_alpha=0.6,
        legend_label="metal",
        color=factor_cmap(
            "metal", palette=list(cmap.values()), factors=list(cmap.keys())
        ),
    )
    text_props = {"source": source, "text_align": "left", "text_baseline": "middle"}
    x = dodge("group", -0.4, range=p.x_range)
    r = p.text(x=x, y="period", text="symbol", **text_props)
    r.glyph.text_font_style = "bold"
    r = p.text(
        x=x, y=dodge("period", 0.3, range=p.y_range), text="atomic number", **text_props
    )
    r.glyph.text_font_size = "8pt"
    r = p.text(
        x=x, y=dodge("period", -0.35, range=p.y_range), text="name", **text_props
    )
    r.glyph.text_font_size = "5pt"
    r = p.text(
        x=x, y=dodge("period", -0.2, range=p.y_range), text="atomic mass", **text_props
    )
    r.glyph.text_font_size = "5pt"
    p.text(
        x=["3", "3"],
        y=["VI", "VII"],
        text=["LA", "AC"],
        text_align="center",
        text_baseline="middle",
    )
    p.hover.tooltips = [
        ("Name", "@name"),
        ("Atomic number", "@{atomic number}"),
        ("Atomic mass", "@{atomic mass}"),
        ("Type", "@metal"),
        ("CPK color", "$color[hex, swatch]:CPK"),
        ("Electronic configuration", "@{electronic configuration}"),
    ]
    p.outline_line_color = None
    p.grid.grid_line_color = None
    p.axis.axis_line_color = None
    p.axis.major_tick_line_color = None
    p.axis.major_label_standoff = 0
    p.legend.orientation = "horizontal"
    p.legend.location = "top_center"
    save(p)
    logger.report_media("html", "periodic_html", iteration=iteration, local_path="periodic.html")


def report_html_groupby(logger, iteration=0):

0 Source : html_reporting.py
with Apache License 2.0
from allegroai

def report_html_groupby(logger, iteration=0):
    # type: (Logger, int) -> ()
    """
    reporting bokeh groupby (html) to debug samples section
    :param logger: The task.logger to use for sending the plots
    :param iteration: The iteration number of the current reports
    """
    output_file("bar_pandas_groupby_nested.html")
    bokeh_df.cyl = bokeh_df.cyl.astype(str)
    bokeh_df.yr = bokeh_df.yr.astype(str)
    group = bokeh_df.groupby(by=["cyl", "mfr"])
    index_cmap = factor_cmap(
        "cyl_mfr", palette=Spectral5, factors=sorted(bokeh_df.cyl.unique()), end=1
    )
    p = figure(
        plot_width=800,
        plot_height=300,
        title="Mean MPG by # Cylinders and Manufacturer",
        x_range=group,
        toolbar_location=None,
        tooltips=[("MPG", "@mpg_mean"), ("Cyl, Mfr", "@cyl_mfr")],
    )
    p.vbar(
        x="cyl_mfr",
        top="mpg_mean",
        width=1,
        source=group,
        line_color="white",
        fill_color=index_cmap,
    )
    p.y_range.start = 0
    p.x_range.range_padding = 0.05
    p.xgrid.grid_line_color = None
    p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
    p.xaxis.major_label_orientation = 1.2
    p.outline_line_color = None
    save(p)
    logger.report_media(
        "html",
        "pandas_groupby_nested_html",
        iteration=iteration,
        local_path="bar_pandas_groupby_nested.html",
    )


def report_html_graph(logger, iteration=0):

0 Source : html_reporting.py
with Apache License 2.0
from allegroai

def report_html_graph(logger, iteration=0):
    # type: (Logger, int) -> ()
    """
    reporting bokeh graph (html) to debug samples section
    :param logger: The task.logger to use for sending the plots
    :param iteration: The iteration number of the current reports
    """
    nodes = 8
    node_indices = list(range(nodes))
    plot = figure(
        title="Graph Layout Demonstration",
        x_range=(-1.1, 1.1),
        y_range=(-1.1, 1.1),
        tools="",
        toolbar_location=None,
    )
    graph = GraphRenderer()
    graph.node_renderer.data_source.add(node_indices, "index")
    graph.node_renderer.data_source.add(Spectral8, "color")
    graph.node_renderer.glyph = Ellipse(height=0.1, width=0.2, fill_color="color")
    graph.edge_renderer.data_source.data = dict(start=[0] * nodes, end=node_indices)
    # start of layout code
    circ = [i * 2 * math.pi / 8 for i in node_indices]
    x = [math.cos(i) for i in circ]
    y = [math.sin(i) for i in circ]
    graph_layout = dict(zip(node_indices, zip(x, y)))
    graph.layout_provider = StaticLayoutProvider(graph_layout=graph_layout)
    plot.renderers.append(graph)
    output_file("graph.html")
    save(plot)
    logger.report_media("html", "Graph_html", iteration=iteration, local_path="graph.html")


def report_html_image(logger, iteration=0):

0 Source : html_reporting.py
with Apache License 2.0
from allegroai

def report_html_image(logger, iteration=0):
    # type: (Logger, int) -> ()
    """
    reporting bokeh image (html) to debug samples section
    :param logger: The task.logger to use for sending the plots
    :param iteration: The iteration number of the current reports
    """

    # First html
    samples = 500
    x = np.linspace(0, 10, samples)
    y = np.linspace(0, 10, samples)
    xx, yy = np.meshgrid(x, y)
    d = np.sin(xx) * np.cos(yy)
    p = figure(tooltips=[("x", "$x"), ("y", "$y"), ("value", "@image")])
    p.x_range.range_padding = p.y_range.range_padding = 0
    # must give a vector of image data for image parameter
    p.image(image=[d], x=0, y=0, dw=10, dh=10, palette="Spectral11", level="image")
    p.grid.grid_line_width = 0.5
    output_file("image.html", title="image.py example")
    save(p)
    logger.report_media("html", "Spectral_html", iteration=iteration, local_path="image.html")


def main():

0 Source : plots.py
with MIT License
from ArtesiaWater

    def interactive_plot(self,
                         savedir=None,
                         plot_columns=('stand_m_tov_nap',),
                         markers=('line',),
                         p=None,
                         plot_legend_names=('',),
                         plot_freq=(None,),
                         tmin=None,
                         tmax=None,
                         hoover_names=('Peil',),
                         hoover_date_format="%Y-%m-%d",
                         ylabel='m NAP',
                         plot_colors=('blue',),
                         add_filter_to_legend=False,
                         return_filename=False):
        """Create an interactive plot of the observations using bokeh.

        Todo:

        - add options for hoovers, markers, linestyle

        Parameters
        ----------
        savedir : str, optional
            directory used for the folium map and bokeh plots
        plot_columns : list of str, optional
            name of the column in the obs df that will be plotted with bokeh
        markers : list of str, optional
            type of markers that can be used for plot, 'line' and 'circle' are
            supported
        p : bokeh.plotting.figure, optional
            reference to existing figure, if p is None a new figure is created
        plot_legend_names : list of str, optional
            legend in bokeh plot
        plot_freq : list of str, optional
            bokeh plot is resampled with this frequency to reduce the size
        tmin : dt.datetime, optional
            start date for timeseries plot
        tmax : dt.datetime, optional
            end date for timeseries plot
        hoover_names : list of str, optional
            names will be displayed together with the plot_column value
            when hoovering over plot
        hoover_date_format : str, optional
            date format to use when hoovering over a plot
        ylabel : str, optional
            label on the y-axis
        plot_colors : list of str, optional
            plot_colors used for the plots
        add_filter_to_legend : boolean, optional
            if True the attributes bovenkant_filter and onderkant_filter
            are added to the legend name
        return_filename : boolean, optional
            if True filename will be returned

        Returns
        -------
        fname_plot : str or bokeh plot
            filename of the bokeh plot or reference to bokeh plot
        """

        from bokeh.plotting import figure
        from bokeh.models import ColumnDataSource, HoverTool
        from bokeh.plotting import save
        from bokeh.resources import CDN

        # create plot dataframe
        plot_df = self._obj[tmin:tmax].copy()
        plot_df['date'] = plot_df.index.strftime(hoover_date_format)
        if plot_df.empty or plot_df[list(plot_columns)].isna().all().all():
            raise ValueError(
                '{} has no data between {} and {}'.format(self._obj.name, tmin, tmax))

        # create plot
        if p is None:
            p = figure(plot_width=600, plot_height=400, x_axis_type='datetime',
                       title='')
            p.yaxis.axis_label = ylabel

        # get x axis
        xcol = self._obj.index.name
        if xcol is None:
            xcol = 'index'

        # get color
        if len(plot_colors)   <   len(plot_columns):
            plot_colors = list(plot_colors) * len(plot_columns)

        # get base for hoover tooltips
        plots = []
        tooltips = []
        tooltips.append(('date', "@date"))

        # plot multiple columns
        for i, column in enumerate(plot_columns):
            # legend name
            if add_filter_to_legend:
                lname = '{} {} (NAP {:.2f} - {:.2f})'.format(plot_legend_names[i], self._obj.name,
                                                             self._obj.onderkant_filter,
                                                             self._obj.bovenkant_filter)
            else:
                lname = '{} {}'.format(plot_legend_names[i], self._obj.name)

            # resample data
            if plot_freq[i] is None:
                source = ColumnDataSource(plot_df[[column, 'date']])
            else:
                source = ColumnDataSource(
                    plot_df[[column, 'date']].resample(plot_freq[i]).first())

            # plot data

            if markers[i] in ['line', 'l']:
                plots.append(p.line(xcol, column, source=source, color=plot_colors[i],
                               legend_label=lname,
                               alpha=0.8, muted_alpha=0.2))
            elif markers[i] in ['circle','c']:
                plots.append(p.circle(xcol, column, source=source, color=plot_colors[i],
                                 legend_label=lname,
                                 alpha=0.8, muted_alpha=0.2))
            else:
                raise NotImplementedError("marker '{}' invalid. Only line and"
                                          "circle are currently available".format(markers[i]))

            # add columns to hoover tooltips
            tooltips_p = tooltips.copy()
            tooltips_p.append((hoover_names[i], "@{}".format(column)))
            hover = HoverTool(renderers=[plots[i]], tooltips=tooltips_p, mode='vline')
            p.add_tools(hover)

        p.legend.location = "top_left"
        p.legend.click_policy = "mute"

        # save plot
        if savedir is not None:
            if not os.path.isdir(savedir):
                os.makedirs(savedir)
            self._obj.iplot_fname = os.path.join(
                savedir, self._obj.name + '.html')
            save(p, self._obj.iplot_fname, resources=CDN, title=self._obj.name)

        if return_filename:
            return self._obj.iplot_fname
        else:
            return p

0 Source : dimens_reduction.py
with Mozilla Public License 2.0
from astrazeneca-cgr-publications

    def plot_interactive_viz(self, data, highlighted_genes, method, pos_label, neg_label, show_plot=False, save_plot=False):
        '''
        Plot an interactive dimensionality reduction embedding (e.g. PCA, t-SNE)
        with label annotation for selected data points
        '''

        # Highlight genes of interest
        data['colors'] = data.known_gene.copy()
        color_mapping = {pos_label: '#ef3b2c', neg_label: '#bdbdbd'}
        data = data.replace({'colors': color_mapping})
        data = data.sort_values(by=[self.cfg.Y], ascending=True)

        known_genes_highlight_color = '#31a354'
        data.loc[data['Gene_Name'] == 'PKD1', 'colors'] = known_genes_highlight_color
        data.loc[data['Gene_Name'] == 'PKD2', 'colors'] = known_genes_highlight_color

        selected_gene_rows = data.loc[data['Gene_Name'].isin(highlighted_genes), :]
        data = data[~data.Gene_Name.isin(highlighted_genes)]
        data = pd.concat([data, selected_gene_rows], axis=0)
        data.loc[data['Gene_Name'].isin(highlighted_genes), 'colors'] = '#252525'

        data['annotation'] = data.known_gene.copy()
        data.loc[data.annotation == pos_label, 'annotation'] = 'Yes'
        data.loc[data.annotation == neg_label, 'annotation'] = 'No'

        # Plot
        source = ColumnDataSource(dict(
            x=data['x'],
            y=data['y'],
            color=data['colors'],
            content=data['Gene_Name'],
            annot=data['annotation'],
        ))

        interact_viz = figure(plot_width=900, plot_height=900,
                              title=method, tools="pan,wheel_zoom,box_zoom,reset,hover,previewsave",
                              x_axis_type=None, y_axis_type=None, min_border=1)

        interact_viz.scatter(x='x', y='y',
                             source=source,
                             color='color',
                             alpha=0.8, size=10,
                             legend=method)

        # hover tools
        hover = interact_viz.select(dict(type=HoverTool))
        hover.tooltips = [("gene", "@content")]
        interact_viz.legend.location = "top_left"

        plot_filename = method + "_interactive_plot.html"
        output_file(str(self.cfg.unsuperv_figs_out / plot_filename))
        save(interact_viz)

        if show_plot:
            show(interact_viz)

        if save_plot:
            interact_viz.output_backend = "svg"
            plot_filename = method + '_interactive_plot.svg'
            export_svgs(interact_viz, filename=(self.cfg.unsuperv_figs_out / plot_filename))

0 Source : plots.py
with Apache License 2.0
from awslabs

def mousover_plot(datadict, attr_x, attr_y, attr_color=None, attr_size=None, save_file=None, plot_title="",
                  point_transparency = 0.5, point_size=20, default_color="#2222aa", hidden_keys = [], show_plot=False):
    """ Produces dynamic scatter plot that can be interacted with by mousing over each point to see its label
        Args:
            datadict (dict): keys contain attributes, values of lists of data from each attribute to plot (each list index corresponds to datapoint).
                             The values of all extra keys in this dict are considered (string) labels to assign to datapoints when they are moused over.
                             Apply _formatDict() to any entries in datadict which are themselves dicts.
            attr_x (str): name of column in dataframe whose values are shown on x-axis (eg. 'latency'). Can be categorical or numeric values
            attr_y (str): name of column in dataframe whose values are shown on y-axis (eg. 'validation performance'). Must be numeric values.
            attr_size (str): name of column in dataframe whose values determine size of dots (eg. 'memory consumption'). Must be numeric values.
            attr_color (str): name of column in dataframe whose values determine color of dots  (eg. one of the hyperparameters). Can be categorical or numeric values
            point_labels (list): list of strings describing the label for each dot (must be in same order as rows of dataframe)
            save_file (str): where to save plot to (html) file (if None, plot is not saved)
            plot_title (str): Title of plot and html file
            point_transparency (float): alpha value of points, lower = more transparent
            point_size (int): size of points, higher = larger
            hidden keys (list[str]): which keys of datadict NOT to show labels for.
            show_plot (bool): whether to show plot
    """
    try:
        with warning_filter():
            import bokeh
            from bokeh.plotting import output_file, ColumnDataSource, show, figure, save
            from bokeh.models import HoverTool, CategoricalColorMapper, LinearColorMapper, Legend, LegendItem, ColorBar
            from bokeh.palettes import Category20
    except ImportError:
        warnings.warn('AutoGluon summary plots cannot be created because bokeh is not installed. To see plots, please do: "pip install bokeh==2.0.1"')
        return None

    n = len(datadict[attr_x])
    for key in datadict.keys():  # Check lengths are all the same
        if len(datadict[key]) != n:
            raise ValueError("Key %s in datadict has different length than %s" % (key, attr_x))

    attr_x_is_string = any([type(val)==str for val in datadict[attr_x]])
    if attr_x_is_string:
        attr_x_levels = list(set(datadict[attr_x]))  # use this to translate between int-indices and x-values
        og_x_vals = datadict[attr_x][:]
        attr_x2 = attr_x + "___"  # this key must not already be in datadict.
        hidden_keys.append(attr_x2)
        datadict[attr_x2] = [attr_x_levels.index(category) for category in og_x_vals] # convert to ints

    legend = None
    if attr_color is not None:
        attr_color_is_string = any([type(val) == str for val in datadict[attr_color]])
        color_datavals = datadict[attr_color]
        if attr_color_is_string:
            attr_color_levels = list(set(color_datavals))
            colorpalette = Category20[20]
            color_mapper = CategoricalColorMapper(factors=attr_color_levels, palette=[colorpalette[2*i % len(colorpalette)] for i in range(len(attr_color_levels))])
            legend = attr_color
        else:
            color_mapper = LinearColorMapper(palette='Magma256', low=min(datadict[attr_color]), high=max(datadict[attr_color])*1.25)
        default_color = {'field': attr_color, 'transform': color_mapper}

    if attr_size is not None:  # different size for each point, ensure mean-size == point_size
        attr_size2 = attr_size + "____"
        hidden_keys.append(attr_size2)
        og_sizevals = np.array(datadict[attr_size])
        sizevals = point_size + (og_sizevals - np.mean(og_sizevals))/np.std(og_sizevals) * (point_size/2)
        if np.min(sizevals)   <   0:
            sizevals = -np.min(sizevals) + sizevals + 1.0
        datadict[attr_size2] = list(sizevals)
        point_size = attr_size2

    if save_file is not None:
        output_file(save_file, title=plot_title)
        print("Plot summary of models saved to file: %s" % save_file)

    source = ColumnDataSource(datadict)
    TOOLS="crosshair,pan,wheel_zoom,box_zoom,reset,hover,save"
    p = figure(title=plot_title, tools=TOOLS)
    if attr_x_is_string:
        circ = p.circle(attr_x2, attr_y, line_color=default_color, line_alpha = point_transparency,
                        fill_color=default_color, fill_alpha=point_transparency, size=point_size, source=source)
    else:
        circ = p.circle(attr_x, attr_y, line_color=default_color, line_alpha = point_transparency,
                        fill_color=default_color, fill_alpha=point_transparency, size=point_size, source=source)
    hover = p.select(dict(type=HoverTool))
    hover.tooltips = OrderedDict([(key,'@'+key+'{safe}') for key in datadict.keys() if key not in hidden_keys])
    # Format axes:
    p.xaxis.axis_label = attr_x
    p.yaxis.axis_label = attr_y
    if attr_x_is_string: # add x-ticks:
        p.xaxis.ticker = list(range(len(attr_x_levels)))
        p.xaxis.major_label_overrides = {i: attr_x_levels[i] for i in range(len(attr_x_levels))}

    # Legend additions:
    if attr_color is not None and attr_color_is_string:
        legend_it = []
        for i in range(len(attr_color_levels)):
            legend_it.append(LegendItem(label=attr_color_levels[i], renderers=[circ], index=datadict[attr_color].index(attr_color_levels[i])))
        legend = Legend(items=legend_it, location=(0, 0))
        p.add_layout(legend, 'right')

    if attr_color is not None and not attr_color_is_string:
        color_bar = ColorBar(color_mapper=color_mapper, title = attr_color,
                             label_standoff=12, border_line_color=None, location=(0,0))
        p.add_layout(color_bar, 'right')

    if attr_size is not None:
        p.add_layout(Legend(items=[LegendItem(label='Size of points based on "'+attr_size + '"')]), 'below')

    if show_plot:
        show(p)
    elif save_file is not None:
        save(p)

0 Source : draw_heatmaps.py
with GNU Affero General Public License v3.0
from DennisSchmitz

def draw_heatmaps(df, outfile, title, taxonomic_rank, colour):
    """
    Draw heatmaps for the given input dataframe, to
    the specified file with the given title.
    """
    # If the sample contains only superkingdom information, use that:
    if taxonomic_rank == "superkingdom":
        # create source info
        # and set hovertool tooltip parameters
        samples = df["Sample_name"].astype(str)
        assigned = df["superkingdom"].astype(str)
        reads = df["reads"].astype(float)
        percent_of_total = df["Percentage"].astype(float)

        colors = len(reads) * colour  # multiply to make an equally long list

        max_load = max(percent_of_total)
        alphas = [min(x / float(max_load), 0.9) + 0.1 for x in percent_of_total]

        source = ColumnDataSource(
            data=dict(
                samples=samples,
                assigned=assigned,
                reads=reads,
                percent_of_total=percent_of_total,
                colors=colors,
                alphas=alphas,
            )
        )

        y_value = (assigned, "assigned")

    # Otherwise, create the usual heatmap input info for each
    # (relevant) taxonomic rank down to species.
    else:
        # Remove 'unclassified' taxa: NaN in dataframe
        df = df[df[taxonomic_rank].notnull()]

        # Check if the dataframe is empty
        if df.empty:
            # If so, warn the user and exit
            return (None, False)

        else:
            # If it is not empty, continue normally
            if (
                max(pd.DataFrame(df.groupby(["Sample_name", taxonomic_rank]).size())[0])
                > 3
            ):
                # if there are taxa with more than 3 contigs *in one sample*
                # the hover info boxes will be too many, so
                # aggregate statistics per taxon

                aggregated = True

                new_df = pd.DataFrame(
                    df.groupby(["Sample_name", taxonomic_rank]).size()
                ).reset_index()
                new_df = new_df.rename(columns={0: "Number_of_contigs"})

                min_df = pd.DataFrame(
                    df.groupby(["Sample_name", taxonomic_rank]).min()
                ).reset_index()
                max_df = pd.DataFrame(
                    df.groupby(["Sample_name", taxonomic_rank]).max()
                ).reset_index()
                sum_df = pd.DataFrame(
                    df.groupby(["Sample_name", taxonomic_rank]).sum()
                ).reset_index()
                avg_df = pd.DataFrame(
                    df.groupby(["Sample_name", taxonomic_rank]).mean()
                ).reset_index()

                for column in [
                    "Plus_reads",
                    "Minus_reads",
                    "Avg_fold",
                    "Length",
                    "Percentage",
                    "Nr_ORFs",
                ]:
                    min_df = min_df.rename(columns={column: "MIN_%s" % column})
                    max_df = max_df.rename(columns={column: "MAX_%s" % column})
                    sum_df = sum_df.rename(columns={column: "SUM_%s" % column})
                    avg_df = avg_df.rename(columns={column: "AVG_%s" % column})

                    new_df["MIN_%s" % column] = min_df["MIN_%s" % column]
                    new_df["MAX_%s" % column] = max_df["MAX_%s" % column]
                    new_df["SUM_%s" % column] = sum_df["SUM_%s" % column]
                    new_df["AVG_%s" % column] = avg_df["AVG_%s" % column]

                for stat in ["MIN", "MAX", "SUM", "AVG"]:
                    new_df["%s_reads" % stat] = (
                        new_df["%s_Minus_reads" % stat] + new_df["%s_Plus_reads" % stat]
                    )

                new_df["tax_name"] = min_df["tax_name"]
                new_df["taxon"] = min_df[taxonomic_rank]
                new_df["total_reads"] = df["read_pairs"]

                new_df = new_df.fillna(0)

                samples = new_df["Sample_name"].astype(str)
                nr_contigs = new_df["Number_of_contigs"].astype(int)
                assigned = new_df["tax_name"].astype(str)
                taxonomy = new_df["taxon"].astype(str)
                min_reads = new_df["MIN_reads"].astype(int)
                max_reads = new_df["MAX_reads"].astype(int)
                sum_reads = new_df["SUM_reads"].astype(int)
                avg_reads = new_df["AVG_reads"].astype(int)
                total_reads = new_df["total_reads"].astype(int)
                min_percentage = new_df["MIN_Percentage"].astype(float)
                max_percentage = new_df["MAX_Percentage"].astype(float)
                sum_percentage = new_df["SUM_Percentage"].astype(float)
                avg_percentage = new_df["AVG_Percentage"].astype(float)
                min_coverage = new_df["MIN_Avg_fold"].astype(int)
                max_coverage = new_df["MAX_Avg_fold"].astype(int)
                sum_coverage = new_df["SUM_Avg_fold"].astype(int)
                avg_coverage = new_df["AVG_Avg_fold"].astype(int)
                min_length = new_df["MIN_Length"].astype(int)
                max_length = new_df["MAX_Length"].astype(int)
                sum_length = new_df["SUM_Length"].astype(int)
                avg_length = new_df["AVG_Length"].astype(int)
                min_nr_orfs = new_df["MIN_Nr_ORFs"].astype(int)
                max_nr_orfs = new_df["MAX_Nr_ORFs"].astype(int)
                sum_nr_orfs = new_df["SUM_Nr_ORFs"].astype(int)
                avg_nr_orfs = new_df["AVG_Nr_ORFs"].astype(int)

                colors = len(samples) * colour

                max_load = max(avg_percentage)
                alphas = [min(x / float(max_load), 0.9) + 0.1 for x in avg_percentage]
                # scale darkness to the average percentage of reads

                source = ColumnDataSource(
                    data=dict(
                        samples=samples,
                        nr_contigs=nr_contigs,
                        assigned=assigned,
                        taxonomy=taxonomy,
                        min_reads=min_reads,
                        max_reads=max_reads,
                        sum_reads=sum_reads,
                        avg_reads=avg_reads,
                        total_reads=total_reads,
                        min_percentage=min_percentage,
                        max_percentage=max_percentage,
                        sum_percentage=sum_percentage,
                        avg_percentage=avg_percentage,
                        min_coverage=min_coverage,
                        max_coverage=max_coverage,
                        sum_coverage=sum_coverage,
                        avg_coverage=avg_coverage,
                        min_length=min_length,
                        max_length=max_length,
                        sum_length=sum_length,
                        avg_length=avg_length,
                        min_nr_orfs=min_nr_orfs,
                        max_nr_orfs=max_nr_orfs,
                        sum_nr_orfs=sum_nr_orfs,
                        avg_nr_orfs=avg_nr_orfs,
                        colors=colors,
                        alphas=alphas,
                    )
                )

            else:
                # no taxon has too many contigs assigned per sample,
                # so create a plot for everything

                aggregated = False

                df.fillna(0, inplace=True)

                samples = df["Sample_name"].astype(str)
                scaffolds = df["scaffold_name"].astype(str)
                assigned = df["tax_name"].astype(str)
                taxonomy = df[taxonomic_rank].astype(str)
                reads = df["reads"].astype(int)
                total_reads = df["read_pairs"].astype(int)
                percent_of_total = df["Percentage"].astype(float)
                coverage = df["Avg_fold"].astype(int)
                contig_length = df["Length"].astype(int)
                nr_orfs = df["Nr_ORFs"].astype(int)

                colors = len(reads) * colour  # multiply to make an equally long list

                max_load = max(percent_of_total)
                alphas = [min(x / float(max_load), 0.9) + 0.1 for x in percent_of_total]

                source = ColumnDataSource(
                    data=dict(
                        samples=samples,
                        scaffolds=scaffolds,
                        assigned=assigned,
                        taxonomy=taxonomy,
                        reads=reads,
                        total_reads=total_reads,
                        percent_of_total=percent_of_total,
                        coverage=coverage,
                        contig_length=contig_length,
                        nr_orfs=nr_orfs,
                        colors=colors,
                        alphas=alphas,
                    )
                )

            y_value = (taxonomy, "taxonomy")

    TOOLS = "hover, save, pan, box_zoom, wheel_zoom, reset"

    p = figure(
        title=title,
        # If desired, the sample can be displayed as "Run x, sample y"
        # -> uncomment the next line
        # x_range = [ "Run %s, sample %s" % (x.split('_')[0], x.split('_')[1]) for x in list(sorted(set(samples))) ],
        x_range=list(sorted(set(df["Sample_name"]))),
        y_range=list(
            reversed(sorted(set(y_value[0])))
        ),  # reverse to order 'from top to bottom'
        x_axis_location="above",
        toolbar_location="right",
        tools=TOOLS,
    )

    # Edit the size of the heatmap when there are many samples and/or taxa
    if len(set(samples)) > 20:
        p.plot_width = int(len(set(samples)) * 25)
    else:
        pass
    # Adjust heatmap sizes depending on the number of
    # taxa observed (not applicable for superkingdom heatmap)
    if taxonomic_rank != "superkingdom":
        if len(set(taxonomy)) > 100:
            p.plot_height = int(p.plot_height * 3)
            p.plot_width = int(p.plot_width * 1.5)
        elif len(set(taxonomy)) > 50:
            p.plot_height = int(p.plot_height * 2)
            p.plot_width = int(p.plot_width * 1.2)
        elif len(set(taxonomy)) > 25:
            p.plot_height = int(p.plot_height * 1.2)
        else:
            pass

        # And set tooltip depending on superkingdoms

        if aggregated:
            # An aggregated format requires a different hover tooltip
            p.select_one(HoverTool).tooltips = [
                ("Sample", "@samples"),
                ("Taxon", "@assigned"),
                ("Number of scaffolds", "@nr_contigs"),
                # ('-----', ""), # If you like a separator in the tooltip
                (
                    "Number of reads total (min, avg, max)",
                    "@sum_reads (@min_reads, @avg_reads, @max_reads)",
                ),
                (
                    "Scaffold length total (min, avg, max)",
                    "@sum_length (@min_length, @avg_length, @max_length)",
                ),
                (
                    "Number of ORFs total (min, avg, max)",
                    "@sum_nr_orfs (@min_nr_orfs, @avg_nr_orfs, @max_nr_orfs)",
                ),
                (
                    "Depth of coverage total (min, avg, max)",
                    "@sum_coverage (@min_coverage, @avg_coverage*, @max_coverage)",
                ),
                ("*", "darkness scaled to this number"),
            ]
        else:
            p.select_one(HoverTool).tooltips = [
                ("Sample", "@samples"),
                ("Scaffold", "@scaffolds"),
                ("Taxon", "@assigned"),
                ("Number of reads", "@reads (@percent_of_total % of sample total)"),
                ("Scaffold length", "@contig_length"),
                ("Number of ORFs", "@nr_orfs"),
                ("Average Depth of Coverage", "@coverage"),
            ]
    else:
        p.select_one(HoverTool).tooltips = [
            ("Sample", "@samples"),
            ("Taxon", "@assigned"),
            ("Number of reads", "@reads"),
            ("Percentage of total", "@percent_of_total %"),
        ]

    p.grid.grid_line_color = None
    p.axis.axis_line_color = None
    p.axis.major_tick_line_color = None
    if len(set(assigned)) > 15:
        p.axis.major_label_text_font_size = "10pt"
    else:
        p.axis.major_label_text_font_size = "12pt"
    p.axis.major_label_standoff = 0
    p.xaxis.major_label_orientation = np.pi / 4
    p.title.text_color = colour[0]
    p.title.text_font_size = "16pt"
    p.title.align = "right"

    p.rect(
        "samples",
        y_value[1],
        1,
        1,
        source=source,
        color="colors",
        alpha="alphas",
        line_color=None,
    )

    panel = Panel(child=p, title=title.split()[1].title())
    # the .title() methods capitalises a string

    if taxonomic_rank == "superkingdom":
        # The superkingdom heatmap still requires a single output file
        output_file(outfile, title=title)
        save(p)
        print("The heatmap %s has been created and written to: %s" % (title, outfile))
        return None
    else:
        return (panel, True)


def main():

0 Source : draw_heatmaps.py
with GNU Affero General Public License v3.0
from DennisSchmitz

def main():
    """
    Main execution of the script
    """
    # 1. Parse and show arguments
    arguments = parse_arguments()

    message = (
        "\n"
        "These are the arguments you have provided:\n"
        "  INPUT:\n"
        "classified = {0},\n"
        "numbers = {1}\n"
        "  OUTPUT:\n"
        "super = {2}\n"
        "virus = {3}\n"
        "phage = {4}\n"
        "bact = {5}\n"
        "super_quantities = {6}\n"
        "stats = {7}\n"
        "vir_stats = {8}\n"
        "phage_stats = {9}\n"
        "bact_stats = {10}\n"
        "  OPTIONAL PARAMETERS:\n"
        "colour = {11}\n".format(
            arguments.classified,
            arguments.numbers,
            arguments.super,
            arguments.virus,
            arguments.phage,
            arguments.bact,
            arguments.super_quantities,
            arguments.stats,
            arguments.vir_stats,
            arguments.phage_stats,
            arguments.bact_stats,
            arguments.colour,
        )
    )

    print(message)

    # 2. Read input files and make dataframes
    numbers_df = read_numbers(arguments.numbers)
    classifications_df = read_classifications(arguments.classified)

    merged_df = classifications_df.merge(
        numbers_df, left_on="Sample_name", right_on="Sample"
    )
    merged_df["Percentage"] = merged_df.reads / merged_df.read_pairs * 100

    # 3. Create chunks of information required for the heatmaps
    # 3.1. Aggregate superkingdom-rank information
    # Count the percentages of Archaea, Bacteria, Eukaryota and Viruses per sample:
    superkingdom_sums = pd.DataFrame(
        merged_df.groupby(["Sample_name", "superkingdom"]).sum()[
            ["reads", "Percentage"]
        ]
    )
    superkingdom_sums.reset_index(
        inplace=True
    )  # to use MultiIndex "Sample_name" and "superkingdom" as columns

    superkingdom_sums.to_csv(arguments.super_quantities, index=False)
    print("File %s has been created!" % arguments.super_quantities)

    # 3.2. Filter viruses from the table
    virus_df = filter_taxa(df=merged_df, taxon="Viruses", rank="superkingdom")
    # Remove the phages from the virus df to make less cluttered heatmaps
    virus_df = remove_taxa(df=virus_df, taxon=PHAGE_FAMILY_LIST, rank="family")

    # 3.3. Filter phages
    phage_df = filter_taxa(df=merged_df, taxon=PHAGE_FAMILY_LIST, rank="family")

    # 3.4. Filter bacteria
    bacterium_df = filter_taxa(df=merged_df, taxon="Bacteria", rank="superkingdom")

    # 4. Write taxonomic rank statistics to a file, for each chunk
    # 4.1. All taxa
    report_taxonomic_statistics(df=merged_df, outfile=arguments.stats)
    # 4.2. Viruses
    report_taxonomic_statistics(df=virus_df, outfile=arguments.vir_stats)
    # 4.3. Phages
    report_taxonomic_statistics(df=phage_df, outfile=arguments.phage_stats)
    # 4.4. Bacteria
    report_taxonomic_statistics(df=bacterium_df, outfile=arguments.bact_stats)

    # 5. Draw heatmaps for each chunk
    # 5.1. All taxa: superkingdoms
    draw_heatmaps(
        df=superkingdom_sums,
        outfile=arguments.super,
        title="Superkingdoms heatmap",
        taxonomic_rank="superkingdom",
        colour=arguments.colour,
    )

    # 5.2. Viruses
    virus_tabs = []
    for rank in RANKS[3:]:
        # Create heatmaps for each rank below 'class'
        (content, panel) = draw_heatmaps(
            df=virus_df,
            outfile=None,
            title="Virus %s heatmap" % rank,
            taxonomic_rank=rank,
            colour=arguments.colour,
        )
        # Check if there was data to make a panel
        if panel:
            virus_tabs.append(content)

        # if there was no data, print a warning and do not add nonsense panel
        else:
            print("No data for the current virus rank! (%s)" % rank)
            print(
                "\n---\nThere are no contigs for the given %s. No virus %s heatmap can be made.\n---\n"
                % (rank, rank)
            )

    if len(virus_tabs) > 1:
        # multiple tabs: create figure with tabs
        output_file(arguments.virus, title="Virus heatmap")
        tabs = Tabs(tabs=virus_tabs)
        save(tabs)
        print("The Virus heatmap has been created and written to: %s" % arguments.virus)
    elif len(virus_tabs) == 1:
        # single tab: create regular figure
        output_file(arguments.virus, title="Virus heatmap")
        save(virus_tabs[0])
    else:
        # no tabs: warn user that no heatmap can be made
        print(
            "\n---\nThere are no contigs for Viruses in this sample! No virus heatmap is made.\n---\n"
        )
        with open(arguments.virus, "w") as outfile:
            outfile.write("No virus contigs found in the current dataset.")

    # 5.3. Phages
    phage_tabs = []
    for rank in RANKS[3:]:
        # Create heatmaps for each rank below 'class'
        (content, panel) = draw_heatmaps(
            df=phage_df,
            outfile=None,
            title="Phage %s heatmap" % rank,
            taxonomic_rank=rank,
            colour=arguments.colour,
        )

        # Check if there was data to make a panel
        if panel:
            phage_tabs.append(content)

        # if there was no data, print a warning and do not add nonsense panel
        else:
            print("No data for the current phage rank! (%s)" % rank)
            print(
                "\n---\nThere are no contigs for the given %s. No phage %s heatmap can be made.\n---\n"
                % (rank, rank)
            )

    if len(phage_tabs) > 1:
        # multiple tabs: create figure with tabs
        output_file(arguments.phage, title="Phage heatmap")
        tabs = Tabs(tabs=phage_tabs)
        save(tabs)
        print("The Phage heatmap has been created and written to: %s" % arguments.phage)
    elif len(phage_tabs) == 1:
        # single tab: create regular figure
        output_file(arguments.phage, title="Phage heatmap")
        save(phage_tabs[0])
    else:
        # no tabs: warn user that no heatmap can be made
        print(
            "\n---\nThere are no contigs for phages in this sample! No phage heatmap is made.\n---\n"
        )
        with open(arguments.phage, "w") as outfile:
            outfile.write("No phage contigs found in the current dataset.")

    # 5.4. Bacteria
    bacterium_tabs = []
    for rank in RANKS[1:]:
        # Create heatmaps for each rank below 'superkingdom'
        (content, panel) = draw_heatmaps(
            df=bacterium_df,
            outfile=None,
            title="Bacterium %s heatmap" % rank,
            taxonomic_rank=rank,
            colour=arguments.colour,
        )

        # Check if there was data to make a panel
        if panel:
            bacterium_tabs.append(content)

        # if there was no data, print a warning and do not add nonsense panel
        else:
            print("No data for the current bacteria rank! (%s)" % rank)
            print(
                "\n---\nThere are no contigs for the given %s. No bacteria %s heatmap can be made.\n---\n"
                % (rank, rank)
            )

    if len(bacterium_tabs) > 1:
        # multiple tabs: create figure with tabs
        output_file(arguments.bact, title="Bacteria heatmap")
        tabs = Tabs(tabs=bacterium_tabs)
        save(tabs)
        print(
            "The Bacteria heatmap has been created and written to: %s" % arguments.bact
        )
    elif len(bacterium_tabs) == 1:
        # single tab: create regular figure
        output_file(arguments.bact, title="Bacteria heatmap")
        save(bacterium_tabs[0])
    else:
        # no tabs: warn user that no heatmap can be made
        print(
            "\n---\nThere are no contigs for bacteria in this sample! No bacteria heatmap is made.\n---\n"
        )
        with open(arguments.bact, "w") as outfile:
            outfile.write("No bacterial contigs found in the current dataset.")


# EXECUTE script--------------------------------------------
if __name__ == "__main__":

0 Source : cli.py
with MIT License
from IQTLabs

def visualize(
    fasta,
    width,
    palette,
    color,
    hide,
    bar,
    title,
    separate,
    cols,
    link_x,
    link_y,
    output,
    offline,
    method,
    dimensions,
    skip,
    mode,
    legend_loc,
    output_backend,
    downsample,
):
    # check filetype
    if fasta is None:
        raise ValueError("Must provide FASTA file.")

    # handle selecting the palette
    palette = small_palettes[palette]

    # handle setting the dimensions automatically if not specified
    if not dimensions:
        dimensions = (750, 500)

    if (
        len([record for _f in fasta for record in Fasta(_f, read_long_names=True)])
        > len(palette)
        and mode != "file"
    ):
        if len(fasta) > 1 and mode == "auto":
            if not skip:
                print(
                    "Visualizing each file in separate color. To override, provide mode selection."
                )
            mode = "file"
        else:
            print("Visualizing each sequence in black.")
            color = False
    elif mode == "auto":
        mode = "seq"

    # get all the sequences
    seqs = []
    color_counter = 0
    warned = False
    for i, _f in enumerate(fasta):
        for j, seq in enumerate(
            Fasta(_f, sequence_always_upper=True, read_long_names=True)
        ):
            seqs.append(
                Box(
                    color=palette[color_counter + 1 if color_counter > 2 else 3][
                        color_counter
                    ]
                    if color
                    else "black",
                    name=_f if mode == "file" else seq.name,
                    raw_seq=str(seq),
                )
            )

            # check the length of the seq
            if len(seq) > 10000 and not skip and not warned and downsample == 1:
                click.confirm(
                    "You are plotting a long sequence ({} bp). This may be very slow, although downsampling might help. "
                    "Do you want to continue?".format(len(seq)),
                    abort=True,
                )
                warned = True

            if mode == "seq":
                color_counter += 1
        if mode == "file":
            color_counter += 1

    # warn if plotting a large number of seqs
    if len(seqs) > 500 and not skip:
        click.confirm(
            "You are plotting a large number of sequences ({}). This may be very slow, although downsampling might help. "
            "Do you want to continue?".format(len(seqs)),
            abort=True,
        )

    # warn if using a bad method
    if (
        max([len(seq.raw_seq) for seq in seqs]) > 25
        and method in ["qi", "randic"]
        and not skip
    ):
        click.confirm(
            "This method is not well suited to a sequence of this length. "
            "Do you want to continue?",
            abort=True,
        )

    axis_labels = {
        "squiggle": {"x": "position (BP)", "y": None},
        "gates": {"x": "C-G axis", "y": "A-T axis"},
        "yau": {"x": None, "y": None},
        "yau-bp": {"x": "position (BP)", "y": None},
        "randic": {"x": "position (BP)", "y": "nucleotide"},
        "qi": {"x": "position (BP)", "y": "dinucleotide"},
    }

    # the number of figures to draw is either the number of sequences or files (or 1)
    if separate:
        if mode == "seq":
            fig_count = len(seqs)
        elif mode == "file":
            fig_count = len(fasta)
    else:
        fig_count = 1

    fig = []
    for i in range(fig_count):

        # link the axes, if requested
        if i > 0 and link_x:
            x_range = fig[i - 1].x_range
        else:
            x_range = None
        if i > 0 and link_y:
            y_range = fig[i - 1].y_range
        else:
            y_range = None

        # the y axes for randic and qi are bases
        if method == "randic":
            y_range = ["A", "T", "G", "C"]
        elif method == "qi":
            y_range = [
                "AA",
                "AC",
                "AG",
                "AT",
                "CA",
                "CC",
                "CG",
                "CT",
                "GA",
                "GC",
                "GG",
                "GT",
                "TA",
                "TC",
                "TG",
                "TT",
            ]

        fig.append(
            figure(
                x_axis_label=axis_labels[method]["x"],
                y_axis_label=axis_labels[method]["y"],
                title=title,
                x_range=x_range,
                y_range=y_range,
                plot_width=dimensions[0],
                plot_height=dimensions[1],
                output_backend=output_backend,
            )
        )

    # show a progress bar if processing multiple files
    if len(seqs) > 1 and bar:
        _seqs = tqdm(seqs, unit=" seqs", leave=False)
    else:
        _seqs = seqs

    for i, seq in enumerate(_seqs):
        # perform the actual transformation
        transformed = transform(seq.raw_seq, method=method)
        if downsample > 1:
            transformed = (transformed[0][::downsample], transformed[1][::downsample])

        # figure (no pun intended) which figure to plot the data on
        if separate:
            if mode == "seq":
                _fig = fig[i]
            elif mode == "file":
                _fig = fig[fasta.index(seq.name)]

            # add a title to the plot
            _fig.title = annotations.Title()
            if mode == "seq":
                _fig.title.text = seq.name
            elif mode == "file":
                _fig.title.text = click.format_filename(seq.name, shorten=True)
        else:
            _fig = fig[0]
            _fig.title = annotations.Title()

            # if only plotting on one figure, set up the title
            if title:
                _fig.title.text = title
            elif len(seqs) > 1 and not title and len(fasta) == 1:
                _fig.title.text = click.format_filename(fasta[0], shorten=True)
            elif len(seqs) == 1:
                # if just plotting one sequence, title it with the name of the sequence
                _fig.title.text = seq.name

        # randic and qi method's have categorical y axes
        if method == "randic":
            y = list(seq.raw_seq)
        elif method == "qi":
            y = [seq.raw_seq[i : i + 2] for i in range(len(seq.raw_seq))]
            y = [str(i) for i in y if len(i) == 2]
        else:
            y = transformed[1]

        # figure out whether to add a legend
        if (separate or not color or mode == "file" or len(seqs) == 1) and not hide:
            legend = None
        else:
            legend = click.format_filename(seq.name, shorten=True)

        # optimization for comparing large FASTA files without hiding
        try:
            if mode == "file" and seqs[i + 1].color != seq.color and not separate:
                legend = click.format_filename(seq.name, shorten=True)
        except IndexError:
            if mode == "file" and not separate:
                legend = click.format_filename(seq.name, shorten=True)

        # do the actual plotting

        # set up the legend
        if legend is not None:
            _fig.line(
                x=transformed[0],
                y=y,
                line_width=width,
                legend_label=legend,
                color=seq.color,
            )
            _fig.legend.location = legend_loc
            if hide:
                _fig.legend.click_policy = "hide"
        else:
            _fig.line(x=transformed[0], y=y, line_width=width, color=seq.color)

    # clean up the tqdm bar
    try:
        _seqs.close()
    except AttributeError:
        pass

    # lay out the figure
    if separate:
        plot = gridplot(
            fig,
            ncols=math.ceil(len(fig) ** 0.5) if cols == 0 else cols,
            toolbar_options=dict(logo=None),
        )  # note that 0 denotes the automatic default
    else:
        plot = fig[0]

    if output is not None and output.endswith(".html"):
        output_file(
            output, title="Squiggle Visualization" if title is not None else title
        )
        save(plot, resources=INLINE if offline else None)
    else:
        show(plot)


if __name__ == "__main__":

0 Source : plot.py
with BSD 3-Clause "New" or "Revised" License
from Open-ET

    def _plot(self, FluxObj, ncols=1, output_type='save', out_file=None, 
            suptitle='', plot_width=1000, plot_height=450, 
            sizing_mode='scale_both', merge_tools=False, link_x=True, **kwargs): 
        """ 
        Private routine for aggregated validation plots that are used by
        the :meth:`.QaQc.plot` and :meth:`.Data.plot` methods.
        """
        # get daily and monthly time series with internal names, get units
        monthly = False
        if hasattr(FluxObj, 'monthly_df'):
            # will run correction as of now if it is a QaQc
            monthly = True
            monthly_df = FluxObj.monthly_df.rename(columns=FluxObj.inv_map) 
            # avoid plotting single point- errors out bokeh datetime axis, etc.
            for c in monthly_df.columns:
                if monthly_df[c].notna().sum()   <  = 1:
                    monthly_df.drop(c, axis=1, inplace=True)
            monthly_source = ColumnDataSource(monthly_df)

        # so that the correction is run, may change this
        FluxObj.df.head(); # if Data, need to access to calc vp/vpd 
        df = FluxObj.df.rename(columns=FluxObj.inv_map) 
        variables = FluxObj.variables
        units = FluxObj.units 
        # bokeh column sources for tooltips
        daily_source=ColumnDataSource(df)
        # for aggregating plots
        daily_line = []
        daily_scatter = []
        monthly_line = []
        monthly_scatter = []

        if output_type == 'save':
            output_file(out_file)

        def _get_units(plt_vars, units):
            """
            Helper function to figure out units for multivariate plots.
            If none of plt_vars exist return None, if multiple units are found
            print a warning that vars have different units. Returns string if 
            one or more units are found- first found if multiple. 
            """
            ret = [] 
            for v in plt_vars:
                unit = units.get(v, None)
                if unit is not None:
                    ret.append(unit)
            if len(ret) == 0:
                ret = None
            elif len(set(ret)) > 1:
                print(
                    'WARNING: variables: {} are not of the same units'.format(
                        ','.join(plt_vars)
                    )
                )
                ret = ret[0]
            elif len(set(ret)) == 1:
                ret = ret[0]

            return ret


        # run through each plot, daily then monthly versions
        #### 
        # energy balance time series plots
        #### 
        plt_vars = ['LE', 'H', 'Rn', 'G']
        colors = ['blue', 'red', 'black', 'green']
        title = 'Daily Surface Energy Balance Components'
        x_label = 'date'
        y_label = _get_units(plt_vars, units)
        fig = figure(
            x_axis_label=x_label, y_axis_label=y_label, title=title,
            width=plot_width, height=plot_height, name='energy_balance_daily'
        )
        fig = Plot.add_lines(
            fig, df, plt_vars, colors, x_label, daily_source, labels=plt_vars
        )
        if fig is not None:
            daily_line.append(fig)
        else:
            print(
                'Energy balance components time series grapths missing all '
                'variables'
            )
        if fig is not None and monthly:
            # same for monthly fig
            title = 'Monthly Surface Energy Balance Components'
            fig = figure(x_axis_label=x_label, y_axis_label=y_label,title=title,
                width=plot_width, height=plot_height, 
                name='energy_balance_monthly'
            )
            fig = Plot.add_lines(
                fig, monthly_df, plt_vars, colors, x_label, monthly_source,
                labels=plt_vars
            )
            monthly_line.append(fig)

        #### 
        # incoming shortwave and ASCE potential clear sky time series plots
        #### 
        plt_vars = ['sw_in', 'rso']
        # only plot if we have both
        if set(plt_vars).issubset(df.columns):
            labels = ['Station Rs', 'ASCE Rso']
            colors = ['black', 'red']
            title =\
                'Daily Incoming Shortwave (Rs) and ASCE Clear Sky Shortwave '+\
                'Radiation (Rso)'
            x_label = 'date'
            y_label = _get_units(plt_vars, units)
            fig = figure(x_axis_label=x_label, y_axis_label=y_label, 
                title=title, width=plot_width, height=plot_height, 
                name='Rs_daily'
            )
            fig = Plot.add_lines(
                fig, df, plt_vars, colors, x_label, daily_source, labels=labels
            )
            if fig is not None:
                daily_line.append(fig)
                ## same for monthly fig (removed for now)
                #title='Monthly Incoming Shortwave and ASCE Potential Radiation'
                #fig = figure(
                #    x_axis_label=x_label,y_axis_label=y_label,title=title,
                #    width=plot_width, height=plot_height
                #)
                #fig = Plot.add_lines(
                #    fig, monthly_df, plt_vars, colors, x_label, monthly_source,
                #    labels=labels
                #)
                #monthly_line.append(fig)
        else:
            print(
                'Shortwave and potential clear sky radiation time series '
                'grapths missing all variables'
            )

        #### 
        # multiple soil heat flux sensor time series plots
        #### 
        # keep user names for these in hover 
        g_re = re.compile('^[gG]_[\d+mean|corr]|G$')
        g_vars = [
            v for v in variables if g_re.match(v) and v in df.columns
        ]
        num_lines = len(g_vars)
        if num_lines > 1:
            rename_dict = {k:variables[k] for k in g_vars}
            tmp_df = df[g_vars].rename(columns=rename_dict)
            tmp_source = ColumnDataSource(tmp_df)
            plt_vars = list(rename_dict.values())
            colors = Viridis256[0:-1:int(256/num_lines)]
            title = 'Daily Soil Heat Flux (Multiple Sensors)'
            x_label = 'date'
            y_label = _get_units(g_vars, units)
            fig = figure(
                x_axis_label=x_label, y_axis_label=y_label, title=title,
                plot_width=plot_width, plot_height=plot_height, name='G_daily'
            )
            fig = Plot.add_lines(
                fig, tmp_df, plt_vars, colors, x_label, tmp_source, 
                labels=plt_vars
            )
            if fig is not None:
                daily_line.append(fig)
            if fig is not None and monthly:
                # same for monthly fig
                g_vars = [
                    v for v in variables if g_re.match(v) and v in \
                        monthly_df.columns
                ]
                num_lines = len(g_vars)
                if num_lines > 1:
                    tmp_df = monthly_df[g_vars].rename(columns=rename_dict)
                    tmp_source = ColumnDataSource(tmp_df)
                    title = 'Monthly Soil Heat Flux (Multiple Sensors)'
                    fig = figure(
                        x_axis_label=x_label, y_axis_label=y_label,title=title,
                        plot_width=plot_width, plot_height=plot_height, 
                        name='G_monthly'
                    )
                    fig = Plot.add_lines(
                        fig, tmp_df, plt_vars, colors, x_label, tmp_source,
                        labels=plt_vars
                    )
                    monthly_line.append(fig)
            # do not print warning if missing multiple soil moisture recordings

        #### 
        # radiation time series plots
        #### 
        plt_vars = ['sw_in', 'lw_in', 'sw_out', 'lw_out']
        colors = ['red', 'darkred', 'blue', 'navy']
        title = 'Daily Radiation Components'
        x_label = 'date'
        y_label = _get_units(plt_vars, units)
        fig = figure(
            x_axis_label=x_label, y_axis_label=y_label, title=title,
            width=plot_width, height=plot_height, name='radiation_daily'
        )
        fig = Plot.add_lines(
            fig, df, plt_vars, colors, x_label, daily_source, labels=plt_vars
        )
        if fig is not None:
            daily_line.append(fig)
        else:
            print(
                'Radiation components time series grapths missing all variables'
            )
        if fig is not None and monthly:
            # same for monthly fig
            title = 'Monthly Radiation Components'
            fig = figure(
                x_axis_label=x_label, y_axis_label=y_label, title=title,
                width=plot_width, height=plot_height, name='radiation_monthly'
            )
            fig = Plot.add_lines(
                fig, monthly_df, plt_vars, colors, x_label, monthly_source,
                labels=plt_vars
            )
            monthly_line.append(fig)


        #### 
        # temperature time series plot
        #### 
        plt_vars = ['t_max','t_avg','t_min','t_dew']
        colors = ['red','black','blue','green']
        title = 'Daily Average Air Temperature'
        x_label = 'date'
        y_label = _get_units(plt_vars, units)
        fig = figure(
            x_axis_label=x_label, y_axis_label=y_label, title=title,
            width=plot_width, height=plot_height, name='temp_daily'
        )
        fig = Plot.add_lines(
            fig, df, plt_vars, colors, x_label, daily_source, labels=plt_vars
        )
        if fig is not None:
            daily_line.append(fig)
        else:
            print(
                'Average air temperature time series grapths missing all '
                'variables'
            )
        if fig is not None and monthly:
            # same for monthly fig
            title = 'Monthly Average Air Temperature'
            fig = figure(
                x_axis_label=x_label, y_axis_label=y_label,title=title,
                width=plot_width, height=plot_height, name='temp_monthly'
            )
            fig = Plot.add_lines(
                fig, monthly_df, plt_vars, colors, x_label, monthly_source,
                labels=plt_vars
            )
            monthly_line.append(fig)

        #### 
        # vapor pressure time series plots
        #### 
        plt_vars = ['vp', 'vpd']
        colors = ['black', 'darkred']
        title = 'Daily Average Vapor Pressure and Deficit'
        x_label = 'date'
        y_label = _get_units(plt_vars, units)
        fig = figure(
            x_axis_label=x_label, y_axis_label=y_label, title=title,
            width=plot_width, height=plot_height, name='vap_press_daily'
        )
        fig = Plot.add_lines(
            fig, df, plt_vars, colors, x_label, daily_source, labels=plt_vars
        )
        if fig is not None:
            daily_line.append(fig)
        else:
            print('Vapor pressure time series grapths missing all variables')
        if fig is not None and monthly:
            # same for monthly fig
            title = 'Monthly Average Vapor Pressure'
            fig = figure(
                x_axis_label=x_label, y_axis_label=y_label, title=title,
                width=plot_width, height=plot_height, name='vap_press_monthly'
            )
            fig = Plot.add_lines(
                fig, monthly_df, plt_vars, colors, x_label, monthly_source,
                labels=plt_vars
            )
            monthly_line.append(fig)

        #### 
        # windpseed time series plot
        #### 
        plt_vars = ['ws']
        colors = ['black']
        title = 'Daily Average Windspeed'
        x_label = 'date'
        y_label = _get_units(plt_vars, units)
        fig = figure(
            x_axis_label=x_label, y_axis_label=y_label, title=title,
            width=plot_width, height=plot_height, name='wind_daily'
        )
        fig = Plot.add_lines(fig, df, plt_vars, colors, x_label, daily_source)
        if fig is not None:
            daily_line.append(fig)
        else:
            print('Windspeed time series grapths missing all variables')
        if fig is not None and monthly:
            # same for monthly fig
            title = 'Monthly Average Windspeed'
            fig = figure(
                x_axis_label=x_label, y_axis_label=y_label, title=title,
                width=plot_width, height=plot_height, name='wind_monthly'
            )
            fig = Plot.add_lines(
                fig, monthly_df, plt_vars, colors, x_label, monthly_source
            )
            monthly_line.append(fig)

        #### 
        # precipitation time series plots
        #### 
        plt_vars = ['ppt', 'gridMET_prcp']
        labels = ['station', 'gridMET']
        colors = ['black', 'red']
        title = 'Daily Station and gridMET Precipitation'
        x_label = 'date'
        y_label = _get_units(plt_vars, units)
        fig = figure(
            x_axis_label=x_label, y_axis_label=y_label, title=title,
            width=plot_width, height=plot_height, name='precip_daily'
        )
        fig = Plot.add_lines(
            fig, df, plt_vars, colors, x_label, daily_source, labels=labels
        )
        if fig is not None:
            daily_line.append(fig)
        else:
            print('Precipitation time series grapths missing all variables')
        if fig is not None and monthly:
            # same for monthly fig
            title = 'Monthly Station and gridMET Precipitation'
            fig = figure(
                x_axis_label=x_label, y_axis_label=y_label, title=title,
                width=plot_width, height=plot_height, name='precip_monthly'
            )
            fig = Plot.add_lines(
                fig, monthly_df, plt_vars, colors, x_label, monthly_source,
                labels=labels
            )
            monthly_line.append(fig)

        #### 
        # latent energy time series plots
        #### 
        plt_vars = ['LE', 'LE_corr', 'LE_user_corr']
        colors = ['black', 'red', 'darkorange']
        title = 'Daily Average Latent Energy Flux'
        x_label = 'date'
        y_label = _get_units(plt_vars, units)
        fig = figure(
            x_axis_label=x_label, y_axis_label=y_label, title=title,
            width=plot_width, height=plot_height, name='LE_daily'
        )
        fig = Plot.add_lines(
            fig, df, plt_vars, colors, x_label, daily_source, labels=plt_vars
        )
        if fig is not None:
            daily_line.append(fig)
        else:
            print('Latent energy time series grapths missing all variables')
        if fig is not None and monthly:
            # same for monthly fig
            title = 'Monthly Average Latent Energy Flux'
            fig = figure(
                x_axis_label=x_label, y_axis_label=y_label, title=title,
                width=plot_width, height=plot_height, name='LE_monthly'
            )
            fig = Plot.add_lines(
                fig, monthly_df, plt_vars, colors, x_label, monthly_source,
                labels=plt_vars
            )
            monthly_line.append(fig)

        #### 
        # ET time series plots
        #### 
        refET = 'ETr' if 'ETrF' in df.columns else 'ETo'
        plt_vars = ['ET', 'ET_corr', 'ET_user_corr', f'gridMET_{refET}']
        labels = plt_vars[0:3] + [refET]
        colors = ['black', 'red', 'darkorange', 'blue']
        title = 'Daily Evapotranspiration'
        x_label = 'date'
        y_label = _get_units(plt_vars, units)
        fig = figure(
            x_axis_label=x_label, y_axis_label=y_label, title=title,
            width=plot_width, height=plot_height, name='ET_daily'
        )
        fig = Plot.add_lines(
            fig, df, plt_vars, colors, x_label, daily_source, labels=labels
        )
        if 'ET_fill_val' in df.columns and fig is not None:
            # make gap fill values more visible
            Plot.line_plot(
                fig, 'date', 'ET_fill_val', daily_source, 'green', 
                label='ET_fill_val', line_width=3
            )

        if fig is not None:
            daily_line.append(fig)
        else:
            print(
                'Evapotranspiration time series grapths missing all variables'
            )
        if fig is not None and monthly:
            # same for monthly fig
            title = 'Monthly Evapotranspiration'
            fig = figure(
                x_axis_label=x_label, y_axis_label=y_label, title=title,
                width=plot_width, height=plot_height, name='ET_monthly'
            )
            fig = Plot.add_lines(
                fig, monthly_df, plt_vars, colors, x_label, monthly_source,
                labels=labels
            )
            monthly_line.append(fig)

        #### 
        # number gap filled days monthly time series plot
        #### 
        if monthly and 'ET_gap' in monthly_df.columns:
            txt = ''
            if 'ET_corr' in df.columns:
                txt = ' Corrected'
            title = 'Number of Gap Filled Days in{} Monthly ET'.format(txt)
            x_label = 'date'
            y_label = 'number of gap-filled days'
            fig = figure(
                x_axis_label=x_label, y_axis_label=y_label, title=title,
                width=plot_width, height=plot_height, name='ET_gaps'
            )
            x = 'date'
            y = 'ET_gap'
            color = 'black'
            Plot.line_plot(fig, x, y, monthly_source, color)
            monthly_line.append(fig)
        elif monthly:
            print('Monthly count of gap filled ET days plot missing variable')

        #### 
        # ETrF time series plots
        ####
        plt_vars = [f'{refET}F', f'{refET}F_filtered']
        colors = ['black', 'red']
        title = f'Daily Fraction of Reference ET ({refET}F)'
        x_label = 'date'
        y_label = _get_units(plt_vars, units)
        fig = figure(
            x_axis_label=x_label, y_axis_label=y_label, title=title,
            width=plot_width, height=plot_height, name=f'{refET}F_daily'
        )
        fig = Plot.add_lines(
            fig, df, plt_vars, colors, x_label, daily_source, labels=plt_vars
        )
        if fig is not None:
            daily_line.append(fig)
        else:
            print(
                'Fraction of reference ET time series grapths missing all '
                'variables'
            )
        if fig is not None and monthly:
            # same for monthly fig
            title = f'Monthly Fraction of Reference ET ({refET}F)'
            fig = figure(
                x_axis_label=x_label, y_axis_label=y_label, title=title,
                width=plot_width, height=plot_height, name=f'{refET}F_monthly'
            )
            fig = Plot.add_lines(
                fig, monthly_df, plt_vars, colors, x_label, monthly_source,
                labels=plt_vars
            )
            monthly_line.append(fig)

        #### 
        # energy balance ratio time series plots
        #### 
        plt_vars = ['ebr', 'ebr_corr', 'ebr_user_corr']
        colors = ['black', 'red', 'darkorange']
        title = 'Daily Energy Balance Ratio with Long-term Mean'
        x_label = 'date'
        y_label = _get_units(plt_vars, units)
        # add mean EBR for each time series in legend
        labels = []
        for i, v in enumerate(plt_vars):
            if v in df.columns:
                added_text = ': {}'.format(str(round(df[v].mean(),2)))
                labels.append(plt_vars[i] + added_text)
            else:
                labels.append(None)
        fig = figure(
            x_axis_label=x_label, y_axis_label=y_label, title=title,
            width=plot_width, height=plot_height, name='EBR_daily'
        )
        fig = Plot.add_lines(
            fig, df, plt_vars, colors, x_label, daily_source, labels=labels
        )
        if fig is not None:
            daily_line.append(fig)
        else:
            print(
                'Energy balance ratio time series grapths missing all '
                'variables'
            )
        if fig is not None and monthly:
            # same for monthly fig
            title = 'Monthly Energy Balance Ratio with Long-term Mean'
            # add mean for monthly EBRs to legend
            labels = []
            for i, v in enumerate(plt_vars):
                if v in monthly_df.columns:
                    added_text = ': {}'.format(
                        str(round(monthly_df[v].mean(),2))
                    )
                    labels.append(plt_vars[i] + added_text)
                else:
                    labels.append(None)
            fig = figure(
                x_axis_label=x_label, y_axis_label=y_label, title=title,
                width=plot_width, height=plot_height, name='EBR_monthly'
            )
            fig = Plot.add_lines(
                fig, monthly_df, plt_vars, colors, x_label, monthly_source,
                labels=labels
            )
            monthly_line.append(fig)

        #### 
        # energy balance closure scatter plots
        #### 
        title = 'Daily Energy Balance Closure, Energy Versus Flux with Slope '\
            'Through Origin'
        unit = _get_units(['LE', 'H', 'Rn', 'G'], units)
        y_label = 'LE + H ({})'.format(unit)
        x_label = 'Rn - G ({})'.format(unit)
        fig = figure(
            x_axis_label=x_label, y_axis_label=y_label, title=title,
            width=plot_width, height=plot_width, name='energy_vs_flux_daily'
        )
        y_vars = ['flux', 'flux_corr', 'flux_user_corr']
        colors = ['black', 'red', 'darkorange']
        labels = ['init', 'corr', 'user_corr']
        # add plot pairs to plot if they exist, add 1:1
        mins_maxs = []
        n_vars_fnd = 0
        for i, v in enumerate(y_vars):
            if v in df.columns and not df[v].isna().all():
                n_vars_fnd += 1
                if v == 'flux_corr' and 'energy_corr' in df.columns:
                    x_var = 'energy_corr'
                else:
                    x_var = 'energy'
                min_max = Plot.scatter_plot(
                    fig, x_var, v, daily_source, colors[i], label=labels[i]
                )
                if min_max is not None:
                    mins_maxs.append(min_max)
        if n_vars_fnd > 0:
            # add scaled one to one line
            mins_maxs = np.array(mins_maxs)
            if not pd.isna(mins_maxs).all():
                x_min = min(mins_maxs[:,0])
                x_max = max(mins_maxs[:,1])
                y_min = min(mins_maxs[:,2])
                y_max = max(mins_maxs[:,3])
                ax_min, ax_max = min([x_min,y_min]), max([x_max,y_max])
                ax_min -= 0.02*abs(ax_max-ax_min)
                ax_max += 0.02*abs(ax_max-ax_min)
                fig.x_range=Range1d(ax_min, ax_max)
                fig.y_range=Range1d(ax_min, ax_max)
                one2one_vals = np.arange(ax_min, ax_max,1)
                fig.line(
                    one2one_vals, one2one_vals, legend_label='1:1 line', 
                    color='black', line_dash='dashed'
                )
                daily_scatter.append(fig)
            if monthly:
                # same for monthly fig
                title = 'Monthly Energy Balance Closure, Energy Versus Flux '\
                    'with Slope Through Origin'
                fig = figure(
                    x_axis_label=x_label, y_axis_label=y_label, title=title,
                    width=plot_width, height=plot_width, 
                    name='energy_vs_flux_monthly'
                )
                mins_maxs = []
                for i, v in enumerate(y_vars):
                    if v in monthly_df.columns:
                        min_max = Plot.scatter_plot(
                            fig, 'energy', v, monthly_source, colors[i], 
                            label=labels[i]
                        )
                        if min_max is not None:
                            mins_maxs.append(min_max)
                mins_maxs = np.array(mins_maxs)
                # check if not all pairs are empty, if not plot 1:1
                if not pd.isna(mins_maxs).all():
                    x_min = min(mins_maxs[:,0])
                    x_max = max(mins_maxs[:,1])
                    y_min = min(mins_maxs[:,2])
                    y_max = max(mins_maxs[:,3])
                    ax_min, ax_max = min([x_min,y_min]), max([x_max,y_max])
                    ax_min -= 0.02*abs(ax_max-ax_min)
                    ax_max += 0.02*abs(ax_max-ax_min)
                    fig.x_range=Range1d(ax_min, ax_max)
                    fig.y_range=Range1d(ax_min, ax_max)
                    one2one_vals = np.arange(ax_min, ax_max,1)
                    fig.line(
                        one2one_vals, one2one_vals, legend_label='1:1 line', 
                        color='black', line_dash='dashed'
                    )
                    monthly_scatter.append(fig)
        else:
            print('Energy balance scatter grapths missing all variables')


        #### 
        # latent energy scatter plots
        #### 
        title = 'Daily Latent Energy, Initial Versus Corrected'
        unit = _get_units(['LE', 'LE_corr', 'LE_user_corr'], units)
        y_label = 'corrected ({})'.format(unit)
        x_label = 'initial ({})'.format(unit)
        fig = figure(
            x_axis_label=x_label, y_axis_label=y_label, title=title,
            width=plot_width, height=plot_width, name='LE_scatter_daily'
        )
        y_vars = ['LE_corr', 'LE_user_corr']
        colors = ['red', 'darkorange']
        labels = ['corr', 'user_corr']
        # add plot pairs to plot if they exist, add 1:1
        mins_maxs = []
        n_vars_fnd = 0
        for i, v in enumerate(y_vars):
            if v in df.columns and not df[v].isna().all():
                n_vars_fnd += 1
                min_max = Plot.scatter_plot(
                    fig, 'LE', v, daily_source, colors[i], label=labels[i]
                )
                mins_maxs.append(min_max)
        if n_vars_fnd > 0:
            # add scaled one to one line
            mins_maxs = np.array(mins_maxs)
            if not pd.isna(mins_maxs).all():
                x_min = min(mins_maxs[:,0])
                x_max = max(mins_maxs[:,1])
                y_min = min(mins_maxs[:,2])
                y_max = max(mins_maxs[:,3])
                ax_min, ax_max = min([x_min,y_min]), max([x_max,y_max])
                ax_min -= 0.02*abs(ax_max-ax_min)
                ax_max += 0.02*abs(ax_max-ax_min)
                fig.x_range=Range1d(ax_min, ax_max)
                fig.y_range=Range1d(ax_min, ax_max)
                one2one_vals = np.arange(ax_min, ax_max,1)
                fig.line(
                    one2one_vals, one2one_vals, legend_label='1:1 line', 
                    color='black', line_dash='dashed'
                )
                daily_scatter.append(fig)
            if monthly:
                # same for monthly fig
                title = 'Monthly Latent Energy, Initial Versus Corrected'
                fig = figure(
                    x_axis_label=x_label, y_axis_label=y_label, title=title,
                    width=plot_width, height=plot_width, 
                    name='LE_scatter_monthly'
                )
                mins_maxs = []
                for i, v in enumerate(y_vars):
                    if v in monthly_df.columns:
                        min_max = Plot.scatter_plot(
                            fig, 'LE', v, monthly_source, colors[i], 
                            label=labels[i]
                        )
                        if min_max is not None:
                            mins_maxs.append(min_max)
                mins_maxs = np.array(mins_maxs)
                # check if not all pairs are empty, if not plot 1:1
                if not pd.isna(mins_maxs).all():
                    x_min = min(mins_maxs[:,0])
                    x_max = max(mins_maxs[:,1])
                    y_min = min(mins_maxs[:,2])
                    y_max = max(mins_maxs[:,3])
                    ax_min, ax_max = min([x_min,y_min]), max([x_max,y_max])
                    ax_min -= 0.02*abs(ax_max-ax_min)
                    ax_max += 0.02*abs(ax_max-ax_min)
                    fig.x_range=Range1d(ax_min, ax_max)
                    fig.y_range=Range1d(ax_min, ax_max)
                    one2one_vals = np.arange(ax_min, ax_max,1)
                    fig.line(
                        one2one_vals, one2one_vals, legend_label='1:1 line', 
                        color='black', line_dash='dashed'
                    )
                    monthly_scatter.append(fig)
        else:
            print('Latent energy scatter grapths missing all variables')

        #### 
        # ET scatter plots
        #### 
        title = 'Daily Evapotranspiration, Initial Versus Corrected'
        unit = _get_units(['ET', 'ET_corr', 'ET_user_corr'], units)
        y_label = 'corrected ({})'.format(unit)
        x_label = 'initial ({})'.format(unit)
        fig = figure(
            x_axis_label=x_label, y_axis_label=y_label, title=title,
            width=plot_width, height=plot_width, name='ET_scatter_daily'
        )
        y_vars = ['ET_corr', 'ET_user_corr']
        colors = ['red', 'darkorange']
        labels = ['corr', 'user_corr']
        # add plot pairs to plot if they exist, add 1:1
        mins_maxs = []
        n_vars_fnd = 0
        for i, v in enumerate(y_vars):
            if v in df.columns and not df[v].isna().all():
                n_vars_fnd += 1
                min_max = Plot.scatter_plot(
                    fig, 'ET', v, daily_source, colors[i], label=labels[i]
                )
                mins_maxs.append(min_max)
        if n_vars_fnd > 0:
            # add scaled one to one line
            mins_maxs = np.array(mins_maxs)
            x_min = min(mins_maxs[:,0])
            x_max = max(mins_maxs[:,1])
            y_min = min(mins_maxs[:,2])
            y_max = max(mins_maxs[:,3])
            ax_min, ax_max = min([x_min,y_min]), max([x_max,y_max])
            ax_min -= 0.02*abs(ax_max-ax_min)
            ax_max += 0.02*abs(ax_max-ax_min)
            fig.x_range=Range1d(ax_min, ax_max)
            fig.y_range=Range1d(ax_min, ax_max)
            one2one_vals = np.arange(ax_min, ax_max,1)
            fig.line(
                one2one_vals, one2one_vals, legend_label='1:1 line', 
                color='black', line_dash='dashed'
            )
            daily_scatter.append(fig)
            if monthly:
                # same for monthly fig
                title = 'Monthly Evapotranspiration, Initial Versus Corrected'
                fig = figure(
                    x_axis_label=x_label, y_axis_label=y_label, title=title,
                    width=plot_width, height=plot_width, 
                    name='ET_scatter_monthly'
                )
                mins_maxs = []
                for i, v in enumerate(y_vars):
                    if v in monthly_df.columns:
                        min_max = Plot.scatter_plot(
                            fig, 'ET', v, monthly_source, colors[i], 
                            label=labels[i]
                        )
                        mins_maxs.append(min_max)
                mins_maxs = np.array(mins_maxs)
                # check if not all pairs are empty, if not plot 1:1
                if not pd.isna(mins_maxs).all():
                    x_min = min(mins_maxs[:,0])
                    x_max = max(mins_maxs[:,1])
                    y_min = min(mins_maxs[:,2])
                    y_max = max(mins_maxs[:,3])
                    ax_min, ax_max = min([x_min,y_min]), max([x_max,y_max])
                    ax_min -= 0.02*abs(ax_max-ax_min)
                    ax_max += 0.02*abs(ax_max-ax_min)
                    fig.x_range=Range1d(ax_min, ax_max)
                    fig.y_range=Range1d(ax_min, ax_max)
                    one2one_vals = np.arange(ax_min, ax_max,1)
                    fig.line(
                        one2one_vals, one2one_vals, legend_label='1:1 line', 
                        color='black', line_dash='dashed'
                    )
                    monthly_scatter.append(fig)
        else:
            print('Evapotranspiration scatter grapths missing all variables')

        #### 
        # multiple soil moisture time series plots
        #### 
        # keep user names for these in hover 
        theta_re = re.compile('theta_[\d+|mean]')
        theta_vars = [
            v for v in variables if theta_re.match(v) and v in df.columns
        ]
        num_lines = len(theta_vars)
        if num_lines > 0 and not df[theta_vars].isna().all().all():
            rename_dict = {k:variables[k] for k in theta_vars}
            tmp_df = df[theta_vars].rename(columns=rename_dict)
            tmp_source = ColumnDataSource(tmp_df)
            plt_vars = list(rename_dict.values())
            colors = Viridis256[0:-1:int(256/num_lines)]
            title = 'Daily Soil Moisture (Multiple Sensors)'
            x_label = 'date'
            y_label = _get_units(theta_vars, units)
            fig = figure(
                x_axis_label=x_label, y_axis_label=y_label, title=title,
                plot_width=plot_width, plot_height=plot_height, 
                name='theta_daily'
            )
            fig = Plot.add_lines(
                fig, tmp_df, plt_vars, colors, x_label, tmp_source, 
                labels=plt_vars
            )
            if fig is not None:
                daily_line.append(fig)
            theta_vars = [
                v for v in variables if theta_re.match(v) and v in\
                    df.columns
            ]
            if fig is not None and monthly and len(theta_vars) > 0:
                # same for monthly fig
                tmp_df = monthly_df[theta_vars].rename(columns=rename_dict)
                tmp_source = ColumnDataSource(tmp_df)
                title = 'Monthly Soil Moisture (Multiple Sensors)'
                fig = figure(
                    x_axis_label=x_label, y_axis_label=y_label, title=title,
                    plot_width=plot_width, plot_height=plot_height,
                    name='theta_monthly'
                )
                fig = Plot.add_lines(
                    fig, tmp_df, plt_vars, colors, x_label, tmp_source,
                    labels=plt_vars
                )
                monthly_line.append(fig)
            # do not print warning if missing multiple soil moisture recordings


        # Aggregate plots and output depending on options
        # remove None values in different figure groups 
        daily_line = list(filter(None, daily_line))
        daily_scatter = list(filter(None, daily_scatter))
        monthly_line = list(filter(None, monthly_line))
        monthly_scatter = list(filter(None, monthly_scatter))
        # link axes for time series plots
        if link_x:
            for each in daily_line:
                each.x_range = daily_line[0].x_range
            for each in monthly_line:
                each.x_range = monthly_line[0].x_range
        figs = daily_line + daily_scatter + monthly_line + monthly_scatter
        grid = gridplot(
            figs, ncols=ncols, plot_width=None, plot_height=None, 
            sizing_mode=sizing_mode, merge_tools=merge_tools, **kwargs
        )
        if output_type == 'show':
            show(column(Div(text=suptitle),grid))
        elif output_type == 'notebook':
            from bokeh.io import output_notebook
            output_notebook()
            show(column(Div(text=suptitle),grid))
        elif output_type == 'save':
            save(column(Div(text=suptitle),grid))
        elif output_type == 'return_figs':
            return figs
        elif output_type == 'return_grid':
            return grid

        reset_output()

0 Source : generate_plots.py
with Apache License 2.0
from oskopek

def export_plots(p: bokeh.plotting.figure,
                 filename: str,
                 title: str,
                 width: int = WIDTH,
                 height: int = HEIGHT,
                 box: bool = False,
                 show_title: bool = False,
                 y_range_start: Optional[float] = None,
                 y_range_end: Optional[float] = None) -> None:
    # HTML
    if not show_title:
        p.title = None
    bokeh.plotting.save(p, title=title, filename=filename + ".html", resources=bokeh.resources.CDN)

    # PNG
    if y_range_start:
        p.y_range.start = y_range_start
    if y_range_end:
        p.y_range.end = y_range_end

    set_font_size(p)
    p.sizing_mode = "fixed"
    p.width = width
    if box:
        p.height = width
    else:
        p.height = height
    p.toolbar_location = None
    bokeh.io.export_png(p, filename=filename + ".png", height=HEIGHT, width=WIDTH)

    # SVG:
    # p.output_backend = "svg"
    # bokeh.io.export_svgs(p, filename=filename + ".svg")
    #
    # os.system(f"inkscape --without-gui --export-pdf={filename}.pdf {filename}.svg")


def box_whiskers_plot(df: pd.DataFrame, out_folder: str, statistic: str = "ll", subtitle: str = "") -> None:

0 Source : utils.py
with Apache License 2.0
from oskopek

def export_plots(p: bokeh.plotting.figure,
                 filename: str,
                 title: str,
                 width: int = WIDTH,
                 height: int = HEIGHT,
                 box: bool = False,
                 show_title: bool = False,
                 y_range_start: Optional[float] = None,
                 y_range_end: Optional[float] = None,
                 x_range_start: Optional[float] = None,
                 x_range_end: Optional[float] = None) -> None:
    # HTML
    if not show_title:
        p.title = None
    bokeh.plotting.save(p, title=title, filename=filename + ".html", resources=bokeh.resources.CDN)

    # PNG
    if y_range_start:
        p.y_range.start = y_range_start
    if y_range_end:
        p.y_range.end = y_range_end
    if x_range_start:
        p.x_range.start = x_range_start
    if x_range_end:
        p.x_range.end = x_range_end

    set_font_size(p)
    p.sizing_mode = "fixed"
    p.width = width
    p.height = height
    if box:
        p.width = height
    p.toolbar_location = None
    bokeh.io.export_png(p, filename=filename + ".png", height=HEIGHT, width=WIDTH)

    # SVG:
    # p.output_backend = "svg"
    # bokeh.io.export_svgs(p, filename=filename + ".svg")
    #
    # os.system(f"inkscape --without-gui --export-pdf={filename}.pdf {filename}.svg")

0 Source : rca.py
with MIT License
from smartyal

def rca(functionNode):
    logger = functionNode.get_logger()
    logger.info("==>>>> in rca (root cause analysis " + functionNode.get_browse_path())
    progressNode = functionNode.get_child("control").get_child("progress")
    progressNode.set_value(0.1)

    variables = functionNode.get_child("selectedVariables").get_leaves()
    tag = functionNode.get_child("selectedTags").get_value() #only one tag 
    annotations = functionNode.get_child("annotations").get_leaves()
    feature = functionNode.get_child("selectedFeatures").get_value()
    algo = functionNode.get_child("selectedAlgorithms").get_value()
    target = functionNode.get_child("selectedTarget").get_target()

    p=Progress(progressNode)
    p.set_divisor(len(annotations)/0.5)
    p.set_offset(0.1)
    #now create the data as x-y

    results = {"x":[],"y":[]}
    var = variables[0]
    #now iterate over all annotations of the matching type and create feature
    for idx,anno in enumerate(annotations):
        p.set_progress(idx)
        if (anno.get_child("type").get_value() == "time") and (tag in anno.get_child("tags").get_value()):
            startTime =anno.get_child("startTime").get_value()
            endTime = anno.get_child("endTime").get_value()
            data = var.get_time_series(startTime,endTime)
            #now create the feature
            feat = calc_feature(data["values"],feature)
            targetValue = get_target(target,(date2secs(startTime)+date2secs(endTime))/2)
            if feat and targetValue and numpy.isfinite(feat) and numpy.isfinite(targetValue):
                results["x"].append(feat)
                results["y"].append(targetValue)
            else:
                logger.warning(f"no result for {var.get_name} @ {startTime}, anno:{tag}, feat:{feat}, target: {target}")
    
    #now we have all the x-y


    progressNode.set_value(0.7)
    fig = figure(title = "x-y Correlation Plot "+var.get_name(),
            tools=[PanTool(), WheelZoomTool(),ResetTool(),SaveTool()],
            plot_height=300,
            x_axis_label=feature+"("+var.get_name()+") @ "+tag,
            y_axis_label=target.get_name())
    fig.toolbar.logo = None
    curdoc().theme = Theme(json=themes.darkTheme)
    fig.xaxis.major_label_text_color = themes.darkTickColor
    fig.yaxis.major_label_text_color = themes.darkTickColor
    
    
    fig.scatter(x=results["x"], y=results["y"], size=5, fill_color="#d9b100", marker="o")
    fileName = functionNode.get_child("outputFileName").get_value()
    filePath = os.path.join(myDir,'./../web/customui/'+fileName)
    progressNode.set_value(0.8)
    output_file(filePath,mode="inline")#inline: put the bokeh .js into this html, otherwise the default cdn will be taken, might cause CORS problems)
    save(fig)


    #print(results)

    return True
#
 

def data_cleaning(annotations,order=None,logger=None):

0 Source : rca.py
with MIT License
from smartyal

def rca2(functionNode):
    logger = functionNode.get_logger()
    logger.info("==>>>> in rca2 (root cause analysis " + functionNode.get_browse_path())
    progressNode = functionNode.get_child("control").get_child("progress")
    progressNode.set_value(0.1)
    m=functionNode.get_model()

    report = '  <  i>REPORT < /i> < br> < div style="font-size:85%">'

    annotations = functionNode.get_child("annotations").get_leaves()
    #order = ["Step"+str(no) for no in range(1,19)]
    #order = ["Phase"+str(no) for no in range(3,28)]
    order = functionNode.get_child("annotationsOrder").get_value()
    logger.debug("filtering by order")
    annotations = data_cleaning( annotations ,order = order,logger=logger) #Step1,Step2,...Step18 lsit of lists
    report+=(f"found {len(annotations)} valid processes  < br>")
    
    #for now, flatten them out
    annotations = [subprocess   for process in annotations for subprocess in process]

    algo = functionNode.get_child("selectedAlgorithm").get_value()
    target = functionNode.get_child("selectedTarget").get_target()

    progressNode.set_value(0.3)
    #now we are building up the table by iterating all the children in "selection"
    entries = functionNode.get_child("selection").get_children()

    table = {"target":[]}
    firstVariable = True

    for entry in entries:
        logger.debug(f"entry {entry.get_name()}")
        #each entry is a combination of variable, tags and feature
        vars = entry.get_child("selectedVariables").get_targets()
        tags = entry.get_child("selectedTags").get_value()
        features = entry.get_child("selectedFeatures").get_value()
        #for iterate over variables
        for var in vars:
            logger.debug(f"processing variable: {var.get_name()} with tags {tags} and features {features}")
            #columnName = var.get_name()+str(tags)+m.getRandomId()
            for tag in tags:
                row = 0
                #table[columnName]=[]# make a column
                for idx,anno in enumerate(annotations):
                    if anno.get_child("type").get_value() != "time":
                        continue
                    if tag in anno.get_child("tags").get_value():
                        startTime =anno.get_child("startTime").get_value()
                        endTime = anno.get_child("endTime").get_value()
                        data = var.get_time_series(startTime,endTime)["values"]
                        #we take only the values "inside" the annotation
                        if len(data)>2:
                            data =data[1:-1]
                        #now create the features
                        for feature in features:
                            feat = calc_feature(data,feature)
                            columnName = var.get_name()+"_"+tag+"_"+feature
                            if not columnName in table:
                                table[columnName]=[]
                            table[columnName].append(feat)
                        
                        targetValue = get_target(target,(date2secs(startTime)+date2secs(endTime))/2)
                        if targetValue:
                            if firstVariable:
                                #for the first variable we also write the target
                                table["target"].append(targetValue)
                            else:
                                #for all others we make sure we have the same target value for that case (sanity check)
                                if table["target"][row] != targetValue:
                                    logger.warning(f'problem target {table["target"][row]} !=> {targetValue}')
                            row=row+1
                        else:
                            logger.warning(f"no corrrect target value for {startTime} - {endTime}")
                 
                firstVariable = False
    #now we have the table, plot it
    import json
    #print(json.dumps(table,indent=2))
    progressNode.set_value(0.5)
    #try a model
     
    algo = functionNode.get_child("selectedAlgorithm").get_value()
    if algo=="lasso":   
        reg = linear_model.LassoCV()
        report +=" using lasso Regression with auto-hyperparams  < br>"
    else:
        #default
        report +=" using linear Regression  < br>"
        reg = linear_model.LinearRegression() #try rigde, lasso

    columnNames = []
    dataTable = []
    saveDict = {}
    for k,v in table.items():
        saveDict[k]=numpy.asarray(v,dtype=numpy.float64)
        if k=="target":
            continue
        dataTable.append(v)
        columnNames.append(k)

    fileName = functionNode.get_child("outputFileName").get_value()
    filePath = os.path.join(myDir,'./../web/customui/'+fileName.split(".")[0])
    numpy.savez(filePath, **saveDict)
    #for loading:
    # get = numpy.load(name+".npz")
    # for name in get.files:
    # data = get[name]

    dataTable = numpy.asarray(dataTable)
    x=dataTable.T
    scaler = StandardScaler()
    scaler.fit(x)
    x=scaler.transform(x)

    
    y=table["target"]
    x_train, x_test, y_train, y_test = train_test_split(x, y) 
    reg.fit(x_train,y_train)
     
 
    
    #print(reg.coef_)
    y_hat= reg.predict(x_test)
    y_repeat = reg.predict(x_train)
    #print(f"predict: {y_hat} vs real: {y_test}")

    #check over/underfitting
    r_train = r2_score(y_train, y_repeat)
    r_test = r2_score(y_test,y_hat)
    

    report+="R < sup>2 < /sup> train= %.4g, R < sup>2 < /sup> test = %.4g  < br>"%(r_train,r_test)


    pearsons = []
    for col in x.T:
        pearsons.append(pearsonr(col, y)[0])

    #and finally the correlations between y and yhat
    y_pearson_train = pearsonr(y_train, y_repeat)[0]
    y_pearson_test = pearsonr(y_test,y_hat)[0]

    report+="pearsonCorr y/y_hat train:%.4g , test:%.4g  < br>"%(y_pearson_train,y_pearson_test)
    
    report +="regression coefficients, pearsons correlations: < br>"
    for col,coef,pear in zip(columnNames,reg.coef_,pearsons):
        report+="    %s:%.4g,   %.4g  < br>"%(col,coef,pear)
 




    #write report
    progressNode.set_value(0.8)
    report+=" < div>"#close the style div
    functionNode.get_child("report").set_value(report)
    #make a plot
    hover1= HoverTool(tooltips=[ ( 'x,y','$x,$y')],mode='mouse')
    hover1.point_policy='snap_to_data'
    hover1.line_policy = "nearest"
    tools = [PanTool(), WheelZoomTool(),BoxZoomTool(),ResetTool(),SaveTool(),hover1]
    title = "prediction results on "+functionNode.get_child("selectedAlgorithm").get_value()
    fig = figure(title = title,tools=tools,plot_height=300,plot_width=400)
    fig.toolbar.logo = None
    

    curdoc().theme = Theme(json=themes.darkTheme)
    fig.xaxis.major_label_text_color = themes.darkTickColor
    fig.yaxis.major_label_text_color = themes.darkTickColor
    fig.xaxis.axis_label= target.get_name()
    fig.xaxis.axis_label_text_color = "white"
    fig.yaxis.axis_label="predicted Values for "+target.get_name()
    fig.yaxis.axis_label_text_color="white"
    fig.circle(y_train,y_repeat,size=4,line_color="white",fill_color="white",name="train",legend_label="train")
    fig.circle(y_test,y_hat,line_color="#d9b100",fill_color="#d9b100",size=4,name="test",legend_label="test")
    mini = min([min(y_train),min(y_repeat),min(y_test),min(y_hat)])
    maxi = max([max(y_train),max(y_repeat),max(y_test),max(y_hat)])
    
    fig.line([mini,maxi],[mini,maxi],line_color="grey",line_dash="dashed")

    fileName = functionNode.get_child("outputFileName").get_value()
    filePath = os.path.join(myDir,'./../web/customui/'+fileName)
    fig.legend.location = "top_left"
    output_file(filePath,mode="inline")
    save(fig)





    return True




def prepare_annos_filter(functionNode):

0 Source : varstatistics.py
with MIT License
from smartyal

def varstatistics(functionNode):
    logger = functionNode.get_logger()
    logger.info("==>>>> statistics " + functionNode.get_browse_path())
    progressNode = functionNode.get_child("control").get_child("progress")
    progressNode.set_value(0)
    #functionNode.get_child("control.signal").set_value(None)

    vars = functionNode.get_child("variable").get_targets()
    widget = functionNode.get_child("widget").get_target()
    bins = functionNode.get_child("bins").get_value()
    tags = functionNode.get_child("annotations").get_value()
    startTime = date2secs(widget.get_child("startTime").get_value())
    endTime = date2secs(widget.get_child("endTime").get_value())

    vars = {var.get_id():{"node":var} for var in vars}


    #first 30% progress:
    prog = Progress(progressNode)
    progressNode.set_value(0.1)
    prog.set_offset(0.1)
    #prog.set_divisor()
    
    if tags:
        allAnnoNodes = widget.get_child("hasAnnotation.annotations").get_leaves()
        allAnnos=[]
        prog.set_divisor(len(allAnnoNodes)/0.2)
        for index,node in enumerate(allAnnoNodes):
            prog.set_progress(index)
            if node.get_child("type").get_value()=="time":
                thisTags = node.get_child("tags").get_value()
                if any(tag in tags for tag in thisTags):
                    anno = {}
                    for child in node.get_children():
                        anno[child.get_name()]=child.get_value()
                    if date2secs(anno["startTime"])>=startTime and date2secs(anno["endTime"])  <  =endTime and (anno["startTime"] < anno["endTime"]):    #take this anno only if it is inside the current start/end time
                        allAnnos.append(anno)
        if allAnnos == []:
            give_up(functionNode,"no matching annotations in selected time")
            return False
    else:
        allAnnos=[]

    progressNode.set_value(0.3)

    

    logger.debug(f"statistics annotations to look at: {len(allAnnos)}")
    prog.set_offset(0.3)
    totalAnnos = max(len(allAnnos),1)
    totalCount = len(vars)*totalAnnos

    prog.set_divisor(totalCount/0.3)
    totalValids = 0
    for varIndex,var in enumerate(vars):
        info = vars[var]
        if tags:
            #iterate over all start and end times
            values = numpy.asarray([],dtype=numpy.float64)
            for annoIndex,anno in enumerate(allAnnos):
                thisValues = info["node"].get_time_series(anno["startTime"],anno["endTime"])["values"]
                values = numpy.append(values,thisValues)
                myCount = varIndex*totalAnnos+annoIndex
                prog.set_progress(myCount)
        else:
            values = info["node"].get_time_series(startTime,endTime)["values"]
    
        valids = numpy.count_nonzero(numpy.isfinite(values))
        totalValids+=valids
        hist, edges = numpy.histogram(values, bins=bins)
        hist=hist/len(values) #normalize
        info["hist"]=hist
        info["edges"]=edges

    #make a plot
    if totalValids == 0:
        give_up(functionNode,"all Variables are have no data in the time and annotations selected")
        return False

    progressNode.set_value(0.6)

    hover1= HoverTool(tooltips=[ ( 'x,y','$x,$y')],mode='mouse')
    hover1.point_policy='snap_to_data'
    hover1.line_policy = "nearest"

    tools = [PanTool(), WheelZoomTool(),BoxZoomTool(),ResetTool(),SaveTool(),hover1]




    title = "Statistics of "+str([info["node"].get_name() for var,info in vars.items()])
    if tags:
        title = title + " in annotation: "+ str(tags )
    
    fig = figure(title = title,tools=tools,plot_height=300)
    fig.toolbar.logo = None

    curdoc().theme = Theme(json=themes.darkTheme)
    fig.xaxis.major_label_text_color = themes.darkTickColor
    fig.yaxis.major_label_text_color = themes.darkTickColor
    
    for index,var in enumerate(vars):
        info = vars[var]
        col = themes.darkLineColors[index]
        hist = info["hist"]
        edges = info["edges"]

        fig.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
           fill_color=col, line_color=col, alpha=0.8,legend_label=info["node"].get_name())
    
    fig.legend.location = "top_left"
    fileName = functionNode.get_child("fileName").get_value()
    filePath = os.path.join(myDir,'./../web/customui/'+fileName)
    
    
    
    # now make the trend box plot, but only for tags
    # for each variable we create statistics for the annotations and prepare the data
    # {"node":Node(), "boxLower":[], "boxUpper", "mean", "limitUpper", "limitLower"}
    # 
    
    startTime = date2secs(widget.get_child("startTime").get_value()) #we only take tags that are inside the current zoom of the widgets
    endTime = date2secs(widget.get_child("endTime").get_value())
    
    boxPlots = []
    allTimes = []
    if tags:
        for index,var in enumerate(vars):
            info={"node":vars[var]["node"],"boxLower":[],"boxUpper":[],"median":[],"time":[],"limitUpper":[],"limitLower":[],"mean":[]}
            for anno in allAnnos:
                data = info["node"].get_time_series(anno["startTime"],anno["endTime"])
                if len(data["values"]):
                    data["values"] = data["values"][numpy.isfinite(data["values"])]
                    #remove the nan
                if len(data["values"]):

                    #make the statistics
                    info["time"].append(numpy.median(data["__time"])*1000)
                    allTimes.append(numpy.median(data["__time"])*1000)
                    info["limitLower"].append(numpy.quantile(data["values"],0.01))
                    info["limitUpper"].append(numpy.quantile(data["values"],0.99))
                    info["boxLower"].append(numpy.quantile(data["values"],0.25))
                    info["boxUpper"].append(numpy.quantile(data["values"],0.75))
                    info["median"].append(numpy.median(data["values"]))
                    info["mean"].append(numpy.mean(data["values"]))
            boxPlots.append(info)

        format = "%Y-%m-%d-T%H:%M:%S"
        custom = """var local = moment(value).tz('UTC'); return local.format();"""#%self.server.get_settings()["timeZone"]

        hover = HoverTool(
            tooltips=[ ( 'date','@x{%F}')],
            formatters={ '@x'        : CustomJSHover(code=custom)
            },
            mode='mouse'
        )
        hover.point_policy='snap_to_data'
        hover.line_policy = "nearest"
        tools = [PanTool(), BoxZoomTool(),WheelZoomTool(),ResetTool(),hover,SaveTool()]
    
        
        fig2 = figure(title = "trends",tools=tools,plot_height=300,x_axis_type='datetime')
        fig2.xaxis.major_label_text_color = themes.darkTickColor
        fig2.yaxis.major_label_text_color = themes.darkTickColor
    
        progressNode.set_value(0.7)    
        
        fig2.xaxis.formatter=DatetimeTickFormatter(years=format,days=format,months=format,hours=format,hourmin=format,minutes=format,minsec=format,seconds=format)
        fig2.toolbar.logo = None
        #fig2.line([1,2,3],[1,2,3])
        #calc with of vbars
        if len(allAnnos)>1:
            xTimesStart = min(allTimes)
            xTimesEnd = max(allTimes)
            width = (xTimesEnd-xTimesStart)/2/len(allAnnos)
        else:
            width = 1000000

        for index,info in enumerate(boxPlots):
            #each info is for one variable
            col = themes.darkLineColors[index]
            fig2.segment(info["time"],info["limitUpper"],info["time"],info["boxUpper"],line_color=col)
            fig2.segment(info["time"],info["limitLower"],info["time"],info["boxLower"],line_color=col)
            
            width =20
            #fig2.vbar(info["time"],width=width,bottom=info["median"],top=info["boxUpper"],fill_color=col,line_color="black",width_units='screen')
            #fig2.vbar(info["time"],width=width,bottom=info["boxLower"],top=info["median"],fill_color=col,line_color="black",width_units='screen')
            #upper box
            sizUpper = numpy.asarray(info["boxUpper"])-numpy.asarray(info["median"])
            medUpper = numpy.asarray(info["median"])+sizUpper/2
            fig2.rect(x=info["time"],y=medUpper,width_units='screen',width=20,height=sizUpper,fill_color=col,line_color="black")

            #lower box
            sizLower = numpy.asarray(info["median"])-numpy.asarray(info["boxLower"])
            medLower = numpy.asarray(info["median"])-sizLower/2
            fig2.rect(x=info["time"],y=medLower,width_units='screen',width=20,height=sizLower,fill_color=col,line_color="black")
           
            #sort data for line
            x = numpy.asarray(info["time"])
            y = numpy.asarray(info["mean"])
            order = numpy.argsort(x)
            x=x[order]
            y=y[order]
            fig2.line(x,y,line_color=col)
        
        progressNode.set_value(0.8)    
    else:
       #no fig2 
       pass


        

    
    
    
    
    output_file(filePath,mode="inline")#inline: put the bokeh .js into this html, otherwise the default cdn will be taken, might cause CORS problems
    if tags:
        save(layout([[fig],[fig2]]))
    else:
        save(fig)

    return True

0 Source : plot.py
with Apache License 2.0
from WSWUP

def daily_comparison(input_csv, out_dir=None, year_filter=None):
    """
    Compare daily weather station data from 
    `PyWeatherQAQC   <  https://github.com/WSWUP/pyWeatherQAQC>`_ with gridMET 
    for each month in year specified.

    The :func:`daily_comparison` function produces HTML files with time series 
    and scatter plots of station versus gridMET climate variables. It uses the 
    `bokeh  < https://bokeh.pydata.org/en/latest/>`_ module to create interactive 
    plots, e.g. they can be zoomed in/out and panned. Separate plot files are 
    created for each month of a single year. 

    Arguments:
        input_csv (str): path to input CSV file containing paired station/
            gridMET metadata. This file is created by running 
            :mod:`gridwxcomp.prep_input` followed by :mod:`gridwxcomp.download_gridmet_opendap`.

    Keyword Arguments:
        out_dir (str or None): default None. Directory to save comparison 
            plots, if None save to "daily_comp_plots" in currect directory. 
        year_filter (str or None): default None. Single year YYYY or range 
            YYYY-YYYY

    Returns:
        None

    Example:
        The :func:`daily_comparison` function will generate HTML files with 
        bokeh plots for paired climate variables, e.g. etr_mm, eto_mm, 
        u2_ms, tmin_c, tmax_c, srad_wm2, ea_kpa, and Ko (dew point depression). 
        Monthly plots are created for a single year.
        
        From the command line, use the "plot" command with the 
        ``[-t, --plot-type]`` option set to station-grid-comp and 
        the ``[-f, --freq]`` option left as default ("daily"),

        .. code-block:: sh

            $ gridwxcomp plot merged_input.csv -t station-grid-comp -o comp_plots_2016 -y 2016

        or within Python,

        >>> from gridwxcomp.plot import daily_comparison
        >>> daily_comparison('merged_input.csv', 'comp_plots_2016', '2016')

        Both methods result in monthly HTML `bokeh  < https://bokeh.pydata.org/en/latest/>`_ 
        plots being saved to "comp_plots_2016/STATION_ID/" where "STATION_ID" 
        is the station ID as found in the input CSV file. A file is saved for 
        each month with the station ID, month, and year in the file name. 
        If ``out_dir`` keyword argument or ``[-o, --out-dir]`` command line 
        option is not given the plots will be saved to a directory named 
        "daily_comp_plots".

    Note:
        If there are less than five days of data in a month the plot for that
        month will not be created.

    """
    if not out_dir:
        out_dir = os.getcwd()

    if not os.path.isdir(out_dir):
        print('{} does not exist, creating directory'.format(out_dir))
        os.makedirs(out_dir)

    year = year_filter
    logging.info('\nProcessing Year: {}'.format(year))

    # # Import Station/GRIDMET meta data shapefile
    paired_data = pd.read_csv(input_csv, sep=',')

    # List of variables to compare (STATION/gridMET ORDER SHOULD MATCH)
    station_vars = ['TMin (C)', 'TMax (C)', 'wx_Ko_c', 'Rs (w/m2)',
                    'ws_2m (m/s)', 'Vapor Pres (kPa)', 'RHAvg (%)',
                    'Precip (mm)', 'ETo (mm)', 'ETr (mm)']

    gridmet_vars = ['tmin_c', 'tmax_c', 'grid_Ko_c', 'srad_wm2', 'u2_ms',
                    'ea_kpa', 'rh_avg', 'prcp_mm', 'eto_mm', 'etr_mm']

    # # Limit row processing range (testing)
    # start = 0
    # end = 1
    #Loop through each station/gridmet pair
    for index, row in paired_data.iterrows():
    # #    Limit iteration during development
    #     if index  <  start:
    #         continue
    #     if index >= end:
    #       break

        # clear previous datasets
        grid_data = []
        station_data = []

        station_path = row.STATION_FILE_PATH
        logging.info('\nStation: {}'.format(row.STATION_ID))

        # Check is station path is given in input
        if pd.isnull(station_path):
            logging.info('Station path is not given. Skipping.')
            continue

        # Skip If FILE DOES NOT EXIST
        if not os.path.exists(station_path):
            logging.info('SKIPPING {}. NO STATION FILE FOUND.'.format(
                station_path))
            continue
        else:
            # pyweather QAQC format if excel
            if station_path.endswith(('.xlsx','xlx')):
                station_data = pd.read_excel(station_path,
                    sheet_name='Corrected Data', parse_dates=True, index_col=0)
            else:
                station_data = pd.read_csv(station_path,
                    parse_dates=True, index_col=0)

            # Filter years
            if year:
                station_data, year_str = parse_yr_filter(
                    station_data, year, label=row.STATION_ID)
            else:
                start_yr = int(station_data.index.year.min()) 
                end_yr = int(station_data.index.year.max()) 
                year_str = '{}_{}'.format(start_yr, end_yr)

        # Import GRIDMET Data
        grid_path = row.GRID_FILE_PATH
        # Skip if GRIDMET FILE DOES NOT EXIST
        if not os.path.exists(grid_path):
            print('SKIPPING {}. NO GRIDMET FILE FOUND.'.format(grid_path))
            continue
        else:
            grid_data = pd.read_csv(grid_path, sep=',',parse_dates=True,
                                    index_col='date')
            # Filter to specific year
            # grid_data = grid_data[grid_data['year'] == year]

            # Add Tdew to gridmet dataset Teten's equation ASCE REF-ET
            #  supporting equations Appendix 2-1

            grid_data['tdew_c'] = (116.91 + 237.3 * np.log(grid_data.ea_kpa)) /\
                                  (16.78 - np.log(grid_data.ea_kpa))

            # Calculate Tmin - Tdew = Ko for both Station and GridMET
            # Dew Point Depression
            grid_data['grid_Ko_c'] = grid_data.tmin_c - grid_data.tdew_c

            station_data['wx_Ko_c'] = station_data['TMin (C)'] - \
                                      station_data['TDew (C)']

            # grid RH Avg calc
            # Saturated Vapor Pressure
            grid_data['tavg_c'] = (grid_data.tmin_c + grid_data.tmax_c) / 2
            grid_data['e_sat_kpa'] = 0.6108 * np.exp(
                (17.27 * grid_data.tavg_c) /
                (grid_data.tavg_c + 237.3))
            # Average RH (%)
            grid_data['rh_avg'] = (grid_data.ea_kpa / grid_data.e_sat_kpa) * 100

            # Combine station and gridMET dataframes (only plotting variables)
            #return station_data, station_vars, grid_data, gridmet_vars
            merged = pd.concat([
                station_data[station_vars], grid_data[gridmet_vars]], axis=1
            )

            for month in range(1,13):
                logging.info('Month: {}'.format(month))
                monthly_data = merged[merged.index.month==month]

                if len(monthly_data.index) < = 5:
                     logging.info('Skipping. Less than 5 observations in '
                                  'month.')
                     continue
                # Output Folder
                out_folder =  os.path.join(out_dir, 'daily_comp_plots',
                                           '{}'.format(
                                               row.STATION_ID.replace(" ","")))

                # Create path if it doesn't exist
                if not os.path.exists(out_folder):
                    os.makedirs(out_folder)

                # Output to HTML file
                out_file_path = os.path.join(out_folder, '{}_{:02}_{}.html')\
                    .format(row.STATION_ID.replace(" ", ""), month, year_str)

                output_file(out_file_path)

                station_vars = ['TMin (C)', 'TMax (C)', 'wx_Ko_c', 'Rs (w/m2)',
                                'ws_2m (m/s)', 'Vapor Pres (kPa)', 'RHAvg (%)',
                                'Precip (mm)', 'ETo (mm)', 'ETr (mm)']

                gridmet_vars = ['tmin_c', 'tmax_c', 'grid_Ko_c', 'srad_wm2',
                                'u2_ms',
                                'ea_kpa', 'rh_avg', 'prcp_mm', 'eto_mm',
                                'etr_mm']

                # list of x variables
                x_var_list= station_vars
                # list of y variables
                y_var_list= gridmet_vars
                # title list
                title_list= ['TMin', 'TMax', 'Ko' , 'Rs', 'WS 2m',
                               'ea', 'RH', 'Prcp', 'ETo', 'ETr']
                # timeseries y label list
                ts_ylabel_list = ['TMin (C)', 'TMax (C)', 'Ko (C)', 'Rs (w/m2)',
                                  'WS 2m (m/s)', 'ea (kPa)', 'Avg RH (%)',
                                  'Prcp (mm)',
                                  'ETo (mm)', 'ETr (mm)']
                # scatter xlabel list
                xlabel_list = ['Station TMin (C)', 'Station TMax (C)',
                               'Station Ko (C)', 'Station Rs (w/m2)',
                               'Station WS 2m (m/s)', 'Station ea (kPa)',
                               'Station RH (%)', 'Station Prcp (mm)',
                               'Station ETo (mm)', 'Station ETr (mm)']
                # scatter ylabel list
                ylabel_list = ['gridMET TMin (C)', 'gridMET TMax (C)',
                               'gridMET Ko (C)', 'gridMET Rs (w/m2)',
                               'gridMET WS 2m (m/s)', 'gridMET ea (kPa)',
                               'gridMET RH (%)', 'gridMET Prcp (mm)',
                               'gridMET ETo (mm)', 'gridMET ETr (mm)']
                # legendx list
                legendx_list = ['Station'] * len(title_list)
                # legend y list
                legendy_list = ['gridMET'] * len(title_list)

                # empty list to append figures to
                figure_list = []

                # loop through and create figures for each variable using vars
                # and plot labels from lists above
                for i, (x_var, y_var, title, ts_ylabel, xlabel, ylabel, legendx,
                        legendy) in enumerate(zip(x_var_list, y_var_list,
                                                  title_list, ts_ylabel_list,
                                                  xlabel_list, ylabel_list,
                                                  legendx_list, legendy_list)):

                    # lstsq cannot have nans (drop nas for each var separately)
                    monthly_data2 = monthly_data[[x_var, y_var]]
                    monthly_data2 = monthly_data2.dropna()

                    monthly_data2['date'] = monthly_data2.index
                    monthly_data2.index.name=''
                    monthly_data2.reset_index(inplace=True)

                    if monthly_data2.empty:
                        logging.info("Skipping {}. No Data.".format(x_var))
                        continue

                    if i == 0:
                        # Initial timeseries plot to establish xrange for link axes
                        p1 = figure(plot_width=800, plot_height=400,
                                    title = title, x_axis_type="datetime",
                                    y_axis_label = ts_ylabel)
                        p1.line(monthly_data2.index,
                                monthly_data2[x_var],  color="navy",
                                alpha=0.5, legend_label=legendx,line_width=2)
                        p1.line(monthly_data2.index,
                                monthly_data2[y_var],  color="red",
                                alpha=0.5, legend_label=legendy,line_width=2)
                        p1.xaxis.major_label_overrides = {
                            i: date.strftime(
                                '%Y %b %d'
                            ) for i, date in enumerate(pd.to_datetime(
                                monthly_data2.date
                            )
                        )}


                    else:
                        # Timeseries plots after first pass
                        p1 = figure(plot_width=800, plot_height=400,
                                    title = title, x_axis_type="datetime",
                                    y_axis_label=ts_ylabel,
                                    x_range=p1.x_range)
                        p1.line(monthly_data2.index,
                                monthly_data2[x_var],  color="navy", alpha=0.5,
                                legend_label=legendx,line_width=2)
                        p1.line(monthly_data2.index,
                                monthly_data2[y_var],  color="red", alpha=0.5,
                                legend_label=legendy,line_width=2)

                    p1.xaxis.major_label_overrides = {
                        i: date.strftime('%Y %b %d') for i, date in enumerate(
                            pd.to_datetime(monthly_data2.date)
                        )
                    }


                    # 1 to 1 Plot
                    # Regression through Zero
                    # https://stackoverflow.com/questions/9990789/how-to-force-
                    # zero-interception-in-linear-regression/9994484#9994484

                    m = np.linalg.lstsq(monthly_data2[x_var].values.reshape(-1,1),
                                        monthly_data2[y_var], rcond=None)[0][0]
                    r_x, r_y = zip(*((i, i*m ) for i in range(
                        int(np.min([monthly_data2[y_var],monthly_data2[x_var]])-2),
                                     int(np.max([monthly_data2[y_var],
                                                 monthly_data2[x_var]])+3),1)))
                    # Plots
                    p2 = figure(plot_width=400, plot_height=400,
                                x_axis_label = xlabel, y_axis_label = ylabel,
                                title = 'Slope Through Zero: m = {}'.format(
                                    round(m,4)))
                    p2.circle(monthly_data2[x_var], monthly_data2[y_var],
                              size=15, color="navy", alpha=0.5)
                    p2.line([int(np.min([monthly_data2[y_var],
                                         monthly_data2[x_var]])-2),int(np.max(
                        [monthly_data2[y_var],monthly_data2[x_var]])+2)],
                             [int(np.min([monthly_data2[y_var],
                                          monthly_data2[x_var]])-2),int(np.max(
                                 [monthly_data2[y_var],monthly_data2[x_var]])+2)],
                              color = "black", legend_label = '1 to 1 line')
                    p2.line(r_x, r_y, color="red", legend_label = 'Reg thru zero')
                    p2.legend.location = "top_left"

                    # Append [p1, p2] to figure_list (create list of lists)
                    figure_list.append([p1, p2])

                #return figure_list, monthly_data2

                # Plot all figures in list
                fig = gridplot(figure_list, toolbar_location="left")

                # Save the figure
                save(fig)

def monthly_comparison(input_csv, out_dir=None, day_limit=10):

0 Source : plot.py
with Apache License 2.0
from WSWUP

def monthly_comparison(input_csv, out_dir=None, day_limit=10):
    """
    Compare monthly average weather station data from
    `PyWeatherQAQC   <  https://github.com/WSWUP/pyWeatherQAQC>`_ with gridMET.

    The :func:`monthly_comparison` function produces HTML files with time series
    and scatter plots of station versus gridMET climate variables of monthly 
    mean data. It uses the `bokeh  < https://bokeh.pydata.org/en/latest/>`_ module
    to create interactive plots, e.g. they can be zoomed in/out and panned. 

    Arguments:
        input_csv (str): path to input CSV file containing
            paired station/gridMET metadata. This file is
            created by running :mod:`gridwxcomp.prep_input` followed by
            :mod:`gridwxcomp.download_gridmet_opendap`.

    Keyword Arguments:
        out_dir (str): default None. Directory to save comparison plots.
        day_limit (int): default 10. Number of paired days per month that must 
            exist for variable to be plotted. 

    Returns:
        None

    Example:
        The :func:`monthly_comparison` function will generate HTML files with
        bokeh plots for paired climate variable, e.g. etr_mm,
        eto_mm, u2_ms, tmin_c, tmax_c, srad_wm2, ea_kpa, and Ko (dew point
        depression).
        
        From the command line, use the "plot" command with the 
        ``[-t, --plot-type]`` option set to station-grid-comp and
        the ``[-f, --freq]`` option set to "monthly",

        .. code-block:: sh

            $ gridwxcomp plot merged_input.csv -t station-grid-comp -freq monthly -o monthly_plots

        or within Python,

        >>> from gridwxcomp.plot import monthly_comparison
        >>> monthly_comparison('merged_input.csv', 'monthly_plots')

        Both methods result in monthly HTML bokeh plots being saved
        to "monthly_plots/" which contains a plot file for each station
        as found in the input CSV file. If ``out_dir`` keyword argument or
        ``[-o, --out-dir]`` command line option is not given the plots will
        be saved to a directory named "monthly_comp_plots".

    Note:
        If there are less than 2 months of data the plot for that
        station will not be created.

    """
    if not out_dir:
        out_dir = os.getcwd()

    if not os.path.isdir(out_dir):
        print('{} does not exist, creating directory'.format(out_dir))
        os.makedirs(out_dir)

    # # Import Station/GRIDMET meta data shapefile
    paired_data = pd.read_csv(input_csv, sep=',')

    # List of variables to compare (STATION/gridMET ORDER SHOULD MATCH)
    station_vars = ['TMin (C)', 'TMax (C)', 'wx_Ko_c', 'Rs (w/m2)',
                    'ws_2m (m/s)', 'Vapor Pres (kPa)', 'RHAvg (%)',
                    'Precip (mm)', 'ETo (mm)', 'ETr (mm)']

    gridmet_vars = ['tmin_c', 'tmax_c', 'grid_Ko_c', 'srad_wm2', 'u2_ms',
                    'ea_kpa', 'rh_avg', 'prcp_mm', 'eto_mm', 'etr_mm']

    # # Limit row processing range (testing)
    # start = 0
    # end = 1
    #Loop through each station/gridmet pair
    for index, row in paired_data.iterrows():
    # #    Limit iteration during development
    #     if index  <  start:
    #         continue
    #     if index >= end:
    #       break

        # clear previous datasets
        grid_data = []
        station_data = []

        start_date = []
        end_date = []

        logging.info('\nStation: {}'.format(row.STATION_ID))
        station_path = row.STATION_FILE_PATH

        # Check is station path is given in input
        if pd.isnull(station_path):
            logging.info('Station path is not given. Skipping.')
            continue

        # Skip If FILE DOES NOT EXIST
        if not os.path.exists(station_path):
            logging.info('SKIPPING {}. NO STATION FILE FOUND.'.format(
                station_path))
            continue
        else:
            if station_path.endswith(('.xlsx','.xlx')):
                station_data = pd.read_excel(station_path, index_col=0,
                        parse_dates=True, sheet_name='Corrected Data')
            else:
                station_data = pd.read_csv(station_path, index_col=0,
                        parse_dates=True)

            start_date.append(station_data.index.date.min())
            end_date.append(station_data.index.date.max())

        # Import GRIDMET Data
        grid_path = row.GRID_FILE_PATH
        # Skip if GRIDMET FILE DOES NOT EXIST
        if not os.path.exists(grid_path):
            print('SKIPPING {}. NO GRIDMET FILE FOUND.'.format(grid_path))
            continue
        else:
            grid_data = pd.read_csv(grid_path, sep=',',parse_dates=True,
                                    index_col='date')

            start_date.append(grid_data.index.date.min())
            end_date.append(grid_data.index.date.max())

            # prevent plotting gaps when time periods differ
            start_date = max(start_date)
            end_date = min(end_date)


            # Filter to specific year
            # grid_data = grid_data[grid_data['year'] == year]

            # Add Tdew to gridmet dataset Teten's equation ASCE REF-ET
            #  supporting equations Appendix 2-1

            grid_data['tdew_c'] = (116.91 + 237.3 * np.log(grid_data.ea_kpa)) /\
                                  (16.78 - np.log(grid_data.ea_kpa))

            # Calculate Tmin - Tdew = Ko for both Station and GridMET
            # Dew Point Depression
            grid_data['grid_Ko_c'] = grid_data.tmin_c - grid_data.tdew_c

            station_data['wx_Ko_c'] = station_data['TMin (C)'] - \
                                      station_data['TDew (C)']

            # grid RH Avg calc
            # Saturated Vapor Pressure
            grid_data['tavg_c'] = (grid_data.tmin_c + grid_data.tmax_c )/2
            grid_data['e_sat_kpa'] = 0.6108*np.exp((17.27*grid_data.tavg_c)/
                                                   (grid_data.tavg_c+237.3))
            # Average RH (%)
            grid_data['rh_avg'] = (grid_data.ea_kpa/grid_data.e_sat_kpa)*100

            # Combine station and gridMET dataframes (only plotting variables)
            merged = pd.concat([station_data[station_vars],
                                grid_data[gridmet_vars]], axis=1
            )
            merged = merged.loc[start_date:end_date]

            station_vars = ['TMin (C)', 'TMax (C)', 'wx_Ko_c', 'Rs (w/m2)',
                'ws_2m (m/s)', 'Vapor Pres (kPa)', 'RHAvg (%)', 'Precip (mm)',
                'ETo (mm)', 'ETr (mm)']

            gridmet_vars = ['tmin_c', 'tmax_c', 'grid_Ko_c', 'srad_wm2',
                'u2_ms', 'ea_kpa', 'rh_avg', 'prcp_mm', 'eto_mm', 'etr_mm']

            # remove all pairs where one var missing
            for (x_var, y_var) in zip(station_vars,gridmet_vars):
                merged[[x_var, y_var]] = merged[[x_var, y_var]].dropna()

            # Monthly averages including count
            monthly = merged.groupby([lambda x: x.year, lambda x: x.month]).agg(
                ['mean', 'sum' ,'count'])

            # Remove months with Less Than XX Days in average
            var_names = list(monthly.columns.levels)[0]
            for v in var_names:
                mask = monthly.loc[:,(v,'count')]  <  day_limit
                monthly.loc[mask,('sum', 'mean')] = np.nan

            # Rebuild Index DateTime
            monthly['year'] = monthly.index.get_level_values(0).values
            monthly['month'] = monthly.index.get_level_values(1).values
            monthly.index = pd.to_datetime(
                monthly.year * 10000 + monthly.month * 100 + 15,
                format='%Y%m%d')

            if len(monthly.index)  <  2:
                logging.info('Skipping. Less than 2 months of observations.')
                continue

            # Output Folder
            out_folder =  os.path.join(out_dir, 'monthly_comp_plots')
                                       # '{}'.format(
                                       #     row.STATION_ID.replace(" ","")))

            # Create path if it doesn't exist
            if not os.path.exists(out_folder):
                os.makedirs(out_folder)

            # Output to HTML file
            out_file_path = os.path.join(out_folder, '{}.html')\
                .format(row.STATION_ID.replace(" ", ""))
            output_file(out_file_path)

            # list of x variables
            x_var_list= station_vars
            # list of y variables
            y_var_list= gridmet_vars
            # title list
            title_list= ['TMin: Monthly Average', 'TMax: Monthly Average',
                         'Ko: Monthly Average' ,
                         'Rs: Monthly Average: Monthly Average',
                         'WS 2m: Monthly Average',
                         'ea: Monthly Average',
                         'RH Avg: Monthly Average', 'Prcp: Monthly Total',
                         'ETo: Monthly Average', 'ETr: Monthly Average']
            # timeseries y label list
            ts_ylabel_list = ['TMin (C)', 'TMax (C)', 'Ko (C)', 'Rs (w/m2)',
                              'WS 2m (m/s)', 'ea (kPa)', 'Avg RH (%)',
                              'Prcp (mm)',
                              'ETo (mm)', 'ETr (mm)']
            # scatter xlabel list
            xlabel_list= ['Station TMin (C)', 'Station TMax (C)',
                          'Station Ko (C)','Station Rs (w/m2)',
                          'Station WS 2m (m/s)', 'Station ea (kPa)',
                          'Station RH (%)', 'Station Prcp (mm)',
                          'Station ETo (mm)', 'Station ETr (mm)']
            # scatter ylabel list
            ylabel_list=['gridMET TMin (C)', 'gridMET TMax (C)',
                         'gridMET Ko (C)','gridMET Rs (w/m2)',
                         'gridMET WS 2m (m/s)', 'gridMET ea (kPa)',
                         'gridMET RH (%)', 'gridMET Prcp (mm)',
                         'gridMET ETo (mm)', 'gridMET ETr (mm)']
            stat_list = ['mean','mean','mean','mean',
                         'mean','mean','mean', 'sum',
                         'mean','mean']
            # legendx list
            legendx_list = ['Station'] * len(title_list)
            # legend y list
            legendy_list = ['gridMET'] * len(title_list)

            # empty list to append figures to
            figure_list = []

            # loop through and create figures for each variable using vars
            # and plot labels from lists above
            for i, (x_var, y_var, title, ts_ylabel, xlabel, ylabel, legendx,
                    legendy, stat) in enumerate(zip(x_var_list, y_var_list,
                                              title_list, ts_ylabel_list,
                                              xlabel_list, ylabel_list,
                                              legendx_list, legendy_list,
                                                    stat_list)):

                # lstsq cannot have nans (drop nas for each var separately)
                monthly2 = monthly[[x_var, y_var]]
                monthly2 = monthly2.dropna()

                if monthly2.empty:
                    logging.info("Skipping {}. No Data.".format(x_var))
                    continue


                if i == 0:
                    # Initial timeseries plot to establish xrange for link axes
                    p1 = figure(plot_width=800, plot_height=400,
                                x_axis_type="datetime",title = title,
                                y_axis_label = ts_ylabel)
                    p1.line(monthly.index.to_pydatetime(),
                            monthly[x_var, stat],  color="navy",
                            alpha=0.5, legend_label=legendx,line_width=2)
                    p1.line(monthly.index.to_pydatetime(),
                            monthly[y_var, stat],  color="red",
                            alpha=0.5, legend_label=legendy,line_width=2)
                else:
                    # Timeseries plots after first pass
                    p1 = figure(plot_width=800, plot_height=400,
                                x_axis_type="datetime",title = title,
                                y_axis_label = ts_ylabel,
                                x_range=p1.x_range)
                    p1.line(monthly.index.to_pydatetime(),
                            monthly[x_var, stat],  color="navy", alpha=0.5,
                            legend_label=legendx,line_width=2)
                    p1.line(monthly.index.to_pydatetime(),
                            monthly[y_var, stat],  color="red", alpha=0.5,
                            legend_label=legendy,line_width=2)

                # 1 to 1 Plot
                # Regression through Zero
                # https://stackoverflow.com/questions/9990789/how-to-force-
                # zero-interception-in-linear-regression/9994484#9994484
                m = np.linalg.lstsq(monthly2[x_var, stat].values.reshape(-1,1),
                                    monthly2[y_var, stat], rcond=None)[0][0]
                r_x, r_y = zip(*((i, i*m ) for i in range(
                    int(np.min([monthly2[y_var, stat],
                                monthly2[x_var, stat]])-2),
                    int(np.max([monthly2[y_var, stat],
                                monthly2[x_var, stat]])+3),1)))
                # Plots
                p2 = figure(plot_width=400, plot_height=400,
                            x_axis_label = xlabel, y_axis_label = ylabel,
                            title = 'Slope Through Zero: m = {}'.format(
                                round(m,4)))
                p2.circle(monthly2[x_var, stat], monthly2[y_var, stat],
                          size=15, color="navy", alpha=0.5)
                p2.line([int(np.min([monthly2[y_var, stat],
                                     monthly2[x_var, stat]])-2),int(np.max(
                    [monthly2[y_var, stat],monthly2[x_var, stat]])+2)],
                         [int(np.min([monthly2[y_var, stat],
                                      monthly2[x_var, stat]])-2),int(np.max(
                             [monthly2[y_var, stat],monthly2[x_var, stat]])+2)],
                          color = "black", legend_label = '1 to 1 line')
                p2.line(r_x, r_y, color="red", legend_label = 'Reg thru zero')
                p2.legend.location = "top_left"

                # Append [p1, p2] to figure_list (create list of lists)
                figure_list.append([p1, p2])

            # Plot all figures in list
            fig = gridplot(figure_list, toolbar_location="left")

            # Save the figure
            save(fig)

def station_bar_plot(summary_csv, layer, out_dir=None, x_label=None, 

0 Source : plot.py
with Apache License 2.0
from WSWUP

def station_bar_plot(summary_csv, layer, out_dir=None, x_label=None, 
        y_label=None, title=None, subtitle=None, year_subtitle=True):
    """
    Produce an interactive bar chart comparing multiple climate stations to each
    other for a particular variable, e.g. bias ratios or interpolated residuals.
    
    Arguments:
        summary_csv (str): path to summary CSV produced by either :func:`gridwxcomp.calc_bias_ratios`
            or by :func:`gridwxcomp.interpolate`. Should contain ``layer`` 
            data for plot.
        layer (str): name of variable to plot.
    
    Keyword Arguments:
        out_dir (str or None): default None. Output directory path, default is 
            'station_bar_plots' in parent directory of ``summary_csv``.
        x_label (str or None): default None. Label for x-axis.
        y_label (str or None): default None. Label for y-axis, defaults to 
            ``layer``.
        title (str or None): default None. Title of plot.
        subtitle (str, list, or None): default None. Additional subtitle(s) 
            for plot.
        year_subtitle (bool): default True. If true print subtitle on plot with
            the max year range used for station data, e.g. 'years: 1995-2005'
            
    Example:
        Let's say we want to compare the mean growing seasion bias ratios of 
        reference evapotranspiration (ETr) for the selection of stations we 
        used to calculate bias ratios. The summary CSV file containing the 
        ratios should be first created using :func:`gridwxcomp.calc_bias_ratios`.
        
        >>> from gridwxcomp.plot import station_bar_plot
        >>> # path to summary CSV with station data
        >>> in_file = 'monthly_ratios/etr_mm_summary_all_yrs.csv'
        >>> layer = 'growseason_mean' 
        >>> station_bar_plot(in_file, layer)
        
        The resulting file will be saved using the layer name as a file name::
        
            'monthly_ratios/station_bar_plots/growseason_mean.html'
            
        The plot file will contain the mean growing season bias ratios 
        of ETr for each station, sorted from smallest to largest values. 
        
        This function may also be used for any numerical data in the summary CSV
        files that are created by :func:`gridwxcomp.interpolate` in addition to
        those created by :func:`gridwxcomp.calc_bias_ratios`. The main  
        requirement is that ``summary_csv`` must contain the column 'STATION_ID'
        and the ``layer`` keyword argument.
        
    Raises:
        FileNotFoundError: if ``summary_csv`` is not found.
        KeyError: if ``layer`` does not exist as a column name in ``summary_csv``.
        
    """
    if not Path(summary_csv).is_file():
        err_msg = '\n{} is not a valid path to a summary CSV file!'.\
                format(summary_csv)
        raise FileNotFoundError(err_msg)
        
    df = pd.read_csv(summary_csv, na_values=[-999])
    
    if not layer in df.columns:
        err_msg = '\nColumn {} was not found in {}'.format(layer, summary_csv)
        raise KeyError(err_msg)
    
    df.sort_values(layer, inplace=True)
    df.index.name = 'dummy_name' # fix internal to bokeh- reset_index 
    source = ColumnDataSource(df)
    # hover tooltip with station and value
    tooltips = [
        ("station", "@STATION_ID"),
        ("value", "@{}".format(layer)), 
    ]
    hover = models.HoverTool(tooltips=tooltips)
    
    if not y_label:
        y_label = layer
    # save to working directory in 'station_bar_plots' if not specified
    if not out_dir:
        out_dir = Path(summary_csv).parent/'station_bar_plots'
    else:
        out_dir = Path(out_dir)
    if not out_dir.is_dir():
        print('\n{}\nDoes not exist, making directory'.format(
            out_dir.absolute()))
        out_dir.mkdir(parents=True, exist_ok=True)

    out_file = out_dir/'{}.html'.format(layer)
    print('\nCreating station bar plot for variable: ', layer,
         '\nUsing data from file: ', Path(summary_csv).absolute())
    
    output_file(out_file)
    
    p = figure(x_range=df.STATION_ID, y_axis_label=y_label, title=title)
    p.vbar(x='STATION_ID', top=layer, width=0.8, source=source)
    p.xaxis.major_label_orientation = pi/2
    p.add_tools(hover, models.BoxSelectTool())
    
    if year_subtitle:
        # add data range (years start to end) as subtitle 
        min_yr = int(df.start_year.min())
        max_yr = int(df.end_year.max())
        if min_yr == max_yr:
            year_str = 'year: {}'.format(min_yr)
        else:
            year_str = 'years: {}-{}'.format(min_yr, max_yr)
        # caution note if not all stations use full year range
        if not (df.end_year==max_yr).all() or not (df.start_year==min_yr).all():
            year_str = '{} (less years exist for some stations)'.\
                    format(year_str)
            
        p.add_layout(models.Title(text=year_str, text_font_style="italic"), 
                'above')
    # add arbitrary number of custom subtitles as lines above plot 
    if isinstance(subtitle, (list, tuple)):
        for st in subtitle:
            p.add_layout(models.Title(text=st, text_font_style="italic"), 
                    'above')
    elif subtitle:
        p.add_layout(models.Title(text=subtitle, text_font_style="italic"), 
                'above')
        
    save(p)
    print('\nPlot saved to: ', out_file.absolute())


def arg_parse():

0 Source : test_mini_analyses.py
with BSD 3-Clause "New" or "Revised" License
from XENONnT

    def test_bokeh_selector(self):
        """Test the bokeh data selector"""
        from straxen.analyses.bokeh_waveform_plot import DataSelectionHist
        p = self.st.get_array(nt_test_run_id, 'peak_basics')
        ds = DataSelectionHist('ds')
        fig = ds.histogram2d(p,
                             p['area'],
                             p['area'],
                             bins=50,
                             hist_range=((0, 200), (0, 2000)),
                             log_color_scale=True,
                             clim=(10, None),
                             undeflow_color='white')

        import bokeh.plotting as bklt
        save_as = 'test_data_selector.html'
        bklt.save(fig, save_as)
        self.assertTrue(os.path.exists(save_as))
        os.remove(save_as)
        self.assertFalse(os.path.exists(save_as))
        # Also test if we can write it to the wiki
        straxen.bokeh_to_wiki(fig)
        straxen.bokeh_to_wiki(fig, save_as)
        self.assertTrue(os.path.exists(save_as))
        os.remove(save_as)
        self.assertFalse(os.path.exists(save_as))

    @unittest.skipIf(not straxen.utilix_is_configured(),