Here are the examples of the python api bokeh.plotting.save taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
21 Examples
3
Source : plot_apple_watch_data.py
with Apache License 2.0
from openPfizer
with Apache License 2.0
from openPfizer
def save_plot(plot, title):
"""
Saves plot in local directory as file_name
:param plot: bokeh plot object
:param title: file name for plot
:return: None
"""
output_file("apple_watch_plots/{}.html".format(title))
save(plot)
def plot_heart_rate(apple_watch):
0
Source : html_reporting.py
with Apache License 2.0
from allegroai
with Apache License 2.0
from allegroai
def report_html_periodic_table(logger, iteration=0):
# type: (Logger, int) -> ()
"""
reporting interactive (html) of periodic table to debug samples section
:param logger: The task.logger to use for sending the plots
:param iteration: The iteration number of the current reports
"""
output_file("periodic.html")
periods = ["I", "II", "III", "IV", "V", "VI", "VII"]
groups = [str(x) for x in range(1, 19)]
autompg_clean = elements.copy()
autompg_clean["atomic mass"] = autompg_clean["atomic mass"].astype(str)
autompg_clean["group"] = autompg_clean["group"].astype(str)
autompg_clean["period"] = [periods[x - 1] for x in autompg_clean.period]
autompg_clean = autompg_clean[autompg_clean.group != "-"]
autompg_clean = autompg_clean[autompg_clean.symbol != "Lr"]
autompg_clean = autompg_clean[autompg_clean.symbol != "Lu"]
cmap = {
"alkali metal": "#a6cee3",
"alkaline earth metal": "#1f78b4",
"metal": "#d93b43",
"halogen": "#999d9a",
"metalloid": "#e08d49",
"noble gas": "#eaeaea",
"nonmetal": "#f1d4Af",
"transition metal": "#599d7A",
}
source = ColumnDataSource(autompg_clean)
p = figure(
plot_width=900,
plot_height=500,
title="Periodic Table (omitting LA and AC Series)",
x_range=groups,
y_range=list(reversed(periods)),
toolbar_location=None,
tools="hover",
)
p.rect(
"group",
"period",
0.95,
0.95,
source=source,
fill_alpha=0.6,
legend_label="metal",
color=factor_cmap(
"metal", palette=list(cmap.values()), factors=list(cmap.keys())
),
)
text_props = {"source": source, "text_align": "left", "text_baseline": "middle"}
x = dodge("group", -0.4, range=p.x_range)
r = p.text(x=x, y="period", text="symbol", **text_props)
r.glyph.text_font_style = "bold"
r = p.text(
x=x, y=dodge("period", 0.3, range=p.y_range), text="atomic number", **text_props
)
r.glyph.text_font_size = "8pt"
r = p.text(
x=x, y=dodge("period", -0.35, range=p.y_range), text="name", **text_props
)
r.glyph.text_font_size = "5pt"
r = p.text(
x=x, y=dodge("period", -0.2, range=p.y_range), text="atomic mass", **text_props
)
r.glyph.text_font_size = "5pt"
p.text(
x=["3", "3"],
y=["VI", "VII"],
text=["LA", "AC"],
text_align="center",
text_baseline="middle",
)
p.hover.tooltips = [
("Name", "@name"),
("Atomic number", "@{atomic number}"),
("Atomic mass", "@{atomic mass}"),
("Type", "@metal"),
("CPK color", "$color[hex, swatch]:CPK"),
("Electronic configuration", "@{electronic configuration}"),
]
p.outline_line_color = None
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_standoff = 0
p.legend.orientation = "horizontal"
p.legend.location = "top_center"
save(p)
logger.report_media("html", "periodic_html", iteration=iteration, local_path="periodic.html")
def report_html_groupby(logger, iteration=0):
0
Source : html_reporting.py
with Apache License 2.0
from allegroai
with Apache License 2.0
from allegroai
def report_html_groupby(logger, iteration=0):
# type: (Logger, int) -> ()
"""
reporting bokeh groupby (html) to debug samples section
:param logger: The task.logger to use for sending the plots
:param iteration: The iteration number of the current reports
"""
output_file("bar_pandas_groupby_nested.html")
bokeh_df.cyl = bokeh_df.cyl.astype(str)
bokeh_df.yr = bokeh_df.yr.astype(str)
group = bokeh_df.groupby(by=["cyl", "mfr"])
index_cmap = factor_cmap(
"cyl_mfr", palette=Spectral5, factors=sorted(bokeh_df.cyl.unique()), end=1
)
p = figure(
plot_width=800,
plot_height=300,
title="Mean MPG by # Cylinders and Manufacturer",
x_range=group,
toolbar_location=None,
tooltips=[("MPG", "@mpg_mean"), ("Cyl, Mfr", "@cyl_mfr")],
)
p.vbar(
x="cyl_mfr",
top="mpg_mean",
width=1,
source=group,
line_color="white",
fill_color=index_cmap,
)
p.y_range.start = 0
p.x_range.range_padding = 0.05
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None
save(p)
logger.report_media(
"html",
"pandas_groupby_nested_html",
iteration=iteration,
local_path="bar_pandas_groupby_nested.html",
)
def report_html_graph(logger, iteration=0):
0
Source : html_reporting.py
with Apache License 2.0
from allegroai
with Apache License 2.0
from allegroai
def report_html_graph(logger, iteration=0):
# type: (Logger, int) -> ()
"""
reporting bokeh graph (html) to debug samples section
:param logger: The task.logger to use for sending the plots
:param iteration: The iteration number of the current reports
"""
nodes = 8
node_indices = list(range(nodes))
plot = figure(
title="Graph Layout Demonstration",
x_range=(-1.1, 1.1),
y_range=(-1.1, 1.1),
tools="",
toolbar_location=None,
)
graph = GraphRenderer()
graph.node_renderer.data_source.add(node_indices, "index")
graph.node_renderer.data_source.add(Spectral8, "color")
graph.node_renderer.glyph = Ellipse(height=0.1, width=0.2, fill_color="color")
graph.edge_renderer.data_source.data = dict(start=[0] * nodes, end=node_indices)
# start of layout code
circ = [i * 2 * math.pi / 8 for i in node_indices]
x = [math.cos(i) for i in circ]
y = [math.sin(i) for i in circ]
graph_layout = dict(zip(node_indices, zip(x, y)))
graph.layout_provider = StaticLayoutProvider(graph_layout=graph_layout)
plot.renderers.append(graph)
output_file("graph.html")
save(plot)
logger.report_media("html", "Graph_html", iteration=iteration, local_path="graph.html")
def report_html_image(logger, iteration=0):
0
Source : html_reporting.py
with Apache License 2.0
from allegroai
with Apache License 2.0
from allegroai
def report_html_image(logger, iteration=0):
# type: (Logger, int) -> ()
"""
reporting bokeh image (html) to debug samples section
:param logger: The task.logger to use for sending the plots
:param iteration: The iteration number of the current reports
"""
# First html
samples = 500
x = np.linspace(0, 10, samples)
y = np.linspace(0, 10, samples)
xx, yy = np.meshgrid(x, y)
d = np.sin(xx) * np.cos(yy)
p = figure(tooltips=[("x", "$x"), ("y", "$y"), ("value", "@image")])
p.x_range.range_padding = p.y_range.range_padding = 0
# must give a vector of image data for image parameter
p.image(image=[d], x=0, y=0, dw=10, dh=10, palette="Spectral11", level="image")
p.grid.grid_line_width = 0.5
output_file("image.html", title="image.py example")
save(p)
logger.report_media("html", "Spectral_html", iteration=iteration, local_path="image.html")
def main():
0
Source : plots.py
with MIT License
from ArtesiaWater
with MIT License
from ArtesiaWater
def interactive_plot(self,
savedir=None,
plot_columns=('stand_m_tov_nap',),
markers=('line',),
p=None,
plot_legend_names=('',),
plot_freq=(None,),
tmin=None,
tmax=None,
hoover_names=('Peil',),
hoover_date_format="%Y-%m-%d",
ylabel='m NAP',
plot_colors=('blue',),
add_filter_to_legend=False,
return_filename=False):
"""Create an interactive plot of the observations using bokeh.
Todo:
- add options for hoovers, markers, linestyle
Parameters
----------
savedir : str, optional
directory used for the folium map and bokeh plots
plot_columns : list of str, optional
name of the column in the obs df that will be plotted with bokeh
markers : list of str, optional
type of markers that can be used for plot, 'line' and 'circle' are
supported
p : bokeh.plotting.figure, optional
reference to existing figure, if p is None a new figure is created
plot_legend_names : list of str, optional
legend in bokeh plot
plot_freq : list of str, optional
bokeh plot is resampled with this frequency to reduce the size
tmin : dt.datetime, optional
start date for timeseries plot
tmax : dt.datetime, optional
end date for timeseries plot
hoover_names : list of str, optional
names will be displayed together with the plot_column value
when hoovering over plot
hoover_date_format : str, optional
date format to use when hoovering over a plot
ylabel : str, optional
label on the y-axis
plot_colors : list of str, optional
plot_colors used for the plots
add_filter_to_legend : boolean, optional
if True the attributes bovenkant_filter and onderkant_filter
are added to the legend name
return_filename : boolean, optional
if True filename will be returned
Returns
-------
fname_plot : str or bokeh plot
filename of the bokeh plot or reference to bokeh plot
"""
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import save
from bokeh.resources import CDN
# create plot dataframe
plot_df = self._obj[tmin:tmax].copy()
plot_df['date'] = plot_df.index.strftime(hoover_date_format)
if plot_df.empty or plot_df[list(plot_columns)].isna().all().all():
raise ValueError(
'{} has no data between {} and {}'.format(self._obj.name, tmin, tmax))
# create plot
if p is None:
p = figure(plot_width=600, plot_height=400, x_axis_type='datetime',
title='')
p.yaxis.axis_label = ylabel
# get x axis
xcol = self._obj.index.name
if xcol is None:
xcol = 'index'
# get color
if len(plot_colors) < len(plot_columns):
plot_colors = list(plot_colors) * len(plot_columns)
# get base for hoover tooltips
plots = []
tooltips = []
tooltips.append(('date', "@date"))
# plot multiple columns
for i, column in enumerate(plot_columns):
# legend name
if add_filter_to_legend:
lname = '{} {} (NAP {:.2f} - {:.2f})'.format(plot_legend_names[i], self._obj.name,
self._obj.onderkant_filter,
self._obj.bovenkant_filter)
else:
lname = '{} {}'.format(plot_legend_names[i], self._obj.name)
# resample data
if plot_freq[i] is None:
source = ColumnDataSource(plot_df[[column, 'date']])
else:
source = ColumnDataSource(
plot_df[[column, 'date']].resample(plot_freq[i]).first())
# plot data
if markers[i] in ['line', 'l']:
plots.append(p.line(xcol, column, source=source, color=plot_colors[i],
legend_label=lname,
alpha=0.8, muted_alpha=0.2))
elif markers[i] in ['circle','c']:
plots.append(p.circle(xcol, column, source=source, color=plot_colors[i],
legend_label=lname,
alpha=0.8, muted_alpha=0.2))
else:
raise NotImplementedError("marker '{}' invalid. Only line and"
"circle are currently available".format(markers[i]))
# add columns to hoover tooltips
tooltips_p = tooltips.copy()
tooltips_p.append((hoover_names[i], "@{}".format(column)))
hover = HoverTool(renderers=[plots[i]], tooltips=tooltips_p, mode='vline')
p.add_tools(hover)
p.legend.location = "top_left"
p.legend.click_policy = "mute"
# save plot
if savedir is not None:
if not os.path.isdir(savedir):
os.makedirs(savedir)
self._obj.iplot_fname = os.path.join(
savedir, self._obj.name + '.html')
save(p, self._obj.iplot_fname, resources=CDN, title=self._obj.name)
if return_filename:
return self._obj.iplot_fname
else:
return p
0
Source : dimens_reduction.py
with Mozilla Public License 2.0
from astrazeneca-cgr-publications
with Mozilla Public License 2.0
from astrazeneca-cgr-publications
def plot_interactive_viz(self, data, highlighted_genes, method, pos_label, neg_label, show_plot=False, save_plot=False):
'''
Plot an interactive dimensionality reduction embedding (e.g. PCA, t-SNE)
with label annotation for selected data points
'''
# Highlight genes of interest
data['colors'] = data.known_gene.copy()
color_mapping = {pos_label: '#ef3b2c', neg_label: '#bdbdbd'}
data = data.replace({'colors': color_mapping})
data = data.sort_values(by=[self.cfg.Y], ascending=True)
known_genes_highlight_color = '#31a354'
data.loc[data['Gene_Name'] == 'PKD1', 'colors'] = known_genes_highlight_color
data.loc[data['Gene_Name'] == 'PKD2', 'colors'] = known_genes_highlight_color
selected_gene_rows = data.loc[data['Gene_Name'].isin(highlighted_genes), :]
data = data[~data.Gene_Name.isin(highlighted_genes)]
data = pd.concat([data, selected_gene_rows], axis=0)
data.loc[data['Gene_Name'].isin(highlighted_genes), 'colors'] = '#252525'
data['annotation'] = data.known_gene.copy()
data.loc[data.annotation == pos_label, 'annotation'] = 'Yes'
data.loc[data.annotation == neg_label, 'annotation'] = 'No'
# Plot
source = ColumnDataSource(dict(
x=data['x'],
y=data['y'],
color=data['colors'],
content=data['Gene_Name'],
annot=data['annotation'],
))
interact_viz = figure(plot_width=900, plot_height=900,
title=method, tools="pan,wheel_zoom,box_zoom,reset,hover,previewsave",
x_axis_type=None, y_axis_type=None, min_border=1)
interact_viz.scatter(x='x', y='y',
source=source,
color='color',
alpha=0.8, size=10,
legend=method)
# hover tools
hover = interact_viz.select(dict(type=HoverTool))
hover.tooltips = [("gene", "@content")]
interact_viz.legend.location = "top_left"
plot_filename = method + "_interactive_plot.html"
output_file(str(self.cfg.unsuperv_figs_out / plot_filename))
save(interact_viz)
if show_plot:
show(interact_viz)
if save_plot:
interact_viz.output_backend = "svg"
plot_filename = method + '_interactive_plot.svg'
export_svgs(interact_viz, filename=(self.cfg.unsuperv_figs_out / plot_filename))
0
Source : plots.py
with Apache License 2.0
from awslabs
with Apache License 2.0
from awslabs
def mousover_plot(datadict, attr_x, attr_y, attr_color=None, attr_size=None, save_file=None, plot_title="",
point_transparency = 0.5, point_size=20, default_color="#2222aa", hidden_keys = [], show_plot=False):
""" Produces dynamic scatter plot that can be interacted with by mousing over each point to see its label
Args:
datadict (dict): keys contain attributes, values of lists of data from each attribute to plot (each list index corresponds to datapoint).
The values of all extra keys in this dict are considered (string) labels to assign to datapoints when they are moused over.
Apply _formatDict() to any entries in datadict which are themselves dicts.
attr_x (str): name of column in dataframe whose values are shown on x-axis (eg. 'latency'). Can be categorical or numeric values
attr_y (str): name of column in dataframe whose values are shown on y-axis (eg. 'validation performance'). Must be numeric values.
attr_size (str): name of column in dataframe whose values determine size of dots (eg. 'memory consumption'). Must be numeric values.
attr_color (str): name of column in dataframe whose values determine color of dots (eg. one of the hyperparameters). Can be categorical or numeric values
point_labels (list): list of strings describing the label for each dot (must be in same order as rows of dataframe)
save_file (str): where to save plot to (html) file (if None, plot is not saved)
plot_title (str): Title of plot and html file
point_transparency (float): alpha value of points, lower = more transparent
point_size (int): size of points, higher = larger
hidden keys (list[str]): which keys of datadict NOT to show labels for.
show_plot (bool): whether to show plot
"""
try:
with warning_filter():
import bokeh
from bokeh.plotting import output_file, ColumnDataSource, show, figure, save
from bokeh.models import HoverTool, CategoricalColorMapper, LinearColorMapper, Legend, LegendItem, ColorBar
from bokeh.palettes import Category20
except ImportError:
warnings.warn('AutoGluon summary plots cannot be created because bokeh is not installed. To see plots, please do: "pip install bokeh==2.0.1"')
return None
n = len(datadict[attr_x])
for key in datadict.keys(): # Check lengths are all the same
if len(datadict[key]) != n:
raise ValueError("Key %s in datadict has different length than %s" % (key, attr_x))
attr_x_is_string = any([type(val)==str for val in datadict[attr_x]])
if attr_x_is_string:
attr_x_levels = list(set(datadict[attr_x])) # use this to translate between int-indices and x-values
og_x_vals = datadict[attr_x][:]
attr_x2 = attr_x + "___" # this key must not already be in datadict.
hidden_keys.append(attr_x2)
datadict[attr_x2] = [attr_x_levels.index(category) for category in og_x_vals] # convert to ints
legend = None
if attr_color is not None:
attr_color_is_string = any([type(val) == str for val in datadict[attr_color]])
color_datavals = datadict[attr_color]
if attr_color_is_string:
attr_color_levels = list(set(color_datavals))
colorpalette = Category20[20]
color_mapper = CategoricalColorMapper(factors=attr_color_levels, palette=[colorpalette[2*i % len(colorpalette)] for i in range(len(attr_color_levels))])
legend = attr_color
else:
color_mapper = LinearColorMapper(palette='Magma256', low=min(datadict[attr_color]), high=max(datadict[attr_color])*1.25)
default_color = {'field': attr_color, 'transform': color_mapper}
if attr_size is not None: # different size for each point, ensure mean-size == point_size
attr_size2 = attr_size + "____"
hidden_keys.append(attr_size2)
og_sizevals = np.array(datadict[attr_size])
sizevals = point_size + (og_sizevals - np.mean(og_sizevals))/np.std(og_sizevals) * (point_size/2)
if np.min(sizevals) < 0:
sizevals = -np.min(sizevals) + sizevals + 1.0
datadict[attr_size2] = list(sizevals)
point_size = attr_size2
if save_file is not None:
output_file(save_file, title=plot_title)
print("Plot summary of models saved to file: %s" % save_file)
source = ColumnDataSource(datadict)
TOOLS="crosshair,pan,wheel_zoom,box_zoom,reset,hover,save"
p = figure(title=plot_title, tools=TOOLS)
if attr_x_is_string:
circ = p.circle(attr_x2, attr_y, line_color=default_color, line_alpha = point_transparency,
fill_color=default_color, fill_alpha=point_transparency, size=point_size, source=source)
else:
circ = p.circle(attr_x, attr_y, line_color=default_color, line_alpha = point_transparency,
fill_color=default_color, fill_alpha=point_transparency, size=point_size, source=source)
hover = p.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([(key,'@'+key+'{safe}') for key in datadict.keys() if key not in hidden_keys])
# Format axes:
p.xaxis.axis_label = attr_x
p.yaxis.axis_label = attr_y
if attr_x_is_string: # add x-ticks:
p.xaxis.ticker = list(range(len(attr_x_levels)))
p.xaxis.major_label_overrides = {i: attr_x_levels[i] for i in range(len(attr_x_levels))}
# Legend additions:
if attr_color is not None and attr_color_is_string:
legend_it = []
for i in range(len(attr_color_levels)):
legend_it.append(LegendItem(label=attr_color_levels[i], renderers=[circ], index=datadict[attr_color].index(attr_color_levels[i])))
legend = Legend(items=legend_it, location=(0, 0))
p.add_layout(legend, 'right')
if attr_color is not None and not attr_color_is_string:
color_bar = ColorBar(color_mapper=color_mapper, title = attr_color,
label_standoff=12, border_line_color=None, location=(0,0))
p.add_layout(color_bar, 'right')
if attr_size is not None:
p.add_layout(Legend(items=[LegendItem(label='Size of points based on "'+attr_size + '"')]), 'below')
if show_plot:
show(p)
elif save_file is not None:
save(p)
0
Source : draw_heatmaps.py
with GNU Affero General Public License v3.0
from DennisSchmitz
with GNU Affero General Public License v3.0
from DennisSchmitz
def draw_heatmaps(df, outfile, title, taxonomic_rank, colour):
"""
Draw heatmaps for the given input dataframe, to
the specified file with the given title.
"""
# If the sample contains only superkingdom information, use that:
if taxonomic_rank == "superkingdom":
# create source info
# and set hovertool tooltip parameters
samples = df["Sample_name"].astype(str)
assigned = df["superkingdom"].astype(str)
reads = df["reads"].astype(float)
percent_of_total = df["Percentage"].astype(float)
colors = len(reads) * colour # multiply to make an equally long list
max_load = max(percent_of_total)
alphas = [min(x / float(max_load), 0.9) + 0.1 for x in percent_of_total]
source = ColumnDataSource(
data=dict(
samples=samples,
assigned=assigned,
reads=reads,
percent_of_total=percent_of_total,
colors=colors,
alphas=alphas,
)
)
y_value = (assigned, "assigned")
# Otherwise, create the usual heatmap input info for each
# (relevant) taxonomic rank down to species.
else:
# Remove 'unclassified' taxa: NaN in dataframe
df = df[df[taxonomic_rank].notnull()]
# Check if the dataframe is empty
if df.empty:
# If so, warn the user and exit
return (None, False)
else:
# If it is not empty, continue normally
if (
max(pd.DataFrame(df.groupby(["Sample_name", taxonomic_rank]).size())[0])
> 3
):
# if there are taxa with more than 3 contigs *in one sample*
# the hover info boxes will be too many, so
# aggregate statistics per taxon
aggregated = True
new_df = pd.DataFrame(
df.groupby(["Sample_name", taxonomic_rank]).size()
).reset_index()
new_df = new_df.rename(columns={0: "Number_of_contigs"})
min_df = pd.DataFrame(
df.groupby(["Sample_name", taxonomic_rank]).min()
).reset_index()
max_df = pd.DataFrame(
df.groupby(["Sample_name", taxonomic_rank]).max()
).reset_index()
sum_df = pd.DataFrame(
df.groupby(["Sample_name", taxonomic_rank]).sum()
).reset_index()
avg_df = pd.DataFrame(
df.groupby(["Sample_name", taxonomic_rank]).mean()
).reset_index()
for column in [
"Plus_reads",
"Minus_reads",
"Avg_fold",
"Length",
"Percentage",
"Nr_ORFs",
]:
min_df = min_df.rename(columns={column: "MIN_%s" % column})
max_df = max_df.rename(columns={column: "MAX_%s" % column})
sum_df = sum_df.rename(columns={column: "SUM_%s" % column})
avg_df = avg_df.rename(columns={column: "AVG_%s" % column})
new_df["MIN_%s" % column] = min_df["MIN_%s" % column]
new_df["MAX_%s" % column] = max_df["MAX_%s" % column]
new_df["SUM_%s" % column] = sum_df["SUM_%s" % column]
new_df["AVG_%s" % column] = avg_df["AVG_%s" % column]
for stat in ["MIN", "MAX", "SUM", "AVG"]:
new_df["%s_reads" % stat] = (
new_df["%s_Minus_reads" % stat] + new_df["%s_Plus_reads" % stat]
)
new_df["tax_name"] = min_df["tax_name"]
new_df["taxon"] = min_df[taxonomic_rank]
new_df["total_reads"] = df["read_pairs"]
new_df = new_df.fillna(0)
samples = new_df["Sample_name"].astype(str)
nr_contigs = new_df["Number_of_contigs"].astype(int)
assigned = new_df["tax_name"].astype(str)
taxonomy = new_df["taxon"].astype(str)
min_reads = new_df["MIN_reads"].astype(int)
max_reads = new_df["MAX_reads"].astype(int)
sum_reads = new_df["SUM_reads"].astype(int)
avg_reads = new_df["AVG_reads"].astype(int)
total_reads = new_df["total_reads"].astype(int)
min_percentage = new_df["MIN_Percentage"].astype(float)
max_percentage = new_df["MAX_Percentage"].astype(float)
sum_percentage = new_df["SUM_Percentage"].astype(float)
avg_percentage = new_df["AVG_Percentage"].astype(float)
min_coverage = new_df["MIN_Avg_fold"].astype(int)
max_coverage = new_df["MAX_Avg_fold"].astype(int)
sum_coverage = new_df["SUM_Avg_fold"].astype(int)
avg_coverage = new_df["AVG_Avg_fold"].astype(int)
min_length = new_df["MIN_Length"].astype(int)
max_length = new_df["MAX_Length"].astype(int)
sum_length = new_df["SUM_Length"].astype(int)
avg_length = new_df["AVG_Length"].astype(int)
min_nr_orfs = new_df["MIN_Nr_ORFs"].astype(int)
max_nr_orfs = new_df["MAX_Nr_ORFs"].astype(int)
sum_nr_orfs = new_df["SUM_Nr_ORFs"].astype(int)
avg_nr_orfs = new_df["AVG_Nr_ORFs"].astype(int)
colors = len(samples) * colour
max_load = max(avg_percentage)
alphas = [min(x / float(max_load), 0.9) + 0.1 for x in avg_percentage]
# scale darkness to the average percentage of reads
source = ColumnDataSource(
data=dict(
samples=samples,
nr_contigs=nr_contigs,
assigned=assigned,
taxonomy=taxonomy,
min_reads=min_reads,
max_reads=max_reads,
sum_reads=sum_reads,
avg_reads=avg_reads,
total_reads=total_reads,
min_percentage=min_percentage,
max_percentage=max_percentage,
sum_percentage=sum_percentage,
avg_percentage=avg_percentage,
min_coverage=min_coverage,
max_coverage=max_coverage,
sum_coverage=sum_coverage,
avg_coverage=avg_coverage,
min_length=min_length,
max_length=max_length,
sum_length=sum_length,
avg_length=avg_length,
min_nr_orfs=min_nr_orfs,
max_nr_orfs=max_nr_orfs,
sum_nr_orfs=sum_nr_orfs,
avg_nr_orfs=avg_nr_orfs,
colors=colors,
alphas=alphas,
)
)
else:
# no taxon has too many contigs assigned per sample,
# so create a plot for everything
aggregated = False
df.fillna(0, inplace=True)
samples = df["Sample_name"].astype(str)
scaffolds = df["scaffold_name"].astype(str)
assigned = df["tax_name"].astype(str)
taxonomy = df[taxonomic_rank].astype(str)
reads = df["reads"].astype(int)
total_reads = df["read_pairs"].astype(int)
percent_of_total = df["Percentage"].astype(float)
coverage = df["Avg_fold"].astype(int)
contig_length = df["Length"].astype(int)
nr_orfs = df["Nr_ORFs"].astype(int)
colors = len(reads) * colour # multiply to make an equally long list
max_load = max(percent_of_total)
alphas = [min(x / float(max_load), 0.9) + 0.1 for x in percent_of_total]
source = ColumnDataSource(
data=dict(
samples=samples,
scaffolds=scaffolds,
assigned=assigned,
taxonomy=taxonomy,
reads=reads,
total_reads=total_reads,
percent_of_total=percent_of_total,
coverage=coverage,
contig_length=contig_length,
nr_orfs=nr_orfs,
colors=colors,
alphas=alphas,
)
)
y_value = (taxonomy, "taxonomy")
TOOLS = "hover, save, pan, box_zoom, wheel_zoom, reset"
p = figure(
title=title,
# If desired, the sample can be displayed as "Run x, sample y"
# -> uncomment the next line
# x_range = [ "Run %s, sample %s" % (x.split('_')[0], x.split('_')[1]) for x in list(sorted(set(samples))) ],
x_range=list(sorted(set(df["Sample_name"]))),
y_range=list(
reversed(sorted(set(y_value[0])))
), # reverse to order 'from top to bottom'
x_axis_location="above",
toolbar_location="right",
tools=TOOLS,
)
# Edit the size of the heatmap when there are many samples and/or taxa
if len(set(samples)) > 20:
p.plot_width = int(len(set(samples)) * 25)
else:
pass
# Adjust heatmap sizes depending on the number of
# taxa observed (not applicable for superkingdom heatmap)
if taxonomic_rank != "superkingdom":
if len(set(taxonomy)) > 100:
p.plot_height = int(p.plot_height * 3)
p.plot_width = int(p.plot_width * 1.5)
elif len(set(taxonomy)) > 50:
p.plot_height = int(p.plot_height * 2)
p.plot_width = int(p.plot_width * 1.2)
elif len(set(taxonomy)) > 25:
p.plot_height = int(p.plot_height * 1.2)
else:
pass
# And set tooltip depending on superkingdoms
if aggregated:
# An aggregated format requires a different hover tooltip
p.select_one(HoverTool).tooltips = [
("Sample", "@samples"),
("Taxon", "@assigned"),
("Number of scaffolds", "@nr_contigs"),
# ('-----', ""), # If you like a separator in the tooltip
(
"Number of reads total (min, avg, max)",
"@sum_reads (@min_reads, @avg_reads, @max_reads)",
),
(
"Scaffold length total (min, avg, max)",
"@sum_length (@min_length, @avg_length, @max_length)",
),
(
"Number of ORFs total (min, avg, max)",
"@sum_nr_orfs (@min_nr_orfs, @avg_nr_orfs, @max_nr_orfs)",
),
(
"Depth of coverage total (min, avg, max)",
"@sum_coverage (@min_coverage, @avg_coverage*, @max_coverage)",
),
("*", "darkness scaled to this number"),
]
else:
p.select_one(HoverTool).tooltips = [
("Sample", "@samples"),
("Scaffold", "@scaffolds"),
("Taxon", "@assigned"),
("Number of reads", "@reads (@percent_of_total % of sample total)"),
("Scaffold length", "@contig_length"),
("Number of ORFs", "@nr_orfs"),
("Average Depth of Coverage", "@coverage"),
]
else:
p.select_one(HoverTool).tooltips = [
("Sample", "@samples"),
("Taxon", "@assigned"),
("Number of reads", "@reads"),
("Percentage of total", "@percent_of_total %"),
]
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
if len(set(assigned)) > 15:
p.axis.major_label_text_font_size = "10pt"
else:
p.axis.major_label_text_font_size = "12pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = np.pi / 4
p.title.text_color = colour[0]
p.title.text_font_size = "16pt"
p.title.align = "right"
p.rect(
"samples",
y_value[1],
1,
1,
source=source,
color="colors",
alpha="alphas",
line_color=None,
)
panel = Panel(child=p, title=title.split()[1].title())
# the .title() methods capitalises a string
if taxonomic_rank == "superkingdom":
# The superkingdom heatmap still requires a single output file
output_file(outfile, title=title)
save(p)
print("The heatmap %s has been created and written to: %s" % (title, outfile))
return None
else:
return (panel, True)
def main():
0
Source : draw_heatmaps.py
with GNU Affero General Public License v3.0
from DennisSchmitz
with GNU Affero General Public License v3.0
from DennisSchmitz
def main():
"""
Main execution of the script
"""
# 1. Parse and show arguments
arguments = parse_arguments()
message = (
"\n"
"These are the arguments you have provided:\n"
" INPUT:\n"
"classified = {0},\n"
"numbers = {1}\n"
" OUTPUT:\n"
"super = {2}\n"
"virus = {3}\n"
"phage = {4}\n"
"bact = {5}\n"
"super_quantities = {6}\n"
"stats = {7}\n"
"vir_stats = {8}\n"
"phage_stats = {9}\n"
"bact_stats = {10}\n"
" OPTIONAL PARAMETERS:\n"
"colour = {11}\n".format(
arguments.classified,
arguments.numbers,
arguments.super,
arguments.virus,
arguments.phage,
arguments.bact,
arguments.super_quantities,
arguments.stats,
arguments.vir_stats,
arguments.phage_stats,
arguments.bact_stats,
arguments.colour,
)
)
print(message)
# 2. Read input files and make dataframes
numbers_df = read_numbers(arguments.numbers)
classifications_df = read_classifications(arguments.classified)
merged_df = classifications_df.merge(
numbers_df, left_on="Sample_name", right_on="Sample"
)
merged_df["Percentage"] = merged_df.reads / merged_df.read_pairs * 100
# 3. Create chunks of information required for the heatmaps
# 3.1. Aggregate superkingdom-rank information
# Count the percentages of Archaea, Bacteria, Eukaryota and Viruses per sample:
superkingdom_sums = pd.DataFrame(
merged_df.groupby(["Sample_name", "superkingdom"]).sum()[
["reads", "Percentage"]
]
)
superkingdom_sums.reset_index(
inplace=True
) # to use MultiIndex "Sample_name" and "superkingdom" as columns
superkingdom_sums.to_csv(arguments.super_quantities, index=False)
print("File %s has been created!" % arguments.super_quantities)
# 3.2. Filter viruses from the table
virus_df = filter_taxa(df=merged_df, taxon="Viruses", rank="superkingdom")
# Remove the phages from the virus df to make less cluttered heatmaps
virus_df = remove_taxa(df=virus_df, taxon=PHAGE_FAMILY_LIST, rank="family")
# 3.3. Filter phages
phage_df = filter_taxa(df=merged_df, taxon=PHAGE_FAMILY_LIST, rank="family")
# 3.4. Filter bacteria
bacterium_df = filter_taxa(df=merged_df, taxon="Bacteria", rank="superkingdom")
# 4. Write taxonomic rank statistics to a file, for each chunk
# 4.1. All taxa
report_taxonomic_statistics(df=merged_df, outfile=arguments.stats)
# 4.2. Viruses
report_taxonomic_statistics(df=virus_df, outfile=arguments.vir_stats)
# 4.3. Phages
report_taxonomic_statistics(df=phage_df, outfile=arguments.phage_stats)
# 4.4. Bacteria
report_taxonomic_statistics(df=bacterium_df, outfile=arguments.bact_stats)
# 5. Draw heatmaps for each chunk
# 5.1. All taxa: superkingdoms
draw_heatmaps(
df=superkingdom_sums,
outfile=arguments.super,
title="Superkingdoms heatmap",
taxonomic_rank="superkingdom",
colour=arguments.colour,
)
# 5.2. Viruses
virus_tabs = []
for rank in RANKS[3:]:
# Create heatmaps for each rank below 'class'
(content, panel) = draw_heatmaps(
df=virus_df,
outfile=None,
title="Virus %s heatmap" % rank,
taxonomic_rank=rank,
colour=arguments.colour,
)
# Check if there was data to make a panel
if panel:
virus_tabs.append(content)
# if there was no data, print a warning and do not add nonsense panel
else:
print("No data for the current virus rank! (%s)" % rank)
print(
"\n---\nThere are no contigs for the given %s. No virus %s heatmap can be made.\n---\n"
% (rank, rank)
)
if len(virus_tabs) > 1:
# multiple tabs: create figure with tabs
output_file(arguments.virus, title="Virus heatmap")
tabs = Tabs(tabs=virus_tabs)
save(tabs)
print("The Virus heatmap has been created and written to: %s" % arguments.virus)
elif len(virus_tabs) == 1:
# single tab: create regular figure
output_file(arguments.virus, title="Virus heatmap")
save(virus_tabs[0])
else:
# no tabs: warn user that no heatmap can be made
print(
"\n---\nThere are no contigs for Viruses in this sample! No virus heatmap is made.\n---\n"
)
with open(arguments.virus, "w") as outfile:
outfile.write("No virus contigs found in the current dataset.")
# 5.3. Phages
phage_tabs = []
for rank in RANKS[3:]:
# Create heatmaps for each rank below 'class'
(content, panel) = draw_heatmaps(
df=phage_df,
outfile=None,
title="Phage %s heatmap" % rank,
taxonomic_rank=rank,
colour=arguments.colour,
)
# Check if there was data to make a panel
if panel:
phage_tabs.append(content)
# if there was no data, print a warning and do not add nonsense panel
else:
print("No data for the current phage rank! (%s)" % rank)
print(
"\n---\nThere are no contigs for the given %s. No phage %s heatmap can be made.\n---\n"
% (rank, rank)
)
if len(phage_tabs) > 1:
# multiple tabs: create figure with tabs
output_file(arguments.phage, title="Phage heatmap")
tabs = Tabs(tabs=phage_tabs)
save(tabs)
print("The Phage heatmap has been created and written to: %s" % arguments.phage)
elif len(phage_tabs) == 1:
# single tab: create regular figure
output_file(arguments.phage, title="Phage heatmap")
save(phage_tabs[0])
else:
# no tabs: warn user that no heatmap can be made
print(
"\n---\nThere are no contigs for phages in this sample! No phage heatmap is made.\n---\n"
)
with open(arguments.phage, "w") as outfile:
outfile.write("No phage contigs found in the current dataset.")
# 5.4. Bacteria
bacterium_tabs = []
for rank in RANKS[1:]:
# Create heatmaps for each rank below 'superkingdom'
(content, panel) = draw_heatmaps(
df=bacterium_df,
outfile=None,
title="Bacterium %s heatmap" % rank,
taxonomic_rank=rank,
colour=arguments.colour,
)
# Check if there was data to make a panel
if panel:
bacterium_tabs.append(content)
# if there was no data, print a warning and do not add nonsense panel
else:
print("No data for the current bacteria rank! (%s)" % rank)
print(
"\n---\nThere are no contigs for the given %s. No bacteria %s heatmap can be made.\n---\n"
% (rank, rank)
)
if len(bacterium_tabs) > 1:
# multiple tabs: create figure with tabs
output_file(arguments.bact, title="Bacteria heatmap")
tabs = Tabs(tabs=bacterium_tabs)
save(tabs)
print(
"The Bacteria heatmap has been created and written to: %s" % arguments.bact
)
elif len(bacterium_tabs) == 1:
# single tab: create regular figure
output_file(arguments.bact, title="Bacteria heatmap")
save(bacterium_tabs[0])
else:
# no tabs: warn user that no heatmap can be made
print(
"\n---\nThere are no contigs for bacteria in this sample! No bacteria heatmap is made.\n---\n"
)
with open(arguments.bact, "w") as outfile:
outfile.write("No bacterial contigs found in the current dataset.")
# EXECUTE script--------------------------------------------
if __name__ == "__main__":
0
Source : cli.py
with MIT License
from IQTLabs
with MIT License
from IQTLabs
def visualize(
fasta,
width,
palette,
color,
hide,
bar,
title,
separate,
cols,
link_x,
link_y,
output,
offline,
method,
dimensions,
skip,
mode,
legend_loc,
output_backend,
downsample,
):
# check filetype
if fasta is None:
raise ValueError("Must provide FASTA file.")
# handle selecting the palette
palette = small_palettes[palette]
# handle setting the dimensions automatically if not specified
if not dimensions:
dimensions = (750, 500)
if (
len([record for _f in fasta for record in Fasta(_f, read_long_names=True)])
> len(palette)
and mode != "file"
):
if len(fasta) > 1 and mode == "auto":
if not skip:
print(
"Visualizing each file in separate color. To override, provide mode selection."
)
mode = "file"
else:
print("Visualizing each sequence in black.")
color = False
elif mode == "auto":
mode = "seq"
# get all the sequences
seqs = []
color_counter = 0
warned = False
for i, _f in enumerate(fasta):
for j, seq in enumerate(
Fasta(_f, sequence_always_upper=True, read_long_names=True)
):
seqs.append(
Box(
color=palette[color_counter + 1 if color_counter > 2 else 3][
color_counter
]
if color
else "black",
name=_f if mode == "file" else seq.name,
raw_seq=str(seq),
)
)
# check the length of the seq
if len(seq) > 10000 and not skip and not warned and downsample == 1:
click.confirm(
"You are plotting a long sequence ({} bp). This may be very slow, although downsampling might help. "
"Do you want to continue?".format(len(seq)),
abort=True,
)
warned = True
if mode == "seq":
color_counter += 1
if mode == "file":
color_counter += 1
# warn if plotting a large number of seqs
if len(seqs) > 500 and not skip:
click.confirm(
"You are plotting a large number of sequences ({}). This may be very slow, although downsampling might help. "
"Do you want to continue?".format(len(seqs)),
abort=True,
)
# warn if using a bad method
if (
max([len(seq.raw_seq) for seq in seqs]) > 25
and method in ["qi", "randic"]
and not skip
):
click.confirm(
"This method is not well suited to a sequence of this length. "
"Do you want to continue?",
abort=True,
)
axis_labels = {
"squiggle": {"x": "position (BP)", "y": None},
"gates": {"x": "C-G axis", "y": "A-T axis"},
"yau": {"x": None, "y": None},
"yau-bp": {"x": "position (BP)", "y": None},
"randic": {"x": "position (BP)", "y": "nucleotide"},
"qi": {"x": "position (BP)", "y": "dinucleotide"},
}
# the number of figures to draw is either the number of sequences or files (or 1)
if separate:
if mode == "seq":
fig_count = len(seqs)
elif mode == "file":
fig_count = len(fasta)
else:
fig_count = 1
fig = []
for i in range(fig_count):
# link the axes, if requested
if i > 0 and link_x:
x_range = fig[i - 1].x_range
else:
x_range = None
if i > 0 and link_y:
y_range = fig[i - 1].y_range
else:
y_range = None
# the y axes for randic and qi are bases
if method == "randic":
y_range = ["A", "T", "G", "C"]
elif method == "qi":
y_range = [
"AA",
"AC",
"AG",
"AT",
"CA",
"CC",
"CG",
"CT",
"GA",
"GC",
"GG",
"GT",
"TA",
"TC",
"TG",
"TT",
]
fig.append(
figure(
x_axis_label=axis_labels[method]["x"],
y_axis_label=axis_labels[method]["y"],
title=title,
x_range=x_range,
y_range=y_range,
plot_width=dimensions[0],
plot_height=dimensions[1],
output_backend=output_backend,
)
)
# show a progress bar if processing multiple files
if len(seqs) > 1 and bar:
_seqs = tqdm(seqs, unit=" seqs", leave=False)
else:
_seqs = seqs
for i, seq in enumerate(_seqs):
# perform the actual transformation
transformed = transform(seq.raw_seq, method=method)
if downsample > 1:
transformed = (transformed[0][::downsample], transformed[1][::downsample])
# figure (no pun intended) which figure to plot the data on
if separate:
if mode == "seq":
_fig = fig[i]
elif mode == "file":
_fig = fig[fasta.index(seq.name)]
# add a title to the plot
_fig.title = annotations.Title()
if mode == "seq":
_fig.title.text = seq.name
elif mode == "file":
_fig.title.text = click.format_filename(seq.name, shorten=True)
else:
_fig = fig[0]
_fig.title = annotations.Title()
# if only plotting on one figure, set up the title
if title:
_fig.title.text = title
elif len(seqs) > 1 and not title and len(fasta) == 1:
_fig.title.text = click.format_filename(fasta[0], shorten=True)
elif len(seqs) == 1:
# if just plotting one sequence, title it with the name of the sequence
_fig.title.text = seq.name
# randic and qi method's have categorical y axes
if method == "randic":
y = list(seq.raw_seq)
elif method == "qi":
y = [seq.raw_seq[i : i + 2] for i in range(len(seq.raw_seq))]
y = [str(i) for i in y if len(i) == 2]
else:
y = transformed[1]
# figure out whether to add a legend
if (separate or not color or mode == "file" or len(seqs) == 1) and not hide:
legend = None
else:
legend = click.format_filename(seq.name, shorten=True)
# optimization for comparing large FASTA files without hiding
try:
if mode == "file" and seqs[i + 1].color != seq.color and not separate:
legend = click.format_filename(seq.name, shorten=True)
except IndexError:
if mode == "file" and not separate:
legend = click.format_filename(seq.name, shorten=True)
# do the actual plotting
# set up the legend
if legend is not None:
_fig.line(
x=transformed[0],
y=y,
line_width=width,
legend_label=legend,
color=seq.color,
)
_fig.legend.location = legend_loc
if hide:
_fig.legend.click_policy = "hide"
else:
_fig.line(x=transformed[0], y=y, line_width=width, color=seq.color)
# clean up the tqdm bar
try:
_seqs.close()
except AttributeError:
pass
# lay out the figure
if separate:
plot = gridplot(
fig,
ncols=math.ceil(len(fig) ** 0.5) if cols == 0 else cols,
toolbar_options=dict(logo=None),
) # note that 0 denotes the automatic default
else:
plot = fig[0]
if output is not None and output.endswith(".html"):
output_file(
output, title="Squiggle Visualization" if title is not None else title
)
save(plot, resources=INLINE if offline else None)
else:
show(plot)
if __name__ == "__main__":
0
Source : plot.py
with BSD 3-Clause "New" or "Revised" License
from Open-ET
with BSD 3-Clause "New" or "Revised" License
from Open-ET
def _plot(self, FluxObj, ncols=1, output_type='save', out_file=None,
suptitle='', plot_width=1000, plot_height=450,
sizing_mode='scale_both', merge_tools=False, link_x=True, **kwargs):
"""
Private routine for aggregated validation plots that are used by
the :meth:`.QaQc.plot` and :meth:`.Data.plot` methods.
"""
# get daily and monthly time series with internal names, get units
monthly = False
if hasattr(FluxObj, 'monthly_df'):
# will run correction as of now if it is a QaQc
monthly = True
monthly_df = FluxObj.monthly_df.rename(columns=FluxObj.inv_map)
# avoid plotting single point- errors out bokeh datetime axis, etc.
for c in monthly_df.columns:
if monthly_df[c].notna().sum() < = 1:
monthly_df.drop(c, axis=1, inplace=True)
monthly_source = ColumnDataSource(monthly_df)
# so that the correction is run, may change this
FluxObj.df.head(); # if Data, need to access to calc vp/vpd
df = FluxObj.df.rename(columns=FluxObj.inv_map)
variables = FluxObj.variables
units = FluxObj.units
# bokeh column sources for tooltips
daily_source=ColumnDataSource(df)
# for aggregating plots
daily_line = []
daily_scatter = []
monthly_line = []
monthly_scatter = []
if output_type == 'save':
output_file(out_file)
def _get_units(plt_vars, units):
"""
Helper function to figure out units for multivariate plots.
If none of plt_vars exist return None, if multiple units are found
print a warning that vars have different units. Returns string if
one or more units are found- first found if multiple.
"""
ret = []
for v in plt_vars:
unit = units.get(v, None)
if unit is not None:
ret.append(unit)
if len(ret) == 0:
ret = None
elif len(set(ret)) > 1:
print(
'WARNING: variables: {} are not of the same units'.format(
','.join(plt_vars)
)
)
ret = ret[0]
elif len(set(ret)) == 1:
ret = ret[0]
return ret
# run through each plot, daily then monthly versions
####
# energy balance time series plots
####
plt_vars = ['LE', 'H', 'Rn', 'G']
colors = ['blue', 'red', 'black', 'green']
title = 'Daily Surface Energy Balance Components'
x_label = 'date'
y_label = _get_units(plt_vars, units)
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name='energy_balance_daily'
)
fig = Plot.add_lines(
fig, df, plt_vars, colors, x_label, daily_source, labels=plt_vars
)
if fig is not None:
daily_line.append(fig)
else:
print(
'Energy balance components time series grapths missing all '
'variables'
)
if fig is not None and monthly:
# same for monthly fig
title = 'Monthly Surface Energy Balance Components'
fig = figure(x_axis_label=x_label, y_axis_label=y_label,title=title,
width=plot_width, height=plot_height,
name='energy_balance_monthly'
)
fig = Plot.add_lines(
fig, monthly_df, plt_vars, colors, x_label, monthly_source,
labels=plt_vars
)
monthly_line.append(fig)
####
# incoming shortwave and ASCE potential clear sky time series plots
####
plt_vars = ['sw_in', 'rso']
# only plot if we have both
if set(plt_vars).issubset(df.columns):
labels = ['Station Rs', 'ASCE Rso']
colors = ['black', 'red']
title =\
'Daily Incoming Shortwave (Rs) and ASCE Clear Sky Shortwave '+\
'Radiation (Rso)'
x_label = 'date'
y_label = _get_units(plt_vars, units)
fig = figure(x_axis_label=x_label, y_axis_label=y_label,
title=title, width=plot_width, height=plot_height,
name='Rs_daily'
)
fig = Plot.add_lines(
fig, df, plt_vars, colors, x_label, daily_source, labels=labels
)
if fig is not None:
daily_line.append(fig)
## same for monthly fig (removed for now)
#title='Monthly Incoming Shortwave and ASCE Potential Radiation'
#fig = figure(
# x_axis_label=x_label,y_axis_label=y_label,title=title,
# width=plot_width, height=plot_height
#)
#fig = Plot.add_lines(
# fig, monthly_df, plt_vars, colors, x_label, monthly_source,
# labels=labels
#)
#monthly_line.append(fig)
else:
print(
'Shortwave and potential clear sky radiation time series '
'grapths missing all variables'
)
####
# multiple soil heat flux sensor time series plots
####
# keep user names for these in hover
g_re = re.compile('^[gG]_[\d+mean|corr]|G$')
g_vars = [
v for v in variables if g_re.match(v) and v in df.columns
]
num_lines = len(g_vars)
if num_lines > 1:
rename_dict = {k:variables[k] for k in g_vars}
tmp_df = df[g_vars].rename(columns=rename_dict)
tmp_source = ColumnDataSource(tmp_df)
plt_vars = list(rename_dict.values())
colors = Viridis256[0:-1:int(256/num_lines)]
title = 'Daily Soil Heat Flux (Multiple Sensors)'
x_label = 'date'
y_label = _get_units(g_vars, units)
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
plot_width=plot_width, plot_height=plot_height, name='G_daily'
)
fig = Plot.add_lines(
fig, tmp_df, plt_vars, colors, x_label, tmp_source,
labels=plt_vars
)
if fig is not None:
daily_line.append(fig)
if fig is not None and monthly:
# same for monthly fig
g_vars = [
v for v in variables if g_re.match(v) and v in \
monthly_df.columns
]
num_lines = len(g_vars)
if num_lines > 1:
tmp_df = monthly_df[g_vars].rename(columns=rename_dict)
tmp_source = ColumnDataSource(tmp_df)
title = 'Monthly Soil Heat Flux (Multiple Sensors)'
fig = figure(
x_axis_label=x_label, y_axis_label=y_label,title=title,
plot_width=plot_width, plot_height=plot_height,
name='G_monthly'
)
fig = Plot.add_lines(
fig, tmp_df, plt_vars, colors, x_label, tmp_source,
labels=plt_vars
)
monthly_line.append(fig)
# do not print warning if missing multiple soil moisture recordings
####
# radiation time series plots
####
plt_vars = ['sw_in', 'lw_in', 'sw_out', 'lw_out']
colors = ['red', 'darkred', 'blue', 'navy']
title = 'Daily Radiation Components'
x_label = 'date'
y_label = _get_units(plt_vars, units)
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name='radiation_daily'
)
fig = Plot.add_lines(
fig, df, plt_vars, colors, x_label, daily_source, labels=plt_vars
)
if fig is not None:
daily_line.append(fig)
else:
print(
'Radiation components time series grapths missing all variables'
)
if fig is not None and monthly:
# same for monthly fig
title = 'Monthly Radiation Components'
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name='radiation_monthly'
)
fig = Plot.add_lines(
fig, monthly_df, plt_vars, colors, x_label, monthly_source,
labels=plt_vars
)
monthly_line.append(fig)
####
# temperature time series plot
####
plt_vars = ['t_max','t_avg','t_min','t_dew']
colors = ['red','black','blue','green']
title = 'Daily Average Air Temperature'
x_label = 'date'
y_label = _get_units(plt_vars, units)
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name='temp_daily'
)
fig = Plot.add_lines(
fig, df, plt_vars, colors, x_label, daily_source, labels=plt_vars
)
if fig is not None:
daily_line.append(fig)
else:
print(
'Average air temperature time series grapths missing all '
'variables'
)
if fig is not None and monthly:
# same for monthly fig
title = 'Monthly Average Air Temperature'
fig = figure(
x_axis_label=x_label, y_axis_label=y_label,title=title,
width=plot_width, height=plot_height, name='temp_monthly'
)
fig = Plot.add_lines(
fig, monthly_df, plt_vars, colors, x_label, monthly_source,
labels=plt_vars
)
monthly_line.append(fig)
####
# vapor pressure time series plots
####
plt_vars = ['vp', 'vpd']
colors = ['black', 'darkred']
title = 'Daily Average Vapor Pressure and Deficit'
x_label = 'date'
y_label = _get_units(plt_vars, units)
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name='vap_press_daily'
)
fig = Plot.add_lines(
fig, df, plt_vars, colors, x_label, daily_source, labels=plt_vars
)
if fig is not None:
daily_line.append(fig)
else:
print('Vapor pressure time series grapths missing all variables')
if fig is not None and monthly:
# same for monthly fig
title = 'Monthly Average Vapor Pressure'
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name='vap_press_monthly'
)
fig = Plot.add_lines(
fig, monthly_df, plt_vars, colors, x_label, monthly_source,
labels=plt_vars
)
monthly_line.append(fig)
####
# windpseed time series plot
####
plt_vars = ['ws']
colors = ['black']
title = 'Daily Average Windspeed'
x_label = 'date'
y_label = _get_units(plt_vars, units)
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name='wind_daily'
)
fig = Plot.add_lines(fig, df, plt_vars, colors, x_label, daily_source)
if fig is not None:
daily_line.append(fig)
else:
print('Windspeed time series grapths missing all variables')
if fig is not None and monthly:
# same for monthly fig
title = 'Monthly Average Windspeed'
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name='wind_monthly'
)
fig = Plot.add_lines(
fig, monthly_df, plt_vars, colors, x_label, monthly_source
)
monthly_line.append(fig)
####
# precipitation time series plots
####
plt_vars = ['ppt', 'gridMET_prcp']
labels = ['station', 'gridMET']
colors = ['black', 'red']
title = 'Daily Station and gridMET Precipitation'
x_label = 'date'
y_label = _get_units(plt_vars, units)
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name='precip_daily'
)
fig = Plot.add_lines(
fig, df, plt_vars, colors, x_label, daily_source, labels=labels
)
if fig is not None:
daily_line.append(fig)
else:
print('Precipitation time series grapths missing all variables')
if fig is not None and monthly:
# same for monthly fig
title = 'Monthly Station and gridMET Precipitation'
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name='precip_monthly'
)
fig = Plot.add_lines(
fig, monthly_df, plt_vars, colors, x_label, monthly_source,
labels=labels
)
monthly_line.append(fig)
####
# latent energy time series plots
####
plt_vars = ['LE', 'LE_corr', 'LE_user_corr']
colors = ['black', 'red', 'darkorange']
title = 'Daily Average Latent Energy Flux'
x_label = 'date'
y_label = _get_units(plt_vars, units)
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name='LE_daily'
)
fig = Plot.add_lines(
fig, df, plt_vars, colors, x_label, daily_source, labels=plt_vars
)
if fig is not None:
daily_line.append(fig)
else:
print('Latent energy time series grapths missing all variables')
if fig is not None and monthly:
# same for monthly fig
title = 'Monthly Average Latent Energy Flux'
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name='LE_monthly'
)
fig = Plot.add_lines(
fig, monthly_df, plt_vars, colors, x_label, monthly_source,
labels=plt_vars
)
monthly_line.append(fig)
####
# ET time series plots
####
refET = 'ETr' if 'ETrF' in df.columns else 'ETo'
plt_vars = ['ET', 'ET_corr', 'ET_user_corr', f'gridMET_{refET}']
labels = plt_vars[0:3] + [refET]
colors = ['black', 'red', 'darkorange', 'blue']
title = 'Daily Evapotranspiration'
x_label = 'date'
y_label = _get_units(plt_vars, units)
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name='ET_daily'
)
fig = Plot.add_lines(
fig, df, plt_vars, colors, x_label, daily_source, labels=labels
)
if 'ET_fill_val' in df.columns and fig is not None:
# make gap fill values more visible
Plot.line_plot(
fig, 'date', 'ET_fill_val', daily_source, 'green',
label='ET_fill_val', line_width=3
)
if fig is not None:
daily_line.append(fig)
else:
print(
'Evapotranspiration time series grapths missing all variables'
)
if fig is not None and monthly:
# same for monthly fig
title = 'Monthly Evapotranspiration'
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name='ET_monthly'
)
fig = Plot.add_lines(
fig, monthly_df, plt_vars, colors, x_label, monthly_source,
labels=labels
)
monthly_line.append(fig)
####
# number gap filled days monthly time series plot
####
if monthly and 'ET_gap' in monthly_df.columns:
txt = ''
if 'ET_corr' in df.columns:
txt = ' Corrected'
title = 'Number of Gap Filled Days in{} Monthly ET'.format(txt)
x_label = 'date'
y_label = 'number of gap-filled days'
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name='ET_gaps'
)
x = 'date'
y = 'ET_gap'
color = 'black'
Plot.line_plot(fig, x, y, monthly_source, color)
monthly_line.append(fig)
elif monthly:
print('Monthly count of gap filled ET days plot missing variable')
####
# ETrF time series plots
####
plt_vars = [f'{refET}F', f'{refET}F_filtered']
colors = ['black', 'red']
title = f'Daily Fraction of Reference ET ({refET}F)'
x_label = 'date'
y_label = _get_units(plt_vars, units)
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name=f'{refET}F_daily'
)
fig = Plot.add_lines(
fig, df, plt_vars, colors, x_label, daily_source, labels=plt_vars
)
if fig is not None:
daily_line.append(fig)
else:
print(
'Fraction of reference ET time series grapths missing all '
'variables'
)
if fig is not None and monthly:
# same for monthly fig
title = f'Monthly Fraction of Reference ET ({refET}F)'
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name=f'{refET}F_monthly'
)
fig = Plot.add_lines(
fig, monthly_df, plt_vars, colors, x_label, monthly_source,
labels=plt_vars
)
monthly_line.append(fig)
####
# energy balance ratio time series plots
####
plt_vars = ['ebr', 'ebr_corr', 'ebr_user_corr']
colors = ['black', 'red', 'darkorange']
title = 'Daily Energy Balance Ratio with Long-term Mean'
x_label = 'date'
y_label = _get_units(plt_vars, units)
# add mean EBR for each time series in legend
labels = []
for i, v in enumerate(plt_vars):
if v in df.columns:
added_text = ': {}'.format(str(round(df[v].mean(),2)))
labels.append(plt_vars[i] + added_text)
else:
labels.append(None)
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name='EBR_daily'
)
fig = Plot.add_lines(
fig, df, plt_vars, colors, x_label, daily_source, labels=labels
)
if fig is not None:
daily_line.append(fig)
else:
print(
'Energy balance ratio time series grapths missing all '
'variables'
)
if fig is not None and monthly:
# same for monthly fig
title = 'Monthly Energy Balance Ratio with Long-term Mean'
# add mean for monthly EBRs to legend
labels = []
for i, v in enumerate(plt_vars):
if v in monthly_df.columns:
added_text = ': {}'.format(
str(round(monthly_df[v].mean(),2))
)
labels.append(plt_vars[i] + added_text)
else:
labels.append(None)
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_height, name='EBR_monthly'
)
fig = Plot.add_lines(
fig, monthly_df, plt_vars, colors, x_label, monthly_source,
labels=labels
)
monthly_line.append(fig)
####
# energy balance closure scatter plots
####
title = 'Daily Energy Balance Closure, Energy Versus Flux with Slope '\
'Through Origin'
unit = _get_units(['LE', 'H', 'Rn', 'G'], units)
y_label = 'LE + H ({})'.format(unit)
x_label = 'Rn - G ({})'.format(unit)
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_width, name='energy_vs_flux_daily'
)
y_vars = ['flux', 'flux_corr', 'flux_user_corr']
colors = ['black', 'red', 'darkorange']
labels = ['init', 'corr', 'user_corr']
# add plot pairs to plot if they exist, add 1:1
mins_maxs = []
n_vars_fnd = 0
for i, v in enumerate(y_vars):
if v in df.columns and not df[v].isna().all():
n_vars_fnd += 1
if v == 'flux_corr' and 'energy_corr' in df.columns:
x_var = 'energy_corr'
else:
x_var = 'energy'
min_max = Plot.scatter_plot(
fig, x_var, v, daily_source, colors[i], label=labels[i]
)
if min_max is not None:
mins_maxs.append(min_max)
if n_vars_fnd > 0:
# add scaled one to one line
mins_maxs = np.array(mins_maxs)
if not pd.isna(mins_maxs).all():
x_min = min(mins_maxs[:,0])
x_max = max(mins_maxs[:,1])
y_min = min(mins_maxs[:,2])
y_max = max(mins_maxs[:,3])
ax_min, ax_max = min([x_min,y_min]), max([x_max,y_max])
ax_min -= 0.02*abs(ax_max-ax_min)
ax_max += 0.02*abs(ax_max-ax_min)
fig.x_range=Range1d(ax_min, ax_max)
fig.y_range=Range1d(ax_min, ax_max)
one2one_vals = np.arange(ax_min, ax_max,1)
fig.line(
one2one_vals, one2one_vals, legend_label='1:1 line',
color='black', line_dash='dashed'
)
daily_scatter.append(fig)
if monthly:
# same for monthly fig
title = 'Monthly Energy Balance Closure, Energy Versus Flux '\
'with Slope Through Origin'
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_width,
name='energy_vs_flux_monthly'
)
mins_maxs = []
for i, v in enumerate(y_vars):
if v in monthly_df.columns:
min_max = Plot.scatter_plot(
fig, 'energy', v, monthly_source, colors[i],
label=labels[i]
)
if min_max is not None:
mins_maxs.append(min_max)
mins_maxs = np.array(mins_maxs)
# check if not all pairs are empty, if not plot 1:1
if not pd.isna(mins_maxs).all():
x_min = min(mins_maxs[:,0])
x_max = max(mins_maxs[:,1])
y_min = min(mins_maxs[:,2])
y_max = max(mins_maxs[:,3])
ax_min, ax_max = min([x_min,y_min]), max([x_max,y_max])
ax_min -= 0.02*abs(ax_max-ax_min)
ax_max += 0.02*abs(ax_max-ax_min)
fig.x_range=Range1d(ax_min, ax_max)
fig.y_range=Range1d(ax_min, ax_max)
one2one_vals = np.arange(ax_min, ax_max,1)
fig.line(
one2one_vals, one2one_vals, legend_label='1:1 line',
color='black', line_dash='dashed'
)
monthly_scatter.append(fig)
else:
print('Energy balance scatter grapths missing all variables')
####
# latent energy scatter plots
####
title = 'Daily Latent Energy, Initial Versus Corrected'
unit = _get_units(['LE', 'LE_corr', 'LE_user_corr'], units)
y_label = 'corrected ({})'.format(unit)
x_label = 'initial ({})'.format(unit)
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_width, name='LE_scatter_daily'
)
y_vars = ['LE_corr', 'LE_user_corr']
colors = ['red', 'darkorange']
labels = ['corr', 'user_corr']
# add plot pairs to plot if they exist, add 1:1
mins_maxs = []
n_vars_fnd = 0
for i, v in enumerate(y_vars):
if v in df.columns and not df[v].isna().all():
n_vars_fnd += 1
min_max = Plot.scatter_plot(
fig, 'LE', v, daily_source, colors[i], label=labels[i]
)
mins_maxs.append(min_max)
if n_vars_fnd > 0:
# add scaled one to one line
mins_maxs = np.array(mins_maxs)
if not pd.isna(mins_maxs).all():
x_min = min(mins_maxs[:,0])
x_max = max(mins_maxs[:,1])
y_min = min(mins_maxs[:,2])
y_max = max(mins_maxs[:,3])
ax_min, ax_max = min([x_min,y_min]), max([x_max,y_max])
ax_min -= 0.02*abs(ax_max-ax_min)
ax_max += 0.02*abs(ax_max-ax_min)
fig.x_range=Range1d(ax_min, ax_max)
fig.y_range=Range1d(ax_min, ax_max)
one2one_vals = np.arange(ax_min, ax_max,1)
fig.line(
one2one_vals, one2one_vals, legend_label='1:1 line',
color='black', line_dash='dashed'
)
daily_scatter.append(fig)
if monthly:
# same for monthly fig
title = 'Monthly Latent Energy, Initial Versus Corrected'
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_width,
name='LE_scatter_monthly'
)
mins_maxs = []
for i, v in enumerate(y_vars):
if v in monthly_df.columns:
min_max = Plot.scatter_plot(
fig, 'LE', v, monthly_source, colors[i],
label=labels[i]
)
if min_max is not None:
mins_maxs.append(min_max)
mins_maxs = np.array(mins_maxs)
# check if not all pairs are empty, if not plot 1:1
if not pd.isna(mins_maxs).all():
x_min = min(mins_maxs[:,0])
x_max = max(mins_maxs[:,1])
y_min = min(mins_maxs[:,2])
y_max = max(mins_maxs[:,3])
ax_min, ax_max = min([x_min,y_min]), max([x_max,y_max])
ax_min -= 0.02*abs(ax_max-ax_min)
ax_max += 0.02*abs(ax_max-ax_min)
fig.x_range=Range1d(ax_min, ax_max)
fig.y_range=Range1d(ax_min, ax_max)
one2one_vals = np.arange(ax_min, ax_max,1)
fig.line(
one2one_vals, one2one_vals, legend_label='1:1 line',
color='black', line_dash='dashed'
)
monthly_scatter.append(fig)
else:
print('Latent energy scatter grapths missing all variables')
####
# ET scatter plots
####
title = 'Daily Evapotranspiration, Initial Versus Corrected'
unit = _get_units(['ET', 'ET_corr', 'ET_user_corr'], units)
y_label = 'corrected ({})'.format(unit)
x_label = 'initial ({})'.format(unit)
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_width, name='ET_scatter_daily'
)
y_vars = ['ET_corr', 'ET_user_corr']
colors = ['red', 'darkorange']
labels = ['corr', 'user_corr']
# add plot pairs to plot if they exist, add 1:1
mins_maxs = []
n_vars_fnd = 0
for i, v in enumerate(y_vars):
if v in df.columns and not df[v].isna().all():
n_vars_fnd += 1
min_max = Plot.scatter_plot(
fig, 'ET', v, daily_source, colors[i], label=labels[i]
)
mins_maxs.append(min_max)
if n_vars_fnd > 0:
# add scaled one to one line
mins_maxs = np.array(mins_maxs)
x_min = min(mins_maxs[:,0])
x_max = max(mins_maxs[:,1])
y_min = min(mins_maxs[:,2])
y_max = max(mins_maxs[:,3])
ax_min, ax_max = min([x_min,y_min]), max([x_max,y_max])
ax_min -= 0.02*abs(ax_max-ax_min)
ax_max += 0.02*abs(ax_max-ax_min)
fig.x_range=Range1d(ax_min, ax_max)
fig.y_range=Range1d(ax_min, ax_max)
one2one_vals = np.arange(ax_min, ax_max,1)
fig.line(
one2one_vals, one2one_vals, legend_label='1:1 line',
color='black', line_dash='dashed'
)
daily_scatter.append(fig)
if monthly:
# same for monthly fig
title = 'Monthly Evapotranspiration, Initial Versus Corrected'
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
width=plot_width, height=plot_width,
name='ET_scatter_monthly'
)
mins_maxs = []
for i, v in enumerate(y_vars):
if v in monthly_df.columns:
min_max = Plot.scatter_plot(
fig, 'ET', v, monthly_source, colors[i],
label=labels[i]
)
mins_maxs.append(min_max)
mins_maxs = np.array(mins_maxs)
# check if not all pairs are empty, if not plot 1:1
if not pd.isna(mins_maxs).all():
x_min = min(mins_maxs[:,0])
x_max = max(mins_maxs[:,1])
y_min = min(mins_maxs[:,2])
y_max = max(mins_maxs[:,3])
ax_min, ax_max = min([x_min,y_min]), max([x_max,y_max])
ax_min -= 0.02*abs(ax_max-ax_min)
ax_max += 0.02*abs(ax_max-ax_min)
fig.x_range=Range1d(ax_min, ax_max)
fig.y_range=Range1d(ax_min, ax_max)
one2one_vals = np.arange(ax_min, ax_max,1)
fig.line(
one2one_vals, one2one_vals, legend_label='1:1 line',
color='black', line_dash='dashed'
)
monthly_scatter.append(fig)
else:
print('Evapotranspiration scatter grapths missing all variables')
####
# multiple soil moisture time series plots
####
# keep user names for these in hover
theta_re = re.compile('theta_[\d+|mean]')
theta_vars = [
v for v in variables if theta_re.match(v) and v in df.columns
]
num_lines = len(theta_vars)
if num_lines > 0 and not df[theta_vars].isna().all().all():
rename_dict = {k:variables[k] for k in theta_vars}
tmp_df = df[theta_vars].rename(columns=rename_dict)
tmp_source = ColumnDataSource(tmp_df)
plt_vars = list(rename_dict.values())
colors = Viridis256[0:-1:int(256/num_lines)]
title = 'Daily Soil Moisture (Multiple Sensors)'
x_label = 'date'
y_label = _get_units(theta_vars, units)
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
plot_width=plot_width, plot_height=plot_height,
name='theta_daily'
)
fig = Plot.add_lines(
fig, tmp_df, plt_vars, colors, x_label, tmp_source,
labels=plt_vars
)
if fig is not None:
daily_line.append(fig)
theta_vars = [
v for v in variables if theta_re.match(v) and v in\
df.columns
]
if fig is not None and monthly and len(theta_vars) > 0:
# same for monthly fig
tmp_df = monthly_df[theta_vars].rename(columns=rename_dict)
tmp_source = ColumnDataSource(tmp_df)
title = 'Monthly Soil Moisture (Multiple Sensors)'
fig = figure(
x_axis_label=x_label, y_axis_label=y_label, title=title,
plot_width=plot_width, plot_height=plot_height,
name='theta_monthly'
)
fig = Plot.add_lines(
fig, tmp_df, plt_vars, colors, x_label, tmp_source,
labels=plt_vars
)
monthly_line.append(fig)
# do not print warning if missing multiple soil moisture recordings
# Aggregate plots and output depending on options
# remove None values in different figure groups
daily_line = list(filter(None, daily_line))
daily_scatter = list(filter(None, daily_scatter))
monthly_line = list(filter(None, monthly_line))
monthly_scatter = list(filter(None, monthly_scatter))
# link axes for time series plots
if link_x:
for each in daily_line:
each.x_range = daily_line[0].x_range
for each in monthly_line:
each.x_range = monthly_line[0].x_range
figs = daily_line + daily_scatter + monthly_line + monthly_scatter
grid = gridplot(
figs, ncols=ncols, plot_width=None, plot_height=None,
sizing_mode=sizing_mode, merge_tools=merge_tools, **kwargs
)
if output_type == 'show':
show(column(Div(text=suptitle),grid))
elif output_type == 'notebook':
from bokeh.io import output_notebook
output_notebook()
show(column(Div(text=suptitle),grid))
elif output_type == 'save':
save(column(Div(text=suptitle),grid))
elif output_type == 'return_figs':
return figs
elif output_type == 'return_grid':
return grid
reset_output()
0
Source : generate_plots.py
with Apache License 2.0
from oskopek
with Apache License 2.0
from oskopek
def export_plots(p: bokeh.plotting.figure,
filename: str,
title: str,
width: int = WIDTH,
height: int = HEIGHT,
box: bool = False,
show_title: bool = False,
y_range_start: Optional[float] = None,
y_range_end: Optional[float] = None) -> None:
# HTML
if not show_title:
p.title = None
bokeh.plotting.save(p, title=title, filename=filename + ".html", resources=bokeh.resources.CDN)
# PNG
if y_range_start:
p.y_range.start = y_range_start
if y_range_end:
p.y_range.end = y_range_end
set_font_size(p)
p.sizing_mode = "fixed"
p.width = width
if box:
p.height = width
else:
p.height = height
p.toolbar_location = None
bokeh.io.export_png(p, filename=filename + ".png", height=HEIGHT, width=WIDTH)
# SVG:
# p.output_backend = "svg"
# bokeh.io.export_svgs(p, filename=filename + ".svg")
#
# os.system(f"inkscape --without-gui --export-pdf={filename}.pdf {filename}.svg")
def box_whiskers_plot(df: pd.DataFrame, out_folder: str, statistic: str = "ll", subtitle: str = "") -> None:
0
Source : utils.py
with Apache License 2.0
from oskopek
with Apache License 2.0
from oskopek
def export_plots(p: bokeh.plotting.figure,
filename: str,
title: str,
width: int = WIDTH,
height: int = HEIGHT,
box: bool = False,
show_title: bool = False,
y_range_start: Optional[float] = None,
y_range_end: Optional[float] = None,
x_range_start: Optional[float] = None,
x_range_end: Optional[float] = None) -> None:
# HTML
if not show_title:
p.title = None
bokeh.plotting.save(p, title=title, filename=filename + ".html", resources=bokeh.resources.CDN)
# PNG
if y_range_start:
p.y_range.start = y_range_start
if y_range_end:
p.y_range.end = y_range_end
if x_range_start:
p.x_range.start = x_range_start
if x_range_end:
p.x_range.end = x_range_end
set_font_size(p)
p.sizing_mode = "fixed"
p.width = width
p.height = height
if box:
p.width = height
p.toolbar_location = None
bokeh.io.export_png(p, filename=filename + ".png", height=HEIGHT, width=WIDTH)
# SVG:
# p.output_backend = "svg"
# bokeh.io.export_svgs(p, filename=filename + ".svg")
#
# os.system(f"inkscape --without-gui --export-pdf={filename}.pdf {filename}.svg")
0
Source : rca.py
with MIT License
from smartyal
with MIT License
from smartyal
def rca(functionNode):
logger = functionNode.get_logger()
logger.info("==>>>> in rca (root cause analysis " + functionNode.get_browse_path())
progressNode = functionNode.get_child("control").get_child("progress")
progressNode.set_value(0.1)
variables = functionNode.get_child("selectedVariables").get_leaves()
tag = functionNode.get_child("selectedTags").get_value() #only one tag
annotations = functionNode.get_child("annotations").get_leaves()
feature = functionNode.get_child("selectedFeatures").get_value()
algo = functionNode.get_child("selectedAlgorithms").get_value()
target = functionNode.get_child("selectedTarget").get_target()
p=Progress(progressNode)
p.set_divisor(len(annotations)/0.5)
p.set_offset(0.1)
#now create the data as x-y
results = {"x":[],"y":[]}
var = variables[0]
#now iterate over all annotations of the matching type and create feature
for idx,anno in enumerate(annotations):
p.set_progress(idx)
if (anno.get_child("type").get_value() == "time") and (tag in anno.get_child("tags").get_value()):
startTime =anno.get_child("startTime").get_value()
endTime = anno.get_child("endTime").get_value()
data = var.get_time_series(startTime,endTime)
#now create the feature
feat = calc_feature(data["values"],feature)
targetValue = get_target(target,(date2secs(startTime)+date2secs(endTime))/2)
if feat and targetValue and numpy.isfinite(feat) and numpy.isfinite(targetValue):
results["x"].append(feat)
results["y"].append(targetValue)
else:
logger.warning(f"no result for {var.get_name} @ {startTime}, anno:{tag}, feat:{feat}, target: {target}")
#now we have all the x-y
progressNode.set_value(0.7)
fig = figure(title = "x-y Correlation Plot "+var.get_name(),
tools=[PanTool(), WheelZoomTool(),ResetTool(),SaveTool()],
plot_height=300,
x_axis_label=feature+"("+var.get_name()+") @ "+tag,
y_axis_label=target.get_name())
fig.toolbar.logo = None
curdoc().theme = Theme(json=themes.darkTheme)
fig.xaxis.major_label_text_color = themes.darkTickColor
fig.yaxis.major_label_text_color = themes.darkTickColor
fig.scatter(x=results["x"], y=results["y"], size=5, fill_color="#d9b100", marker="o")
fileName = functionNode.get_child("outputFileName").get_value()
filePath = os.path.join(myDir,'./../web/customui/'+fileName)
progressNode.set_value(0.8)
output_file(filePath,mode="inline")#inline: put the bokeh .js into this html, otherwise the default cdn will be taken, might cause CORS problems)
save(fig)
#print(results)
return True
#
def data_cleaning(annotations,order=None,logger=None):
0
Source : rca.py
with MIT License
from smartyal
with MIT License
from smartyal
def rca2(functionNode):
logger = functionNode.get_logger()
logger.info("==>>>> in rca2 (root cause analysis " + functionNode.get_browse_path())
progressNode = functionNode.get_child("control").get_child("progress")
progressNode.set_value(0.1)
m=functionNode.get_model()
report = ' < i>REPORT < /i> < br> < div style="font-size:85%">'
annotations = functionNode.get_child("annotations").get_leaves()
#order = ["Step"+str(no) for no in range(1,19)]
#order = ["Phase"+str(no) for no in range(3,28)]
order = functionNode.get_child("annotationsOrder").get_value()
logger.debug("filtering by order")
annotations = data_cleaning( annotations ,order = order,logger=logger) #Step1,Step2,...Step18 lsit of lists
report+=(f"found {len(annotations)} valid processes < br>")
#for now, flatten them out
annotations = [subprocess for process in annotations for subprocess in process]
algo = functionNode.get_child("selectedAlgorithm").get_value()
target = functionNode.get_child("selectedTarget").get_target()
progressNode.set_value(0.3)
#now we are building up the table by iterating all the children in "selection"
entries = functionNode.get_child("selection").get_children()
table = {"target":[]}
firstVariable = True
for entry in entries:
logger.debug(f"entry {entry.get_name()}")
#each entry is a combination of variable, tags and feature
vars = entry.get_child("selectedVariables").get_targets()
tags = entry.get_child("selectedTags").get_value()
features = entry.get_child("selectedFeatures").get_value()
#for iterate over variables
for var in vars:
logger.debug(f"processing variable: {var.get_name()} with tags {tags} and features {features}")
#columnName = var.get_name()+str(tags)+m.getRandomId()
for tag in tags:
row = 0
#table[columnName]=[]# make a column
for idx,anno in enumerate(annotations):
if anno.get_child("type").get_value() != "time":
continue
if tag in anno.get_child("tags").get_value():
startTime =anno.get_child("startTime").get_value()
endTime = anno.get_child("endTime").get_value()
data = var.get_time_series(startTime,endTime)["values"]
#we take only the values "inside" the annotation
if len(data)>2:
data =data[1:-1]
#now create the features
for feature in features:
feat = calc_feature(data,feature)
columnName = var.get_name()+"_"+tag+"_"+feature
if not columnName in table:
table[columnName]=[]
table[columnName].append(feat)
targetValue = get_target(target,(date2secs(startTime)+date2secs(endTime))/2)
if targetValue:
if firstVariable:
#for the first variable we also write the target
table["target"].append(targetValue)
else:
#for all others we make sure we have the same target value for that case (sanity check)
if table["target"][row] != targetValue:
logger.warning(f'problem target {table["target"][row]} !=> {targetValue}')
row=row+1
else:
logger.warning(f"no corrrect target value for {startTime} - {endTime}")
firstVariable = False
#now we have the table, plot it
import json
#print(json.dumps(table,indent=2))
progressNode.set_value(0.5)
#try a model
algo = functionNode.get_child("selectedAlgorithm").get_value()
if algo=="lasso":
reg = linear_model.LassoCV()
report +=" using lasso Regression with auto-hyperparams < br>"
else:
#default
report +=" using linear Regression < br>"
reg = linear_model.LinearRegression() #try rigde, lasso
columnNames = []
dataTable = []
saveDict = {}
for k,v in table.items():
saveDict[k]=numpy.asarray(v,dtype=numpy.float64)
if k=="target":
continue
dataTable.append(v)
columnNames.append(k)
fileName = functionNode.get_child("outputFileName").get_value()
filePath = os.path.join(myDir,'./../web/customui/'+fileName.split(".")[0])
numpy.savez(filePath, **saveDict)
#for loading:
# get = numpy.load(name+".npz")
# for name in get.files:
# data = get[name]
dataTable = numpy.asarray(dataTable)
x=dataTable.T
scaler = StandardScaler()
scaler.fit(x)
x=scaler.transform(x)
y=table["target"]
x_train, x_test, y_train, y_test = train_test_split(x, y)
reg.fit(x_train,y_train)
#print(reg.coef_)
y_hat= reg.predict(x_test)
y_repeat = reg.predict(x_train)
#print(f"predict: {y_hat} vs real: {y_test}")
#check over/underfitting
r_train = r2_score(y_train, y_repeat)
r_test = r2_score(y_test,y_hat)
report+="R < sup>2 < /sup> train= %.4g, R < sup>2 < /sup> test = %.4g < br>"%(r_train,r_test)
pearsons = []
for col in x.T:
pearsons.append(pearsonr(col, y)[0])
#and finally the correlations between y and yhat
y_pearson_train = pearsonr(y_train, y_repeat)[0]
y_pearson_test = pearsonr(y_test,y_hat)[0]
report+="pearsonCorr y/y_hat train:%.4g , test:%.4g < br>"%(y_pearson_train,y_pearson_test)
report +="regression coefficients, pearsons correlations: < br>"
for col,coef,pear in zip(columnNames,reg.coef_,pearsons):
report+=" %s:%.4g, %.4g < br>"%(col,coef,pear)
#write report
progressNode.set_value(0.8)
report+=" < div>"#close the style div
functionNode.get_child("report").set_value(report)
#make a plot
hover1= HoverTool(tooltips=[ ( 'x,y','$x,$y')],mode='mouse')
hover1.point_policy='snap_to_data'
hover1.line_policy = "nearest"
tools = [PanTool(), WheelZoomTool(),BoxZoomTool(),ResetTool(),SaveTool(),hover1]
title = "prediction results on "+functionNode.get_child("selectedAlgorithm").get_value()
fig = figure(title = title,tools=tools,plot_height=300,plot_width=400)
fig.toolbar.logo = None
curdoc().theme = Theme(json=themes.darkTheme)
fig.xaxis.major_label_text_color = themes.darkTickColor
fig.yaxis.major_label_text_color = themes.darkTickColor
fig.xaxis.axis_label= target.get_name()
fig.xaxis.axis_label_text_color = "white"
fig.yaxis.axis_label="predicted Values for "+target.get_name()
fig.yaxis.axis_label_text_color="white"
fig.circle(y_train,y_repeat,size=4,line_color="white",fill_color="white",name="train",legend_label="train")
fig.circle(y_test,y_hat,line_color="#d9b100",fill_color="#d9b100",size=4,name="test",legend_label="test")
mini = min([min(y_train),min(y_repeat),min(y_test),min(y_hat)])
maxi = max([max(y_train),max(y_repeat),max(y_test),max(y_hat)])
fig.line([mini,maxi],[mini,maxi],line_color="grey",line_dash="dashed")
fileName = functionNode.get_child("outputFileName").get_value()
filePath = os.path.join(myDir,'./../web/customui/'+fileName)
fig.legend.location = "top_left"
output_file(filePath,mode="inline")
save(fig)
return True
def prepare_annos_filter(functionNode):
0
Source : varstatistics.py
with MIT License
from smartyal
with MIT License
from smartyal
def varstatistics(functionNode):
logger = functionNode.get_logger()
logger.info("==>>>> statistics " + functionNode.get_browse_path())
progressNode = functionNode.get_child("control").get_child("progress")
progressNode.set_value(0)
#functionNode.get_child("control.signal").set_value(None)
vars = functionNode.get_child("variable").get_targets()
widget = functionNode.get_child("widget").get_target()
bins = functionNode.get_child("bins").get_value()
tags = functionNode.get_child("annotations").get_value()
startTime = date2secs(widget.get_child("startTime").get_value())
endTime = date2secs(widget.get_child("endTime").get_value())
vars = {var.get_id():{"node":var} for var in vars}
#first 30% progress:
prog = Progress(progressNode)
progressNode.set_value(0.1)
prog.set_offset(0.1)
#prog.set_divisor()
if tags:
allAnnoNodes = widget.get_child("hasAnnotation.annotations").get_leaves()
allAnnos=[]
prog.set_divisor(len(allAnnoNodes)/0.2)
for index,node in enumerate(allAnnoNodes):
prog.set_progress(index)
if node.get_child("type").get_value()=="time":
thisTags = node.get_child("tags").get_value()
if any(tag in tags for tag in thisTags):
anno = {}
for child in node.get_children():
anno[child.get_name()]=child.get_value()
if date2secs(anno["startTime"])>=startTime and date2secs(anno["endTime"]) < =endTime and (anno["startTime"] < anno["endTime"]): #take this anno only if it is inside the current start/end time
allAnnos.append(anno)
if allAnnos == []:
give_up(functionNode,"no matching annotations in selected time")
return False
else:
allAnnos=[]
progressNode.set_value(0.3)
logger.debug(f"statistics annotations to look at: {len(allAnnos)}")
prog.set_offset(0.3)
totalAnnos = max(len(allAnnos),1)
totalCount = len(vars)*totalAnnos
prog.set_divisor(totalCount/0.3)
totalValids = 0
for varIndex,var in enumerate(vars):
info = vars[var]
if tags:
#iterate over all start and end times
values = numpy.asarray([],dtype=numpy.float64)
for annoIndex,anno in enumerate(allAnnos):
thisValues = info["node"].get_time_series(anno["startTime"],anno["endTime"])["values"]
values = numpy.append(values,thisValues)
myCount = varIndex*totalAnnos+annoIndex
prog.set_progress(myCount)
else:
values = info["node"].get_time_series(startTime,endTime)["values"]
valids = numpy.count_nonzero(numpy.isfinite(values))
totalValids+=valids
hist, edges = numpy.histogram(values, bins=bins)
hist=hist/len(values) #normalize
info["hist"]=hist
info["edges"]=edges
#make a plot
if totalValids == 0:
give_up(functionNode,"all Variables are have no data in the time and annotations selected")
return False
progressNode.set_value(0.6)
hover1= HoverTool(tooltips=[ ( 'x,y','$x,$y')],mode='mouse')
hover1.point_policy='snap_to_data'
hover1.line_policy = "nearest"
tools = [PanTool(), WheelZoomTool(),BoxZoomTool(),ResetTool(),SaveTool(),hover1]
title = "Statistics of "+str([info["node"].get_name() for var,info in vars.items()])
if tags:
title = title + " in annotation: "+ str(tags )
fig = figure(title = title,tools=tools,plot_height=300)
fig.toolbar.logo = None
curdoc().theme = Theme(json=themes.darkTheme)
fig.xaxis.major_label_text_color = themes.darkTickColor
fig.yaxis.major_label_text_color = themes.darkTickColor
for index,var in enumerate(vars):
info = vars[var]
col = themes.darkLineColors[index]
hist = info["hist"]
edges = info["edges"]
fig.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
fill_color=col, line_color=col, alpha=0.8,legend_label=info["node"].get_name())
fig.legend.location = "top_left"
fileName = functionNode.get_child("fileName").get_value()
filePath = os.path.join(myDir,'./../web/customui/'+fileName)
# now make the trend box plot, but only for tags
# for each variable we create statistics for the annotations and prepare the data
# {"node":Node(), "boxLower":[], "boxUpper", "mean", "limitUpper", "limitLower"}
#
startTime = date2secs(widget.get_child("startTime").get_value()) #we only take tags that are inside the current zoom of the widgets
endTime = date2secs(widget.get_child("endTime").get_value())
boxPlots = []
allTimes = []
if tags:
for index,var in enumerate(vars):
info={"node":vars[var]["node"],"boxLower":[],"boxUpper":[],"median":[],"time":[],"limitUpper":[],"limitLower":[],"mean":[]}
for anno in allAnnos:
data = info["node"].get_time_series(anno["startTime"],anno["endTime"])
if len(data["values"]):
data["values"] = data["values"][numpy.isfinite(data["values"])]
#remove the nan
if len(data["values"]):
#make the statistics
info["time"].append(numpy.median(data["__time"])*1000)
allTimes.append(numpy.median(data["__time"])*1000)
info["limitLower"].append(numpy.quantile(data["values"],0.01))
info["limitUpper"].append(numpy.quantile(data["values"],0.99))
info["boxLower"].append(numpy.quantile(data["values"],0.25))
info["boxUpper"].append(numpy.quantile(data["values"],0.75))
info["median"].append(numpy.median(data["values"]))
info["mean"].append(numpy.mean(data["values"]))
boxPlots.append(info)
format = "%Y-%m-%d-T%H:%M:%S"
custom = """var local = moment(value).tz('UTC'); return local.format();"""#%self.server.get_settings()["timeZone"]
hover = HoverTool(
tooltips=[ ( 'date','@x{%F}')],
formatters={ '@x' : CustomJSHover(code=custom)
},
mode='mouse'
)
hover.point_policy='snap_to_data'
hover.line_policy = "nearest"
tools = [PanTool(), BoxZoomTool(),WheelZoomTool(),ResetTool(),hover,SaveTool()]
fig2 = figure(title = "trends",tools=tools,plot_height=300,x_axis_type='datetime')
fig2.xaxis.major_label_text_color = themes.darkTickColor
fig2.yaxis.major_label_text_color = themes.darkTickColor
progressNode.set_value(0.7)
fig2.xaxis.formatter=DatetimeTickFormatter(years=format,days=format,months=format,hours=format,hourmin=format,minutes=format,minsec=format,seconds=format)
fig2.toolbar.logo = None
#fig2.line([1,2,3],[1,2,3])
#calc with of vbars
if len(allAnnos)>1:
xTimesStart = min(allTimes)
xTimesEnd = max(allTimes)
width = (xTimesEnd-xTimesStart)/2/len(allAnnos)
else:
width = 1000000
for index,info in enumerate(boxPlots):
#each info is for one variable
col = themes.darkLineColors[index]
fig2.segment(info["time"],info["limitUpper"],info["time"],info["boxUpper"],line_color=col)
fig2.segment(info["time"],info["limitLower"],info["time"],info["boxLower"],line_color=col)
width =20
#fig2.vbar(info["time"],width=width,bottom=info["median"],top=info["boxUpper"],fill_color=col,line_color="black",width_units='screen')
#fig2.vbar(info["time"],width=width,bottom=info["boxLower"],top=info["median"],fill_color=col,line_color="black",width_units='screen')
#upper box
sizUpper = numpy.asarray(info["boxUpper"])-numpy.asarray(info["median"])
medUpper = numpy.asarray(info["median"])+sizUpper/2
fig2.rect(x=info["time"],y=medUpper,width_units='screen',width=20,height=sizUpper,fill_color=col,line_color="black")
#lower box
sizLower = numpy.asarray(info["median"])-numpy.asarray(info["boxLower"])
medLower = numpy.asarray(info["median"])-sizLower/2
fig2.rect(x=info["time"],y=medLower,width_units='screen',width=20,height=sizLower,fill_color=col,line_color="black")
#sort data for line
x = numpy.asarray(info["time"])
y = numpy.asarray(info["mean"])
order = numpy.argsort(x)
x=x[order]
y=y[order]
fig2.line(x,y,line_color=col)
progressNode.set_value(0.8)
else:
#no fig2
pass
output_file(filePath,mode="inline")#inline: put the bokeh .js into this html, otherwise the default cdn will be taken, might cause CORS problems
if tags:
save(layout([[fig],[fig2]]))
else:
save(fig)
return True
0
Source : plot.py
with Apache License 2.0
from WSWUP
with Apache License 2.0
from WSWUP
def daily_comparison(input_csv, out_dir=None, year_filter=None):
"""
Compare daily weather station data from
`PyWeatherQAQC < https://github.com/WSWUP/pyWeatherQAQC>`_ with gridMET
for each month in year specified.
The :func:`daily_comparison` function produces HTML files with time series
and scatter plots of station versus gridMET climate variables. It uses the
`bokeh < https://bokeh.pydata.org/en/latest/>`_ module to create interactive
plots, e.g. they can be zoomed in/out and panned. Separate plot files are
created for each month of a single year.
Arguments:
input_csv (str): path to input CSV file containing paired station/
gridMET metadata. This file is created by running
:mod:`gridwxcomp.prep_input` followed by :mod:`gridwxcomp.download_gridmet_opendap`.
Keyword Arguments:
out_dir (str or None): default None. Directory to save comparison
plots, if None save to "daily_comp_plots" in currect directory.
year_filter (str or None): default None. Single year YYYY or range
YYYY-YYYY
Returns:
None
Example:
The :func:`daily_comparison` function will generate HTML files with
bokeh plots for paired climate variables, e.g. etr_mm, eto_mm,
u2_ms, tmin_c, tmax_c, srad_wm2, ea_kpa, and Ko (dew point depression).
Monthly plots are created for a single year.
From the command line, use the "plot" command with the
``[-t, --plot-type]`` option set to station-grid-comp and
the ``[-f, --freq]`` option left as default ("daily"),
.. code-block:: sh
$ gridwxcomp plot merged_input.csv -t station-grid-comp -o comp_plots_2016 -y 2016
or within Python,
>>> from gridwxcomp.plot import daily_comparison
>>> daily_comparison('merged_input.csv', 'comp_plots_2016', '2016')
Both methods result in monthly HTML `bokeh < https://bokeh.pydata.org/en/latest/>`_
plots being saved to "comp_plots_2016/STATION_ID/" where "STATION_ID"
is the station ID as found in the input CSV file. A file is saved for
each month with the station ID, month, and year in the file name.
If ``out_dir`` keyword argument or ``[-o, --out-dir]`` command line
option is not given the plots will be saved to a directory named
"daily_comp_plots".
Note:
If there are less than five days of data in a month the plot for that
month will not be created.
"""
if not out_dir:
out_dir = os.getcwd()
if not os.path.isdir(out_dir):
print('{} does not exist, creating directory'.format(out_dir))
os.makedirs(out_dir)
year = year_filter
logging.info('\nProcessing Year: {}'.format(year))
# # Import Station/GRIDMET meta data shapefile
paired_data = pd.read_csv(input_csv, sep=',')
# List of variables to compare (STATION/gridMET ORDER SHOULD MATCH)
station_vars = ['TMin (C)', 'TMax (C)', 'wx_Ko_c', 'Rs (w/m2)',
'ws_2m (m/s)', 'Vapor Pres (kPa)', 'RHAvg (%)',
'Precip (mm)', 'ETo (mm)', 'ETr (mm)']
gridmet_vars = ['tmin_c', 'tmax_c', 'grid_Ko_c', 'srad_wm2', 'u2_ms',
'ea_kpa', 'rh_avg', 'prcp_mm', 'eto_mm', 'etr_mm']
# # Limit row processing range (testing)
# start = 0
# end = 1
#Loop through each station/gridmet pair
for index, row in paired_data.iterrows():
# # Limit iteration during development
# if index < start:
# continue
# if index >= end:
# break
# clear previous datasets
grid_data = []
station_data = []
station_path = row.STATION_FILE_PATH
logging.info('\nStation: {}'.format(row.STATION_ID))
# Check is station path is given in input
if pd.isnull(station_path):
logging.info('Station path is not given. Skipping.')
continue
# Skip If FILE DOES NOT EXIST
if not os.path.exists(station_path):
logging.info('SKIPPING {}. NO STATION FILE FOUND.'.format(
station_path))
continue
else:
# pyweather QAQC format if excel
if station_path.endswith(('.xlsx','xlx')):
station_data = pd.read_excel(station_path,
sheet_name='Corrected Data', parse_dates=True, index_col=0)
else:
station_data = pd.read_csv(station_path,
parse_dates=True, index_col=0)
# Filter years
if year:
station_data, year_str = parse_yr_filter(
station_data, year, label=row.STATION_ID)
else:
start_yr = int(station_data.index.year.min())
end_yr = int(station_data.index.year.max())
year_str = '{}_{}'.format(start_yr, end_yr)
# Import GRIDMET Data
grid_path = row.GRID_FILE_PATH
# Skip if GRIDMET FILE DOES NOT EXIST
if not os.path.exists(grid_path):
print('SKIPPING {}. NO GRIDMET FILE FOUND.'.format(grid_path))
continue
else:
grid_data = pd.read_csv(grid_path, sep=',',parse_dates=True,
index_col='date')
# Filter to specific year
# grid_data = grid_data[grid_data['year'] == year]
# Add Tdew to gridmet dataset Teten's equation ASCE REF-ET
# supporting equations Appendix 2-1
grid_data['tdew_c'] = (116.91 + 237.3 * np.log(grid_data.ea_kpa)) /\
(16.78 - np.log(grid_data.ea_kpa))
# Calculate Tmin - Tdew = Ko for both Station and GridMET
# Dew Point Depression
grid_data['grid_Ko_c'] = grid_data.tmin_c - grid_data.tdew_c
station_data['wx_Ko_c'] = station_data['TMin (C)'] - \
station_data['TDew (C)']
# grid RH Avg calc
# Saturated Vapor Pressure
grid_data['tavg_c'] = (grid_data.tmin_c + grid_data.tmax_c) / 2
grid_data['e_sat_kpa'] = 0.6108 * np.exp(
(17.27 * grid_data.tavg_c) /
(grid_data.tavg_c + 237.3))
# Average RH (%)
grid_data['rh_avg'] = (grid_data.ea_kpa / grid_data.e_sat_kpa) * 100
# Combine station and gridMET dataframes (only plotting variables)
#return station_data, station_vars, grid_data, gridmet_vars
merged = pd.concat([
station_data[station_vars], grid_data[gridmet_vars]], axis=1
)
for month in range(1,13):
logging.info('Month: {}'.format(month))
monthly_data = merged[merged.index.month==month]
if len(monthly_data.index) < = 5:
logging.info('Skipping. Less than 5 observations in '
'month.')
continue
# Output Folder
out_folder = os.path.join(out_dir, 'daily_comp_plots',
'{}'.format(
row.STATION_ID.replace(" ","")))
# Create path if it doesn't exist
if not os.path.exists(out_folder):
os.makedirs(out_folder)
# Output to HTML file
out_file_path = os.path.join(out_folder, '{}_{:02}_{}.html')\
.format(row.STATION_ID.replace(" ", ""), month, year_str)
output_file(out_file_path)
station_vars = ['TMin (C)', 'TMax (C)', 'wx_Ko_c', 'Rs (w/m2)',
'ws_2m (m/s)', 'Vapor Pres (kPa)', 'RHAvg (%)',
'Precip (mm)', 'ETo (mm)', 'ETr (mm)']
gridmet_vars = ['tmin_c', 'tmax_c', 'grid_Ko_c', 'srad_wm2',
'u2_ms',
'ea_kpa', 'rh_avg', 'prcp_mm', 'eto_mm',
'etr_mm']
# list of x variables
x_var_list= station_vars
# list of y variables
y_var_list= gridmet_vars
# title list
title_list= ['TMin', 'TMax', 'Ko' , 'Rs', 'WS 2m',
'ea', 'RH', 'Prcp', 'ETo', 'ETr']
# timeseries y label list
ts_ylabel_list = ['TMin (C)', 'TMax (C)', 'Ko (C)', 'Rs (w/m2)',
'WS 2m (m/s)', 'ea (kPa)', 'Avg RH (%)',
'Prcp (mm)',
'ETo (mm)', 'ETr (mm)']
# scatter xlabel list
xlabel_list = ['Station TMin (C)', 'Station TMax (C)',
'Station Ko (C)', 'Station Rs (w/m2)',
'Station WS 2m (m/s)', 'Station ea (kPa)',
'Station RH (%)', 'Station Prcp (mm)',
'Station ETo (mm)', 'Station ETr (mm)']
# scatter ylabel list
ylabel_list = ['gridMET TMin (C)', 'gridMET TMax (C)',
'gridMET Ko (C)', 'gridMET Rs (w/m2)',
'gridMET WS 2m (m/s)', 'gridMET ea (kPa)',
'gridMET RH (%)', 'gridMET Prcp (mm)',
'gridMET ETo (mm)', 'gridMET ETr (mm)']
# legendx list
legendx_list = ['Station'] * len(title_list)
# legend y list
legendy_list = ['gridMET'] * len(title_list)
# empty list to append figures to
figure_list = []
# loop through and create figures for each variable using vars
# and plot labels from lists above
for i, (x_var, y_var, title, ts_ylabel, xlabel, ylabel, legendx,
legendy) in enumerate(zip(x_var_list, y_var_list,
title_list, ts_ylabel_list,
xlabel_list, ylabel_list,
legendx_list, legendy_list)):
# lstsq cannot have nans (drop nas for each var separately)
monthly_data2 = monthly_data[[x_var, y_var]]
monthly_data2 = monthly_data2.dropna()
monthly_data2['date'] = monthly_data2.index
monthly_data2.index.name=''
monthly_data2.reset_index(inplace=True)
if monthly_data2.empty:
logging.info("Skipping {}. No Data.".format(x_var))
continue
if i == 0:
# Initial timeseries plot to establish xrange for link axes
p1 = figure(plot_width=800, plot_height=400,
title = title, x_axis_type="datetime",
y_axis_label = ts_ylabel)
p1.line(monthly_data2.index,
monthly_data2[x_var], color="navy",
alpha=0.5, legend_label=legendx,line_width=2)
p1.line(monthly_data2.index,
monthly_data2[y_var], color="red",
alpha=0.5, legend_label=legendy,line_width=2)
p1.xaxis.major_label_overrides = {
i: date.strftime(
'%Y %b %d'
) for i, date in enumerate(pd.to_datetime(
monthly_data2.date
)
)}
else:
# Timeseries plots after first pass
p1 = figure(plot_width=800, plot_height=400,
title = title, x_axis_type="datetime",
y_axis_label=ts_ylabel,
x_range=p1.x_range)
p1.line(monthly_data2.index,
monthly_data2[x_var], color="navy", alpha=0.5,
legend_label=legendx,line_width=2)
p1.line(monthly_data2.index,
monthly_data2[y_var], color="red", alpha=0.5,
legend_label=legendy,line_width=2)
p1.xaxis.major_label_overrides = {
i: date.strftime('%Y %b %d') for i, date in enumerate(
pd.to_datetime(monthly_data2.date)
)
}
# 1 to 1 Plot
# Regression through Zero
# https://stackoverflow.com/questions/9990789/how-to-force-
# zero-interception-in-linear-regression/9994484#9994484
m = np.linalg.lstsq(monthly_data2[x_var].values.reshape(-1,1),
monthly_data2[y_var], rcond=None)[0][0]
r_x, r_y = zip(*((i, i*m ) for i in range(
int(np.min([monthly_data2[y_var],monthly_data2[x_var]])-2),
int(np.max([monthly_data2[y_var],
monthly_data2[x_var]])+3),1)))
# Plots
p2 = figure(plot_width=400, plot_height=400,
x_axis_label = xlabel, y_axis_label = ylabel,
title = 'Slope Through Zero: m = {}'.format(
round(m,4)))
p2.circle(monthly_data2[x_var], monthly_data2[y_var],
size=15, color="navy", alpha=0.5)
p2.line([int(np.min([monthly_data2[y_var],
monthly_data2[x_var]])-2),int(np.max(
[monthly_data2[y_var],monthly_data2[x_var]])+2)],
[int(np.min([monthly_data2[y_var],
monthly_data2[x_var]])-2),int(np.max(
[monthly_data2[y_var],monthly_data2[x_var]])+2)],
color = "black", legend_label = '1 to 1 line')
p2.line(r_x, r_y, color="red", legend_label = 'Reg thru zero')
p2.legend.location = "top_left"
# Append [p1, p2] to figure_list (create list of lists)
figure_list.append([p1, p2])
#return figure_list, monthly_data2
# Plot all figures in list
fig = gridplot(figure_list, toolbar_location="left")
# Save the figure
save(fig)
def monthly_comparison(input_csv, out_dir=None, day_limit=10):
0
Source : plot.py
with Apache License 2.0
from WSWUP
with Apache License 2.0
from WSWUP
def monthly_comparison(input_csv, out_dir=None, day_limit=10):
"""
Compare monthly average weather station data from
`PyWeatherQAQC < https://github.com/WSWUP/pyWeatherQAQC>`_ with gridMET.
The :func:`monthly_comparison` function produces HTML files with time series
and scatter plots of station versus gridMET climate variables of monthly
mean data. It uses the `bokeh < https://bokeh.pydata.org/en/latest/>`_ module
to create interactive plots, e.g. they can be zoomed in/out and panned.
Arguments:
input_csv (str): path to input CSV file containing
paired station/gridMET metadata. This file is
created by running :mod:`gridwxcomp.prep_input` followed by
:mod:`gridwxcomp.download_gridmet_opendap`.
Keyword Arguments:
out_dir (str): default None. Directory to save comparison plots.
day_limit (int): default 10. Number of paired days per month that must
exist for variable to be plotted.
Returns:
None
Example:
The :func:`monthly_comparison` function will generate HTML files with
bokeh plots for paired climate variable, e.g. etr_mm,
eto_mm, u2_ms, tmin_c, tmax_c, srad_wm2, ea_kpa, and Ko (dew point
depression).
From the command line, use the "plot" command with the
``[-t, --plot-type]`` option set to station-grid-comp and
the ``[-f, --freq]`` option set to "monthly",
.. code-block:: sh
$ gridwxcomp plot merged_input.csv -t station-grid-comp -freq monthly -o monthly_plots
or within Python,
>>> from gridwxcomp.plot import monthly_comparison
>>> monthly_comparison('merged_input.csv', 'monthly_plots')
Both methods result in monthly HTML bokeh plots being saved
to "monthly_plots/" which contains a plot file for each station
as found in the input CSV file. If ``out_dir`` keyword argument or
``[-o, --out-dir]`` command line option is not given the plots will
be saved to a directory named "monthly_comp_plots".
Note:
If there are less than 2 months of data the plot for that
station will not be created.
"""
if not out_dir:
out_dir = os.getcwd()
if not os.path.isdir(out_dir):
print('{} does not exist, creating directory'.format(out_dir))
os.makedirs(out_dir)
# # Import Station/GRIDMET meta data shapefile
paired_data = pd.read_csv(input_csv, sep=',')
# List of variables to compare (STATION/gridMET ORDER SHOULD MATCH)
station_vars = ['TMin (C)', 'TMax (C)', 'wx_Ko_c', 'Rs (w/m2)',
'ws_2m (m/s)', 'Vapor Pres (kPa)', 'RHAvg (%)',
'Precip (mm)', 'ETo (mm)', 'ETr (mm)']
gridmet_vars = ['tmin_c', 'tmax_c', 'grid_Ko_c', 'srad_wm2', 'u2_ms',
'ea_kpa', 'rh_avg', 'prcp_mm', 'eto_mm', 'etr_mm']
# # Limit row processing range (testing)
# start = 0
# end = 1
#Loop through each station/gridmet pair
for index, row in paired_data.iterrows():
# # Limit iteration during development
# if index < start:
# continue
# if index >= end:
# break
# clear previous datasets
grid_data = []
station_data = []
start_date = []
end_date = []
logging.info('\nStation: {}'.format(row.STATION_ID))
station_path = row.STATION_FILE_PATH
# Check is station path is given in input
if pd.isnull(station_path):
logging.info('Station path is not given. Skipping.')
continue
# Skip If FILE DOES NOT EXIST
if not os.path.exists(station_path):
logging.info('SKIPPING {}. NO STATION FILE FOUND.'.format(
station_path))
continue
else:
if station_path.endswith(('.xlsx','.xlx')):
station_data = pd.read_excel(station_path, index_col=0,
parse_dates=True, sheet_name='Corrected Data')
else:
station_data = pd.read_csv(station_path, index_col=0,
parse_dates=True)
start_date.append(station_data.index.date.min())
end_date.append(station_data.index.date.max())
# Import GRIDMET Data
grid_path = row.GRID_FILE_PATH
# Skip if GRIDMET FILE DOES NOT EXIST
if not os.path.exists(grid_path):
print('SKIPPING {}. NO GRIDMET FILE FOUND.'.format(grid_path))
continue
else:
grid_data = pd.read_csv(grid_path, sep=',',parse_dates=True,
index_col='date')
start_date.append(grid_data.index.date.min())
end_date.append(grid_data.index.date.max())
# prevent plotting gaps when time periods differ
start_date = max(start_date)
end_date = min(end_date)
# Filter to specific year
# grid_data = grid_data[grid_data['year'] == year]
# Add Tdew to gridmet dataset Teten's equation ASCE REF-ET
# supporting equations Appendix 2-1
grid_data['tdew_c'] = (116.91 + 237.3 * np.log(grid_data.ea_kpa)) /\
(16.78 - np.log(grid_data.ea_kpa))
# Calculate Tmin - Tdew = Ko for both Station and GridMET
# Dew Point Depression
grid_data['grid_Ko_c'] = grid_data.tmin_c - grid_data.tdew_c
station_data['wx_Ko_c'] = station_data['TMin (C)'] - \
station_data['TDew (C)']
# grid RH Avg calc
# Saturated Vapor Pressure
grid_data['tavg_c'] = (grid_data.tmin_c + grid_data.tmax_c )/2
grid_data['e_sat_kpa'] = 0.6108*np.exp((17.27*grid_data.tavg_c)/
(grid_data.tavg_c+237.3))
# Average RH (%)
grid_data['rh_avg'] = (grid_data.ea_kpa/grid_data.e_sat_kpa)*100
# Combine station and gridMET dataframes (only plotting variables)
merged = pd.concat([station_data[station_vars],
grid_data[gridmet_vars]], axis=1
)
merged = merged.loc[start_date:end_date]
station_vars = ['TMin (C)', 'TMax (C)', 'wx_Ko_c', 'Rs (w/m2)',
'ws_2m (m/s)', 'Vapor Pres (kPa)', 'RHAvg (%)', 'Precip (mm)',
'ETo (mm)', 'ETr (mm)']
gridmet_vars = ['tmin_c', 'tmax_c', 'grid_Ko_c', 'srad_wm2',
'u2_ms', 'ea_kpa', 'rh_avg', 'prcp_mm', 'eto_mm', 'etr_mm']
# remove all pairs where one var missing
for (x_var, y_var) in zip(station_vars,gridmet_vars):
merged[[x_var, y_var]] = merged[[x_var, y_var]].dropna()
# Monthly averages including count
monthly = merged.groupby([lambda x: x.year, lambda x: x.month]).agg(
['mean', 'sum' ,'count'])
# Remove months with Less Than XX Days in average
var_names = list(monthly.columns.levels)[0]
for v in var_names:
mask = monthly.loc[:,(v,'count')] < day_limit
monthly.loc[mask,('sum', 'mean')] = np.nan
# Rebuild Index DateTime
monthly['year'] = monthly.index.get_level_values(0).values
monthly['month'] = monthly.index.get_level_values(1).values
monthly.index = pd.to_datetime(
monthly.year * 10000 + monthly.month * 100 + 15,
format='%Y%m%d')
if len(monthly.index) < 2:
logging.info('Skipping. Less than 2 months of observations.')
continue
# Output Folder
out_folder = os.path.join(out_dir, 'monthly_comp_plots')
# '{}'.format(
# row.STATION_ID.replace(" ","")))
# Create path if it doesn't exist
if not os.path.exists(out_folder):
os.makedirs(out_folder)
# Output to HTML file
out_file_path = os.path.join(out_folder, '{}.html')\
.format(row.STATION_ID.replace(" ", ""))
output_file(out_file_path)
# list of x variables
x_var_list= station_vars
# list of y variables
y_var_list= gridmet_vars
# title list
title_list= ['TMin: Monthly Average', 'TMax: Monthly Average',
'Ko: Monthly Average' ,
'Rs: Monthly Average: Monthly Average',
'WS 2m: Monthly Average',
'ea: Monthly Average',
'RH Avg: Monthly Average', 'Prcp: Monthly Total',
'ETo: Monthly Average', 'ETr: Monthly Average']
# timeseries y label list
ts_ylabel_list = ['TMin (C)', 'TMax (C)', 'Ko (C)', 'Rs (w/m2)',
'WS 2m (m/s)', 'ea (kPa)', 'Avg RH (%)',
'Prcp (mm)',
'ETo (mm)', 'ETr (mm)']
# scatter xlabel list
xlabel_list= ['Station TMin (C)', 'Station TMax (C)',
'Station Ko (C)','Station Rs (w/m2)',
'Station WS 2m (m/s)', 'Station ea (kPa)',
'Station RH (%)', 'Station Prcp (mm)',
'Station ETo (mm)', 'Station ETr (mm)']
# scatter ylabel list
ylabel_list=['gridMET TMin (C)', 'gridMET TMax (C)',
'gridMET Ko (C)','gridMET Rs (w/m2)',
'gridMET WS 2m (m/s)', 'gridMET ea (kPa)',
'gridMET RH (%)', 'gridMET Prcp (mm)',
'gridMET ETo (mm)', 'gridMET ETr (mm)']
stat_list = ['mean','mean','mean','mean',
'mean','mean','mean', 'sum',
'mean','mean']
# legendx list
legendx_list = ['Station'] * len(title_list)
# legend y list
legendy_list = ['gridMET'] * len(title_list)
# empty list to append figures to
figure_list = []
# loop through and create figures for each variable using vars
# and plot labels from lists above
for i, (x_var, y_var, title, ts_ylabel, xlabel, ylabel, legendx,
legendy, stat) in enumerate(zip(x_var_list, y_var_list,
title_list, ts_ylabel_list,
xlabel_list, ylabel_list,
legendx_list, legendy_list,
stat_list)):
# lstsq cannot have nans (drop nas for each var separately)
monthly2 = monthly[[x_var, y_var]]
monthly2 = monthly2.dropna()
if monthly2.empty:
logging.info("Skipping {}. No Data.".format(x_var))
continue
if i == 0:
# Initial timeseries plot to establish xrange for link axes
p1 = figure(plot_width=800, plot_height=400,
x_axis_type="datetime",title = title,
y_axis_label = ts_ylabel)
p1.line(monthly.index.to_pydatetime(),
monthly[x_var, stat], color="navy",
alpha=0.5, legend_label=legendx,line_width=2)
p1.line(monthly.index.to_pydatetime(),
monthly[y_var, stat], color="red",
alpha=0.5, legend_label=legendy,line_width=2)
else:
# Timeseries plots after first pass
p1 = figure(plot_width=800, plot_height=400,
x_axis_type="datetime",title = title,
y_axis_label = ts_ylabel,
x_range=p1.x_range)
p1.line(monthly.index.to_pydatetime(),
monthly[x_var, stat], color="navy", alpha=0.5,
legend_label=legendx,line_width=2)
p1.line(monthly.index.to_pydatetime(),
monthly[y_var, stat], color="red", alpha=0.5,
legend_label=legendy,line_width=2)
# 1 to 1 Plot
# Regression through Zero
# https://stackoverflow.com/questions/9990789/how-to-force-
# zero-interception-in-linear-regression/9994484#9994484
m = np.linalg.lstsq(monthly2[x_var, stat].values.reshape(-1,1),
monthly2[y_var, stat], rcond=None)[0][0]
r_x, r_y = zip(*((i, i*m ) for i in range(
int(np.min([monthly2[y_var, stat],
monthly2[x_var, stat]])-2),
int(np.max([monthly2[y_var, stat],
monthly2[x_var, stat]])+3),1)))
# Plots
p2 = figure(plot_width=400, plot_height=400,
x_axis_label = xlabel, y_axis_label = ylabel,
title = 'Slope Through Zero: m = {}'.format(
round(m,4)))
p2.circle(monthly2[x_var, stat], monthly2[y_var, stat],
size=15, color="navy", alpha=0.5)
p2.line([int(np.min([monthly2[y_var, stat],
monthly2[x_var, stat]])-2),int(np.max(
[monthly2[y_var, stat],monthly2[x_var, stat]])+2)],
[int(np.min([monthly2[y_var, stat],
monthly2[x_var, stat]])-2),int(np.max(
[monthly2[y_var, stat],monthly2[x_var, stat]])+2)],
color = "black", legend_label = '1 to 1 line')
p2.line(r_x, r_y, color="red", legend_label = 'Reg thru zero')
p2.legend.location = "top_left"
# Append [p1, p2] to figure_list (create list of lists)
figure_list.append([p1, p2])
# Plot all figures in list
fig = gridplot(figure_list, toolbar_location="left")
# Save the figure
save(fig)
def station_bar_plot(summary_csv, layer, out_dir=None, x_label=None,
0
Source : plot.py
with Apache License 2.0
from WSWUP
with Apache License 2.0
from WSWUP
def station_bar_plot(summary_csv, layer, out_dir=None, x_label=None,
y_label=None, title=None, subtitle=None, year_subtitle=True):
"""
Produce an interactive bar chart comparing multiple climate stations to each
other for a particular variable, e.g. bias ratios or interpolated residuals.
Arguments:
summary_csv (str): path to summary CSV produced by either :func:`gridwxcomp.calc_bias_ratios`
or by :func:`gridwxcomp.interpolate`. Should contain ``layer``
data for plot.
layer (str): name of variable to plot.
Keyword Arguments:
out_dir (str or None): default None. Output directory path, default is
'station_bar_plots' in parent directory of ``summary_csv``.
x_label (str or None): default None. Label for x-axis.
y_label (str or None): default None. Label for y-axis, defaults to
``layer``.
title (str or None): default None. Title of plot.
subtitle (str, list, or None): default None. Additional subtitle(s)
for plot.
year_subtitle (bool): default True. If true print subtitle on plot with
the max year range used for station data, e.g. 'years: 1995-2005'
Example:
Let's say we want to compare the mean growing seasion bias ratios of
reference evapotranspiration (ETr) for the selection of stations we
used to calculate bias ratios. The summary CSV file containing the
ratios should be first created using :func:`gridwxcomp.calc_bias_ratios`.
>>> from gridwxcomp.plot import station_bar_plot
>>> # path to summary CSV with station data
>>> in_file = 'monthly_ratios/etr_mm_summary_all_yrs.csv'
>>> layer = 'growseason_mean'
>>> station_bar_plot(in_file, layer)
The resulting file will be saved using the layer name as a file name::
'monthly_ratios/station_bar_plots/growseason_mean.html'
The plot file will contain the mean growing season bias ratios
of ETr for each station, sorted from smallest to largest values.
This function may also be used for any numerical data in the summary CSV
files that are created by :func:`gridwxcomp.interpolate` in addition to
those created by :func:`gridwxcomp.calc_bias_ratios`. The main
requirement is that ``summary_csv`` must contain the column 'STATION_ID'
and the ``layer`` keyword argument.
Raises:
FileNotFoundError: if ``summary_csv`` is not found.
KeyError: if ``layer`` does not exist as a column name in ``summary_csv``.
"""
if not Path(summary_csv).is_file():
err_msg = '\n{} is not a valid path to a summary CSV file!'.\
format(summary_csv)
raise FileNotFoundError(err_msg)
df = pd.read_csv(summary_csv, na_values=[-999])
if not layer in df.columns:
err_msg = '\nColumn {} was not found in {}'.format(layer, summary_csv)
raise KeyError(err_msg)
df.sort_values(layer, inplace=True)
df.index.name = 'dummy_name' # fix internal to bokeh- reset_index
source = ColumnDataSource(df)
# hover tooltip with station and value
tooltips = [
("station", "@STATION_ID"),
("value", "@{}".format(layer)),
]
hover = models.HoverTool(tooltips=tooltips)
if not y_label:
y_label = layer
# save to working directory in 'station_bar_plots' if not specified
if not out_dir:
out_dir = Path(summary_csv).parent/'station_bar_plots'
else:
out_dir = Path(out_dir)
if not out_dir.is_dir():
print('\n{}\nDoes not exist, making directory'.format(
out_dir.absolute()))
out_dir.mkdir(parents=True, exist_ok=True)
out_file = out_dir/'{}.html'.format(layer)
print('\nCreating station bar plot for variable: ', layer,
'\nUsing data from file: ', Path(summary_csv).absolute())
output_file(out_file)
p = figure(x_range=df.STATION_ID, y_axis_label=y_label, title=title)
p.vbar(x='STATION_ID', top=layer, width=0.8, source=source)
p.xaxis.major_label_orientation = pi/2
p.add_tools(hover, models.BoxSelectTool())
if year_subtitle:
# add data range (years start to end) as subtitle
min_yr = int(df.start_year.min())
max_yr = int(df.end_year.max())
if min_yr == max_yr:
year_str = 'year: {}'.format(min_yr)
else:
year_str = 'years: {}-{}'.format(min_yr, max_yr)
# caution note if not all stations use full year range
if not (df.end_year==max_yr).all() or not (df.start_year==min_yr).all():
year_str = '{} (less years exist for some stations)'.\
format(year_str)
p.add_layout(models.Title(text=year_str, text_font_style="italic"),
'above')
# add arbitrary number of custom subtitles as lines above plot
if isinstance(subtitle, (list, tuple)):
for st in subtitle:
p.add_layout(models.Title(text=st, text_font_style="italic"),
'above')
elif subtitle:
p.add_layout(models.Title(text=subtitle, text_font_style="italic"),
'above')
save(p)
print('\nPlot saved to: ', out_file.absolute())
def arg_parse():
0
Source : test_mini_analyses.py
with BSD 3-Clause "New" or "Revised" License
from XENONnT
with BSD 3-Clause "New" or "Revised" License
from XENONnT
def test_bokeh_selector(self):
"""Test the bokeh data selector"""
from straxen.analyses.bokeh_waveform_plot import DataSelectionHist
p = self.st.get_array(nt_test_run_id, 'peak_basics')
ds = DataSelectionHist('ds')
fig = ds.histogram2d(p,
p['area'],
p['area'],
bins=50,
hist_range=((0, 200), (0, 2000)),
log_color_scale=True,
clim=(10, None),
undeflow_color='white')
import bokeh.plotting as bklt
save_as = 'test_data_selector.html'
bklt.save(fig, save_as)
self.assertTrue(os.path.exists(save_as))
os.remove(save_as)
self.assertFalse(os.path.exists(save_as))
# Also test if we can write it to the wiki
straxen.bokeh_to_wiki(fig)
straxen.bokeh_to_wiki(fig, save_as)
self.assertTrue(os.path.exists(save_as))
os.remove(save_as)
self.assertFalse(os.path.exists(save_as))
@unittest.skipIf(not straxen.utilix_is_configured(),