ui.py
# ******************************************************************************
# Copyright 2017-2019 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
import base64
import io
import os
import json
from os.path import join
import pandas as pd
import numpy as np
from bokeh import layouts
from bokeh.document import Document
from bokeh.layouts import widgetbox, row, column, layout
from bokeh.models import Div, CustomJS, HoverTool, Title, ColumnDataSource
from bokeh.models.widgets import DataTable, TableColumn, TextInput
from bokeh.models.widgets import HTMLTemplateFormatter, RadioButtonGroup
from bokeh.models.widgets import Dropdown
from bokeh.models.widgets import Tabs, Panel
from bokeh.plotting import Figure, figure
from bokeh.server.server import Server
from bokeh.transform import dodge
from bokeh.core.properties import value
from tornado.web import StaticFileHandler
from nlp_architect import LIBRARY_ROOT
from nlp_architect.models.absa import LEXICONS_OUT
from nlp_architect.models.absa.train.acquire_terms import AcquireTerms
from nlp_architect.models.absa.train.train import TrainSentiment
from sentiment_solution import SENTIMENT_OUT, SentimentSolution
SOLUTION_DIR = join(LIBRARY_ROOT, "solutions/absa_solution/")
POLARITIES = ("POS", "NEG")
# pylint: disable=global-variable-undefined
def serve_absa_ui() -> None:
"""Main function for serving UI application."""
def _doc_modifier(doc: Document) -> None:
grid = _create_ui_components()
doc.add_root(grid)
print("Opening Bokeh application on http://localhost:5006/")
server = Server(
{"/": _doc_modifier},
websocket_max_message_size=5000 * 1024 * 1024,
extra_patterns=[
(
"/style/(.*)",
StaticFileHandler,
{"path": os.path.normpath(join(SOLUTION_DIR, "/style"))},
)
],
)
server.start()
server.io_loop.add_callback(server.show, "/")
server.io_loop.start()
def _create_header(train_dropdown, inference_dropdown, text_status) -> layouts.Row:
"""Utility function for creating and styling the header row in the UI layout."""
architect_logo = Div(
text=' < a href="https://intellabs.github.io/nlp-architect"> < img border="0" '
'src="style/nlp_architect.jpg" width="200"> < /a> by Intel® AI Lab',
style={
"margin-left": "500px",
"margin-top": "20px",
"font-size": "110%",
"text-align": "center",
},
)
css_link = Div(
text=" < link rel='stylesheet' type='text/css' href='style/lexicon_manager.css'>",
style={"font-size": "0%"},
)
js_script = Div(text=" < input type='file' id='inputOS' hidden='true'>")
title = Div(
text="ABSApp",
style={
"font-size": "300%",
"color": "royalblue",
"font-weight": "bold",
"margin-left": "500px",
},
)
return row(
column(
row(children=[train_dropdown, lexicons_dropdown, inference_dropdown], width=500),
row(text_status),
),
css_link,
js_script,
widgetbox(title, width=900, height=84),
widgetbox(architect_logo, width=400, height=84),
)
def empty_table(*headers):
return ColumnDataSource(data={header: 19 * [""] for header in headers})
def _create_ui_components() -> (Figure, ColumnDataSource): # pylint: disable=too-many-statements
global asp_table_source, asp_filter_src, op_table_source, op_filter_src
global stats, aspects, tabs, lexicons_dropdown
stats = pd.DataFrame(columns=["Quantity", "Score"])
aspects = pd.Series([])
def new_col_data_src():
return ColumnDataSource({"file_contents": [], "file_name": []})
large_text = HTMLTemplateFormatter(template=""" < div> < %= value %> < /div>""")
def data_column(title):
return TableColumn(
field=title, title=' < span class="header">' + title + " < /span>", formatter=large_text
)
asp_table_columns = [
data_column("Term"),
data_column("Alias1"),
data_column("Alias2"),
data_column("Alias3"),
]
op_table_columns = [data_column("Term"), data_column("Score"), data_column("Polarity")]
asp_table_source = empty_table("Term", "Alias1", "Alias2", "Alias3")
asp_filter_src = empty_table("Term", "Alias1", "Alias2", "Alias3")
asp_src = new_col_data_src()
op_table_source = empty_table("Term", "Score", "Polarity", "Polarity")
op_filter_src = empty_table("Term", "Score", "Polarity", "Polarity")
op_src = new_col_data_src()
asp_table = DataTable(
source=asp_table_source,
selectable="checkbox",
columns=asp_table_columns,
editable=True,
width=600,
height=500,
)
op_table = DataTable(
source=op_table_source,
selectable="checkbox",
columns=op_table_columns,
editable=True,
width=600,
height=500,
)
asp_examples_box = _create_examples_table()
op_examples_box = _create_examples_table()
asp_layout = layout([[asp_table, asp_examples_box]])
op_layout = layout([[op_table, op_examples_box]])
asp_tab = Panel(child=asp_layout, title="Aspect Lexicon")
op_tab = Panel(child=op_layout, title="Opinion Lexicon")
tabs = Tabs(tabs=[asp_tab, op_tab], width=700, css_classes=["mytab"])
lexicons_menu = [("Open", "open"), ("Save", "save")]
lexicons_dropdown = Dropdown(
label="Edit Lexicons",
button_type="success",
menu=lexicons_menu,
width=140,
height=31,
css_classes=["mybutton"],
)
train_menu = [("Parsed Data", "parsed"), ("Raw Data", "raw")]
train_dropdown = Dropdown(
label="Extract Lexicons",
button_type="success",
menu=train_menu,
width=162,
height=31,
css_classes=["mybutton"],
)
inference_menu = [("Parsed Data", "parsed"), ("Raw Data", "raw")]
inference_dropdown = Dropdown(
label="Classify",
button_type="success",
menu=inference_menu,
width=140,
height=31,
css_classes=["mybutton"],
)
text_status = TextInput(
value="Select training data", title="Train Run Status:", css_classes=["statusText"]
)
text_status.visible = False
train_src = new_col_data_src()
infer_src = new_col_data_src()
with open(join(SOLUTION_DIR, "dropdown.js")) as f:
args = dict(
clicked=lexicons_dropdown,
asp_filter=asp_filter_src,
op_filter=op_filter_src,
asp_src=asp_src,
op_src=op_src,
tabs=tabs,
text_status=text_status,
train_src=train_src,
infer_src=infer_src,
train_clicked=train_dropdown,
infer_clicked=inference_dropdown,
opinion_lex_generic="",
)
code = f.read()
args["train_clicked"] = train_dropdown
train_dropdown.js_on_change("value", CustomJS(args=args, code=code))
args["train_clicked"] = inference_dropdown
inference_dropdown.js_on_change("value", CustomJS(args=args, code=code))
args["clicked"] = lexicons_dropdown
lexicons_dropdown.js_on_change("value", CustomJS(args=args, code=code))
def update_filter_source(table_source, filter_source):
df = table_source.to_df()
sel_inx = sorted(table_source.selected.indices)
df = df.iloc[sel_inx, 1:]
new_source = ColumnDataSource(df)
filter_source.data = new_source.data
def update_examples_box(data, examples_box, old, new):
examples_box.source.data = {"Examples": []}
unselected = list(set(old) - set(new))
selected = list(set(new) - set(old))
if len(selected) < = 1 and len(unselected) < = 1:
examples_box.source.data.update(
{
"Examples": [str(data.iloc[unselected[0], i]) for i in range(4, 24)]
if len(unselected) != 0
else [str(data.iloc[selected[0], i]) for i in range(4, 24)]
}
)
def asp_selected_change(_, old, new):
global asp_filter_src, asp_table_source, aspects_data
update_filter_source(asp_table_source, asp_filter_src)
update_examples_box(aspects_data, asp_examples_box, old, new)
def op_selected_change(_, old, new):
global op_filter_src, op_table_source, opinions_data
update_filter_source(op_table_source, op_filter_src)
update_examples_box(opinions_data, op_examples_box, old, new)
def read_csv(file_src, headers=False, index_cols=False, readCSV=True):
if readCSV:
raw_contents = file_src.data["file_contents"][0]
if len(raw_contents.split(",")) == 1:
b64_contents = raw_contents
else:
# remove the prefix that JS adds
b64_contents = raw_contents.split(",", 1)[1]
file_contents = base64.b64decode(b64_contents)
return pd.read_csv(
io.BytesIO(file_contents),
encoding="ISO-8859-1",
keep_default_na=False,
na_values={None},
engine="python",
index_col=index_cols,
header=0 if headers else None,
)
return file_src
def read_parsed_files(file_content, file_name):
try:
# remove the prefix that JS adds
b64_contents = file_content.split(",", 1)[1]
file_content = base64.b64decode(b64_contents)
with open(SENTIMENT_OUT / file_name, "w") as json_file:
data_dict = json.loads(file_content.decode("utf-8"))
json.dump(data_dict, json_file)
except Exception as e:
print(str(e))
# pylint: disable=unused-argument
def train_file_callback(attr, old, new):
global train_data
SENTIMENT_OUT.mkdir(parents=True, exist_ok=True)
train = TrainSentiment(parse=True, rerank_model=None)
if len(train_src.data["file_contents"]) == 1:
train_data = read_csv(train_src, index_cols=0)
file_name = train_src.data["file_name"][0]
raw_data_path = SENTIMENT_OUT / file_name
train_data.to_csv(raw_data_path, header=False)
print("Running_SentimentTraining on data...")
train.run(data=raw_data_path)
else:
f_contents = train_src.data["file_contents"]
f_names = train_src.data["file_name"]
raw_data_path = SENTIMENT_OUT / train_src.data["file_name"][0].split("/")[0]
if not os.path.exists(raw_data_path):
os.makedirs(raw_data_path)
for f_content, f_name in zip(f_contents, f_names):
read_parsed_files(f_content, f_name)
print("Running_SentimentTraining on data...")
train.run(parsed_data=raw_data_path)
text_status.value = "Lexicon extraction completed"
with io.open(AcquireTerms.acquired_aspect_terms_path, "r") as fp:
aspect_data_csv = fp.read()
file_data = base64.b64encode(str.encode(aspect_data_csv))
file_data = file_data.decode("utf-8")
asp_src.data = {"file_contents": [file_data], "file_name": ["nameFile.csv"]}
out_path = LEXICONS_OUT / "generated_opinion_lex_reranked.csv"
with io.open(out_path, "r") as fp:
opinion_data_csv = fp.read()
file_data = base64.b64encode(str.encode(opinion_data_csv))
file_data = file_data.decode("utf-8")
op_src.data = {"file_contents": [file_data], "file_name": ["nameFile.csv"]}
def show_analysis() -> None:
global stats, aspects, plot, source, tabs
plot, source = _create_plot()
events_table = _create_events_table()
# pylint: disable=unused-argument
def _events_handler(attr, old, new):
_update_events(events_table, events_type.active)
# Toggle display of in-domain / All aspect mentions
events_type = RadioButtonGroup(labels=["All Events", "In-Domain Events"], active=0)
analysis_layout = layout([[plot], [events_table]])
# events_type display toggle disabled
# analysis_layout = layout([[plot],[events_type],[events_table]])
analysis_tab = Panel(child=analysis_layout, title="Analysis")
tabs.tabs.insert(2, analysis_tab)
tabs.active = 2
events_type.on_change("active", _events_handler)
source.selected.on_change("indices", _events_handler) # pylint: disable=no-member
# pylint: disable=unused-argument
def infer_file_callback(attr, old, new):
# run inference on input data and current aspect/opinion lexicons in view
global infer_data, stats, aspects
SENTIMENT_OUT.mkdir(parents=True, exist_ok=True)
df_aspect = pd.DataFrame.from_dict(asp_filter_src.data)
aspect_col_list = ["Term", "Alias1", "Alias2", "Alias3"]
df_aspect = df_aspect[aspect_col_list]
df_aspect.to_csv(SENTIMENT_OUT / "aspects.csv", index=False, na_rep="NaN")
df_opinion = pd.DataFrame.from_dict(op_filter_src.data)
opinion_col_list = ["Term", "Score", "Polarity", "isAcquired"]
df_opinion = df_opinion[opinion_col_list]
df_opinion.to_csv(SENTIMENT_OUT / "opinions.csv", index=False, na_rep="NaN")
solution = SentimentSolution()
if len(infer_src.data["file_contents"]) == 1:
infer_data = read_csv(infer_src, index_cols=0)
file_name = infer_src.data["file_name"][0]
raw_data_path = SENTIMENT_OUT / file_name
infer_data.to_csv(raw_data_path, header=False)
print("Running_SentimentInference on data...")
text_status.value = "Running classification on data..."
stats = solution.run(
data=raw_data_path,
aspect_lex=SENTIMENT_OUT / "aspects.csv",
opinion_lex=SENTIMENT_OUT / "opinions.csv",
)
else:
f_contents = infer_src.data["file_contents"]
f_names = infer_src.data["file_name"]
raw_data_path = SENTIMENT_OUT / infer_src.data["file_name"][0].split("/")[0]
if not os.path.exists(raw_data_path):
os.makedirs(raw_data_path)
for f_content, f_name in zip(f_contents, f_names):
read_parsed_files(f_content, f_name)
print("Running_SentimentInference on data...")
text_status.value = "Running classification on data..."
stats = solution.run(
parsed_data=raw_data_path,
aspect_lex=SENTIMENT_OUT / "aspects.csv",
opinion_lex=SENTIMENT_OUT / "opinions.csv",
)
aspects = pd.read_csv(SENTIMENT_OUT / "aspects.csv", encoding="utf-8")["Term"]
text_status.value = "Classification completed"
show_analysis()
# pylint: disable=unused-argument
def asp_file_callback(attr, old, new):
global aspects_data, asp_table_source
aspects_data = read_csv(asp_src, headers=True)
# Replaces None values by empty string
aspects_data = aspects_data.fillna("")
new_source = ColumnDataSource(aspects_data)
asp_table_source.data = new_source.data
asp_table_source.selected.indices = list(range(len(aspects_data)))
# pylint: disable=unused-argument
def op_file_callback(attr, old, new):
global opinions_data, op_table_source, lexicons_dropdown, df_opinion_generic
df = read_csv(op_src, headers=True)
# Replaces None values by empty string
df = df.fillna("")
# Placeholder for generic opinion lexicons from the given csv file
df_opinion_generic = df[df["isAcquired"] == "N"]
# Update the argument value for the callback customJS
lexicons_dropdown.js_property_callbacks.get("change:value")[0].args[
"opinion_lex_generic"
] = df_opinion_generic.to_dict(orient="list")
opinions_data = df[df["isAcquired"] == "Y"]
new_source = ColumnDataSource(opinions_data)
op_table_source.data = new_source.data
op_table_source.selected.indices = list(range(len(opinions_data)))
# pylint: disable=unused-argument
def txt_status_callback(attr, old, new):
print("Status: " + new)
text_status.on_change("value", txt_status_callback)
asp_src.on_change("data", asp_file_callback)
# pylint: disable=no-member
asp_table_source.selected.on_change("indices", asp_selected_change)
op_src.on_change("data", op_file_callback)
op_table_source.selected.on_change("indices", op_selected_change) # pylint: disable=no-member
train_src.on_change("data", train_file_callback)
infer_src.on_change("data", infer_file_callback)
return layout([[_create_header(train_dropdown, inference_dropdown, text_status)], [tabs]])
def _create_events_table() -> DataTable:
"""Utility function for creating and styling the events table."""
formatter = HTMLTemplateFormatter(
template="""
< style>
.AS_POS {color: #0000FF; font-weight: bold;}
.AS_NEG {color: #0000FF; font-weight: bold;}
.OP_POS {color: #1aaa0d; font-style: bold;}
.OP_NEG {color: #f40000;font-style: bold;}
.NEG_POS {font-style: italic;}
.NEG_NEG {color: #f40000; font-style: italic;}
.INT_POS {color: #1aaa0d; font-style: italic;}
.INT_NEG {color: #f40000; font-style: italic;}
< /style>
< %= value %>"""
)
columns = [
TableColumn(field="POS_events", title="Positive Examples", formatter=formatter),
TableColumn(field="NEG_events", title="Negative Examples", formatter=formatter),
]
return DataTable(
source=ColumnDataSource(),
columns=columns,
height=400,
index_position=None,
width=2110,
sortable=False,
editable=True,
reorderable=False,
)
def _create_plot() -> (Figure, ColumnDataSource):
"""Utility function for creating and styling the bar plot."""
global source, aspects, stats
pos_counts, neg_counts = (
[stats.loc[(asp, pol, False), "Quantity"] for asp in aspects] for pol in POLARITIES
)
np.seterr(divide="ignore")
source = ColumnDataSource(
data={
"aspects": aspects,
"POS": pos_counts,
"NEG": neg_counts,
"log-POS": np.log2(pos_counts),
"log-NEG": np.log2(neg_counts),
}
)
np.seterr(divide="warn")
p = figure(
plot_height=145,
sizing_mode="scale_width",
x_range=aspects,
toolbar_location="right",
tools="save, tap",
)
rs = [
p.vbar(
x=dodge("aspects", -0.207, range=p.x_range),
top="log-POS",
width=0.4,
source=source,
color="limegreen",
legend=value("POS"),
name="POS",
),
p.vbar(
x=dodge("aspects", 0.207, range=p.x_range),
top="log-NEG",
width=0.4,
source=source,
color="orangered",
legend=value("NEG"),
name="NEG",
),
]
for r in rs:
p.add_tools(
HoverTool(tooltips=[("Aspect", "@aspects"), (r.name, "@" + r.name)], renderers=[r])
)
p.add_layout(
Title(text=" " * 7 + "Sentiment Count (log scale)", align="left", text_font_size="23px"),
"left",
)
p.yaxis.ticker = []
p.y_range.start = 0
p.xgrid.grid_line_color = None
p.xaxis.major_label_text_font_size = "20pt"
p.legend.label_text_font_size = "20pt"
return p, source
def _update_events(events: DataTable, in_domain: bool) -> None:
"""Utility function for updating the content of the events table."""
i = source.selected.indices
events.source.data.update(
{
pol + "_events": stats.loc[aspects[i[0]], pol, in_domain]["Sent_1":].replace(np.nan, "")
if i
else []
for pol in POLARITIES
}
)
def _create_examples_table() -> DataTable:
"""Utility function for creating and styling the events table."""
formatter = HTMLTemplateFormatter(
template="""
< style>
.AS {color: #0000FF; font-weight: bold;}
.OP {color: #0000FF; font-weight: bold;}
< /style>
< div> < %= value %> < /div>"""
)
columns = [
TableColumn(
field="Examples", title=' < span class="header">Examples < /span>', formatter=formatter
)
]
empty_source = ColumnDataSource()
empty_source.data = {"Examples": []}
return DataTable(
source=empty_source,
columns=columns,
height=500,
index_position=None,
width=1500,
sortable=False,
editable=False,
reorderable=False,
header_row=True,
)
if __name__ == "__main__":
serve_absa_ui()