from snaplogic.tool import SLTool as slt
slt.ensure("bokeh", "0.13.0")
# Imports
import os
from collections import Counter
import pandas as pd
from bokeh.io import output_file, save
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.transform import factor_cmap
from bokeh.plotting import figure
# Global Variables
df = None
request_queue = []
tmp_path = os.path.join(tmp_root, "viz.html")
output_file(tmp_path)
# This function will be executed once before consuming the data.
def snaplogic_init():
return None
# This function will be executed on each document from the upstream snap.
def snaplogic_process(row):
global df
output_list = []
if "group" in row:
# Load dataset and convert into DataFrame.
df = pd.DataFrame(row["group"])
# Replace 1 and 0 in $SeniorCitizen field with "Yes" and "No".
df["SeniorCitizen"] = df["SeniorCitizen"].replace("1", "Yes")
df["SeniorCitizen"] = df["SeniorCitizen"].replace("0", "No")
else:
request_queue.append(row)
if df is not None:
while len(request_queue) > 0:
try:
request = request_queue.pop(0)
original_field = request["original_field"]
display_field = request["display_field"]
field_type = request["type"]
options = request["options"]
# Plot numeric field.
if field_type == "number":
p = figure(x_axis_label="Churn", y_axis_label=display_field,
x_range=df["Churn"].unique(), plot_width=600, plot_height=580)
p.vbar(df["Churn"],
top=df[original_field].add([df[original_field].max() / 100] * len(df)),
bottom=df[original_field],
width=0.7, line_color=None, fill_alpha=0.008)
p.y_range.start = options[0]
save(p)
with open(tmp_path, "r") as tmp_file:
viz_html = tmp_file.read()
output_list.append({"viz": viz_html})
# Plot categorical field.
elif field_type == "dropdown":
churn_unique = df["Churn"].unique()
col_unique = df[original_field].unique()
counter = Counter()
for i in range(0, len(df)):
counter[(df["Churn"][i], df[original_field][i])] += 1
palette = ["#c9d9d3", "#718dbf", "#e84d60"]
x = [(churn_value, col_value) for churn_value in churn_unique for col_value in col_unique]
source = ColumnDataSource(data=dict(x=x, counts=[counter[point] for point in x]))
p = figure(x_axis_label="Churn", y_axis_label="Count", plot_width=600, plot_height=580,
x_range=FactorRange(*x))
p.vbar(x="x", top="counts", width=0.9, source=source, line_color=None,
fill_color=factor_cmap("x", palette=palette, factors=col_unique, start=1, end=2))
p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
save(p)
with open(tmp_path, "r") as tmp_file:
viz_html = tmp_file.read()
output_list.append({"viz": viz_html})
# Ignore invalid field type.
else:
output_list.append(slt.get_drop_doc())
except:
output_list.append({"viz": "The request is not valid."})
if "group" in row:
# Do not output if the input document is a dataset. This is needed in order to preserve lineage property.
output_list.append(slt.get_drop_doc())
return output_list
# This function will be executed after consuming all documents from the upstream snap.
def snaplogic_final():
return None |