Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Paste code macro
languagepython
from snaplogic.tool import SLTool as slt

slt.ensure("bokeh", "0.13.0")

# Imports
import os
from collections import Counter
import pandas as pd
from bokeh.io import output_file, save
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.transform import factor_cmap
from bokeh.plotting import figure

# Global Variables
df = None
request_queue = []
tmp_path = os.path.join(tmp_root, "viz.html")
output_file(tmp_path)


# This function will be executed once before consuming the data.
def snaplogic_init():
    return None


# This function will be executed on each document from the upstream snap.
def snaplogic_process(row):
    global df
    output_list = []

    if "group" in row:
        # Load dataset and convert into DataFrame.
        df = pd.DataFrame(row["group"])
        # Replace 1 and 0 in $SeniorCitizen field with "Yes" and "No".
        df["SeniorCitizen"] = df["SeniorCitizen"].replace("1", "Yes")
        df["SeniorCitizen"] = df["SeniorCitizen"].replace("0", "No")
    else:
        request_queue.append(row)

    if df is not None:
        while len(request_queue) > 0:
            try:
                request = request_queue.pop(0)
                original_field = request["original_field"]
                display_field = request["display_field"]
                field_type = request["type"]
                options = request["options"]

                # Plot numeric field.
                if field_type == "number":
                    p = figure(x_axis_label="Churn", y_axis_label=display_field,
                               x_range=df["Churn"].unique(), plot_width=600, plot_height=580)
                    p.vbar(df["Churn"],
                           top=df[original_field].add([df[original_field].max() / 100] * len(df)),
                           bottom=df[original_field],
                           width=0.7, line_color=None, fill_alpha=0.008)
                    p.y_range.start = options[0]
                    save(p)
                    with open(tmp_path, "r") as tmp_file:
                        viz_html = tmp_file.read()
                    output_list.append({"viz": viz_html})

                # Plot categorical field.
                elif field_type == "dropdown":
                    churn_unique = df["Churn"].unique()
                    col_unique = df[original_field].unique()
                    counter = Counter()
                    for i in range(0, len(df)):
                        counter[(df["Churn"][i], df[original_field][i])] += 1
                    palette = ["#c9d9d3", "#718dbf", "#e84d60"]
                    x = [(churn_value, col_value) for churn_value in churn_unique for col_value in col_unique]
                    source = ColumnDataSource(data=dict(x=x, counts=[counter[point] for point in x]))
                    p = figure(x_axis_label="Churn", y_axis_label="Count", plot_width=600, plot_height=580,
                               x_range=FactorRange(*x))
                    p.vbar(x="x", top="counts", width=0.9, source=source, line_color=None,
                           fill_color=factor_cmap("x", palette=palette, factors=col_unique, start=1, end=2))
                    p.y_range.start = 0
                    p.x_range.range_padding = 0.1
                    p.xaxis.major_label_orientation = 1
                    save(p)
                    with open(tmp_path, "r") as tmp_file:
                        viz_html = tmp_file.read()
                    output_list.append({"viz": viz_html})

                # Ignore invalid field type.
                else:
                    output_list.append(slt.get_drop_doc())

            except:
                output_list.append({"viz": "The request is not valid."})

    if "group" in row:
        # Do not output if the input document is a dataset. This is needed in order to preserve lineage property.
        output_list.append(slt.get_drop_doc())

    return output_list


# This function will be executed after consuming all documents from the upstream snap.
def snaplogic_final():
    return None

Downloads

...

titleImportant Steps to Successfully Reuse Pipelines

...

Multiexcerpt include macro
namedownload_instructions
pageOpenAPI

Attachments
patterns*.slp, *.zip