Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Paste code macro
languagepython
from snaplogic.tool import SLTool as slt

slt.ensure("bokeh", "0.13.0")

# Imports
import os
from collections import Counter
import pandas as pd
from bokeh.io import output_file, save
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.transform import factor_cmap
from bokeh.plotting import figure

# Global Variables
df = None
request_queue = []
tmp_path = os.path.join(tmp_root, "viz.html")
output_file(tmp_path)


# This function will be executed once before consuming the data.
def snaplogic_init():
    return None


# This function will be executed on each document from the upstream snap.
def snaplogic_process(row):
    global df
    output_list = []

    if "group" in row:
        # Load dataset and convert into DataFrame.
        df = pd.DataFrame(row["group"])
        # Replace 1 and 0 in $SeniorCitizen field with "Yes" and "No".
        df["SeniorCitizen"] = df["SeniorCitizen"].replace("1", "Yes")
        df["SeniorCitizen"] = df["SeniorCitizen"].replace("0", "No")
    else:
        request_queue.append(row)

    if df is not None:
        while len(request_queue) > 0:
            try:
                request = request_queue.pop(0)
                original_field = request["original_field"]
                display_field = request["display_field"]
                field_type = request["type"]
                options = request["options"]

                # Plot numeric field.
                if field_type == "number":
                    p = figure(x_axis_label="Churn", y_axis_label=display_field,
                               x_range=df["Churn"].unique(), plot_width=600, plot_height=580)
                    p.vbar(df["Churn"],
                           top=df[original_field].add([df[original_field].max() / 100] * len(df)),
                           bottom=df[original_field],
                           width=0.7, line_color=None, fill_alpha=0.008)
                    p.y_range.start = options[0]
                    save(p)
                    with open(tmp_path, "r") as tmp_file:
                        viz_html = tmp_file.read()
                    output_list.append({"viz": viz_html})

                # Plot categorical field.
                elif field_type == "dropdown":
                    churn_unique = df["Churn"].unique()
                    col_unique = df[original_field].unique()
                    counter = Counter()
                    for i in range(0, len(df)):
                        counter[(df["Churn"][i], df[original_field][i])] += 1
                    palette = ["#c9d9d3", "#718dbf", "#e84d60"]
                    x = [(churn_value, col_value) for churn_value in churn_unique for col_value in col_unique]
                    source = ColumnDataSource(data=dict(x=x, counts=[counter[point] for point in x]))
                    p = figure(x_axis_label="Churn", y_axis_label="Count", plot_width=600, plot_height=580,
                               x_range=FactorRange(*x))
                    p.vbar(x="x", top="counts", width=0.9, source=source, line_color=None,
                           fill_color=factor_cmap("x", palette=palette, factors=col_unique, start=1, end=2))
                    p.y_range.start = 0
                    p.x_range.range_padding = 0.1
                    p.xaxis.major_label_orientation = 1
                    save(p)
                    with open(tmp_path, "r") as tmp_file:
                        viz_html = tmp_file.read()
                    output_list.append({"viz": viz_html})

                # Ignore invalid field type.
                else:
                    output_list.append(slt.get_drop_doc())

            except:
                output_list.append({"viz": "The request is not valid."})

    if "group" in row:
        # Do not output if the input document is a dataset. This is needed in order to preserve lineage property.
        output_list.append(slt.get_drop_doc())

    return output_list


# This function will be executed after consuming all documents from the upstream snap.
def snaplogic_final():
    return None

Downloads

Note
titleImportant Steps to Successfully Reuse Pipelines
  1. Download and import the Pipeline into SnapLogic.
  2. Configure Snap accounts as applicable.
  3. Provide Pipeline parameters as applicable.

Attachments
patterns*.slp, *.zip