From 4fa90105b0e153585054f40df0b5833e0e2ebabb Mon Sep 17 00:00:00 2001
From: Dmitry Pavlov <bgdatamlai@gmail.com>
Date: Tue, 10 Sep 2019 18:15:32 +0000
Subject: [PATCH 1/7] ...

---
 .../importschema-checkpoint.ipynb             | 223 ++++++++++++++++++
 test                                          |   0
 2 files changed, 223 insertions(+)
 create mode 100644 .ipynb_checkpoints/importschema-checkpoint.ipynb
 create mode 100644 test

diff --git a/.ipynb_checkpoints/importschema-checkpoint.ipynb b/.ipynb_checkpoints/importschema-checkpoint.ipynb
new file mode 100644
index 00000000..b9293796
--- /dev/null
+++ b/.ipynb_checkpoints/importschema-checkpoint.ipynb
@@ -0,0 +1,223 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import v3io_frames as v3f\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pyspark.sql import SparkSession\n",
+    "spark = SparkSession.builder.appName(\"Iguazio file access notebook\").config(\"hive.metastore.uris\", \"thrift://hive:9083\").enableHiveSupport().getOrCreate()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+------------+\n",
+      "|databaseName|\n",
+      "+------------+\n",
+      "|     default|\n",
+      "|        test|\n",
+      "+------------+\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "spark.sql(\"show databases\").show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+--------+---------+-----------+\n",
+      "|database|tableName|isTemporary|\n",
+      "+--------+---------+-----------+\n",
+      "|    test|        a|      false|\n",
+      "|    test|        b|      false|\n",
+      "|    test|  example|      false|\n",
+      "|    test| example2|      false|\n",
+      "+--------+---------+-----------+\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "spark.sql(\"show tables in test\").show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "def createTable(tableName, df):\n",
+    "    cols = df.dtypes\n",
+    "    createScript = \"CREATE EXTERNAL TABLE test.\" + tableName + \"(\"\n",
+    "    colArray = []\n",
+    "    for colName, colType in cols:\n",
+    "        colArray.append(colName.replace(\" \", \"_\") + \" \" + colType)\n",
+    "    createColsScript =   \", \".join(colArray )\n",
+    "    \n",
+    "    script = createScript + createColsScript + \") STORED AS PARQUET LOCATION '\" + dataDir + \"'\"\n",
+    "    print(script)\n",
+    "    return script\n",
+    "    \n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def createTableInUserDirectory(dirName):\n",
+    "    dataDir = \"/User/\" + dirName + \"/\"\n",
+    "    print(dataDir)\n",
+    "    #dataDir = \"/User/v3io/bigdata/User/sample\"\n",
+    "    df = spark.read.parquet(dataDir)\n",
+    "    sqlScript = createTable(dirName, df)\n",
+    "    spark.sql(sqlScript)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "DataFrame[id: bigint, diagnosis: string, radius_mean: double, texture_mean: double, perimeter_mean: double, area_mean: double, smoothness_mean: double, compactness_mean: double, concavity_mean: double, concave_points_mean: double, symmetry_mean: double, fractal_dimension_mean: double, radius_se: double, texture_se: double, perimeter_se: double, area_se: double, smoothness_se: double, compactness_se: double, concavity_se: double, concave_points_se: double, symmetry_se: double, fractal_dimension_se: double, radius_worst: double, texture_worst: double, perimeter_worst: double, area_worst: double, smoothness_worst: double, compactness_worst: double, concavity_worst: double, concave_points_worst: double, symmetry_worst: double, fractal_dimension_worst: double]"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "spark.sql(\"select * from test.example2\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/User/examples/\n"
+     ]
+    },
+    {
+     "ename": "AnalysisException",
+     "evalue": "'Unable to infer schema for Parquet. It must be specified manually.;'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mPy4JJavaError\u001b[0m                             Traceback (most recent call last)",
+      "\u001b[0;32m/spark/python/pyspark/sql/utils.py\u001b[0m in \u001b[0;36mdeco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m     62\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     64\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mpy4j\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprotocol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPy4JJavaError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py\u001b[0m in \u001b[0;36mget_return_value\u001b[0;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[1;32m    327\u001b[0m                     \u001b[0;34m\"An error occurred while calling {0}{1}{2}.\\n\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 328\u001b[0;31m                     format(target_id, \".\", name), value)\n\u001b[0m\u001b[1;32m    329\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mPy4JJavaError\u001b[0m: An error occurred while calling o207.parquet.\n: org.apache.spark.sql.AnalysisException: Unable to infer schema for Parquet. It must be specified manually.;\n\tat org.apache.spark.sql.execution.datasources.DataSource$$anonfun$9.apply(DataSource.scala:207)\n\tat org.apache.spark.sql.execution.datasources.DataSource$$anonfun$9.apply(DataSource.scala:207)\n\tat scala.Option.getOrElse(Option.scala:121)\n\tat org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:206)\n\tat org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:392)\n\tat org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:239)\n\tat org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:227)\n\tat org.apache.spark.sql.DataFrameReader.parquet(DataFrameReader.scala:622)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.lang.reflect.Method.invoke(Method.java:498)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\n\tat java.lang.Thread.run(Thread.java:748)\n",
+      "\nDuring handling of the above exception, another exception occurred:\n",
+      "\u001b[0;31mAnalysisException\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-45-4faf31fd9a31>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mdir\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlistdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/User/v3io/bigdata/User\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mdir\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\".\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m         \u001b[0mcreateTableInUserDirectory\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdir\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m<ipython-input-44-db195e8dde24>\u001b[0m in \u001b[0;36mcreateTableInUserDirectory\u001b[0;34m(dirName)\u001b[0m\n\u001b[1;32m      3\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataDir\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0;31m#dataDir = \"/User/v3io/bigdata/User/sample\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m     \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mspark\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparquet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataDir\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m     \u001b[0msqlScript\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreateTable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdirName\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m     \u001b[0mspark\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msql\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msqlScript\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/spark/python/pyspark/sql/readwriter.py\u001b[0m in \u001b[0;36mparquet\u001b[0;34m(self, *paths)\u001b[0m\n\u001b[1;32m    301\u001b[0m         \u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'name'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'string'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'year'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'int'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'month'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'int'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'day'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'int'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    302\u001b[0m         \"\"\"\n\u001b[0;32m--> 303\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_df\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jreader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparquet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_to_seq\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_spark\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpaths\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    304\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    305\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0mignore_unicode_prefix\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m   1255\u001b[0m         \u001b[0manswer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1256\u001b[0m         return_value = get_return_value(\n\u001b[0;32m-> 1257\u001b[0;31m             answer, self.gateway_client, self.target_id, self.name)\n\u001b[0m\u001b[1;32m   1258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1259\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mtemp_arg\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtemp_args\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/spark/python/pyspark/sql/utils.py\u001b[0m in \u001b[0;36mdeco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m     67\u001b[0m                                              e.java_exception.getStackTrace()))\n\u001b[1;32m     68\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'org.apache.spark.sql.AnalysisException: '\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 69\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mAnalysisException\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m': '\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstackTrace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     70\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'org.apache.spark.sql.catalyst.analysis'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     71\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0mAnalysisException\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m': '\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstackTrace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mAnalysisException\u001b[0m: 'Unable to infer schema for Parquet. It must be specified manually.;'"
+     ]
+    }
+   ],
+   "source": [
+    "#currentDir = os.getcwd()\n",
+    "#print(currentDir)\n",
+    "#dirs = os.walk(\"/User\")\n",
+    "for dir in os.listdir(\"/User/v3io/bigdata/User\"):\n",
+    "    if not dir.startswith(\".\") :\n",
+    "        createTableInUserDirectory(dir)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['.ipynb_checkpoints', 'examples', 'sample']"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "os.listdir(\"/User/v3io/bigdata/User\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/test b/test
new file mode 100644
index 00000000..e69de29b

From 59fa05c9e8695f1e9cf7b85b30899d09c844738c Mon Sep 17 00:00:00 2001
From: Dmitry Pavlov <bgdatamlai@gmail.com>
Date: Fri, 13 Sep 2019 19:01:37 +0000
Subject: [PATCH 2/7] finished notebook

---
 getting-started/importschema.ipynb | 204 +++++++++++++++++++++++++++++
 1 file changed, 204 insertions(+)
 create mode 100644 getting-started/importschema.ipynb

diff --git a/getting-started/importschema.ipynb b/getting-started/importschema.ipynb
new file mode 100644
index 00000000..790019e3
--- /dev/null
+++ b/getting-started/importschema.ipynb
@@ -0,0 +1,204 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# This notebook is to help automatically import parquet schema to hive"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below is import of all needed dependencies. And in this sell you should pass path where parquet files located. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/examples/parquet_example\n"
+     ]
+    }
+   ],
+   "source": [
+    "# import pandas as pd\n",
+    "# import v3io_frames as v3f\n",
+    "import os\n",
+    "\n",
+    "# Set path where parquet files located. They can be nested in folder. \n",
+    "filepath = os.path.join('/examples/parquet_example')\n",
+    "print(filepath)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here is creating of spark context with hive support."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pyspark.sql import SparkSession\n",
+    "spark = SparkSession.builder.appName(\"Import parquet schema to hive\").config(\"hive.metastore.uris\", \"thrift://hive:9083\").enableHiveSupport().getOrCreate()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Define function below for getting sql script needed for creating table in hive using dataframe types as columns to table"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def getCreateTableScript(databaseName, tableName, df):\n",
+    "    cols = df.dtypes\n",
+    "    createScript = \"CREATE EXTERNAL TABLE \" + databaseName + \".\" + tableName + \"(\"\n",
+    "    colArray = []\n",
+    "    for colName, colType in cols:\n",
+    "        colArray.append(colName.replace(\" \", \"_\") + \" \" + colType)\n",
+    "    createColsScript =   \", \".join(colArray )\n",
+    "    \n",
+    "    script = createScript + createColsScript + \") STORED AS PARQUET LOCATION '\" + tableName + \"'\"\n",
+    "    print(script)\n",
+    "    return script\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#define main function for creating table where arqument 'path' is path to parquet files \n",
+    "def createTable(databaseName, tableName, path): \n",
+    "    df = spark.read.parquet(path)\n",
+    "    sqlScript = getCreateTableScript(databaseName, tableName, df)\n",
+    "    spark.sql(sqlScript)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Here is an example where you provide a path to a folder with parquet files and their are uploaded\n",
+    "\n",
+    "Write here name of database. Database will create if it doesn't exist.\n",
+    "In this sell code goes over all files and dirs in provided path and using them for creating table.\n",
+    "File should be ended with .parquet format\n",
+    "Directory (in which stored parquet files) should be started with \".\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "AnalysisException",
+     "evalue": "'org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:Unable to create database path file:/User/spark-warehouse/dima.db, failed to create database dima);'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mPy4JJavaError\u001b[0m                             Traceback (most recent call last)",
+      "\u001b[0;32m/spark/python/pyspark/sql/utils.py\u001b[0m in \u001b[0;36mdeco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m     62\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     64\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mpy4j\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprotocol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPy4JJavaError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py\u001b[0m in \u001b[0;36mget_return_value\u001b[0;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[1;32m    327\u001b[0m                     \u001b[0;34m\"An error occurred while calling {0}{1}{2}.\\n\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 328\u001b[0;31m                     format(target_id, \".\", name), value)\n\u001b[0m\u001b[1;32m    329\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mPy4JJavaError\u001b[0m: An error occurred while calling o40.sql.\n: org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:Unable to create database path file:/User/spark-warehouse/dima.db, failed to create database dima);\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:106)\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.doCreateDatabase(HiveExternalCatalog.scala:163)\n\tat org.apache.spark.sql.catalyst.catalog.ExternalCatalog.createDatabase(ExternalCatalog.scala:69)\n\tat org.apache.spark.sql.catalyst.catalog.SessionCatalog.createDatabase(SessionCatalog.scala:207)\n\tat org.apache.spark.sql.execution.command.CreateDatabaseCommand.run(ddl.scala:70)\n\tat org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)\n\tat org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)\n\tat org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)\n\tat org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:190)\n\tat org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:190)\n\tat org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3254)\n\tat org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77)\n\tat org.apache.spark.sql.Dataset.withAction(Dataset.scala:3253)\n\tat org.apache.spark.sql.Dataset.<init>(Dataset.scala:190)\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:75)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:641)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.lang.reflect.Method.invoke(Method.java:498)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\n\tat java.lang.Thread.run(Thread.java:748)\nCaused by: org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:Unable to create database path file:/User/spark-warehouse/dima.db, failed to create database dima)\n\tat org.apache.hadoop.hive.ql.metadata.Hive.createDatabase(Hive.java:312)\n\tat org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$createDatabase$1.apply$mcV$sp(HiveClientImpl.scala:303)\n\tat org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$createDatabase$1.apply(HiveClientImpl.scala:303)\n\tat org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$createDatabase$1.apply(HiveClientImpl.scala:303)\n\tat org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$withHiveState$1.apply(HiveClientImpl.scala:272)\n\tat org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:210)\n\tat org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:209)\n\tat org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:255)\n\tat org.apache.spark.sql.hive.client.HiveClientImpl.createDatabase(HiveClientImpl.scala:302)\n\tat org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$doCreateDatabase$1.apply$mcV$sp(HiveExternalCatalog.scala:164)\n\tat org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$doCreateDatabase$1.apply(HiveExternalCatalog.scala:164)\n\tat org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$doCreateDatabase$1.apply(HiveExternalCatalog.scala:164)\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)\n\t... 26 more\nCaused by: MetaException(message:Unable to create database path file:/User/spark-warehouse/dima.db, failed to create database dima)\n\tat org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$create_database_result$create_database_resultStandardScheme.read(ThriftHiveMetastore.java:14412)\n\tat org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$create_database_result$create_database_resultStandardScheme.read(ThriftHiveMetastore.java:14380)\n\tat org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$create_database_result.read(ThriftHiveMetastore.java:14314)\n\tat org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:86)\n\tat org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_create_database(ThriftHiveMetastore.java:625)\n\tat org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.create_database(ThriftHiveMetastore.java:612)\n\tat org.apache.hadoop.hive.metastore.HiveMetaStoreClient.createDatabase(HiveMetaStoreClient.java:644)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.lang.reflect.Method.invoke(Method.java:498)\n\tat org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:156)\n\tat com.sun.proxy.$Proxy13.createDatabase(Unknown Source)\n\tat org.apache.hadoop.hive.ql.metadata.Hive.createDatabase(Hive.java:306)\n\t... 38 more\n",
+      "\nDuring handling of the above exception, another exception occurred:\n",
+      "\u001b[0;31mAnalysisException\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-5-e90a1348d773>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mdatabaseName\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"dima\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mspark\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msql\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"CREATE DATABASE IF NOT EXISTS \"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mdatabaseName\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfileOrDir\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlistdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/spark/python/pyspark/sql/session.py\u001b[0m in \u001b[0;36msql\u001b[0;34m(self, sqlQuery)\u001b[0m\n\u001b[1;32m    708\u001b[0m         \u001b[0;34m[\u001b[0m\u001b[0mRow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34mu'row1'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mRow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34mu'row2'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mRow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34mu'row3'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    709\u001b[0m         \"\"\"\n\u001b[0;32m--> 710\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jsparkSession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msql\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msqlQuery\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_wrapped\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    711\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    712\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0msince\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2.0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m   1255\u001b[0m         \u001b[0manswer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1256\u001b[0m         return_value = get_return_value(\n\u001b[0;32m-> 1257\u001b[0;31m             answer, self.gateway_client, self.target_id, self.name)\n\u001b[0m\u001b[1;32m   1258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1259\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mtemp_arg\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtemp_args\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/spark/python/pyspark/sql/utils.py\u001b[0m in \u001b[0;36mdeco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m     67\u001b[0m                                              e.java_exception.getStackTrace()))\n\u001b[1;32m     68\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'org.apache.spark.sql.AnalysisException: '\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 69\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mAnalysisException\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m': '\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstackTrace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     70\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'org.apache.spark.sql.catalyst.analysis'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     71\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0mAnalysisException\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m': '\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstackTrace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mAnalysisException\u001b[0m: 'org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:Unable to create database path file:/User/spark-warehouse/dima.db, failed to create database dima);'"
+     ]
+    }
+   ],
+   "source": [
+    "databaseName = \"hive\"\n",
+    "spark.sql(\"CREATE DATABASE IF NOT EXISTS \" + databaseName)\n",
+    "\n",
+    "\n",
+    "for fileOrDir in os.listdir(filepath):\n",
+    "    if fileOrDir.endswith(\".parquet\") :\n",
+    "        createTable(databaseName, fileOrDir.split(\".parquet\")[0], filepath + fileOrDir)\n",
+    "    elif not fileOrDir.startswith(\".\") :\n",
+    "        createTable(databaseName, fileOrDir, filepath + fileOrDir + \"/*\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Test how it works"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# test how the tables were saved\n",
+    "spark.sql(\"show tables in \" + databaseName).show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# test how saving to table works\n",
+    "tableName = \"example1\"\n",
+    "spark.sql(\"select * from \" + databaseName + \".\" + tableName)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 8a20d03e6dbada1525fa0361b439bf050ece9998 Mon Sep 17 00:00:00 2001
From: Dmitry Pavlov <bgdatamlai@gmail.com>
Date: Fri, 13 Sep 2019 19:06:06 +0000
Subject: [PATCH 3/7] removed useless code

---
 test | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 test

diff --git a/test b/test
deleted file mode 100644
index e69de29b..00000000

From 8413fa19f159aff58fee637b2825eacc87e06ec9 Mon Sep 17 00:00:00 2001
From: Dmitry Pavlov <bgdatamlai@gmail.com>
Date: Fri, 13 Sep 2019 18:47:02 -0400
Subject: [PATCH 4/7] fixing commit

---
 getting-started/importschema.ipynb  | 212 ------------------------
 getting-started/parquettohive.ipynb | 246 ++++++++++++++++++++++++++++
 2 files changed, 246 insertions(+), 212 deletions(-)
 delete mode 100644 getting-started/importschema.ipynb
 create mode 100644 getting-started/parquettohive.ipynb

diff --git a/getting-started/importschema.ipynb b/getting-started/importschema.ipynb
deleted file mode 100644
index ea1d2fda..00000000
--- a/getting-started/importschema.ipynb
+++ /dev/null
@@ -1,212 +0,0 @@
-{
- "cells": [
-  {
-
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# This notebook is to help automatically import parquet schema to hive"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Below is import of all needed dependencies. And in this sell you should pass path where parquet files located. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/examples/parquet_example\n"
-     ]
-    }
-   ],
-   "source": [
-    "# import pandas as pd\n",
-    "# import v3io_frames as v3f\n",
-    "import os\n",
-    "\n",
-    "# Set path where parquet files located. They can be nested in folder. \n",
-    "filepath = os.path.join('/examples/parquet_example')\n",
-    "print(filepath)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Here is creating of spark context with hive support."
-
-   ]
-  },
-  {
-   "cell_type": "code",
-
-   "execution_count": 2,
-
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pyspark.sql import SparkSession\n",
-    "spark = SparkSession.builder.appName(\"Import parquet schema to hive\").config(\"hive.metastore.uris\", \"thrift://hive:9083\").enableHiveSupport().getOrCreate()"
-   ]
-  },
-  {
-
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Define function below for getting sql script needed for creating table in hive using dataframe types as columns to table"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def getCreateTableScript(databaseName, tableName, df):\n",
-
-    "    cols = df.dtypes\n",
-    "    createScript = \"CREATE EXTERNAL TABLE \" + databaseName + \".\" + tableName + \"(\"\n",
-    "    colArray = []\n",
-    "    for colName, colType in cols:\n",
-    "        colArray.append(colName.replace(\" \", \"_\") + \" \" + colType)\n",
-    "    createColsScript =   \", \".join(colArray )\n",
-    "    \n",
-    "    script = createScript + createColsScript + \") STORED AS PARQUET LOCATION '\" + tableName + \"'\"\n",
-    "    print(script)\n",
-    "    return script\n",
-    "    "
-   ]
-  },
-  {
-   "cell_type": "code",
-
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#define main function for creating table where arqument 'path' is path to parquet files \n",
-    "def createTable(databaseName, tableName, path): \n",
-    "    df = spark.read.parquet(path)\n",
-    "    sqlScript = getCreateTableScript(databaseName, tableName, df)\n",
-
-    "    spark.sql(sqlScript)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Here is an example where you provide a path to a folder with parquet files and their are uploaded\n",
-    "\n",
-    "Write here name of database. Database will create if it doesn't exist.\n",
-    "In this sell code goes over all files and dirs in provided path and using them for creating table.\n",
-    "File should be ended with .parquet format\n",
-    "Directory (in which stored parquet files) should be started with \".\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "AnalysisException",
-     "evalue": "'org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:Unable to create database path file:/User/spark-warehouse/dima.db, failed to create database dima);'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mPy4JJavaError\u001b[0m                             Traceback (most recent call last)",
-      "\u001b[0;32m/spark/python/pyspark/sql/utils.py\u001b[0m in \u001b[0;36mdeco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m     62\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     64\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mpy4j\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprotocol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPy4JJavaError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py\u001b[0m in \u001b[0;36mget_return_value\u001b[0;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[1;32m    327\u001b[0m                     \u001b[0;34m\"An error occurred while calling {0}{1}{2}.\\n\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 328\u001b[0;31m                     format(target_id, \".\", name), value)\n\u001b[0m\u001b[1;32m    329\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mPy4JJavaError\u001b[0m: An error occurred while calling o40.sql.\n: org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:Unable to create database path file:/User/spark-warehouse/dima.db, failed to create database dima);\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:106)\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.doCreateDatabase(HiveExternalCatalog.scala:163)\n\tat org.apache.spark.sql.catalyst.catalog.ExternalCatalog.createDatabase(ExternalCatalog.scala:69)\n\tat org.apache.spark.sql.catalyst.catalog.SessionCatalog.createDatabase(SessionCatalog.scala:207)\n\tat org.apache.spark.sql.execution.command.CreateDatabaseCommand.run(ddl.scala:70)\n\tat org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)\n\tat org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)\n\tat org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)\n\tat org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:190)\n\tat org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:190)\n\tat org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3254)\n\tat org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77)\n\tat org.apache.spark.sql.Dataset.withAction(Dataset.scala:3253)\n\tat org.apache.spark.sql.Dataset.<init>(Dataset.scala:190)\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:75)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:641)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.lang.reflect.Method.invoke(Method.java:498)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\n\tat java.lang.Thread.run(Thread.java:748)\nCaused by: org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:Unable to create database path file:/User/spark-warehouse/dima.db, failed to create database dima)\n\tat org.apache.hadoop.hive.ql.metadata.Hive.createDatabase(Hive.java:312)\n\tat org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$createDatabase$1.apply$mcV$sp(HiveClientImpl.scala:303)\n\tat org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$createDatabase$1.apply(HiveClientImpl.scala:303)\n\tat org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$createDatabase$1.apply(HiveClientImpl.scala:303)\n\tat org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$withHiveState$1.apply(HiveClientImpl.scala:272)\n\tat org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:210)\n\tat org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:209)\n\tat org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:255)\n\tat org.apache.spark.sql.hive.client.HiveClientImpl.createDatabase(HiveClientImpl.scala:302)\n\tat org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$doCreateDatabase$1.apply$mcV$sp(HiveExternalCatalog.scala:164)\n\tat org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$doCreateDatabase$1.apply(HiveExternalCatalog.scala:164)\n\tat org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$doCreateDatabase$1.apply(HiveExternalCatalog.scala:164)\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)\n\t... 26 more\nCaused by: MetaException(message:Unable to create database path file:/User/spark-warehouse/dima.db, failed to create database dima)\n\tat org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$create_database_result$create_database_resultStandardScheme.read(ThriftHiveMetastore.java:14412)\n\tat org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$create_database_result$create_database_resultStandardScheme.read(ThriftHiveMetastore.java:14380)\n\tat org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$create_database_result.read(ThriftHiveMetastore.java:14314)\n\tat org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:86)\n\tat org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_create_database(ThriftHiveMetastore.java:625)\n\tat org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.create_database(ThriftHiveMetastore.java:612)\n\tat org.apache.hadoop.hive.metastore.HiveMetaStoreClient.createDatabase(HiveMetaStoreClient.java:644)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.lang.reflect.Method.invoke(Method.java:498)\n\tat org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:156)\n\tat com.sun.proxy.$Proxy13.createDatabase(Unknown Source)\n\tat org.apache.hadoop.hive.ql.metadata.Hive.createDatabase(Hive.java:306)\n\t... 38 more\n",
-      "\nDuring handling of the above exception, another exception occurred:\n",
-      "\u001b[0;31mAnalysisException\u001b[0m                         Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-5-e90a1348d773>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mdatabaseName\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"dima\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mspark\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msql\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"CREATE DATABASE IF NOT EXISTS \"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mdatabaseName\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfileOrDir\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlistdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/spark/python/pyspark/sql/session.py\u001b[0m in \u001b[0;36msql\u001b[0;34m(self, sqlQuery)\u001b[0m\n\u001b[1;32m    708\u001b[0m         \u001b[0;34m[\u001b[0m\u001b[0mRow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34mu'row1'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mRow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34mu'row2'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mRow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34mu'row3'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    709\u001b[0m         \"\"\"\n\u001b[0;32m--> 710\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jsparkSession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msql\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msqlQuery\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_wrapped\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    711\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    712\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0msince\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2.0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m   1255\u001b[0m         \u001b[0manswer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1256\u001b[0m         return_value = get_return_value(\n\u001b[0;32m-> 1257\u001b[0;31m             answer, self.gateway_client, self.target_id, self.name)\n\u001b[0m\u001b[1;32m   1258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1259\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mtemp_arg\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtemp_args\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/spark/python/pyspark/sql/utils.py\u001b[0m in \u001b[0;36mdeco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m     67\u001b[0m                                              e.java_exception.getStackTrace()))\n\u001b[1;32m     68\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'org.apache.spark.sql.AnalysisException: '\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 69\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mAnalysisException\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m': '\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstackTrace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     70\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'org.apache.spark.sql.catalyst.analysis'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     71\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0mAnalysisException\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m': '\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstackTrace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mAnalysisException\u001b[0m: 'org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:Unable to create database path file:/User/spark-warehouse/dima.db, failed to create database dima);'"
-     ]
-    }
-   ],
-   "source": [
-    "databaseName = \"hive\"\n",
-    "spark.sql(\"CREATE DATABASE IF NOT EXISTS \" + databaseName)\n",
-    "\n",
-    "\n",
-    "for fileOrDir in os.listdir(filepath):\n",
-    "    if fileOrDir.endswith(\".parquet\") :\n",
-    "        createTable(databaseName, fileOrDir.split(\".parquet\")[0], filepath + fileOrDir)\n",
-    "    elif not fileOrDir.startswith(\".\") :\n",
-    "        createTable(databaseName, fileOrDir, filepath + fileOrDir + \"/*\")\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Test how it works"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# test how the tables were saved\n",
-    "spark.sql(\"show tables in \" + databaseName).show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# test how saving to table works\n",
-    "tableName = \"example1\"\n",
-    "spark.sql(\"select * from \" + databaseName + \".\" + tableName)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/getting-started/parquettohive.ipynb b/getting-started/parquettohive.ipynb
new file mode 100644
index 00000000..2aa418d9
--- /dev/null
+++ b/getting-started/parquettohive.ipynb
@@ -0,0 +1,246 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# This notebook is to help automatically import parquet schema to hive"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below is import of all needed dependencies. And in this sell you should pass path where parquet files located. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here is creating of spark context with hive support."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pyspark.sql import SparkSession\n",
+    "spark = SparkSession.builder.appName(\"Import parquet schema to hive\").config(\"hive.metastore.uris\", \"thrift://hive:9083\").enableHiveSupport().getOrCreate()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Define function below for getting sql script needed for creating table in hive using dataframe types as columns to table"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def getCreateTableScript(databaseName, tableName, df):\n",
+    "    cols = df.dtypes\n",
+    "    createScript = \"CREATE EXTERNAL TABLE \" + databaseName + \".\" + tableName + \"(\"\n",
+    "    colArray = []\n",
+    "    for colName, colType in cols:\n",
+    "        colArray.append(colName.replace(\" \", \"_\") + \" \" + colType)\n",
+    "    createColsScript =   \", \".join(colArray )\n",
+    "    \n",
+    "    script = createScript + createColsScript + \") STORED AS PARQUET LOCATION '\" + tableName + \"'\"\n",
+    "    print(script)\n",
+    "    return script\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#define main function for creating table where arqument 'path' is path to parquet files \n",
+    "def createTable(databaseName, tableName, path): \n",
+    "    df = spark.read.parquet(path)\n",
+    "    sqlScript = getCreateTableScript(databaseName, tableName, df)\n",
+    "    spark.sql(sqlScript)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## One file example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CREATE EXTERNAL TABLE test.table_from_single_file(id bigint, diagnosis string, radius_mean double, texture_mean double, perimeter_mean double, area_mean double, smoothness_mean double, compactness_mean double, concavity_mean double, concave_points_mean double, symmetry_mean double, fractal_dimension_mean double, radius_se double, texture_se double, perimeter_se double, area_se double, smoothness_se double, compactness_se double, concavity_se double, concave_points_se double, symmetry_se double, fractal_dimension_se double, radius_worst double, texture_worst double, perimeter_worst double, area_worst double, smoothness_worst double, compactness_worst double, concavity_worst double, concave_points_worst double, symmetry_worst double, fractal_dimension_worst double) STORED AS PARQUET LOCATION 'table_from_single_file'\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Set path where concrete parquet file located.\n",
+    "my_parqute_file_path = os.path.join('v3io://bigdata/examples/example1.parquet')\n",
+    "createTable(\"test\",\"table_from_single_file\",my_parqute_file_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## One folder example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CREATE EXTERNAL TABLE test.table_from_dir(id bigint, diagnosis string, radius_mean double, texture_mean double, perimeter_mean double, area_mean double, smoothness_mean double, compactness_mean double, concavity_mean double, concave_points_mean double, symmetry_mean double, fractal_dimension_mean double, radius_se double, texture_se double, perimeter_se double, area_se double, smoothness_se double, compactness_se double, concavity_se double, concave_points_se double, symmetry_se double, fractal_dimension_se double, radius_worst double, texture_worst double, perimeter_worst double, area_worst double, smoothness_worst double, compactness_worst double, concavity_worst double, concave_points_worst double, symmetry_worst double, fractal_dimension_worst double) STORED AS PARQUET LOCATION 'table_from_dir'\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Set path where parquet folder with parquet files inside located.\n",
+    "folder_path = os.path.join('v3io://users/admin/examples/parquet_examples/dir1/*')\n",
+    "createTable(\"test\",\"table_from_dir\",folder_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Multiple files and folders example\n",
+    "\n",
+    "Write here name of database and path to folder where all parquet files (or folders with them) located. Database should be created.\n",
+    "In this sell code goes over all files and dirs in provided path and using them for creating table.\n",
+    "File should be ended with .parquet format\n",
+    "Directory (in which stored parquet files) should be started with \".\"\n",
+    "Name of directory or file will be name of table."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CREATE EXTERNAL TABLE test.dir1(id bigint, diagnosis string, radius_mean double, texture_mean double, perimeter_mean double, area_mean double, smoothness_mean double, compactness_mean double, concavity_mean double, concave_points_mean double, symmetry_mean double, fractal_dimension_mean double, radius_se double, texture_se double, perimeter_se double, area_se double, smoothness_se double, compactness_se double, concavity_se double, concave_points_se double, symmetry_se double, fractal_dimension_se double, radius_worst double, texture_worst double, perimeter_worst double, area_worst double, smoothness_worst double, compactness_worst double, concavity_worst double, concave_points_worst double, symmetry_worst double, fractal_dimension_worst double) STORED AS PARQUET LOCATION 'dir1'\n",
+      "CREATE EXTERNAL TABLE test.example1(id bigint, diagnosis string, radius_mean double, texture_mean double, perimeter_mean double, area_mean double, smoothness_mean double, compactness_mean double, concavity_mean double, concave_points_mean double, symmetry_mean double, fractal_dimension_mean double, radius_se double, texture_se double, perimeter_se double, area_se double, smoothness_se double, compactness_se double, concavity_se double, concave_points_se double, symmetry_se double, fractal_dimension_se double, radius_worst double, texture_worst double, perimeter_worst double, area_worst double, smoothness_worst double, compactness_worst double, concavity_worst double, concave_points_worst double, symmetry_worst double, fractal_dimension_worst double) STORED AS PARQUET LOCATION 'example1'\n"
+     ]
+    }
+   ],
+   "source": [
+    "databaseName = \"test\"\n",
+    "filepath = \"/v3io/users/admin/examples/parquet_examples\"\n",
+    "\n",
+    "for fileOrDir in os.listdir(filepath):\n",
+    "    if fileOrDir.endswith(\".parquet\") :\n",
+    "        createTable(databaseName, fileOrDir.split(\".parquet\")[0], filepath.replace(\"/v3io/\", \"v3io://\", 1) + \"/\" + fileOrDir)\n",
+    "    elif not fileOrDir.startswith(\".\") :\n",
+    "        createTable(databaseName, fileOrDir, filepath.replace(\"/v3io/\", \"v3io://\", 1) + \"/\" + fileOrDir + \"/*\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Test how it works"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+------------+\n",
+      "|databaseName|\n",
+      "+------------+\n",
+      "|     default|\n",
+      "|        test|\n",
+      "+------------+\n",
+      "\n",
+      "+--------+---------+-----------+\n",
+      "|database|tableName|isTemporary|\n",
+      "+--------+---------+-----------+\n",
+      "+--------+---------+-----------+\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# test how the tables were saved\n",
+    "#spark.sql(\"drop database test CASCADE\")\n",
+    "spark.sql(\"drop table \" + databaseName + \".example1\")\n",
+    "spark.sql(\"show databases\").show()\n",
+    "spark.sql(\"show tables in \" + databaseName).show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# test how saving to table works\n",
+    "tableName = \"example1\"\n",
+    "spark.sql(\"select * from \" + databaseName + \".\" + tableName)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From dd020acd97b98ed530064f06b7b2a6582c1045e9 Mon Sep 17 00:00:00 2001
From: Sharon Lifshitz <sharonl@iguaz.io>
Date: Sun, 15 Sep 2019 18:08:01 +0300
Subject: [PATCH 5/7] [DOC] Frames GS MB doc review (#1) (v2.3.0 outputs)
 [IG-12272 IG-12092]

---
 getting-started/frames.ipynb | 788 +++++++++++++++++++++++------------
 1 file changed, 515 insertions(+), 273 deletions(-)

diff --git a/getting-started/frames.ipynb b/getting-started/frames.ipynb
index 71f03f7f..e0097fb7 100644
--- a/getting-started/frames.ipynb
+++ b/getting-started/frames.ipynb
@@ -4,28 +4,60 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Using Iguazio Frames Library for High-Performance Data Access \n",
-    "iguazio `v3io_frames` is a streaming oriented multi-model (generic) data API which allow high-speed data loading and storing<br>\n",
-    "frames currently support iguazio key/value, time-series, and streaming data models (called backends), additional backends will be added.\n",
+    "# Using the V3IO Frames Library for High-Performance Data Access \n",
     "\n",
-    "For detailed description of the Frames API go to https://github.com/v3io/frames\n",
+    "- [Overview](#frames-overview)\n",
+    "- [Initialization](#frames-init)\n",
+    "- [Working with NoSQL Tables (\"kv\" Backend)](#frames-kv)\n",
+    "- [Working with Time-Series Databases (\"tsdb\" Backend)](#frames-tsdb)\n",
+    "- [Working with Streams (\"stream\" Backend)](#frames-stream)\n",
+    "- [Cleanup](#frames-cleanup)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a id=\"frames-overview\"></a>\n",
+    "## Overview\n",
     "\n",
-    "to use frames you first create a `client` and provide it the session and credential details, the client is used to for 5 basic operations:\n",
+    "[V3IO Frames](https://github.com/v3io/frames) (**\"Frames\"**) is a multi-model open-source data-access library, developed by Iguazio, which provides a unified high-performance DataFrame API for loading, storing, and accessing data in the data store of the Iguazio Data Science Platform (**\"the platform**).\n",
+    "Frames currently supports the NoSQL (key/value), stream, and time-series (TSDB) data models via its \"kv\", \"stream\", and \"tsdb\" backends.\n",
+    "\n",
+    "To use Frames, you first need to import the **v3io_frames** library and create and initialize a client object &mdash; an instance of the`Client` class.<br>\n",
+    "The `Client` class features the following object methods for supporting basic data operations:\n",
+    "\n",
+    "- `create` &mdash; create a new NoSQL or TSDB table or a stream (\"the backend\").\n",
+    "- `delete` &mdash; delete the backend.\n",
+    "- `read` &mdash; read data from the backend (as a pandas DataFrame or DataFrame iterator).\n",
+    "- `write` &mdash; write one or more DataFrames to the backend.\n",
+    "- `execute` &mdash; execute a command on the backend. Each backend may support multiple commands.\n",
+    "\n",
+    "\n",
+    "For a detailed description of the Frames API, see the [Frames documentation](https://github.com/v3io/frames/blob/development/README.md).<br>\n",
+    "For more help and usage details, use the internal API help &mdash; `<client object>.<command>?` in Jupyter Notebook or `print(<client object>.<command>.__doc__)`.<br>\n",
+    "For example, the following command returns information about the read operation for a client object named `client`:\n",
     "```\n",
-    "   create  - create a new time-series table or a stream \n",
-    "   delete  - delete the table or stream\n",
-    "   read    - read data from the backend (as pandas DataFrame or dataFrame iterator)\n",
-    "   write   - write one or more DataFrames into the backend\n",
-    "   execute - execute a command on the backend, each backend may support multiple commands \n",
-    "```   \n",
+    "client.read?\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a id=\"frames-init\"></a>\n",
+    "## Initialization\n",
     "\n",
-    "Content:\n",
-    "- [Working with key/value and SQL data](kv)\n",
-    "- [Working with Time-series data](#tsdb)\n",
-    "- [Working with Streams](#stream)\n",
+    "To use V3IO Frames, first ensure that your platform tenant has a shared tenant-wide instance of the V3IO Frames service.\n",
+    "This can be done by a platform service administrator from the **Services** dashboard page.<br>\n",
+    "Then, import the required libraries and create a Frames client object (an instance of the `Client` class), as demonstrated in the following code, which creates a client object named `client`.\n",
     "\n",
-    "The following sections describe how to use frames, for more help and details use the internal documentation, e.g. run the following command\n",
-    "```  client.read?```\n"
+    "> **Note:**\n",
+    "> - The client constructor's `container` parameter is set to `\"users\"` for accessing data in the platform's \"users\" data container.\n",
+    "> - Because no authentication credentials are passed to the constructor, Frames will use the access token that's assigned to the `V3IO_ACCESS_KEY` environment variable.\n",
+    ">   The platform's Jupyter Notebook service defines this variable automatically and initializes it to an access token for the running user of the service.\n",
+    ">   You can pass different credentials by using the constructor's `token` parameter (platform access token) or `user` and `password` parameters (platform username and password)."
    ]
   },
   {
@@ -37,23 +69,72 @@
     "import pandas as pd\n",
     "import v3io_frames as v3f\n",
     "import os\n",
-    "client = v3f.Client('framesd:8081', container='users')"
+    "\n",
+    "# Create a Frames client\n",
+    "client = v3f.Client(\"framesd:8081\", container=\"users\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a id='frames-kv'></a>\n",
+    "## Working with NoSQL Tables (\"kv\" Backend)\n",
+    "\n",
+    "This section demonstrates how to use the `\"kv\"` Frames backend to write and read NoSQL data in the platform.\n",
+    "\n",
+    "- [Initialization](#frames-kv-init)\n",
+    "- [Load Data from Amazon S3](frames-kv-load-data-s3)\n",
+    "- [Write to a NoSQL Table](#frames-kv-write)\n",
+    "- [Read from the Table Using an SQL Query](#frames-kv-read-sql-query)\n",
+    "- [Read from the Table Using the Frames API](#frames-kv-read-frames-api)\n",
+    "  - [Read Using a Single DataFrame](#frames-kv-read-frames-api-single-df)\n",
+    "  - [Read Using a DataFrame Iterator (Streaming)](#frames-kv-read-frames-api-df-iterator)\n",
+    "-  [Write Data Using an Update Expression](#frames-kv-write-update-expression)\n",
+    "  - [Use the Write Method to Perform a Batch Update](#frames-kv-write-expression-batch-update)\n",
+    "  - [Use the Update Method's Execute Command to Update a Single Item](#frames-kv-write-expression--singe-item-update-w-execute-update-cmd)\n",
+    "- [Delete the NoSQL Table](#frames-kv-delete)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<a id='kv'></a>\n",
-    "## Working with key/value and SQL data\n",
+    "<a id=\"frames-kv-init\"></a>\n",
+    "### Initialization\n",
     "\n",
-    "### Load data from Amazon S3"
+    "Start out by defining table-path variables that will be used in the tutorial's code examples.<br>\n",
+    "The table path (`table`) is relative to the configured parent data container; see [Write to a NoSQL Table](#frames-kv-write)."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Relative path to the NoSQL table within the parent platform data container\n",
+    "table = os.path.join(os.getenv(\"V3IO_USERNAME\") + \"/examples/bank\")\n",
+    "\n",
+    "# Full path to the NoSQL table for SQL queries (platform Presto data-path syntax);\n",
+    "# use the same data container as used for the Frames client (\"users\")\n",
+    "sql_table_path = 'v3io.users.\"' + table + '\"'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a id=\"frames-kv-load-data-s3\"></a>\n",
+    "### Load Data from Amazon S3\n",
+    "\n",
+    "Read a file from an Amazon Simple Storage (S3) bucket into a Frames pandas DataFrame."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -216,15 +297,14 @@
        "4   unknown    5   may       226         1     -1         0  unknown  no  "
       ]
      },
-     "execution_count": 2,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "# read S3 file into a data frame and show its data & metadata\n",
-    "tablename = os.path.join(os.getenv('V3IO_USERNAME')+'/examples/bank')\n",
-    "df = pd.read_csv('https://s3.amazonaws.com/iguazio-sample-data/bank.csv', sep=';')\n",
+    "# Read an AWS S3 file into a DataFrame and show its data and metadata\n",
+    "df = pd.read_csv(\"https://s3.amazonaws.com/iguazio-sample-data/bank.csv\", sep=\";\")\n",
     "df.head()"
    ]
   },
@@ -232,33 +312,42 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Write data frames into the database using a single command\n",
-    "data is streamed into the database via fast NoSQL APIs, note the backend is `kv`<br>\n",
-    "the input data can be a single dataframe or a dataframe iterator (for streaming)"
+    "<a id=\"frames-kv-write\"></a>\n",
+    "### Write to a NoSQL Table\n",
+    "\n",
+    "Use the `write` method of the Frames client with the `\"kv\"` backend to write the data that was read in the previous step to a NoSQL table.<br>\n",
+    "The mandatory `table` parameter specifies the relative table path within the data container that was configured for the Frames client (see the [main initialization](#frames-init) step).\n",
+    "In the following example, the relative table path is set by using the `table` variable that was defined in the [\"kv\" backend initialization](#frames-kv-init) step.<br>\n",
+    "The `dfs` parameter can be set either to a single DataFrame (as done in the following example) or to multiple DataFrames &mdash; either as a DataFrame iterator or as a list of DataFrames."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
-    "out = client.write('kv', tablename, df)"
+    "out = client.write(\"kv\", table=table, dfs=df)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Read from the Database with DB side SQL\n",
-    "offload data filtering, grouping, joins, etc to a scale-out high speed DB engine<br>\n",
-    "Note that we're using a V3IO_USERNAME as environment variable as therefore we need to define the string for the \"From\" section<br>\n",
-    "The from convention is select * from v3io.<data container>.\"path\""
+    "<a id=\"frames-kv-read-sql-query\"></a>\n",
+    "### Read from the Table Using an SQL Query\n",
+    "\n",
+    "You can run SQL queries on your NoSQL table (using Presto) to offload data filtering, grouping, joins, etc. to a scale-out high-speed database engine.\n",
+    "\n",
+    "> **Note:** To query a table in a platform data container, the table path in the `from` section of the SQL query should be of the format `v3io.<container name>.\"/path/to/table\"`.\n",
+    "> See [Presto Data Paths](https://www.iguazio.com/docs/tutorials/latest-release/getting-started/fundamentals/#data-paths-presto) in the platform documentation.\n",
+    "> In the following example, the path is set by using the `sql_table_path` variable that was defined in the [\"kv\" backend initialization](#frames-kv-init) step.\n",
+    "> Unless you changed the code, this variable translates to `v3io.users.\"<running user>/examples/bank\"`; for example, `v3io.users.\"iguazio/examples/bank\"` for user \"iguazio\"."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -295,68 +384,83 @@
        "        <td>no</td>\n",
        "        <td>secondary</td>\n",
        "        <td>0</td>\n",
-       "        <td>yes</td>\n",
+       "        <td>no</td>\n",
        "        <td>unknown</td>\n",
-       "        <td>249</td>\n",
+       "        <td>219</td>\n",
        "        <td>married</td>\n",
        "        <td>no</td>\n",
-       "        <td>19317</td>\n",
-       "        <td>aug</td>\n",
-       "        <td>cellular</td>\n",
-       "        <td>1</td>\n",
-       "        <td>yes</td>\n",
+       "        <td>26452</td>\n",
+       "        <td>jul</td>\n",
+       "        <td>telephone</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
        "        <td>retired</td>\n",
-       "        <td>4</td>\n",
-       "        <td>68</td>\n",
+       "        <td>15</td>\n",
+       "        <td>75</td>\n",
        "        <td>-1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "        <td>no</td>\n",
        "        <td>secondary</td>\n",
        "        <td>0</td>\n",
-       "        <td>no</td>\n",
+       "        <td>yes</td>\n",
        "        <td>unknown</td>\n",
-       "        <td>219</td>\n",
+       "        <td>249</td>\n",
        "        <td>married</td>\n",
        "        <td>no</td>\n",
-       "        <td>26452</td>\n",
-       "        <td>jul</td>\n",
-       "        <td>telephone</td>\n",
-       "        <td>2</td>\n",
-       "        <td>no</td>\n",
+       "        <td>19317</td>\n",
+       "        <td>aug</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>yes</td>\n",
        "        <td>retired</td>\n",
-       "        <td>15</td>\n",
-       "        <td>75</td>\n",
+       "        <td>4</td>\n",
+       "        <td>68</td>\n",
        "        <td>-1</td>\n",
        "    </tr>\n",
        "</table>"
       ],
       "text/plain": [
-       "[('no', 'secondary', 0, 'yes', 'unknown', 249, 'married', 'no', 19317, 'aug', 'cellular', 1, 'yes', 'retired', 4, 68, -1),\n",
-       " ('no', 'secondary', 0, 'no', 'unknown', 219, 'married', 'no', 26452, 'jul', 'telephone', 2, 'no', 'retired', 15, 75, -1)]"
+       "[('no', 'secondary', 0, 'no', 'unknown', 219, 'married', 'no', 26452, 'jul', 'telephone', 2, 'no', 'retired', 15, 75, -1),\n",
+       " ('no', 'secondary', 0, 'yes', 'unknown', 249, 'married', 'no', 19317, 'aug', 'cellular', 1, 'yes', 'retired', 4, 68, -1)]"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "table_path = os.path.join('v3io.users.\"'+os.getenv('V3IO_USERNAME')+'/examples/bank\"')\n",
-    "%sql select * from $table_path where balance > 10000"
+    "%sql select * from $sql_table_path where balance > 10000"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Read the data through frames API\n",
-    "the frames API returns a dataframe or a dataframe iterator (a stream)<br>"
+    "<a id=\"frames-kv-read-frames-api\"></a>\n",
+    "### Read from the Table Using the Frames API\n",
+    "\n",
+    "Use the `read` method of the Frames client with the `\"kv\"` backend to read data from your NoSQL table.<br>\n",
+    "The `read` method can return a DataFrame or a DataFrame iterator (a stream), as demonstrated in the following examples.\n",
+    "\n",
+    "- [Read Using a Single DataFrame](#frames-kv-read-frames-api-single-df)\n",
+    "- [Read Using a DataFrame Iterator (Streaming)](#frames-kv-read-frames-api-df-iterator)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a id=\"frames-kv-read-frames-api-single-df\"></a>\n",
+    "#### Read Using a Single DataFrame\n",
+    "\n",
+    "The following example uses a single command to read data from the NoSQL table into a DataFrame."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -380,26 +484,26 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>housing</th>\n",
-       "      <th>contact</th>\n",
-       "      <th>education</th>\n",
-       "      <th>loan</th>\n",
+       "      <th>age</th>\n",
+       "      <th>balance</th>\n",
        "      <th>campaign</th>\n",
-       "      <th>pdays</th>\n",
-       "      <th>poutcome</th>\n",
+       "      <th>contact</th>\n",
+       "      <th>day</th>\n",
        "      <th>default</th>\n",
-       "      <th>balance</th>\n",
        "      <th>duration</th>\n",
-       "      <th>previous</th>\n",
+       "      <th>education</th>\n",
+       "      <th>housing</th>\n",
        "      <th>job</th>\n",
+       "      <th>loan</th>\n",
        "      <th>marital</th>\n",
        "      <th>month</th>\n",
-       "      <th>day</th>\n",
-       "      <th>age</th>\n",
+       "      <th>pdays</th>\n",
+       "      <th>poutcome</th>\n",
+       "      <th>previous</th>\n",
        "      <th>y</th>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>__name</th>\n",
+       "      <th>index</th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
@@ -422,22 +526,22 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>75</th>\n",
-       "      <td>no</td>\n",
-       "      <td>telephone</td>\n",
-       "      <td>secondary</td>\n",
-       "      <td>no</td>\n",
+       "      <td>75.0</td>\n",
+       "      <td>26452.0</td>\n",
        "      <td>2.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>unknown</td>\n",
+       "      <td>telephone</td>\n",
+       "      <td>15.0</td>\n",
        "      <td>no</td>\n",
-       "      <td>26452.0</td>\n",
        "      <td>219.0</td>\n",
-       "      <td>0.0</td>\n",
+       "      <td>secondary</td>\n",
+       "      <td>no</td>\n",
        "      <td>retired</td>\n",
+       "      <td>no</td>\n",
        "      <td>married</td>\n",
        "      <td>jul</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>75.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>0.0</td>\n",
        "      <td>no</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -445,22 +549,22 @@
        "</div>"
       ],
       "text/plain": [
-       "       housing    contact  education loan  campaign  pdays poutcome default  \\\n",
-       "__name                                                                        \n",
-       "75          no  telephone  secondary   no       2.0   -1.0  unknown      no   \n",
+       "        age  balance  campaign    contact   day default  duration  education  \\\n",
+       "index                                                                          \n",
+       "75     75.0  26452.0       2.0  telephone  15.0      no     219.0  secondary   \n",
        "\n",
-       "        balance  duration  previous      job  marital month   day   age   y  \n",
-       "__name                                                                       \n",
-       "75      26452.0     219.0       0.0  retired  married   jul  15.0  75.0  no  "
+       "      housing      job loan  marital month  pdays poutcome  previous   y  \n",
+       "index                                                                     \n",
+       "75         no  retired   no  married   jul   -1.0  unknown       0.0  no  "
       ]
      },
-     "execution_count": 5,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "df = client.read(backend='kv', table=tablename, filter=\"balance>20000\")\n",
+    "df = client.read(backend=\"kv\", table=table, filter=\"balance > 20000\")\n",
     "df.head(8)"
    ]
   },
@@ -468,36 +572,36 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Read the data as a stream iterator\n",
-    "to use iterator and allow cuncurent data movement and processing add `iterator=True`, you will need to iterate over the returned value or use `concat`\n",
-    "iterators work with all backends (not just stream), they allow streaming when placed as an input to write functions which support iterators as input"
+    "<a id=\"frames-kv-read-frames-api-df-iterator\"></a>\n",
+    "#### Read Using a DataFrame Iterator (Streaming)\n",
+    "\n",
+    "The following example uses a DataFrame iterator to stream data from the NoSQL table into multiple DataFrames and allow concurrent data movement and processing.<br>\n",
+    "The example sets the `iterator` parameter to `True` to receive a DataFrame iterator (instead of the default single DataFrame), and then iterates the DataFrames in the returned iterator; you can also use `concat` instead of iterating the DataFrames.\n",
+    "\n",
+    "> **Note:** Iterators work with all Frames backends and can be used as input to write functions that support this, such as the `write` method of the Frames client."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "        balance  campaign  marital default loan    contact   y   age  \\\n",
-      "__name                                                                 \n",
-      "75      26452.0       2.0  married      no   no  telephone  no  75.0   \n",
+      "        age  balance  campaign    contact   day default  duration  education  \\\n",
+      "index                                                                          \n",
+      "75     75.0  26452.0       2.0  telephone  15.0      no     219.0  secondary   \n",
       "\n",
-      "        duration  previous   day housing  pdays  education      job poutcome  \\\n",
-      "__name                                                                         \n",
-      "75         219.0       0.0  15.0      no   -1.0  secondary  retired  unknown   \n",
-      "\n",
-      "       month  \n",
-      "__name        \n",
-      "75       jul  \n"
+      "      housing      job loan  marital month  pdays poutcome  previous   y  \n",
+      "index                                                                     \n",
+      "75         no  retired   no  married   jul   -1.0  unknown       0.0  no  \n"
      ]
     }
    ],
    "source": [
-    "dfs = client.read(backend='kv', table=tablename, filter=\"balance>20000\", iterator=True)\n",
+    "dfs = client.read(backend=\"kv\", table=table, filter=\"balance > 20000\", iterator=True)\n",
     "for df in dfs:\n",
     "    print(df.head())"
    ]
@@ -506,108 +610,139 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Batch updates with expression\n",
-    "in many cases we want to update specific column values or update a column using an expression (e.g. counter = counter + x)<br>\n",
-    "when using the key/value backend it can run an expression against each of the rows (specified in the index), and use the dataframe columns as parameters<br>\n",
-    "columns are specified using `{}`, e.g. specifing `expression=\"packets=packets+{pkt};bytes=bytes+{bytes};last_update={mytime}\"` will add the data in `pkt` and `bytes` column from the input dataframe to the `packets` and `bytes` columns in the row and set the `last_update` field to `mytime`. the rows are selected based on the input dataframe index"
+    "<a id=\"frames-kv-write-update-expression\"></a>\n",
+    "### Write Data Using an Update Expression\n",
+    "\n",
+    "In many cases, it's useful to update specific attributes (columns) by using an update expression (for example, `counter = counter + 1`).\n",
+    "The `write` method and the `update` command of the `execute` method of the Frames client support an optional `expression` parameter for the `\"kv\"` backend, which can be set to a [platform update expression](https://www.iguazio.com/docs/reference/latest-release/expressions/update-expression/).\n",
+    "The difference is that `write` applies the expression to all the DataFrame items (rows) while `update` applies the expression only to a single item, as explained in the following examples.\n",
+    "\n",
+    "In Frames update expressions, attributes (columns) in the written DataFrame are embedded within curly braces (`{ATTRIBUTE}`); attributes in the target table are specified simply by their names (`ATTRIBUTE`), as with all platform expressions.\n",
+    "For example, `expression=\"packets=packets+{pkt}; bytes=bytes+{bytes}; last_update={mytime}\"` updates the values of the `packets` and `bytes` attributes in the table item by adding to their current values the values of the `pkt` and `bytes` DataFrame columns, and sets the value of the `last_update` attribute in the table item to the value of the `mytime` DataFrame column (creating the attribute if it doesn't already exist in the table item).\n",
+    "\n",
+    "> **Note:**\n",
+    "> - When setting the expression parameter, Frames doesn't update the table schema (unlike in standard writes).\n",
+    "> - Both the `write` method and the `update` command of the `execute` method also support an optional `condition` parameter for the `\"kv\"` backend.<br>\n",
+    "> This parameter can be set to a [platform condition expression](https://www.iguazio.com/docs/reference/latest-release/expressions/condition-expression/) to perform a conditional update &mdash; i.e., only update or create new items if specific conditions are met.\n",
+    "> Note that when the condition expression references a non-existing attribute, the condition evaluates to `false`.\n",
+    "\n",
+    "- [Use the Write Method to Perform a Batch Update](#frames-kv-write-expression-batch-update)\n",
+    "- [Use the Update Method's Execute Command to Update a Single Item](#frames-kv-write-expression-single-item-update-w-execute-update-cmd)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a id=\"frames-kv-write-expression-batch-update\"></a>\n",
+    "#### Use the Write Method to Perform a Batch Update\n",
+    "\n",
+    "The `write` method applies the update expression of the `expression` parameter to all items in the DataFrame (\"batch\" update); i.e., all table items (rows) whose primary-key attribute (index-column) values match those of the DataFrame items are updated, and items that don't exist in the table are created."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# example: creating a new column which reflect the delta between the old `balance` column and the one provided in df (should result in 0 since df didnt change)\n",
-    "out = client.write('kv', tablename, df, expression='balance_delta=balance-{balance}')"
+    "# Add a new \"balance_delta\" attribute (column) to all table items (rows) and set its value to the difference (delta) between the\n",
+    "# current value of the \"balance\" attribute in the table and the value provided for this attribute in the DataFrame.\n",
+    "# Because the value of \"balance\" in the DataFrame wasn't modified since it was written to the table, the attribute value that is written to table (for all items) should be 0.\n",
+    "out = client.write(\"kv\", table, df, expression=\"balance_delta = balance - {balance}\")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Making a single row update using execute command\n",
-    "The use of `condition` is optional and allow to implement safe/conditional transactions "
+    "<a id=\"frames-kv-write-expression-single-item-update-w-execute-update-cmd\"></a>\n",
+    "#### Use the Update Method's Execute Command to Update a Single Item\n",
+    "\n",
+    "The `update` command of the `execute` method updates or creates a single item whose primary-key attribute (index-column) value is specified in the command's `key` parameter, as demonstrated in the following example.\n",
+    "The example also uses the optional `condition` parameter to perform the update only if the specified condition is met."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "Empty DataFrame\n",
-       "Columns: []\n",
-       "Index: []"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "client.execute('kv',tablename,'update', args={'key':'44', 'expression': 'age=44', 'condition':'balance>0'})"
+    "# Conditionally update the table item whose primary-key attribute (index-column) value is 44 (`key`) and\n",
+    "# set its \"age\" attribute to 44, provided the value of the item's \"balance\" attribute is greater than 0.\n",
+    "client.execute(\"kv\", table, \"update\", args={\"key\": \"44\", \"expression\": \"age=44\", \"condition\": \"balance > 0\"})"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Delete the table\n",
-    "note: in kv (NoSQL) tabels there is no need to create a table before using it"
+    "<a id=\"frames-kv-delete\"></a>\n",
+    "### Delete the NoSQL Table\n",
+    "\n",
+    "Use the `delete` method of the Frames client with the `\"kv\"` backend to delete the NoSQL table that was used in the previous steps."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
-    "client.delete('kv',table=tablename)"
+    "# Delete the `table` NoSQL table\n",
+    "client.delete(\"kv\", table)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<a id='tsdb'></a>\n",
-    "## Working with time-series data"
+    "<a id='frames-tsdb'></a>\n",
+    "## Working with Time-Series Databases (\"tsdb\" Backend)\n",
+    "\n",
+    "This section demonstrates how to use the `\"tsdb\"` Frames backend to create a time-series database (TSDB) table in the platform, ingest data into the table, and read from the table (i.e., submit TSDB queries).\n",
+    "\n",
+    "- [Initialization](#frames-tsdb-init)\n",
+    "- [Create a TSDB Table](#frames-tsdb-create)\n",
+    "- [Write to the TSDB Table](#frames-tsdb-write)\n",
+    "- [Read from the TSDB Table](#frames-tsdb-read)\n",
+    "- [Delete the TSDB Table](#frames-tsdb-delete)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Note that the tsdb table example will be created under the root of the \"users\" container"
+    "<a id=\"frames-tsdb-init\"></a>\n",
+    "### Initialization\n",
+    "\n",
+    "Start out by defining a TSDB table-path variable that will be used in the tutorial's code examples.<br>\n",
+    "The table path (`tsdb_table`) is relative to the configured parent data container; see [Create a TSDB Table](#frames-tsdb-create)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Relative path to the TSDB table within the parent platform data container\n",
+    "tsdb_table = os.path.join(os.getenv(\"V3IO_USERNAME\") + \"/examples/tsdb_tab\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a id=\"frames-tsdb-create\"></a>\n",
+    "### Create a TSDB Table\n",
+    "\n",
+    "Use the `create` method of the Frames client with the `\"tsdb\"` backend to create a new TSDB table.<br>\n",
+    "The mandatory `table` parameter specifies the relative table path within the data container that was configured for the Frames client (see the [main initialization](#frames-init) step).\n",
+    "In the following example, the relative table path is set by using the `tsdb_table` variable that was defined in the [\"tsdb\" backend initialization](#frames-tsdb-init) step.<br>\n",
+    "You can optionally use the `attrs` parameter to provide additional arguments.\n",
+    "For example, you can set the `rate` argument to the TSDB’s metric-samples ingestion rate (`\"[0-9]+/[smh]\"`; for example, `1/s`); the rate should be calculated according to the slowest expected ingestion rate."
    ]
   },
   {
@@ -616,8 +751,33 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# create a time series table, rate specifies the typical ingestion rate (e.g. one sample per minute)\n",
-    "client.create(backend='tsdb', table='tsdb_tab',attrs={'rate':'1/m'})"
+    "# Create a new TSDB table; ingestion rate = one sample per minute (\"1/m\")\n",
+    "client.create(backend=\"tsdb\", table=tsdb_table, attrs={\"rate\": \"1/m\"})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a id=\"frames-tsdb-write\"></a>\n",
+    "### Write to the TSDB Table\n",
+    "\n",
+    "Use the `write` method of the Frames client with the `\"tsdb\"` backend to ingest data from a pandas DataFrame into your TSDB table.<br>\n",
+    "The primary-key attribute of platform TSDB tables (i.e., the DataFrame index column) must hold the sample time of the data (displayed as `time` in read outputs).<br>\n",
+    "In addition, TSDB table items (rows) can optionally have sub-index columns (attributes) that are called labels.\n",
+    "You can add labels to TSDB table items in one of two ways; you can also combine these methods:\n",
+    "\n",
+    "- Use the `labels` dictionary parameter of the `write` method to add labels to all the written metric-sample table items (DataFrame rows) &mdash; `{\"<label>\": \"<value>\"[, \"<label>\": \"<value>\", ...]}`.<br>\n",
+    "  For example, `{\"node\": \"11\"}` in the following code example.\n",
+    "- Define DataFrame index columns for the labels.\n",
+    "  All DataFrame index columns except for the sample-time index column are automatically converted into labels for the respective table items.\n",
+    "  > **Note:** If you wish to use regular columns in your DataFrames as TSDB labels, convert these columns to index columns.\n",
+    "  > The following example converts the `symbol` and `exchange` columns to index columns that will be used as TSDB labels (in addition to the `time` index column):<br>\n",
+    "  > ```python\n",
+    "  > df.index.name=\"time\"                              # Ensure that the sample-time index column is named \"time\"\n",
+    "  > df.reset_index(level=0, inplace=True)             # Reset the DataFrame indexes\n",
+    "  > df = df.set_index([\"time\", \"symbol\", \"exchange\"]) # Convert the \"time\" column and additional TSDB-label columns to DataFrame indexes\n",
+    "  > ```"
    ]
   },
   {
@@ -630,7 +790,7 @@
      "output_type": "stream",
      "text": [
       "<class 'pandas.core.frame.DataFrame'>\n",
-      "DatetimeIndex: 60 entries, 2019-03-14 07:05:00-05:00 to 2019-03-14 12:00:00-05:00\n",
+      "DatetimeIndex: 60 entries, 2019-09-15 08:05:00-05:00 to 2019-09-15 13:00:00-05:00\n",
       "Freq: 300S\n",
       "Data columns (total 3 columns):\n",
       "cpu     60 non-null float64\n",
@@ -639,59 +799,44 @@
       "dtypes: float64(3)\n",
       "memory usage: 1.9 KB\n",
       "None                                 cpu       mem      disk\n",
-      "2019-03-14 07:05:00-05:00  0.300817 -0.136579  1.400577\n",
-      "2019-03-14 07:10:00-05:00  0.340741 -0.485285  2.723041\n",
-      "2019-03-14 07:15:00-05:00 -2.029409  0.285942  2.076960\n",
-      "2019-03-14 07:20:00-05:00 -1.977885  0.096732  4.749289\n",
-      "2019-03-14 07:25:00-05:00 -1.567182  0.174292  6.428432\n"
+      "2019-09-15 08:05:00-05:00 -1.115736 -1.229851  0.463388\n",
+      "2019-09-15 08:10:00-05:00 -0.113683 -0.515424  0.852152\n",
+      "2019-09-15 08:15:00-05:00 -1.603768 -1.155131  0.607183\n",
+      "2019-09-15 08:20:00-05:00 -1.458350 -0.673800  0.109492\n",
+      "2019-09-15 08:25:00-05:00  1.006173 -1.245499 -0.011838\n"
      ]
     }
    ],
    "source": [
-    "# create sample time-series data\n",
+    "# Prepare metric samples to ingets to the TSDB table\n",
     "import numpy as np\n",
     "from datetime import datetime, timedelta\n",
+    "\n",
     "end = datetime.now().replace(minute=0, second=0, microsecond=0)\n",
-    "rng = pd.date_range(end=end, periods=60, freq='300s', tz='EST')\n",
-    "df = pd.DataFrame(np.random.randn(len(rng), 3), index=rng, columns=['cpu','mem','disk'])\n",
+    "rng = pd.date_range(end=end, periods=60, freq=\"300s\", tz=\"EST\")\n",
+    "df = pd.DataFrame(np.random.randn(len(rng), 3), index=rng, columns=[\"cpu\", \"mem\", \"disk\"])\n",
     "df = df.cumsum()\n",
     "print(df.info(), df.head())"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Write to the time-series DB\n",
-    "The time series DB has a time based index and additional sub indexes called labels<br>\n",
-    "labels can be specified in two ways:\n",
-    "* Using the `labels` parameters which will add the specified labels to each row/sample<br>\n",
-    "* Using multi-index, all non time index columns are automatically converted to labels\n",
-    "\n",
-    "if your DataFrame doesnt contain multi-index and you wish to use specific columns as time-series labels you should convert the columns to indexes using:<br>\n",
-    "```python\n",
-    "    df.index.name='time'                              # in case the index column is un-named \n",
-    "    df.reset_index(level=0, inplace=True)    \n",
-    "    df = df.set_index(['time','symbol','exchange'])   # e.g. convert the specified columns to indexes \n",
-    "```\n",
-    "\n",
-    "Note: you can use both (multi-index and labels) together, the labels will be the aggregation of both "
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
-    "client.write(backend='tsdb', table = 'tsdb_tab',dfs=df, labels={'node':'11'})"
+    "# Ingest data into the TSDB table\n",
+    "client.write(backend=\"tsdb\", table=tsdb_table, dfs=df, labels={\"node\": \"11\"})"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Read from the time-series DB\n"
+    "<a id=\"frames-tsdb-read\"></a>\n",
+    "### Read from the TSDB Table\n",
+    "\n",
+    "Use the `read` method of the Frames client with the `\"tsdb\"` backend to read data from your TSDB table (i.e., query the database)."
    ]
   },
   {
@@ -721,15 +866,15 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th></th>\n",
-       "      <th>avg(mem)</th>\n",
-       "      <th>max(mem)</th>\n",
-       "      <th>min(mem)</th>\n",
-       "      <th>avg(disk)</th>\n",
-       "      <th>max(disk)</th>\n",
-       "      <th>min(disk)</th>\n",
        "      <th>avg(cpu)</th>\n",
+       "      <th>avg(disk)</th>\n",
+       "      <th>avg(mem)</th>\n",
        "      <th>max(cpu)</th>\n",
+       "      <th>max(disk)</th>\n",
+       "      <th>max(mem)</th>\n",
        "      <th>min(cpu)</th>\n",
+       "      <th>min(disk)</th>\n",
+       "      <th>min(mem)</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>time</th>\n",
@@ -747,30 +892,30 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>2019-03-14 11:58:02</th>\n",
+       "      <th>2019-09-15 12:20:58</th>\n",
        "      <th>11</th>\n",
-       "      <td>0.082436</td>\n",
-       "      <td>1.478036</td>\n",
-       "      <td>-1.102929</td>\n",
-       "      <td>4.200892</td>\n",
-       "      <td>6.428432</td>\n",
-       "      <td>1.400577</td>\n",
-       "      <td>-1.216931</td>\n",
-       "      <td>0.340741</td>\n",
-       "      <td>-2.139816</td>\n",
+       "      <td>-1.072884</td>\n",
+       "      <td>0.508054</td>\n",
+       "      <td>-0.893552</td>\n",
+       "      <td>-0.113683</td>\n",
+       "      <td>0.852152</td>\n",
+       "      <td>-0.515424</td>\n",
+       "      <td>-1.603768</td>\n",
+       "      <td>0.109492</td>\n",
+       "      <td>-1.229851</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                          avg(mem)  max(mem)  min(mem)  avg(disk)  max(disk)  \\\n",
+       "                          avg(cpu)  avg(disk)  avg(mem)  max(cpu)  max(disk)  \\\n",
        "time                node                                                       \n",
-       "2019-03-14 11:58:02 11    0.082436  1.478036 -1.102929   4.200892   6.428432   \n",
+       "2019-09-15 12:20:58 11   -1.072884   0.508054 -0.893552 -0.113683   0.852152   \n",
        "\n",
-       "                          min(disk)  avg(cpu)  max(cpu)  min(cpu)  \n",
+       "                          max(mem)  min(cpu)  min(disk)  min(mem)  \n",
        "time                node                                           \n",
-       "2019-03-14 11:58:02 11     1.400577 -1.216931  0.340741 -2.139816  "
+       "2019-09-15 12:20:58 11   -0.515424 -1.603768   0.109492 -1.229851  "
       ]
      },
      "execution_count": 13,
@@ -779,8 +924,9 @@
     }
    ],
    "source": [
-    "# Read Time-Series aggregates from the DB (returned as a data stream, use concat to assemble the frames)\n",
-    "tsdf = client.read(backend='tsdb', query='select avg(*),max(*),min(*) from tsdb_tab', step='60m', start=\"now-7d\", end='now',multi_index=True)\n",
+    "# Read time-series aggregates from the TSDB table as a data stream; use concat to assemble the DataFrames\n",
+    "query_str= \"select avg(*), max(*), min(*) from '\" + tsdb_table + \"'\"\n",
+    "tsdf = client.read(backend=\"tsdb\", query=query_str, step=\"60m\", start=\"now-7d\", end=\"now\", multi_index=True)\n",
     "tsdf.head()"
    ]
   },
@@ -788,7 +934,10 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Delete the table"
+    "<a id=\"frames-tsdb-delete\"></a>\n",
+    "### Delete the TSDB Table\n",
+    "\n",
+    "Use the `delete` method of the Frames client with the `\"tsdb\"` backend to delete the TSDB table that was used in the previous steps."
    ]
   },
   {
@@ -797,17 +946,36 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "client.delete('tsdb','tsdb_tab')"
+    "client.delete(\"tsdb\", tsdb_table)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<a id='stream'></a>\n",
-    "## Working with streams\n",
-    "iguazio platform support streams which have AWS Kinesis like API, they can be accessed from the notebook as if they were a structured stream (currently assume the structure is serialized through `JSON`. streams can be accessed in a bulk, or preferably using an iterator.<br>\n",
-    "Streams must be first created, users can specify the number of shards and retention period when creating the stream."
+    "<a id='frames-stream'></a>\n",
+    "## Working with Streams (\"stream\" Backend)\n",
+    "\n",
+    "The platform supports streams that have an AWS Kinesis-like API. For more information, see the [platform documentation](https://www.iguazio.com/docs/concepts/latest-release/streams/).\n",
+    "\n",
+    "- [Initialization](#frames-stream-init)\n",
+    "- [Create a Stream](#frames-stream-create)\n",
+    "- [Write to the Stream](#frames-stream-write)\n",
+    "  - [Use the Write Method to Perform a Batch Update](#frames-stream-write-batch-update)\n",
+    "  - [Use the Execute Method's Put Command to Update a Single Record](#frames-stream-execute-put)\n",
+    "- [Read from the Stream](#frames-stream-read)\n",
+    "- [Delete the Stream](#frames-tsdb-delete)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a id=\"frames-stream-init\"></a>\n",
+    "### Initialization\n",
+    "\n",
+    "Start out by defining a stream-path variable that will be used in the tutorial's code examples.<br>\n",
+    "The stream path (`strm`) is relative to the configured parent data container; see [Create a Stream](#frames-stream-create)."
    ]
   },
   {
@@ -816,8 +984,22 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "strm = os.path.join(os.getenv('V3IO_USERNAME')+'/examples/somestream')\n",
-    "client.create(backend='stream', table=strm,attrs={'retention_hours':48,'shards':1})"
+    "# Relative path to the stream within the parent platform data container\n",
+    "strm = os.path.join(os.getenv(\"V3IO_USERNAME\") + \"/examples/somestream\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a id=\"frames-stream-create\"></a>\n",
+    "### Create a Stream\n",
+    "\n",
+    "Use the `create` method of the Frames client with the `\"stream\"` backend to create a new data stream.<br>\n",
+    "The mandatory `table` parameter specifies the relative stream path within the data container that was configured for the Frames client (see the [main initialization](#frames-init) step).\n",
+    "In the following example, the relative stream path is set by using the `strm` variable that was defined in the [\"stream\" backend initialization](#frames-stream-init) step.<br>\n",
+    "You can optionally use the `attrs` parameter to provide additional arguments.\n",
+    "For example, you can set the `shards` argument to the number of shards in the stream, or you can set the `retention_hours` argument to the stream's retention period in hours."
    ]
   },
   {
@@ -826,69 +1008,60 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# write data into a stream\n",
-    "def gendf():\n",
-    "    end = datetime.now().replace(minute=0, second=0, microsecond=0)\n",
-    "    rng = pd.date_range(end=end, periods=60, freq='300s', tz='Israel')\n",
-    "    df = pd.DataFrame(np.random.randn(len(rng), 3), index=rng, columns=['cpu', 'mem', 'disk'])\n",
-    "    return df\n",
+    "# Create a new stream\n",
+    "client.create(backend=\"stream\", table=strm, attrs={\"retention_hours\": 48, \"shards\": 1})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a id=\"frames-stream-write\"></a>\n",
+    "### Write to the Stream\n",
     "\n",
-    "client.write('stream', strm, gendf())"
+    "You can use either of the following methods to ingest data into your stream:\n",
+    "\n",
+    "- [Use the Write Method to Perform a Batch Update](#frames-stream-write-batch-update)\n",
+    "- [Use the Execute Method's Put Command to Update a Single Record](#frames-stream-execute-put)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Reading from a stream\n",
-    "the stream read operation need to specify the seek method and parameters (each seek method may have different parameters) as listed below:\n",
-    "```\n",
-    "   earliest   - start from the earliest point in the stream (no params)\n",
-    "   latest     - start from the latest, i.e. show only new records\n",
-    "   time       - start from a point in time, specify the start param e.g. start='now-1d'\n",
-    "   sequence   - start from a specific sequence number, specify the sequence param e.g. sequence=45\n",
-    "```\n",
-    "\n"
+    "<a id=\"frames-stream-write-batch-update\"></a>\n",
+    "#### Use the Write Method to Perform a Batch Update\n",
+    "\n",
+    "Use the `write` method of the Frames client with the `\"stream\"` backend to ingest multiple records into your stream (batch update), as demonstrated in the following example.<br>\n",
+    "The `dfs` parameter can be set either to a single DataFrame (as done in the following example) or to multiple DataFrames &mdash; either as a DataFrame iterator or as a list of DataFrames."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 17,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                 cpu      disk               index-0       mem  \\\n",
-      "seq_number                                                       \n",
-      "1           0.011781 -0.172618  2019-03-14T05:05:00Z  0.535711   \n",
-      "2           1.580387 -0.470061  2019-03-14T05:10:00Z -0.003681   \n",
-      "3           0.400377 -1.969355  2019-03-14T05:15:00Z  0.363214   \n",
-      "4          -0.213012 -0.806401  2019-03-14T05:20:00Z  0.250864   \n",
-      "\n",
-      "                             stream_time  \n",
-      "seq_number                                \n",
-      "1          2019-03-14 12:58:08.343475133  \n",
-      "2          2019-03-14 12:58:08.343475133  \n",
-      "3          2019-03-14 12:58:08.343475133  \n",
-      "4          2019-03-14 12:58:08.343475133  \n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "dfs = client.read('stream', strm,seek='earliest', shard_id='0', iterator=True)\n",
-    "for df in dfs:\n",
-    "    print(df.head(4))"
+    "# Prepare the ingestion data\n",
+    "end = datetime.now().replace(minute=0, second=0, microsecond=0)\n",
+    "rng = pd.date_range(end=end, periods=60, freq=\"300s\", tz=\"Israel\")\n",
+    "df = pd.DataFrame(np.random.randn(len(rng), 3), index=rng, columns=[\"cpu\", \"mem\", \"disk\"])\n",
+    "\n",
+    "# Ingest data into the stream\n",
+    "client.write(\"stream\", table-strm, dfs=df)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Push a single record update to a stream\n",
-    "In some cases it is more conviniant to just push a buffer into a stream, for that use the execute `put` command <br>\n",
-    "put accepts the `data` arg and two optional parameters (`clientinfo` for some extra info and `partition` if you want to specify the shard id)"
+    "<a id=\"frames-stream-execute-put\"></a>\n",
+    "#### Use the Execute Method's Put Command to Update a Single Record\n",
+    "\n",
+    "Use the `put` command of the `execute` method of the Frames client with the `\"stream\"` backend to add a single record to a stream.<br>\n",
+    "Use the `args` parameter of the `put` command to provide the necessary information:\n",
+    "set the mandatory `data` argument to the ingested record data.\n",
+    "You can optionally set the `clientinfo` argument to additional metadata and the `partition` argument to to a partition key; records with the same partition key are assigned to the same shard."
    ]
   },
   {
@@ -936,23 +1109,90 @@
     }
    ],
    "source": [
-    "client.execute('stream', strm, 'put', args={'data': 'abcd', 'clientinfo': '123'})"
+    "client.execute(\"stream\", strm, \"put\", args={'data': \"abcd\", \"clientinfo\": \"123\"})"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Delete the stream"
+    "<a id=\"frames-stream-read\"></a>\n",
+    "### Read from the Stream\n",
+    "\n",
+    "Use the `read` method of the Frames client with the `\"stream\"` backend to read data from your stream.<br>\n",
+    "The mandatory `seek` parameter specifies the seek method, which determines the location within the target stream shard from which to read; some methods require setting additional parameters:\n",
+    "\n",
+    "- `\"earliest\"` &mdash; start from the earliest point in the shard; (no additional parameters).\n",
+    "- `\"latest\"` &mdash; start from the latest location in the shard (i.e., consume only new records).\n",
+    "- `\"time\"` &mdash; start from a specific point in time, as specified in the `start` parameter (for example, `start=\"now-1d\"`).\n",
+    "- `\"sequence\"` &mdash; start from a specific record sequence number, as specified in the `sequence` parameter (for example, `sequence=45`).\n",
+    "\n",
+    "The `read` method can return a single DataFrame (default) or a DataFrame iterator (a stream) if the `iterator` parameter is set to `True`, as demonstrated in the following example."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 19,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                 cpu      disk               index-0       mem raw_data  \\\n",
+      "seq_number                                                                \n",
+      "1          -0.448749 -0.080189  2019-09-15T05:05:00Z -0.128496            \n",
+      "2          -0.255004 -1.169343  2019-09-15T05:10:00Z -0.858265            \n",
+      "3          -0.469349  0.844941  2019-09-15T05:15:00Z  0.994743            \n",
+      "4           1.062668 -1.448676  2019-09-15T05:20:00Z -1.243823            \n",
+      "\n",
+      "                             stream_time  \n",
+      "seq_number                                \n",
+      "1          2019-09-15 13:21:56.969021708  \n",
+      "2          2019-09-15 13:21:56.969021708  \n",
+      "3          2019-09-15 13:21:56.969021708  \n",
+      "4          2019-09-15 13:21:56.969021708  \n"
+     ]
+    }
+   ],
+   "source": [
+    "# Read from the from the earliest available location (seek=\"earliest\") in the first stream shard (shard_id=0);\n",
+    "# return the result as a DataFrame iterator (iterator=True) and iterate and print the returned data\n",
+    "dfs = client.read(\"stream\", strm, seek=\"earliest\", shard_id=\"0\", iterator=True)\n",
+    "for df in dfs:\n",
+    "    print(df.head(4))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a id=\"frames-tsdb-stream\"></a>\n",
+    "### Delete the Stream\n",
+    "\n",
+    "Use the `delete` method of the Frames client with the `\"stream\"` backend to delete the TSDB table that was used in the previous steps."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "client.delete('stream',strm)"
+    "client.delete(\"stream\", strm)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a id=\"frames-cleanup\"></a>\n",
+    "## Cleanup\n",
+    "\n",
+    "You can optionally delete any of the directories or files that you created.\n",
+    "See the instructions in the [Creating and Deleting Container Directories](https://www.iguazio.com/docs/tutorials/latest-release/getting-started/containers/#create-delete-container-dirs) tutorial.\n",
+    "For example, the following code uses a local file-system command to delete the entire **&lt;running user&gt;/examples/** directory in the \"users\" container.\n",
+    "Edit the path, as needed, then remove the comment mark (`#`) and run the code."
    ]
   },
   {
@@ -960,7 +1200,9 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "#!rm -rf /User/examples/"
+   ]
   }
  ],
  "metadata": {

From 03c7feb4f26682fdcbb0d6432ac22a8ac06577a7 Mon Sep 17 00:00:00 2001
From: Sharon Lifshitz <sharonl@iguaz.io>
Date: Sun, 15 Sep 2019 18:13:15 +0300
Subject: [PATCH 6/7] [DOC] Frames GS NB: remove kv update expression [IG-12272
 IG-12092]

---
 getting-started/frames.ipynb | 71 ------------------------------------
 1 file changed, 71 deletions(-)

diff --git a/getting-started/frames.ipynb b/getting-started/frames.ipynb
index e0097fb7..11229c61 100644
--- a/getting-started/frames.ipynb
+++ b/getting-started/frames.ipynb
@@ -90,9 +90,6 @@
     "- [Read from the Table Using the Frames API](#frames-kv-read-frames-api)\n",
     "  - [Read Using a Single DataFrame](#frames-kv-read-frames-api-single-df)\n",
     "  - [Read Using a DataFrame Iterator (Streaming)](#frames-kv-read-frames-api-df-iterator)\n",
-    "-  [Write Data Using an Update Expression](#frames-kv-write-update-expression)\n",
-    "  - [Use the Write Method to Perform a Batch Update](#frames-kv-write-expression-batch-update)\n",
-    "  - [Use the Update Method's Execute Command to Update a Single Item](#frames-kv-write-expression--singe-item-update-w-execute-update-cmd)\n",
     "- [Delete the NoSQL Table](#frames-kv-delete)"
    ]
   },
@@ -606,74 +603,6 @@
     "    print(df.head())"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "<a id=\"frames-kv-write-update-expression\"></a>\n",
-    "### Write Data Using an Update Expression\n",
-    "\n",
-    "In many cases, it's useful to update specific attributes (columns) by using an update expression (for example, `counter = counter + 1`).\n",
-    "The `write` method and the `update` command of the `execute` method of the Frames client support an optional `expression` parameter for the `\"kv\"` backend, which can be set to a [platform update expression](https://www.iguazio.com/docs/reference/latest-release/expressions/update-expression/).\n",
-    "The difference is that `write` applies the expression to all the DataFrame items (rows) while `update` applies the expression only to a single item, as explained in the following examples.\n",
-    "\n",
-    "In Frames update expressions, attributes (columns) in the written DataFrame are embedded within curly braces (`{ATTRIBUTE}`); attributes in the target table are specified simply by their names (`ATTRIBUTE`), as with all platform expressions.\n",
-    "For example, `expression=\"packets=packets+{pkt}; bytes=bytes+{bytes}; last_update={mytime}\"` updates the values of the `packets` and `bytes` attributes in the table item by adding to their current values the values of the `pkt` and `bytes` DataFrame columns, and sets the value of the `last_update` attribute in the table item to the value of the `mytime` DataFrame column (creating the attribute if it doesn't already exist in the table item).\n",
-    "\n",
-    "> **Note:**\n",
-    "> - When setting the expression parameter, Frames doesn't update the table schema (unlike in standard writes).\n",
-    "> - Both the `write` method and the `update` command of the `execute` method also support an optional `condition` parameter for the `\"kv\"` backend.<br>\n",
-    "> This parameter can be set to a [platform condition expression](https://www.iguazio.com/docs/reference/latest-release/expressions/condition-expression/) to perform a conditional update &mdash; i.e., only update or create new items if specific conditions are met.\n",
-    "> Note that when the condition expression references a non-existing attribute, the condition evaluates to `false`.\n",
-    "\n",
-    "- [Use the Write Method to Perform a Batch Update](#frames-kv-write-expression-batch-update)\n",
-    "- [Use the Update Method's Execute Command to Update a Single Item](#frames-kv-write-expression-single-item-update-w-execute-update-cmd)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "<a id=\"frames-kv-write-expression-batch-update\"></a>\n",
-    "#### Use the Write Method to Perform a Batch Update\n",
-    "\n",
-    "The `write` method applies the update expression of the `expression` parameter to all items in the DataFrame (\"batch\" update); i.e., all table items (rows) whose primary-key attribute (index-column) values match those of the DataFrame items are updated, and items that don't exist in the table are created."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Add a new \"balance_delta\" attribute (column) to all table items (rows) and set its value to the difference (delta) between the\n",
-    "# current value of the \"balance\" attribute in the table and the value provided for this attribute in the DataFrame.\n",
-    "# Because the value of \"balance\" in the DataFrame wasn't modified since it was written to the table, the attribute value that is written to table (for all items) should be 0.\n",
-    "out = client.write(\"kv\", table, df, expression=\"balance_delta = balance - {balance}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "<a id=\"frames-kv-write-expression-single-item-update-w-execute-update-cmd\"></a>\n",
-    "#### Use the Update Method's Execute Command to Update a Single Item\n",
-    "\n",
-    "The `update` command of the `execute` method updates or creates a single item whose primary-key attribute (index-column) value is specified in the command's `key` parameter, as demonstrated in the following example.\n",
-    "The example also uses the optional `condition` parameter to perform the update only if the specified condition is met."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Conditionally update the table item whose primary-key attribute (index-column) value is 44 (`key`) and\n",
-    "# set its \"age\" attribute to 44, provided the value of the item's \"balance\" attribute is greater than 0.\n",
-    "client.execute(\"kv\", table, \"update\", args={\"key\": \"44\", \"expression\": \"age=44\", \"condition\": \"balance > 0\"})"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},

From d4051ca98cb9e646aed39e999d4db7802c802d03 Mon Sep 17 00:00:00 2001
From: Sharon Lifshitz <sharonl@iguaz.io>
Date: Sun, 15 Sep 2019 18:38:13 +0300
Subject: [PATCH 7/7] [DOC] Frames GS NB: fix stream write & update outputs to
 v2.5.0 [IG-12272 IG-12092]

---
 getting-started/frames.ipynb | 2186 +++++++++++++++++++++++++++++++++-
 1 file changed, 2122 insertions(+), 64 deletions(-)

diff --git a/getting-started/frames.ipynb b/getting-started/frames.ipynb
index 11229c61..3ff3ade9 100644
--- a/getting-started/frames.ipynb
+++ b/getting-started/frames.ipynb
@@ -372,6 +372,7 @@
        "        <th>contact</th>\n",
        "        <th>campaign</th>\n",
        "        <th>y</th>\n",
+       "        <th>idx</th>\n",
        "        <th>job</th>\n",
        "        <th>day</th>\n",
        "        <th>age</th>\n",
@@ -381,6 +382,1626 @@
        "        <td>no</td>\n",
        "        <td>secondary</td>\n",
        "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>149</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>10218</td>\n",
+       "        <td>nov</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>2916</td>\n",
+       "        <td>admin.</td>\n",
+       "        <td>19</td>\n",
+       "        <td>32</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>failure</td>\n",
+       "        <td>699</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>11219</td>\n",
+       "        <td>aug</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>276</td>\n",
+       "        <td>housemaid</td>\n",
+       "        <td>12</td>\n",
+       "        <td>35</td>\n",
+       "        <td>79</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>249</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>19317</td>\n",
+       "        <td>aug</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>3553</td>\n",
+       "        <td>retired</td>\n",
+       "        <td>4</td>\n",
+       "        <td>68</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>14</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>17555</td>\n",
+       "        <td>aug</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>14</td>\n",
+       "        <td>no</td>\n",
+       "        <td>1776</td>\n",
+       "        <td>management</td>\n",
+       "        <td>26</td>\n",
+       "        <td>43</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>215</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>16264</td>\n",
+       "        <td>nov</td>\n",
+       "        <td>telephone</td>\n",
+       "        <td>3</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3289</td>\n",
+       "        <td>management</td>\n",
+       "        <td>17</td>\n",
+       "        <td>58</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>yes</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>197</td>\n",
+       "        <td>divorced</td>\n",
+       "        <td>no</td>\n",
+       "        <td>13204</td>\n",
+       "        <td>nov</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3329</td>\n",
+       "        <td>management</td>\n",
+       "        <td>20</td>\n",
+       "        <td>34</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>106</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>22370</td>\n",
+       "        <td>may</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>2624</td>\n",
+       "        <td>entrepreneur</td>\n",
+       "        <td>15</td>\n",
+       "        <td>53</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>primary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>205</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>71188</td>\n",
+       "        <td>oct</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3700</td>\n",
+       "        <td>retired</td>\n",
+       "        <td>6</td>\n",
+       "        <td>60</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>29</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>13893</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3608</td>\n",
+       "        <td>management</td>\n",
+       "        <td>11</td>\n",
+       "        <td>44</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>288</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>10758</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>1005</td>\n",
+       "        <td>management</td>\n",
+       "        <td>1</td>\n",
+       "        <td>41</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>92</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>11269</td>\n",
+       "        <td>may</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>554</td>\n",
+       "        <td>technician</td>\n",
+       "        <td>29</td>\n",
+       "        <td>43</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>2</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>failure</td>\n",
+       "        <td>208</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>16957</td>\n",
+       "        <td>jan</td>\n",
+       "        <td>telephone</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3025</td>\n",
+       "        <td>management</td>\n",
+       "        <td>29</td>\n",
+       "        <td>38</td>\n",
+       "        <td>247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>115</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>20453</td>\n",
+       "        <td>may</td>\n",
+       "        <td>telephone</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>4334</td>\n",
+       "        <td>entrepreneur</td>\n",
+       "        <td>4</td>\n",
+       "        <td>37</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>523</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>10378</td>\n",
+       "        <td>nov</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>3</td>\n",
+       "        <td>no</td>\n",
+       "        <td>468</td>\n",
+       "        <td>management</td>\n",
+       "        <td>17</td>\n",
+       "        <td>40</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>primary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>323</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>11262</td>\n",
+       "        <td>aug</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>368</td>\n",
+       "        <td>technician</td>\n",
+       "        <td>26</td>\n",
+       "        <td>60</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>77</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>12531</td>\n",
+       "        <td>aug</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>8</td>\n",
+       "        <td>no</td>\n",
+       "        <td>2955</td>\n",
+       "        <td>technician</td>\n",
+       "        <td>13</td>\n",
+       "        <td>49</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>3</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>failure</td>\n",
+       "        <td>27</td>\n",
+       "        <td>divorced</td>\n",
+       "        <td>no</td>\n",
+       "        <td>26306</td>\n",
+       "        <td>feb</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>2196</td>\n",
+       "        <td>management</td>\n",
+       "        <td>11</td>\n",
+       "        <td>54</td>\n",
+       "        <td>84</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>primary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>107</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>14752</td>\n",
+       "        <td>may</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>709</td>\n",
+       "        <td>housemaid</td>\n",
+       "        <td>19</td>\n",
+       "        <td>42</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>198</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>11494</td>\n",
+       "        <td>nov</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>213</td>\n",
+       "        <td>self-employed</td>\n",
+       "        <td>19</td>\n",
+       "        <td>57</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>8</td>\n",
+       "        <td>no</td>\n",
+       "        <td>other</td>\n",
+       "        <td>138</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>13669</td>\n",
+       "        <td>oct</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>822</td>\n",
+       "        <td>self-employed</td>\n",
+       "        <td>15</td>\n",
+       "        <td>40</td>\n",
+       "        <td>136</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>118</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>12877</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>3</td>\n",
+       "        <td>no</td>\n",
+       "        <td>4441</td>\n",
+       "        <td>management</td>\n",
+       "        <td>17</td>\n",
+       "        <td>38</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>125</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>11555</td>\n",
+       "        <td>apr</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>561</td>\n",
+       "        <td>student</td>\n",
+       "        <td>8</td>\n",
+       "        <td>28</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>4</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>failure</td>\n",
+       "        <td>8</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>22546</td>\n",
+       "        <td>may</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>6</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3332</td>\n",
+       "        <td>management</td>\n",
+       "        <td>14</td>\n",
+       "        <td>31</td>\n",
+       "        <td>267</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>166</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>21244</td>\n",
+       "        <td>aug</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>1821</td>\n",
+       "        <td>housemaid</td>\n",
+       "        <td>4</td>\n",
+       "        <td>51</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>214</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>21664</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>8</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3274</td>\n",
+       "        <td>management</td>\n",
+       "        <td>17</td>\n",
+       "        <td>56</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>111</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>10786</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>3</td>\n",
+       "        <td>no</td>\n",
+       "        <td>717</td>\n",
+       "        <td>management</td>\n",
+       "        <td>20</td>\n",
+       "        <td>40</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>36</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>12223</td>\n",
+       "        <td>nov</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>2881</td>\n",
+       "        <td>services</td>\n",
+       "        <td>19</td>\n",
+       "        <td>42</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>117</td>\n",
+       "        <td>divorced</td>\n",
+       "        <td>no</td>\n",
+       "        <td>10287</td>\n",
+       "        <td>may</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>899</td>\n",
+       "        <td>blue-collar</td>\n",
+       "        <td>29</td>\n",
+       "        <td>51</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>60</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>14440</td>\n",
+       "        <td>nov</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3910</td>\n",
+       "        <td>admin.</td>\n",
+       "        <td>21</td>\n",
+       "        <td>49</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>27</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>13494</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>25</td>\n",
+       "        <td>no</td>\n",
+       "        <td>1433</td>\n",
+       "        <td>blue-collar</td>\n",
+       "        <td>20</td>\n",
+       "        <td>33</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>415</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>18347</td>\n",
+       "        <td>may</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>1887</td>\n",
+       "        <td>management</td>\n",
+       "        <td>23</td>\n",
+       "        <td>33</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>primary</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>failure</td>\n",
+       "        <td>154</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>22856</td>\n",
+       "        <td>jul</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>2776</td>\n",
+       "        <td>management</td>\n",
+       "        <td>2</td>\n",
+       "        <td>37</td>\n",
+       "        <td>388</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>297</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>16178</td>\n",
+       "        <td>nov</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3603</td>\n",
+       "        <td>blue-collar</td>\n",
+       "        <td>21</td>\n",
+       "        <td>44</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>135</td>\n",
+       "        <td>divorced</td>\n",
+       "        <td>no</td>\n",
+       "        <td>10787</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3345</td>\n",
+       "        <td>technician</td>\n",
+       "        <td>4</td>\n",
+       "        <td>31</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>primary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>106</td>\n",
+       "        <td>divorced</td>\n",
+       "        <td>no</td>\n",
+       "        <td>10924</td>\n",
+       "        <td>may</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>339</td>\n",
+       "        <td>self-employed</td>\n",
+       "        <td>6</td>\n",
+       "        <td>51</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>failure</td>\n",
+       "        <td>172</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>15834</td>\n",
+       "        <td>apr</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>3</td>\n",
+       "        <td>no</td>\n",
+       "        <td>1805</td>\n",
+       "        <td>retired</td>\n",
+       "        <td>5</td>\n",
+       "        <td>70</td>\n",
+       "        <td>186</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>3</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>failure</td>\n",
+       "        <td>44</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>22171</td>\n",
+       "        <td>may</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3231</td>\n",
+       "        <td>admin.</td>\n",
+       "        <td>18</td>\n",
+       "        <td>29</td>\n",
+       "        <td>355</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>7</td>\n",
+       "        <td>no</td>\n",
+       "        <td>success</td>\n",
+       "        <td>245</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>15459</td>\n",
+       "        <td>may</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>3</td>\n",
+       "        <td>no</td>\n",
+       "        <td>1216</td>\n",
+       "        <td>management</td>\n",
+       "        <td>26</td>\n",
+       "        <td>29</td>\n",
+       "        <td>97</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>primary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>94</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>25824</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>1031</td>\n",
+       "        <td>retired</td>\n",
+       "        <td>17</td>\n",
+       "        <td>49</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>failure</td>\n",
+       "        <td>344</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>12569</td>\n",
+       "        <td>sep</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>2963</td>\n",
+       "        <td>management</td>\n",
+       "        <td>9</td>\n",
+       "        <td>31</td>\n",
+       "        <td>295</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>231</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>14093</td>\n",
+       "        <td>aug</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>5</td>\n",
+       "        <td>no</td>\n",
+       "        <td>94</td>\n",
+       "        <td>blue-collar</td>\n",
+       "        <td>11</td>\n",
+       "        <td>57</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>87</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>21515</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>4014</td>\n",
+       "        <td>admin.</td>\n",
+       "        <td>5</td>\n",
+       "        <td>41</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>157</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>15311</td>\n",
+       "        <td>apr</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>6</td>\n",
+       "        <td>no</td>\n",
+       "        <td>714</td>\n",
+       "        <td>management</td>\n",
+       "        <td>29</td>\n",
+       "        <td>56</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>476</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>17361</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3993</td>\n",
+       "        <td>management</td>\n",
+       "        <td>18</td>\n",
+       "        <td>44</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>other</td>\n",
+       "        <td>102</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>10788</td>\n",
+       "        <td>dec</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>1870</td>\n",
+       "        <td>student</td>\n",
+       "        <td>23</td>\n",
+       "        <td>25</td>\n",
+       "        <td>210</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>356</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>11386</td>\n",
+       "        <td>nov</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>2461</td>\n",
+       "        <td>services</td>\n",
+       "        <td>20</td>\n",
+       "        <td>31</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>2</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>failure</td>\n",
+       "        <td>199</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>23663</td>\n",
+       "        <td>apr</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>650</td>\n",
+       "        <td>housemaid</td>\n",
+       "        <td>16</td>\n",
+       "        <td>33</td>\n",
+       "        <td>146</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>420</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>15520</td>\n",
+       "        <td>nov</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>1778</td>\n",
+       "        <td>management</td>\n",
+       "        <td>18</td>\n",
+       "        <td>56</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>397</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>14220</td>\n",
+       "        <td>sep</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>2962</td>\n",
+       "        <td>retired</td>\n",
+       "        <td>9</td>\n",
+       "        <td>71</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>223</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>16353</td>\n",
+       "        <td>oct</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>922</td>\n",
+       "        <td>blue-collar</td>\n",
+       "        <td>27</td>\n",
+       "        <td>67</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>74</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>10907</td>\n",
+       "        <td>may</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>3</td>\n",
+       "        <td>no</td>\n",
+       "        <td>4174</td>\n",
+       "        <td>management</td>\n",
+       "        <td>4</td>\n",
+       "        <td>42</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>223</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>16873</td>\n",
+       "        <td>oct</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>64</td>\n",
+       "        <td>admin.</td>\n",
+       "        <td>7</td>\n",
+       "        <td>56</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>7</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>other</td>\n",
+       "        <td>167</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>15030</td>\n",
+       "        <td>may</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>874</td>\n",
+       "        <td>management</td>\n",
+       "        <td>13</td>\n",
+       "        <td>38</td>\n",
+       "        <td>174</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>40</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>12437</td>\n",
+       "        <td>nov</td>\n",
+       "        <td>telephone</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>548</td>\n",
+       "        <td>management</td>\n",
+       "        <td>18</td>\n",
+       "        <td>39</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>yes</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>174</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>27069</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>3</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3830</td>\n",
+       "        <td>technician</td>\n",
+       "        <td>20</td>\n",
+       "        <td>57</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>95</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>11797</td>\n",
+       "        <td>aug</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3177</td>\n",
+       "        <td>management</td>\n",
+       "        <td>11</td>\n",
+       "        <td>32</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>197</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>16430</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>3</td>\n",
+       "        <td>no</td>\n",
+       "        <td>2299</td>\n",
+       "        <td>self-employed</td>\n",
+       "        <td>6</td>\n",
+       "        <td>36</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>3</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>failure</td>\n",
+       "        <td>609</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>11971</td>\n",
+       "        <td>nov</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>40</td>\n",
+       "        <td>management</td>\n",
+       "        <td>17</td>\n",
+       "        <td>38</td>\n",
+       "        <td>101</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>yes</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>77</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>13229</td>\n",
+       "        <td>jul</td>\n",
+       "        <td>telephone</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3213</td>\n",
+       "        <td>admin.</td>\n",
+       "        <td>8</td>\n",
+       "        <td>49</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>29</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>12186</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>3</td>\n",
+       "        <td>no</td>\n",
+       "        <td>272</td>\n",
+       "        <td>management</td>\n",
+       "        <td>20</td>\n",
+       "        <td>46</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>164</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>27733</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>7</td>\n",
+       "        <td>no</td>\n",
+       "        <td>1483</td>\n",
+       "        <td>technician</td>\n",
+       "        <td>3</td>\n",
+       "        <td>43</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>500</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>11303</td>\n",
+       "        <td>may</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>875</td>\n",
+       "        <td>admin.</td>\n",
+       "        <td>26</td>\n",
+       "        <td>37</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>297</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>14412</td>\n",
+       "        <td>nov</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>4032</td>\n",
+       "        <td>retired</td>\n",
+       "        <td>18</td>\n",
+       "        <td>58</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>3</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>success</td>\n",
+       "        <td>638</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>13711</td>\n",
+       "        <td>may</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>1779</td>\n",
+       "        <td>technician</td>\n",
+       "        <td>14</td>\n",
+       "        <td>32</td>\n",
+       "        <td>175</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>258</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>19358</td>\n",
+       "        <td>nov</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>877</td>\n",
+       "        <td>management</td>\n",
+       "        <td>19</td>\n",
+       "        <td>30</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>293</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>14058</td>\n",
+       "        <td>jul</td>\n",
+       "        <td>telephone</td>\n",
+       "        <td>20</td>\n",
+       "        <td>no</td>\n",
+       "        <td>4212</td>\n",
+       "        <td>blue-collar</td>\n",
+       "        <td>17</td>\n",
+       "        <td>38</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>206</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>26394</td>\n",
+       "        <td>aug</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>4</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3011</td>\n",
+       "        <td>services</td>\n",
+       "        <td>25</td>\n",
+       "        <td>50</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>272</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>11417</td>\n",
+       "        <td>may</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>2682</td>\n",
+       "        <td>services</td>\n",
+       "        <td>28</td>\n",
+       "        <td>29</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>189</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>22196</td>\n",
+       "        <td>nov</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3791</td>\n",
+       "        <td>services</td>\n",
+       "        <td>21</td>\n",
+       "        <td>56</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>352</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>16063</td>\n",
+       "        <td>may</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>3</td>\n",
+       "        <td>no</td>\n",
+       "        <td>4369</td>\n",
+       "        <td>technician</td>\n",
+       "        <td>30</td>\n",
+       "        <td>57</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>113</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>11084</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>670</td>\n",
+       "        <td>blue-collar</td>\n",
+       "        <td>11</td>\n",
+       "        <td>40</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>1</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>failure</td>\n",
+       "        <td>291</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>13156</td>\n",
+       "        <td>apr</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>4</td>\n",
+       "        <td>no</td>\n",
+       "        <td>4363</td>\n",
+       "        <td>blue-collar</td>\n",
+       "        <td>17</td>\n",
+       "        <td>38</td>\n",
+       "        <td>331</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>36</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>13044</td>\n",
+       "        <td>aug</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>4</td>\n",
+       "        <td>no</td>\n",
+       "        <td>2039</td>\n",
+       "        <td>management</td>\n",
+       "        <td>18</td>\n",
+       "        <td>43</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>primary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>654</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>26965</td>\n",
+       "        <td>apr</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>2</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>871</td>\n",
+       "        <td>housemaid</td>\n",
+       "        <td>21</td>\n",
+       "        <td>31</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>failure</td>\n",
+       "        <td>203</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>12607</td>\n",
+       "        <td>aug</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>5</td>\n",
+       "        <td>no</td>\n",
+       "        <td>500</td>\n",
+       "        <td>management</td>\n",
+       "        <td>14</td>\n",
+       "        <td>32</td>\n",
+       "        <td>84</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>132</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>14363</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3508</td>\n",
+       "        <td>housemaid</td>\n",
+       "        <td>5</td>\n",
+       "        <td>52</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>220</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>13658</td>\n",
+       "        <td>dec</td>\n",
+       "        <td>telephone</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>2908</td>\n",
+       "        <td>technician</td>\n",
+       "        <td>9</td>\n",
+       "        <td>35</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>465</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>13342</td>\n",
+       "        <td>nov</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>2316</td>\n",
+       "        <td>entrepreneur</td>\n",
+       "        <td>18</td>\n",
+       "        <td>43</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>5</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>10655</td>\n",
+       "        <td>jul</td>\n",
+       "        <td>telephone</td>\n",
+       "        <td>3</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3967</td>\n",
+       "        <td>technician</td>\n",
+       "        <td>31</td>\n",
+       "        <td>48</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>success</td>\n",
+       "        <td>646</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>14533</td>\n",
+       "        <td>dec</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>3</td>\n",
+       "        <td>no</td>\n",
+       "        <td>1603</td>\n",
+       "        <td>technician</td>\n",
+       "        <td>31</td>\n",
+       "        <td>32</td>\n",
+       "        <td>198</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>primary</td>\n",
+       "        <td>4</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>success</td>\n",
+       "        <td>146</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>12519</td>\n",
+       "        <td>apr</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>602</td>\n",
+       "        <td>blue-collar</td>\n",
+       "        <td>17</td>\n",
+       "        <td>50</td>\n",
+       "        <td>147</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
        "        <td>no</td>\n",
        "        <td>unknown</td>\n",
        "        <td>219</td>\n",
@@ -391,6 +2012,7 @@
        "        <td>telephone</td>\n",
        "        <td>2</td>\n",
        "        <td>no</td>\n",
+       "        <td>4047</td>\n",
        "        <td>retired</td>\n",
        "        <td>15</td>\n",
        "        <td>75</td>\n",
@@ -402,24 +2024,295 @@
        "        <td>0</td>\n",
        "        <td>yes</td>\n",
        "        <td>unknown</td>\n",
-       "        <td>249</td>\n",
+       "        <td>115</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>13683</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>3</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3878</td>\n",
+       "        <td>blue-collar</td>\n",
+       "        <td>3</td>\n",
+       "        <td>34</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>205</td>\n",
        "        <td>married</td>\n",
        "        <td>no</td>\n",
-       "        <td>19317</td>\n",
+       "        <td>42045</td>\n",
        "        <td>aug</td>\n",
        "        <td>cellular</td>\n",
-       "        <td>1</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>2989</td>\n",
+       "        <td>entrepreneur</td>\n",
+       "        <td>8</td>\n",
+       "        <td>42</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>primary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>42</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>13117</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>4440</td>\n",
+       "        <td>blue-collar</td>\n",
+       "        <td>11</td>\n",
+       "        <td>45</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
        "        <td>yes</td>\n",
-       "        <td>retired</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>71</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>27359</td>\n",
+       "        <td>jun</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>1881</td>\n",
+       "        <td>management</td>\n",
+       "        <td>3</td>\n",
+       "        <td>36</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>no</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>272</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>10177</td>\n",
+       "        <td>may</td>\n",
+       "        <td>cellular</td>\n",
        "        <td>4</td>\n",
-       "        <td>68</td>\n",
+       "        <td>no</td>\n",
+       "        <td>1211</td>\n",
+       "        <td>admin.</td>\n",
+       "        <td>5</td>\n",
+       "        <td>66</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>163</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>10888</td>\n",
+       "        <td>aug</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>4346</td>\n",
+       "        <td>technician</td>\n",
+       "        <td>5</td>\n",
+       "        <td>44</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>yes</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>117</td>\n",
+       "        <td>single</td>\n",
+       "        <td>no</td>\n",
+       "        <td>16874</td>\n",
+       "        <td>may</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>3485</td>\n",
+       "        <td>entrepreneur</td>\n",
+       "        <td>15</td>\n",
+       "        <td>25</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>58</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>16992</td>\n",
+       "        <td>may</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>2386</td>\n",
+       "        <td>technician</td>\n",
+       "        <td>29</td>\n",
+       "        <td>33</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>yes</td>\n",
+       "        <td>tertiary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>166</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>19447</td>\n",
+       "        <td>nov</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>1</td>\n",
+       "        <td>no</td>\n",
+       "        <td>4338</td>\n",
+       "        <td>management</td>\n",
+       "        <td>21</td>\n",
+       "        <td>50</td>\n",
+       "        <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>no</td>\n",
+       "        <td>secondary</td>\n",
+       "        <td>0</td>\n",
+       "        <td>yes</td>\n",
+       "        <td>unknown</td>\n",
+       "        <td>66</td>\n",
+       "        <td>married</td>\n",
+       "        <td>no</td>\n",
+       "        <td>10910</td>\n",
+       "        <td>may</td>\n",
+       "        <td>cellular</td>\n",
+       "        <td>2</td>\n",
+       "        <td>no</td>\n",
+       "        <td>4394</td>\n",
+       "        <td>blue-collar</td>\n",
+       "        <td>15</td>\n",
+       "        <td>43</td>\n",
        "        <td>-1</td>\n",
        "    </tr>\n",
        "</table>"
       ],
       "text/plain": [
-       "[('no', 'secondary', 0, 'no', 'unknown', 219, 'married', 'no', 26452, 'jul', 'telephone', 2, 'no', 'retired', 15, 75, -1),\n",
-       " ('no', 'secondary', 0, 'yes', 'unknown', 249, 'married', 'no', 19317, 'aug', 'cellular', 1, 'yes', 'retired', 4, 68, -1)]"
+       "[('no', 'secondary', 0, 'yes', 'unknown', 149, 'single', 'no', 10218, 'nov', 'cellular', 2, 'no', 2916, 'admin.', 19, 32, -1),\n",
+       " ('no', 'tertiary', 1, 'no', 'failure', 699, 'married', 'no', 11219, 'aug', 'cellular', 2, 'no', 276, 'housemaid', 12, 35, 79),\n",
+       " ('no', 'secondary', 0, 'yes', 'unknown', 249, 'married', 'no', 19317, 'aug', 'cellular', 1, 'yes', 3553, 'retired', 4, 68, -1),\n",
+       " ('no', 'secondary', 0, 'no', 'unknown', 14, 'married', 'no', 17555, 'aug', 'cellular', 14, 'no', 1776, 'management', 26, 43, -1),\n",
+       " ('no', 'tertiary', 0, 'no', 'unknown', 215, 'married', 'no', 16264, 'nov', 'telephone', 3, 'no', 3289, 'management', 17, 58, -1),\n",
+       " ('yes', 'tertiary', 0, 'yes', 'unknown', 197, 'divorced', 'no', 13204, 'nov', 'cellular', 2, 'no', 3329, 'management', 20, 34, -1),\n",
+       " ('no', 'tertiary', 0, 'yes', 'unknown', 106, 'married', 'no', 22370, 'may', 'unknown', 1, 'no', 2624, 'entrepreneur', 15, 53, -1),\n",
+       " ('no', 'primary', 0, 'no', 'unknown', 205, 'married', 'no', 71188, 'oct', 'cellular', 1, 'no', 3700, 'retired', 6, 60, -1),\n",
+       " ('no', 'tertiary', 0, 'no', 'unknown', 29, 'married', 'no', 13893, 'jun', 'unknown', 2, 'no', 3608, 'management', 11, 44, -1),\n",
+       " ('no', 'tertiary', 0, 'yes', 'unknown', 288, 'married', 'no', 10758, 'jun', 'cellular', 1, 'no', 1005, 'management', 1, 41, -1),\n",
+       " ('no', 'secondary', 0, 'no', 'unknown', 92, 'single', 'no', 11269, 'may', 'unknown', 1, 'no', 554, 'technician', 29, 43, -1),\n",
+       " ('no', 'tertiary', 2, 'yes', 'failure', 208, 'single', 'no', 16957, 'jan', 'telephone', 1, 'no', 3025, 'management', 29, 38, 247),\n",
+       " ('no', 'secondary', 0, 'yes', 'unknown', 115, 'single', 'no', 20453, 'may', 'telephone', 1, 'no', 4334, 'entrepreneur', 4, 37, -1),\n",
+       " ('no', 'secondary', 0, 'yes', 'unknown', 523, 'single', 'no', 10378, 'nov', 'cellular', 3, 'no', 468, 'management', 17, 40, -1),\n",
+       " ('no', 'primary', 0, 'no', 'unknown', 323, 'single', 'no', 11262, 'aug', 'cellular', 1, 'yes', 368, 'technician', 26, 60, -1),\n",
+       " ('no', 'secondary', 0, 'no', 'unknown', 77, 'married', 'no', 12531, 'aug', 'cellular', 8, 'no', 2955, 'technician', 13, 49, -1),\n",
+       " ('no', 'tertiary', 3, 'yes', 'failure', 27, 'divorced', 'no', 26306, 'feb', 'cellular', 1, 'no', 2196, 'management', 11, 54, 84),\n",
+       " ('no', 'primary', 0, 'no', 'unknown', 107, 'married', 'no', 14752, 'may', 'unknown', 2, 'no', 709, 'housemaid', 19, 42, -1),\n",
+       " ('no', 'secondary', 0, 'no', 'unknown', 198, 'married', 'no', 11494, 'nov', 'cellular', 1, 'no', 213, 'self-employed', 19, 57, -1),\n",
+       " ('no', 'tertiary', 8, 'no', 'other', 138, 'married', 'no', 13669, 'oct', 'cellular', 1, 'no', 822, 'self-employed', 15, 40, 136),\n",
+       " ('no', 'tertiary', 0, 'no', 'unknown', 118, 'married', 'no', 12877, 'jun', 'unknown', 3, 'no', 4441, 'management', 17, 38, -1),\n",
+       " ('no', 'secondary', 0, 'no', 'unknown', 125, 'single', 'no', 11555, 'apr', 'cellular', 2, 'no', 561, 'student', 8, 28, -1),\n",
+       " ('no', 'tertiary', 4, 'yes', 'failure', 8, 'married', 'no', 22546, 'may', 'cellular', 6, 'no', 3332, 'management', 14, 31, 267),\n",
+       " ('yes', 'unknown', 0, 'no', 'unknown', 166, 'married', 'no', 21244, 'aug', 'cellular', 2, 'no', 1821, 'housemaid', 4, 51, -1),\n",
+       " ('no', 'tertiary', 0, 'no', 'unknown', 214, 'married', 'no', 21664, 'jun', 'unknown', 8, 'no', 3274, 'management', 17, 56, -1),\n",
+       " ('no', 'tertiary', 0, 'no', 'unknown', 111, 'married', 'no', 10786, 'jun', 'unknown', 3, 'no', 717, 'management', 20, 40, -1),\n",
+       " ('no', 'tertiary', 0, 'yes', 'unknown', 36, 'married', 'no', 12223, 'nov', 'cellular', 1, 'no', 2881, 'services', 19, 42, -1),\n",
+       " ('no', 'unknown', 0, 'yes', 'unknown', 117, 'divorced', 'no', 10287, 'may', 'unknown', 1, 'no', 899, 'blue-collar', 29, 51, -1),\n",
+       " ('no', 'secondary', 0, 'yes', 'unknown', 60, 'married', 'no', 14440, 'nov', 'cellular', 1, 'no', 3910, 'admin.', 21, 49, -1),\n",
+       " ('no', 'secondary', 0, 'yes', 'unknown', 27, 'single', 'no', 13494, 'jun', 'unknown', 25, 'no', 1433, 'blue-collar', 20, 33, -1),\n",
+       " ('no', 'unknown', 0, 'yes', 'unknown', 415, 'married', 'no', 18347, 'may', 'unknown', 1, 'no', 1887, 'management', 23, 33, -1),\n",
+       " ('no', 'primary', 1, 'no', 'failure', 154, 'married', 'no', 22856, 'jul', 'cellular', 1, 'no', 2776, 'management', 2, 37, 388),\n",
+       " ('no', 'secondary', 0, 'yes', 'unknown', 297, 'married', 'no', 16178, 'nov', 'cellular', 1, 'no', 3603, 'blue-collar', 21, 44, -1),\n",
+       " ('no', 'secondary', 0, 'yes', 'unknown', 135, 'divorced', 'no', 10787, 'jun', 'unknown', 2, 'no', 3345, 'technician', 4, 31, -1),\n",
+       " ('no', 'primary', 0, 'no', 'unknown', 106, 'divorced', 'no', 10924, 'may', 'cellular', 2, 'no', 339, 'self-employed', 6, 51, -1),\n",
+       " ('no', 'tertiary', 1, 'no', 'failure', 172, 'married', 'no', 15834, 'apr', 'cellular', 3, 'no', 1805, 'retired', 5, 70, 186),\n",
+       " ('no', 'secondary', 3, 'yes', 'failure', 44, 'married', 'no', 22171, 'may', 'cellular', 1, 'no', 3231, 'admin.', 18, 29, 355),\n",
+       " ('no', 'tertiary', 7, 'no', 'success', 245, 'single', 'no', 15459, 'may', 'cellular', 3, 'no', 1216, 'management', 26, 29, 97),\n",
+       " ('no', 'primary', 0, 'no', 'unknown', 94, 'single', 'no', 25824, 'jun', 'unknown', 1, 'no', 1031, 'retired', 17, 49, -1),\n",
+       " ('no', 'tertiary', 1, 'no', 'failure', 344, 'single', 'no', 12569, 'sep', 'cellular', 1, 'yes', 2963, 'management', 9, 31, 295),\n",
+       " ('no', 'secondary', 0, 'no', 'unknown', 231, 'married', 'no', 14093, 'aug', 'cellular', 5, 'no', 94, 'blue-collar', 11, 57, -1),\n",
+       " ('no', 'secondary', 0, 'yes', 'unknown', 87, 'married', 'no', 21515, 'jun', 'unknown', 1, 'no', 4014, 'admin.', 5, 41, -1),\n",
+       " ('no', 'tertiary', 0, 'no', 'unknown', 157, 'single', 'no', 15311, 'apr', 'cellular', 6, 'no', 714, 'management', 29, 56, -1),\n",
+       " ('no', 'tertiary', 0, 'no', 'unknown', 476, 'married', 'no', 17361, 'jun', 'unknown', 1, 'no', 3993, 'management', 18, 44, -1),\n",
+       " ('no', 'unknown', 2, 'no', 'other', 102, 'single', 'no', 10788, 'dec', 'cellular', 2, 'no', 1870, 'student', 23, 25, 210),\n",
+       " ('no', 'secondary', 0, 'yes', 'unknown', 356, 'single', 'no', 11386, 'nov', 'cellular', 1, 'no', 2461, 'services', 20, 31, -1),\n",
+       " ('no', 'tertiary', 2, 'yes', 'failure', 199, 'single', 'no', 23663, 'apr', 'cellular', 2, 'no', 650, 'housemaid', 16, 33, 146),\n",
+       " ('no', 'tertiary', 0, 'no', 'unknown', 420, 'married', 'no', 15520, 'nov', 'cellular', 1, 'no', 1778, 'management', 18, 56, -1),\n",
+       " ('no', 'tertiary', 0, 'yes', 'unknown', 397, 'married', 'no', 14220, 'sep', 'cellular', 1, 'yes', 2962, 'retired', 9, 71, -1),\n",
+       " ('no', 'secondary', 0, 'no', 'unknown', 223, 'married', 'no', 16353, 'oct', 'cellular', 2, 'no', 922, 'blue-collar', 27, 67, -1),\n",
+       " ('no', 'tertiary', 0, 'no', 'unknown', 74, 'single', 'no', 10907, 'may', 'cellular', 3, 'no', 4174, 'management', 4, 42, -1),\n",
+       " ('no', 'secondary', 0, 'no', 'unknown', 223, 'married', 'no', 16873, 'oct', 'cellular', 1, 'no', 64, 'admin.', 7, 56, -1),\n",
+       " ('no', 'tertiary', 7, 'yes', 'other', 167, 'single', 'no', 15030, 'may', 'cellular', 1, 'no', 874, 'management', 13, 38, 174),\n",
+       " ('no', 'tertiary', 0, 'no', 'unknown', 40, 'single', 'no', 12437, 'nov', 'telephone', 1, 'no', 548, 'management', 18, 39, -1),\n",
+       " ('yes', 'tertiary', 0, 'no', 'unknown', 174, 'married', 'no', 27069, 'jun', 'unknown', 3, 'no', 3830, 'technician', 20, 57, -1),\n",
+       " ('no', 'tertiary', 0, 'yes', 'unknown', 95, 'single', 'no', 11797, 'aug', 'cellular', 2, 'no', 3177, 'management', 11, 32, -1),\n",
+       " ('no', 'tertiary', 0, 'yes', 'unknown', 197, 'married', 'no', 16430, 'jun', 'unknown', 3, 'no', 2299, 'self-employed', 6, 36, -1),\n",
+       " ('no', 'tertiary', 3, 'yes', 'failure', 609, 'single', 'no', 11971, 'nov', 'unknown', 2, 'no', 40, 'management', 17, 38, 101),\n",
+       " ('yes', 'secondary', 0, 'no', 'unknown', 77, 'married', 'no', 13229, 'jul', 'telephone', 2, 'no', 3213, 'admin.', 8, 49, -1),\n",
+       " ('no', 'secondary', 0, 'no', 'unknown', 29, 'married', 'no', 12186, 'jun', 'unknown', 3, 'no', 272, 'management', 20, 46, -1),\n",
+       " ('no', 'tertiary', 0, 'yes', 'unknown', 164, 'single', 'no', 27733, 'jun', 'unknown', 7, 'no', 1483, 'technician', 3, 43, -1),\n",
+       " ('no', 'secondary', 0, 'no', 'unknown', 500, 'married', 'no', 11303, 'may', 'cellular', 2, 'no', 875, 'admin.', 26, 37, -1),\n",
+       " ('no', 'secondary', 0, 'no', 'unknown', 297, 'married', 'no', 14412, 'nov', 'cellular', 1, 'no', 4032, 'retired', 18, 58, -1),\n",
+       " ('no', 'tertiary', 3, 'yes', 'success', 638, 'single', 'no', 13711, 'may', 'cellular', 1, 'no', 1779, 'technician', 14, 32, 175),\n",
+       " ('no', 'tertiary', 0, 'no', 'unknown', 258, 'single', 'no', 19358, 'nov', 'cellular', 2, 'no', 877, 'management', 19, 30, -1),\n",
+       " ('no', 'tertiary', 0, 'no', 'unknown', 293, 'single', 'no', 14058, 'jul', 'telephone', 20, 'no', 4212, 'blue-collar', 17, 38, -1),\n",
+       " ('no', 'secondary', 0, 'no', 'unknown', 206, 'married', 'no', 26394, 'aug', 'cellular', 4, 'no', 3011, 'services', 25, 50, -1),\n",
+       " ('no', 'secondary', 0, 'yes', 'unknown', 272, 'married', 'no', 11417, 'may', 'unknown', 2, 'no', 2682, 'services', 28, 29, -1),\n",
+       " ('no', 'secondary', 0, 'yes', 'unknown', 189, 'married', 'no', 22196, 'nov', 'cellular', 1, 'no', 3791, 'services', 21, 56, -1),\n",
+       " ('no', 'secondary', 0, 'yes', 'unknown', 352, 'married', 'no', 16063, 'may', 'unknown', 3, 'no', 4369, 'technician', 30, 57, -1),\n",
+       " ('no', 'secondary', 0, 'no', 'unknown', 113, 'married', 'no', 11084, 'jun', 'unknown', 1, 'no', 670, 'blue-collar', 11, 40, -1),\n",
+       " ('no', 'secondary', 1, 'yes', 'failure', 291, 'single', 'no', 13156, 'apr', 'cellular', 4, 'no', 4363, 'blue-collar', 17, 38, 331),\n",
+       " ('no', 'tertiary', 0, 'no', 'unknown', 36, 'married', 'no', 13044, 'aug', 'cellular', 4, 'no', 2039, 'management', 18, 43, -1),\n",
+       " ('no', 'primary', 0, 'no', 'unknown', 654, 'single', 'no', 26965, 'apr', 'cellular', 2, 'yes', 871, 'housemaid', 21, 31, -1),\n",
+       " ('no', 'tertiary', 1, 'no', 'failure', 203, 'single', 'no', 12607, 'aug', 'cellular', 5, 'no', 500, 'management', 14, 32, 84),\n",
+       " ('no', 'secondary', 0, 'no', 'unknown', 132, 'married', 'no', 14363, 'jun', 'unknown', 1, 'no', 3508, 'housemaid', 5, 52, -1),\n",
+       " ('no', 'secondary', 0, 'yes', 'unknown', 220, 'married', 'no', 13658, 'dec', 'telephone', 2, 'no', 2908, 'technician', 9, 35, -1),\n",
+       " ('no', 'tertiary', 0, 'yes', 'unknown', 465, 'married', 'no', 13342, 'nov', 'cellular', 1, 'no', 2316, 'entrepreneur', 18, 43, -1),\n",
+       " ('no', 'secondary', 0, 'no', 'unknown', 5, 'married', 'no', 10655, 'jul', 'telephone', 3, 'no', 3967, 'technician', 31, 48, -1),\n",
+       " ('no', 'unknown', 2, 'no', 'success', 646, 'married', 'no', 14533, 'dec', 'cellular', 3, 'no', 1603, 'technician', 31, 32, 198),\n",
+       " ('no', 'primary', 4, 'yes', 'success', 146, 'married', 'no', 12519, 'apr', 'cellular', 2, 'no', 602, 'blue-collar', 17, 50, 147),\n",
+       " ('no', 'secondary', 0, 'no', 'unknown', 219, 'married', 'no', 26452, 'jul', 'telephone', 2, 'no', 4047, 'retired', 15, 75, -1),\n",
+       " ('no', 'secondary', 0, 'yes', 'unknown', 115, 'single', 'no', 13683, 'jun', 'unknown', 3, 'no', 3878, 'blue-collar', 3, 34, -1),\n",
+       " ('no', 'tertiary', 0, 'no', 'unknown', 205, 'married', 'no', 42045, 'aug', 'cellular', 2, 'no', 2989, 'entrepreneur', 8, 42, -1),\n",
+       " ('no', 'primary', 0, 'no', 'unknown', 42, 'married', 'no', 13117, 'jun', 'unknown', 2, 'no', 4440, 'blue-collar', 11, 45, -1),\n",
+       " ('no', 'tertiary', 0, 'yes', 'unknown', 71, 'married', 'no', 27359, 'jun', 'unknown', 2, 'no', 1881, 'management', 3, 36, -1),\n",
+       " ('no', 'secondary', 0, 'no', 'unknown', 272, 'single', 'no', 10177, 'may', 'cellular', 4, 'no', 1211, 'admin.', 5, 66, -1),\n",
+       " ('no', 'secondary', 0, 'yes', 'unknown', 163, 'married', 'no', 10888, 'aug', 'cellular', 1, 'no', 4346, 'technician', 5, 44, -1),\n",
+       " ('yes', 'tertiary', 0, 'yes', 'unknown', 117, 'single', 'no', 16874, 'may', 'cellular', 2, 'no', 3485, 'entrepreneur', 15, 25, -1),\n",
+       " ('no', 'tertiary', 0, 'yes', 'unknown', 58, 'married', 'no', 16992, 'may', 'unknown', 1, 'no', 2386, 'technician', 29, 33, -1),\n",
+       " ('yes', 'tertiary', 0, 'yes', 'unknown', 166, 'married', 'no', 19447, 'nov', 'cellular', 1, 'no', 4338, 'management', 21, 50, -1),\n",
+       " ('no', 'secondary', 0, 'yes', 'unknown', 66, 'married', 'no', 10910, 'may', 'cellular', 2, 'no', 4394, 'blue-collar', 15, 43, -1)]"
       ]
      },
      "execution_count": 5,
@@ -500,7 +2393,7 @@
        "      <th>y</th>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>index</th>\n",
+       "      <th>idx</th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
@@ -522,23 +2415,163 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>75</th>\n",
-       "      <td>75.0</td>\n",
-       "      <td>26452.0</td>\n",
-       "      <td>2.0</td>\n",
+       "      <th>871</th>\n",
+       "      <td>31</td>\n",
+       "      <td>26965</td>\n",
+       "      <td>2</td>\n",
+       "      <td>cellular</td>\n",
+       "      <td>21</td>\n",
+       "      <td>no</td>\n",
+       "      <td>654</td>\n",
+       "      <td>primary</td>\n",
+       "      <td>no</td>\n",
+       "      <td>housemaid</td>\n",
+       "      <td>no</td>\n",
+       "      <td>single</td>\n",
+       "      <td>apr</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>0</td>\n",
+       "      <td>yes</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2989</th>\n",
+       "      <td>42</td>\n",
+       "      <td>42045</td>\n",
+       "      <td>2</td>\n",
+       "      <td>cellular</td>\n",
+       "      <td>8</td>\n",
+       "      <td>no</td>\n",
+       "      <td>205</td>\n",
+       "      <td>tertiary</td>\n",
+       "      <td>no</td>\n",
+       "      <td>entrepreneur</td>\n",
+       "      <td>no</td>\n",
+       "      <td>married</td>\n",
+       "      <td>aug</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>0</td>\n",
+       "      <td>no</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3011</th>\n",
+       "      <td>50</td>\n",
+       "      <td>26394</td>\n",
+       "      <td>4</td>\n",
+       "      <td>cellular</td>\n",
+       "      <td>25</td>\n",
+       "      <td>no</td>\n",
+       "      <td>206</td>\n",
+       "      <td>secondary</td>\n",
+       "      <td>no</td>\n",
+       "      <td>services</td>\n",
+       "      <td>no</td>\n",
+       "      <td>married</td>\n",
+       "      <td>aug</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>0</td>\n",
+       "      <td>no</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4047</th>\n",
+       "      <td>75</td>\n",
+       "      <td>26452</td>\n",
+       "      <td>2</td>\n",
        "      <td>telephone</td>\n",
-       "      <td>15.0</td>\n",
+       "      <td>15</td>\n",
        "      <td>no</td>\n",
-       "      <td>219.0</td>\n",
+       "      <td>219</td>\n",
        "      <td>secondary</td>\n",
        "      <td>no</td>\n",
        "      <td>retired</td>\n",
        "      <td>no</td>\n",
        "      <td>married</td>\n",
        "      <td>jul</td>\n",
-       "      <td>-1.0</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>0</td>\n",
+       "      <td>no</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2624</th>\n",
+       "      <td>53</td>\n",
+       "      <td>22370</td>\n",
+       "      <td>1</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>15</td>\n",
+       "      <td>no</td>\n",
+       "      <td>106</td>\n",
+       "      <td>tertiary</td>\n",
+       "      <td>yes</td>\n",
+       "      <td>entrepreneur</td>\n",
+       "      <td>no</td>\n",
+       "      <td>married</td>\n",
+       "      <td>may</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>0</td>\n",
+       "      <td>no</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3700</th>\n",
+       "      <td>60</td>\n",
+       "      <td>71188</td>\n",
+       "      <td>1</td>\n",
+       "      <td>cellular</td>\n",
+       "      <td>6</td>\n",
+       "      <td>no</td>\n",
+       "      <td>205</td>\n",
+       "      <td>primary</td>\n",
+       "      <td>no</td>\n",
+       "      <td>retired</td>\n",
+       "      <td>no</td>\n",
+       "      <td>married</td>\n",
+       "      <td>oct</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>0</td>\n",
+       "      <td>no</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1881</th>\n",
+       "      <td>36</td>\n",
+       "      <td>27359</td>\n",
+       "      <td>2</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>3</td>\n",
+       "      <td>no</td>\n",
+       "      <td>71</td>\n",
+       "      <td>tertiary</td>\n",
+       "      <td>yes</td>\n",
+       "      <td>management</td>\n",
+       "      <td>no</td>\n",
+       "      <td>married</td>\n",
+       "      <td>jun</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>0</td>\n",
+       "      <td>no</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1483</th>\n",
+       "      <td>43</td>\n",
+       "      <td>27733</td>\n",
+       "      <td>7</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>3</td>\n",
+       "      <td>no</td>\n",
+       "      <td>164</td>\n",
+       "      <td>tertiary</td>\n",
+       "      <td>yes</td>\n",
+       "      <td>technician</td>\n",
+       "      <td>no</td>\n",
+       "      <td>single</td>\n",
+       "      <td>jun</td>\n",
+       "      <td>-1</td>\n",
        "      <td>unknown</td>\n",
-       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
        "      <td>no</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -546,13 +2579,27 @@
        "</div>"
       ],
       "text/plain": [
-       "        age  balance  campaign    contact   day default  duration  education  \\\n",
-       "index                                                                          \n",
-       "75     75.0  26452.0       2.0  telephone  15.0      no     219.0  secondary   \n",
+       "      age  balance  campaign    contact  day default  duration  education  \\\n",
+       "idx                                                                         \n",
+       "871    31    26965         2   cellular   21      no       654    primary   \n",
+       "2989   42    42045         2   cellular    8      no       205   tertiary   \n",
+       "3011   50    26394         4   cellular   25      no       206  secondary   \n",
+       "4047   75    26452         2  telephone   15      no       219  secondary   \n",
+       "2624   53    22370         1    unknown   15      no       106   tertiary   \n",
+       "3700   60    71188         1   cellular    6      no       205    primary   \n",
+       "1881   36    27359         2    unknown    3      no        71   tertiary   \n",
+       "1483   43    27733         7    unknown    3      no       164   tertiary   \n",
        "\n",
-       "      housing      job loan  marital month  pdays poutcome  previous   y  \n",
-       "index                                                                     \n",
-       "75         no  retired   no  married   jul   -1.0  unknown       0.0  no  "
+       "     housing           job loan  marital month  pdays poutcome  previous    y  \n",
+       "idx                                                                            \n",
+       "871       no     housemaid   no   single   apr     -1  unknown         0  yes  \n",
+       "2989      no  entrepreneur   no  married   aug     -1  unknown         0   no  \n",
+       "3011      no      services   no  married   aug     -1  unknown         0   no  \n",
+       "4047      no       retired   no  married   jul     -1  unknown         0   no  \n",
+       "2624     yes  entrepreneur   no  married   may     -1  unknown         0   no  \n",
+       "3700      no       retired   no  married   oct     -1  unknown         0   no  \n",
+       "1881     yes    management   no  married   jun     -1  unknown         0   no  \n",
+       "1483     yes    technician   no   single   jun     -1  unknown         0   no  "
       ]
      },
      "execution_count": 6,
@@ -587,13 +2634,21 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "        age  balance  campaign    contact   day default  duration  education  \\\n",
-      "index                                                                          \n",
-      "75     75.0  26452.0       2.0  telephone  15.0      no     219.0  secondary   \n",
+      "      age  balance  campaign    contact  day default  duration  education  \\\n",
+      "idx                                                                         \n",
+      "4047   75    26452         2  telephone   15      no       219  secondary   \n",
+      "3011   50    26394         4   cellular   25      no       206  secondary   \n",
+      "2989   42    42045         2   cellular    8      no       205   tertiary   \n",
+      "1881   36    27359         2    unknown    3      no        71   tertiary   \n",
+      "871    31    26965         2   cellular   21      no       654    primary   \n",
       "\n",
-      "      housing      job loan  marital month  pdays poutcome  previous   y  \n",
-      "index                                                                     \n",
-      "75         no  retired   no  married   jul   -1.0  unknown       0.0  no  \n"
+      "     housing           job loan  marital month  pdays poutcome  previous    y  \n",
+      "idx                                                                            \n",
+      "4047      no       retired   no  married   jul     -1  unknown         0   no  \n",
+      "3011      no      services   no  married   aug     -1  unknown         0   no  \n",
+      "2989      no  entrepreneur   no  married   aug     -1  unknown         0   no  \n",
+      "1881     yes    management   no  married   jun     -1  unknown         0   no  \n",
+      "871       no     housemaid   no   single   apr     -1  unknown         0  yes  \n"
      ]
     }
    ],
@@ -719,7 +2774,7 @@
      "output_type": "stream",
      "text": [
       "<class 'pandas.core.frame.DataFrame'>\n",
-      "DatetimeIndex: 60 entries, 2019-09-15 08:05:00-05:00 to 2019-09-15 13:00:00-05:00\n",
+      "DatetimeIndex: 60 entries, 2019-09-15 10:05:00-05:00 to 2019-09-15 15:00:00-05:00\n",
       "Freq: 300S\n",
       "Data columns (total 3 columns):\n",
       "cpu     60 non-null float64\n",
@@ -728,11 +2783,11 @@
       "dtypes: float64(3)\n",
       "memory usage: 1.9 KB\n",
       "None                                 cpu       mem      disk\n",
-      "2019-09-15 08:05:00-05:00 -1.115736 -1.229851  0.463388\n",
-      "2019-09-15 08:10:00-05:00 -0.113683 -0.515424  0.852152\n",
-      "2019-09-15 08:15:00-05:00 -1.603768 -1.155131  0.607183\n",
-      "2019-09-15 08:20:00-05:00 -1.458350 -0.673800  0.109492\n",
-      "2019-09-15 08:25:00-05:00  1.006173 -1.245499 -0.011838\n"
+      "2019-09-15 10:05:00-05:00  0.057680  0.864139 -0.844771\n",
+      "2019-09-15 10:10:00-05:00  0.174364 -0.566146 -0.780971\n",
+      "2019-09-15 10:15:00-05:00 -0.380715  1.346382 -1.492667\n",
+      "2019-09-15 10:20:00-05:00 -1.351383  3.514912 -1.476890\n",
+      "2019-09-15 10:25:00-05:00 -1.418901  3.645923 -1.368978\n"
      ]
     }
    ],
@@ -796,14 +2851,14 @@
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th>avg(cpu)</th>\n",
-       "      <th>avg(disk)</th>\n",
-       "      <th>avg(mem)</th>\n",
        "      <th>max(cpu)</th>\n",
-       "      <th>max(disk)</th>\n",
-       "      <th>max(mem)</th>\n",
        "      <th>min(cpu)</th>\n",
-       "      <th>min(disk)</th>\n",
+       "      <th>avg(mem)</th>\n",
+       "      <th>max(mem)</th>\n",
        "      <th>min(mem)</th>\n",
+       "      <th>avg(disk)</th>\n",
+       "      <th>max(disk)</th>\n",
+       "      <th>min(disk)</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>time</th>\n",
@@ -821,30 +2876,30 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>2019-09-15 12:20:58</th>\n",
+       "      <th>2019-09-15 14:33:38</th>\n",
        "      <th>11</th>\n",
-       "      <td>-1.072884</td>\n",
-       "      <td>0.508054</td>\n",
-       "      <td>-0.893552</td>\n",
-       "      <td>-0.113683</td>\n",
-       "      <td>0.852152</td>\n",
-       "      <td>-0.515424</td>\n",
-       "      <td>-1.603768</td>\n",
-       "      <td>0.109492</td>\n",
-       "      <td>-1.229851</td>\n",
+       "      <td>-0.415523</td>\n",
+       "      <td>0.425815</td>\n",
+       "      <td>-1.418901</td>\n",
+       "      <td>2.148689</td>\n",
+       "      <td>4.086927</td>\n",
+       "      <td>-0.566146</td>\n",
+       "      <td>-1.109473</td>\n",
+       "      <td>-0.692562</td>\n",
+       "      <td>-1.492667</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                          avg(cpu)  avg(disk)  avg(mem)  max(cpu)  max(disk)  \\\n",
-       "time                node                                                       \n",
-       "2019-09-15 12:20:58 11   -1.072884   0.508054 -0.893552 -0.113683   0.852152   \n",
+       "                          avg(cpu)  max(cpu)  min(cpu)  avg(mem)  max(mem)  \\\n",
+       "time                node                                                     \n",
+       "2019-09-15 14:33:38 11   -0.415523  0.425815 -1.418901  2.148689  4.086927   \n",
        "\n",
-       "                          max(mem)  min(cpu)  min(disk)  min(mem)  \n",
-       "time                node                                           \n",
-       "2019-09-15 12:20:58 11   -0.515424 -1.603768   0.109492 -1.229851  "
+       "                          min(mem)  avg(disk)  max(disk)  min(disk)  \n",
+       "time                node                                             \n",
+       "2019-09-15 14:33:38 11   -0.566146  -1.109473  -0.692562  -1.492667  "
       ]
      },
      "execution_count": 13,
@@ -972,12 +3027,15 @@
    "outputs": [],
    "source": [
     "# Prepare the ingestion data\n",
+    "import numpy as np\n",
+    "from datetime import datetime, timedelta\n",
+    "\n",
     "end = datetime.now().replace(minute=0, second=0, microsecond=0)\n",
     "rng = pd.date_range(end=end, periods=60, freq=\"300s\", tz=\"Israel\")\n",
     "df = pd.DataFrame(np.random.randn(len(rng), 3), index=rng, columns=[\"cpu\", \"mem\", \"disk\"])\n",
     "\n",
     "# Ingest data into the stream\n",
-    "client.write(\"stream\", table-strm, dfs=df)"
+    "client.write(\"stream\", table=strm, dfs=df)"
    ]
   },
   {
@@ -1070,17 +3128,17 @@
      "text": [
       "                 cpu      disk               index-0       mem raw_data  \\\n",
       "seq_number                                                                \n",
-      "1          -0.448749 -0.080189  2019-09-15T05:05:00Z -0.128496            \n",
-      "2          -0.255004 -1.169343  2019-09-15T05:10:00Z -0.858265            \n",
-      "3          -0.469349  0.844941  2019-09-15T05:15:00Z  0.994743            \n",
-      "4           1.062668 -1.448676  2019-09-15T05:20:00Z -1.243823            \n",
+      "1           0.617594 -0.904056  2019-09-15T07:05:00Z -0.100935            \n",
+      "2          -1.161206  0.305356  2019-09-15T07:10:00Z -0.010497            \n",
+      "3          -0.177504 -0.941991  2019-09-15T07:15:00Z  1.214677            \n",
+      "4           1.234641  0.279839  2019-09-15T07:20:00Z -0.521239            \n",
       "\n",
       "                             stream_time  \n",
       "seq_number                                \n",
-      "1          2019-09-15 13:21:56.969021708  \n",
-      "2          2019-09-15 13:21:56.969021708  \n",
-      "3          2019-09-15 13:21:56.969021708  \n",
-      "4          2019-09-15 13:21:56.969021708  \n"
+      "1          2019-09-15 15:30:07.145829941  \n",
+      "2          2019-09-15 15:30:07.145829941  \n",
+      "3          2019-09-15 15:30:07.145829941  \n",
+      "4          2019-09-15 15:30:07.145829941  \n"
      ]
     }
    ],