diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 169a6707..a4610fad 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,14 +17,6 @@ jobs:
         requirements: ['-r requirements.txt']
         include:
           # historical requirements
-          - name: "Minimum install_requires versions"
-            requirements: numpy~=1.12.0 pandas~=0.24.0 SQLAlchemy~=1.2.19 psycopg2~=2.7.0 PyMySQL==1.0.2
-            pytest_flags: --ignore=siuba/dply/forcats.py siuba
-            python-version: 3.6
-          - name: "2019-late dependencies"
-            requirements: numpy==1.17.4 pandas==0.24.2 SQLAlchemy==1.2.19 psycopg2==2.8.4 PyMySQL==1.0.2
-            pytest_flags: --ignore=siuba/dply/forcats.py siuba
-            python-version: 3.6
           - name: "2020-early dependencies"
             requirements: numpy==1.17.4 pandas~=0.25.3 SQLAlchemy~=1.3.11 psycopg2~=2.8.4 PyMySQL==1.0.2
             pytest_flags: --ignore=siuba/dply/forcats.py siuba
@@ -37,6 +29,14 @@ jobs:
             python-version: 3.8
             requirements: numpy~=1.19.1 pandas~=1.1.0 SQLAlchemy~=1.4.13 psycopg2~=2.8.5 PyMySQL==1.0.2
             latest: true
+          - name: "2022-early dependencies"
+            python-version: 3.8
+            requirements: numpy~=1.22.0 pandas~=1.3.5 SQLAlchemy~=1.4.29 psycopg2-binary~=2.9.3 PyMySQL==1.0.2
+            latest: true
+          - name: "2022-early dependencies"
+            python-version: 3.10.1
+            requirements: numpy~=1.22.0 pandas~=1.3.5 SQLAlchemy~=1.4.29 psycopg2-binary~=2.9.3 PyMySQL==1.0.2
+            latest: true
 
     steps:
       - uses: actions/checkout@v2
diff --git a/examples/examples-duckdb.ipynb b/examples/examples-duckdb.ipynb
new file mode 100644
index 00000000..e568f373
--- /dev/null
+++ b/examples/examples-duckdb.ipynb
@@ -0,0 +1,2906 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# DuckDB\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<sqlalchemy.engine.cursor.LegacyCursorResult at 0x24a38bbd190>"
+      ]
+     },
+     "execution_count": 72,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# NBVAL_IGNORE_OUTPUT\n",
+    "from sqlalchemy import sql\n",
+    "from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey, Sequence, DateTime\n",
+    "from datetime import datetime\n",
+    "from sqlalchemy import create_engine\n",
+    "import os\n",
+    "\n",
+    "# To use DuckDB with siuba, just follow these steps! \n",
+    "# 1. pip install duckdb\n",
+    "# 2. pip install duckdb_engine (install the DuckDB SQLAlchemy driver)\n",
+    "\n",
+    "# For more details on DuckDB, visit https://www.duckdb.org\n",
+    "\n",
+    "# This creates an in-memory duckdb, but a file-based one can also be created by replacing :memory: with a path to the file\n",
+    "engine = create_engine(\"duckdb:///:memory:\")\n",
+    "# engine = create_engine('duckdb:////path/to/duck.db')\n",
+    "\n",
+    "metadata = MetaData()\n",
+    "# The sequence is needed because SQLAlchemy defaults to using the legacy PostgreSQL data type of Serial, which DuckDB does not support\n",
+    "# See the readme of duckdb_engine for details: https://github.com/Mause/duckdb_engine\n",
+    "user_id_seq = Sequence('user_id_seq')\n",
+    "users = Table('users', metadata,\n",
+    "    Column('id', Integer, user_id_seq, server_default=user_id_seq.next_value(), primary_key=True),\n",
+    "    Column('name', String),\n",
+    "    Column('fullname', String),\n",
+    ")\n",
+    "\n",
+    "# The sequence is needed because SQLAlchemy defaults to using the legacy PostgreSQL data type of Serial, which DuckDB does not support\n",
+    "# See the readme of duckdb_engine for details: https://github.com/Mause/duckdb_engine\n",
+    "\n",
+    "# A DateTime field was also added in order to test date logic\n",
+    "address_id_seq = Sequence('address_id_seq')\n",
+    "addresses = Table('addresses', metadata,\n",
+    "  Column('id', Integer, address_id_seq,server_default=address_id_seq.next_value(), primary_key=True),\n",
+    "  Column('user_id', None, ForeignKey('users.id')),\n",
+    "  Column('email_address', String, nullable=False),\n",
+    "  Column('update_dt', DateTime)\n",
+    " )\n",
+    "\n",
+    "metadata.drop_all(engine)\n",
+    "metadata.create_all(engine)\n",
+    "\n",
+    "conn = engine.connect()\n",
+    "\n",
+    "ins = users.insert().values(name='jack', fullname='Jack Jones')\n",
+    "result = conn.execute(ins)\n",
+    "\n",
+    "\n",
+    "ins = users.insert()\n",
+    "conn.execute(ins, id=2, name='wendy', fullname='Wendy Williams')\n",
+    "\n",
+    "\n",
+    "conn.execute(addresses.insert(), [\n",
+    "   {'user_id': 1, 'email_address' : 'jack@yahoo.com', 'update_dt':datetime.now()},\n",
+    "   {'user_id': 1, 'email_address' : 'jack@msn.com', 'update_dt':datetime.now()},\n",
+    "   {'user_id': 2, 'email_address' : 'www@www.org', 'update_dt':datetime.now()},\n",
+    "   {'user_id': 2, 'email_address' : 'wendy@aol.com', 'update_dt':datetime.now()},\n",
+    "])\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Big Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT anon_1.id, anon_1.user_id, anon_1.email_address, anon_1.update_dt, anon_1.num \n",
+      "FROM (SELECT id, user_id, email_address, update_dt, num, min(anon_2.id) OVER (PARTITION BY anon_2.user_id) AS win1, anon_2.id > min(anon_2.id) OVER (PARTITION BY anon_2.user_id) AS win2 \n",
+      "FROM (SELECT id, user_id, email_address, update_dt, dense_rank() OVER (PARTITION BY addresses.user_id ORDER BY addresses.id) AS num \n",
+      "FROM addresses) AS anon_2) AS anon_1 \n",
+      "WHERE anon_1.win2 AND (anon_1.email_address LIKE 'jack' || '%')\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\siuba\\sql\\translate.py:24: SiubaSqlRuntimeWarning: \n",
+      "dense_rank sql translation defaults na_option to None. To return identical result as pandas, use na_option = 'keep'.\n",
+      "\n",
+      "This warning only displays once per function\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>email_address</th>\n",
+       "      <th>update_dt</th>\n",
+       "      <th>num</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@msn.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   id  user_id email_address                  update_dt  num\n",
+       "0   2        1  jack@msn.com 2022-01-19 08:12:01.020680    2"
+      ]
+     },
+     "execution_count": 74,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#If on Windows, be sure to pip install pyreadline\n",
+    "from siuba import *\n",
+    "from siuba.sql.verbs import LazyTbl, collect, show_query\n",
+    "from siuba.sql.dply.vector import dense_rank\n",
+    "import siuba.meta_hook.sqlalchemy.sql.functions as F\n",
+    "\n",
+    "from sqlalchemy import sql\n",
+    "\n",
+    "tbl_addresses = LazyTbl(conn, addresses)\n",
+    "tbl_users = LazyTbl(conn, users)\n",
+    "\n",
+    "#tbl_addresses >> mutate(_, num = dense_rank(_.id)) >> show_query(_)\n",
+    "q = (tbl_addresses\n",
+    "  >> group_by(\"user_id\")\n",
+    "  >> mutate(num = dense_rank(_.id))\n",
+    "  >> filter(\n",
+    "       _.id > _.id.min(),\n",
+    "       _.email_address.str.startswith(\"jack\")\n",
+    "     )\n",
+    "  >> ungroup()\n",
+    "  >> show_query(simplify = True)\n",
+    "  >> collect()\n",
+    "  )\n",
+    "\n",
+    "q"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Mutate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT addresses.id, addresses.user_id, addresses.email_address, addresses.update_dt, dense_rank() OVER (ORDER BY addresses.id) + 1 AS rank \n",
+      "FROM addresses\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\siuba\\sql\\translate.py:24: SiubaSqlRuntimeWarning: \n",
+      "dense_rank sql translation defaults na_option to None. To return identical result as pandas, use na_option = 'keep'.\n",
+      "\n",
+      "This warning only displays once per function\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><pre># Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "</pre><div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>email_address</th>\n",
+       "      <th>update_dt</th>\n",
+       "      <th>rank</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@yahoo.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@msn.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>www@www.org</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>wendy@aol.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div><p># .. may have more rows</p></div>"
+      ],
+      "text/plain": [
+       "# Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "   id  user_id   email_address                  update_dt  rank\n",
+       "0   1        1  jack@yahoo.com 2022-01-19 08:12:01.020680     2\n",
+       "1   2        1    jack@msn.com 2022-01-19 08:12:01.020680     3\n",
+       "2   3        2     www@www.org 2022-01-19 08:12:01.020680     4\n",
+       "3   4        2   wendy@aol.com 2022-01-19 08:12:01.020680     5\n",
+       "# .. may have more rows"
+      ]
+     },
+     "execution_count": 75,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "q = (tbl_addresses\n",
+    "  >> mutate(rank = dense_rank(_.id) + 1)\n",
+    "  >> show_query()\n",
+    "  )\n",
+    "q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT addresses.id, addresses.user_id, addresses.email_address, addresses.update_dt, addresses.id > dense_rank() OVER (PARTITION BY addresses.user_id, addresses.user_id, addresses.user_id ORDER BY addresses.id) + 1 AS rank \n",
+      "FROM addresses\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\siuba\\sql\\translate.py:24: SiubaSqlRuntimeWarning: \n",
+      "dense_rank sql translation defaults na_option to None. To return identical result as pandas, use na_option = 'keep'.\n",
+      "\n",
+      "This warning only displays once per function\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><pre># Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "</pre><div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>email_address</th>\n",
+       "      <th>update_dt</th>\n",
+       "      <th>rank</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>www@www.org</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>wendy@aol.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@yahoo.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@msn.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div><p># .. may have more rows</p></div>"
+      ],
+      "text/plain": [
+       "# Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "   id  user_id   email_address                  update_dt   rank\n",
+       "0   3        2     www@www.org 2022-01-19 08:12:01.020680   True\n",
+       "1   4        2   wendy@aol.com 2022-01-19 08:12:01.020680   True\n",
+       "2   1        1  jack@yahoo.com 2022-01-19 08:12:01.020680  False\n",
+       "3   2        1    jack@msn.com 2022-01-19 08:12:01.020680  False\n",
+       "# .. may have more rows"
+      ]
+     },
+     "execution_count": 76,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "q = (tbl_addresses\n",
+    "  >> group_by(\"user_id\")\n",
+    "  >> mutate(rank = _.id > dense_rank(_.id) + 1)\n",
+    "  >> show_query()\n",
+    "  )\n",
+    "q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT anon_1.email, anon_1.is_mikey, CAST(anon_1.is_mikey AS INTEGER) + 1 AS mikey2 \n",
+      "FROM (SELECT anon_2.email AS email, (anon_2.email LIKE 'mikey' || '%') AS is_mikey \n",
+      "FROM (SELECT addresses.email_address AS email \n",
+      "FROM addresses) AS anon_2) AS anon_1\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><pre># Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "</pre><div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>email</th>\n",
+       "      <th>is_mikey</th>\n",
+       "      <th>mikey2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>jack@yahoo.com</td>\n",
+       "      <td>False</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>jack@msn.com</td>\n",
+       "      <td>False</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>www@www.org</td>\n",
+       "      <td>False</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>wendy@aol.com</td>\n",
+       "      <td>False</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div><p># .. may have more rows</p></div>"
+      ],
+      "text/plain": [
+       "# Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "            email  is_mikey  mikey2\n",
+       "0  jack@yahoo.com     False       1\n",
+       "1    jack@msn.com     False       1\n",
+       "2     www@www.org     False       1\n",
+       "3   wendy@aol.com     False       1\n",
+       "# .. may have more rows"
+      ]
+     },
+     "execution_count": 77,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# rename and first mutate in same query,\n",
+    "# second mutate is outer query (since uses to prev col)\n",
+    "# Need to convert the boolean variable to an integer in order for DuckDB to be able to add 1 to it\n",
+    "q = (tbl_addresses\n",
+    "  >> select(_.email == _.email_address)\n",
+    "  >> mutate(is_mikey = _.email.str.startswith(\"mikey\"), mikey2 = _.is_mikey.astype(int) + 1)\n",
+    "  >> show_query()\n",
+    "  )\n",
+    "q"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Filter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT anon_1.id, anon_1.user_id, anon_1.email_address, anon_1.update_dt \n",
+      "FROM (SELECT addresses.id AS id, addresses.user_id AS user_id, addresses.email_address AS email_address, addresses.update_dt AS update_dt \n",
+      "FROM addresses) AS anon_1 \n",
+      "WHERE anon_1.id > 1\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><pre># Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "</pre><div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>email_address</th>\n",
+       "      <th>update_dt</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@msn.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>www@www.org</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>wendy@aol.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div><p># .. may have more rows</p></div>"
+      ],
+      "text/plain": [
+       "# Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "   id  user_id  email_address                  update_dt\n",
+       "0   2        1   jack@msn.com 2022-01-19 08:12:01.020680\n",
+       "1   3        2    www@www.org 2022-01-19 08:12:01.020680\n",
+       "2   4        2  wendy@aol.com 2022-01-19 08:12:01.020680\n",
+       "# .. may have more rows"
+      ]
+     },
+     "execution_count": 78,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "q = (tbl_addresses\n",
+    "  >> filter(_.id > 1)\n",
+    "  >> show_query()\n",
+    "  )\n",
+    "q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT anon_1.id, anon_1.user_id, anon_1.email_address, anon_1.update_dt \n",
+      "FROM (SELECT addresses.id AS id, addresses.user_id AS user_id, addresses.email_address AS email_address, addresses.update_dt AS update_dt \n",
+      "FROM addresses) AS anon_1 \n",
+      "WHERE anon_1.id > 1\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><pre># Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "</pre><div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>email_address</th>\n",
+       "      <th>update_dt</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@msn.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>www@www.org</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>wendy@aol.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div><p># .. may have more rows</p></div>"
+      ],
+      "text/plain": [
+       "# Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "   id  user_id  email_address                  update_dt\n",
+       "0   2        1   jack@msn.com 2022-01-19 08:12:01.020680\n",
+       "1   3        2    www@www.org 2022-01-19 08:12:01.020680\n",
+       "2   4        2  wendy@aol.com 2022-01-19 08:12:01.020680\n",
+       "# .. may have more rows"
+      ]
+     },
+     "execution_count": 79,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "q = (tbl_addresses\n",
+    "  >> group_by(\"user_id\")\n",
+    "  >> filter(_.id > 1)\n",
+    "  >> show_query()\n",
+    "  )\n",
+    "q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT anon_1.id, anon_1.user_id, anon_1.email_address, anon_1.update_dt \n",
+      "FROM (SELECT anon_2.id AS id, anon_2.user_id AS user_id, anon_2.email_address AS email_address, anon_2.update_dt AS update_dt, dense_rank() OVER (PARTITION BY anon_2.user_id, anon_2.user_id ORDER BY anon_2.id) AS win1, dense_rank() OVER (PARTITION BY anon_2.user_id, anon_2.user_id ORDER BY anon_2.id) > 1 AS win2 \n",
+      "FROM (SELECT addresses.id AS id, addresses.user_id AS user_id, addresses.email_address AS email_address, addresses.update_dt AS update_dt \n",
+      "FROM addresses) AS anon_2) AS anon_1 \n",
+      "WHERE anon_1.win2\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\siuba\\sql\\translate.py:24: SiubaSqlRuntimeWarning: \n",
+      "dense_rank sql translation defaults na_option to None. To return identical result as pandas, use na_option = 'keep'.\n",
+      "\n",
+      "This warning only displays once per function\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>email_address</th>\n",
+       "      <th>update_dt</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>wendy@aol.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@msn.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   id  user_id  email_address                  update_dt\n",
+       "0   4        2  wendy@aol.com 2022-01-19 08:12:01.020680\n",
+       "1   2        1   jack@msn.com 2022-01-19 08:12:01.020680"
+      ]
+     },
+     "execution_count": 80,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "q = (tbl_addresses\n",
+    "  >> group_by(\"user_id\")\n",
+    "  >> filter(dense_rank(_.id) > 1)\n",
+    "  >> show_query()\n",
+    "  >> collect()\n",
+    "  )\n",
+    "\n",
+    "q"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Summarize"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT addresses.user_id, avg(addresses.id) AS avg_id \n",
+      "FROM addresses GROUP BY addresses.user_id\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>avg_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>3.5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   user_id  avg_id\n",
+       "0        1     1.5\n",
+       "1        2     3.5"
+      ]
+     },
+     "execution_count": 81,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "q = (tbl_addresses\n",
+    "  >> group_by(\"user_id\")\n",
+    "  >> summarize(avg_id = _.id.mean())\n",
+    "  >> show_query()\n",
+    "  >> collect()\n",
+    "  )\n",
+    "\n",
+    "q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT avg(anon_1.id2) AS m_id \n",
+      "FROM (SELECT addresses.id AS id, addresses.user_id AS user_id, addresses.email_address AS email_address, addresses.update_dt AS update_dt, addresses.id + 1 AS id2 \n",
+      "FROM addresses) AS anon_1\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><pre># Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "</pre><div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>m_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>3.5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div><p># .. may have more rows</p></div>"
+      ],
+      "text/plain": [
+       "# Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "   m_id\n",
+       "0   3.5\n",
+       "# .. may have more rows"
+      ]
+     },
+     "execution_count": 82,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "q = (tbl_addresses >> mutate(_, id2 = _.id + 1) >> summarize(_, m_id = _.id2.mean())) >> show_query()\n",
+    "q"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Count"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT anon_1.user_id, anon_1.id, count(*) AS n \n",
+      "FROM (SELECT addresses.id AS id, addresses.user_id AS user_id, addresses.email_address AS email_address, addresses.update_dt AS update_dt \n",
+      "FROM addresses) AS anon_1 GROUP BY anon_1.user_id, anon_1.id ORDER BY n DESC\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>id</th>\n",
+       "      <th>n</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   user_id  id  n\n",
+       "0        1   1  1\n",
+       "1        1   2  1\n",
+       "2        2   3  1\n",
+       "3        2   4  1"
+      ]
+     },
+     "execution_count": 83,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "q = (tbl_addresses\n",
+    "  >> group_by(\"user_id\")\n",
+    "  >> count(_.id)\n",
+    "  >> show_query()\n",
+    "  >> collect()\n",
+    ")\n",
+    "\n",
+    "q"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Joins"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT anon_1.id, anon_1.user_id, anon_1.email_address, anon_1.update_dt, anon_2.fullname, anon_2.name \n",
+      "FROM (SELECT addresses.id AS id, addresses.user_id AS user_id, addresses.email_address AS email_address, addresses.update_dt AS update_dt \n",
+      "FROM addresses) AS anon_1 LEFT OUTER JOIN (SELECT users.id AS id, users.name AS name, users.fullname AS fullname \n",
+      "FROM users) AS anon_2 ON anon_1.user_id = anon_2.id\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>email_address</th>\n",
+       "      <th>update_dt</th>\n",
+       "      <th>fullname</th>\n",
+       "      <th>name</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@yahoo.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>Jack Jones</td>\n",
+       "      <td>jack</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@msn.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>Jack Jones</td>\n",
+       "      <td>jack</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>www@www.org</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>Wendy Williams</td>\n",
+       "      <td>wendy</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>wendy@aol.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>Wendy Williams</td>\n",
+       "      <td>wendy</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   id  user_id   email_address                  update_dt        fullname  \\\n",
+       "0   1        1  jack@yahoo.com 2022-01-19 08:12:01.020680      Jack Jones   \n",
+       "1   2        1    jack@msn.com 2022-01-19 08:12:01.020680      Jack Jones   \n",
+       "2   3        2     www@www.org 2022-01-19 08:12:01.020680  Wendy Williams   \n",
+       "3   4        2   wendy@aol.com 2022-01-19 08:12:01.020680  Wendy Williams   \n",
+       "\n",
+       "    name  \n",
+       "0   jack  \n",
+       "1   jack  \n",
+       "2  wendy  \n",
+       "3  wendy  "
+      ]
+     },
+     "execution_count": 84,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# TODO: not executable like this, how to get first SELECT out of parens?\n",
+    "# E.g. can use users.join, etc..\n",
+    "q = (tbl_addresses\n",
+    "  >> left_join(_, tbl_users, {\"user_id\": \"id\"})\n",
+    "  >> show_query()\n",
+    "  >> collect()\n",
+    "  )\n",
+    "\n",
+    "q"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## case_when"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT addresses.id, addresses.user_id, addresses.email_address, addresses.update_dt, CASE WHEN (addresses.id > 20) THEN 0 WHEN (addresses.id > 1) THEN 1 ELSE addresses.id END AS label \n",
+      "FROM addresses\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>email_address</th>\n",
+       "      <th>update_dt</th>\n",
+       "      <th>label</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@yahoo.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@msn.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>www@www.org</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>wendy@aol.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   id  user_id   email_address                  update_dt  label\n",
+       "0   1        1  jack@yahoo.com 2022-01-19 08:12:01.020680      1\n",
+       "1   2        1    jack@msn.com 2022-01-19 08:12:01.020680      1\n",
+       "2   3        2     www@www.org 2022-01-19 08:12:01.020680      1\n",
+       "3   4        2   wendy@aol.com 2022-01-19 08:12:01.020680      1"
+      ]
+     },
+     "execution_count": 85,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "## TODO: fix sql case_when statements\n",
+    "q = (tbl_addresses\n",
+    "  >> mutate(\n",
+    "       label = case_when(_, {\n",
+    "         _.id > 20: 0,\n",
+    "         _.id > 1: 1,\n",
+    "         True: _.id\n",
+    "       })\n",
+    "     )\n",
+    "   >> show_query()\n",
+    "   >> collect()\n",
+    "  )\n",
+    "\n",
+    "q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT anon_1.id, anon_1.user_id, anon_1.email_address, anon_1.update_dt \n",
+      "FROM (SELECT addresses.id AS id, addresses.user_id AS user_id, addresses.email_address AS email_address, addresses.update_dt AS update_dt \n",
+      "FROM addresses) AS anon_1 \n",
+      "WHERE CASE WHEN (anon_1.id > 20) THEN true WHEN (anon_1.id > 1) THEN false ELSE true END\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>email_address</th>\n",
+       "      <th>update_dt</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@yahoo.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   id  user_id   email_address                  update_dt\n",
+       "0   1        1  jack@yahoo.com 2022-01-19 08:12:01.020680"
+      ]
+     },
+     "execution_count": 86,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "## TODO: fix sql case_when statements\n",
+    "# works, but better to just use filter normally...\n",
+    "q = (tbl_addresses\n",
+    "  >> filter(\n",
+    "       case_when(_, {\n",
+    "         _.id > 20: True,\n",
+    "         _.id > 1: False,\n",
+    "         True: True\n",
+    "       })\n",
+    "     )\n",
+    "  >> show_query()\n",
+    "  >> collect()\n",
+    "  )\n",
+    "\n",
+    "q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 87,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT addresses.id, addresses.user_id, addresses.email_address, addresses.update_dt, CASE WHEN (addresses.id > avg(addresses.id) OVER (PARTITION BY addresses.user_id)) THEN 0 WHEN (addresses.id > 20) THEN 1 ELSE addresses.id END AS label \n",
+      "FROM addresses\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>email_address</th>\n",
+       "      <th>update_dt</th>\n",
+       "      <th>label</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@yahoo.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@msn.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>www@www.org</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>wendy@aol.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   id  user_id   email_address                  update_dt  label\n",
+       "0   1        1  jack@yahoo.com 2022-01-19 08:12:01.020680      1\n",
+       "1   2        1    jack@msn.com 2022-01-19 08:12:01.020680      0\n",
+       "2   3        2     www@www.org 2022-01-19 08:12:01.020680      3\n",
+       "3   4        2   wendy@aol.com 2022-01-19 08:12:01.020680      0"
+      ]
+     },
+     "execution_count": 87,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "q = (tbl_addresses\n",
+    "  >> group_by(\"user_id\")\n",
+    "  >> mutate(\n",
+    "       label = case_when(_, {\n",
+    "         _.id > _.id.mean(): 0,\n",
+    "         _.id > 20: 1,\n",
+    "         True: _.id\n",
+    "       })\n",
+    "     )\n",
+    "  >> show_query()\n",
+    "  >> collect()\n",
+    "  )\n",
+    "\n",
+    "q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 88,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "█─'__call__'\n",
+       "├─<function case_when at 0x0000024A3A04AE50>\n",
+       "├─_\n",
+       "└─█─'<lazy>'\n",
+       "  └─█─'__call__'\n",
+       "    ├─<class 'dict'>\n",
+       "    └─{_.id > 1: 'yeah', True: 'no'}"
+      ]
+     },
+     "execution_count": 88,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# NBVAL_IGNORE_OUTPUT\n",
+    "case_when(_, {_.id > 1: \"yeah\", True: \"no\"})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## if_else"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT addresses.id, addresses.user_id, addresses.email_address, addresses.update_dt, CASE WHEN (dense_rank() OVER (ORDER BY addresses.id) > 1) THEN 'yes' ELSE 'no' END AS big_id \n",
+      "FROM addresses\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\siuba\\sql\\translate.py:24: SiubaSqlRuntimeWarning: \n",
+      "dense_rank sql translation defaults na_option to None. To return identical result as pandas, use na_option = 'keep'.\n",
+      "\n",
+      "This warning only displays once per function\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><pre># Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "</pre><div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>email_address</th>\n",
+       "      <th>update_dt</th>\n",
+       "      <th>big_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@yahoo.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>no</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@msn.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>yes</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>www@www.org</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>yes</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>wendy@aol.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>yes</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div><p># .. may have more rows</p></div>"
+      ],
+      "text/plain": [
+       "# Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "   id  user_id   email_address                  update_dt big_id\n",
+       "0   1        1  jack@yahoo.com 2022-01-19 08:12:01.020680     no\n",
+       "1   2        1    jack@msn.com 2022-01-19 08:12:01.020680    yes\n",
+       "2   3        2     www@www.org 2022-01-19 08:12:01.020680    yes\n",
+       "3   4        2   wendy@aol.com 2022-01-19 08:12:01.020680    yes\n",
+       "# .. may have more rows"
+      ]
+     },
+     "execution_count": 89,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "q = (tbl_addresses\n",
+    "  >> mutate(big_id = if_else(dense_rank(_.id) > 1, \"yes\", \"no\"))\n",
+    "  >> show_query()\n",
+    "  )\n",
+    "q"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Head"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT addresses.id, addresses.user_id, addresses.email_address, addresses.update_dt \n",
+      "FROM addresses \n",
+      " LIMIT 3\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>email_address</th>\n",
+       "      <th>update_dt</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@yahoo.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@msn.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>www@www.org</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   id  user_id   email_address                  update_dt\n",
+       "0   1        1  jack@yahoo.com 2022-01-19 08:12:01.020680\n",
+       "1   2        1    jack@msn.com 2022-01-19 08:12:01.020680\n",
+       "2   3        2     www@www.org 2022-01-19 08:12:01.020680"
+      ]
+     },
+     "execution_count": 90,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(tbl_addresses\n",
+    "  >> head(3)\n",
+    "  >> show_query()\n",
+    "  >> collect()\n",
+    "  )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Rename"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT addresses.id AS id2, addresses.user_id, addresses.email_address, addresses.update_dt \n",
+      "FROM addresses\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id2</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>email_address</th>\n",
+       "      <th>update_dt</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@yahoo.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@msn.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>www@www.org</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>wendy@aol.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   id2  user_id   email_address                  update_dt\n",
+       "0    1        1  jack@yahoo.com 2022-01-19 08:12:01.020680\n",
+       "1    2        1    jack@msn.com 2022-01-19 08:12:01.020680\n",
+       "2    3        2     www@www.org 2022-01-19 08:12:01.020680\n",
+       "3    4        2   wendy@aol.com 2022-01-19 08:12:01.020680"
+      ]
+     },
+     "execution_count": 91,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(tbl_addresses\n",
+    "  >> rename(id2 = \"id\")\n",
+    "  >> show_query()\n",
+    "  >> collect()\n",
+    "  )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Distinct"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 92,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT DISTINCT addresses.user_id, addresses.user_id + 1 AS user_id2 \n",
+      "FROM addresses\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>user_id2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   user_id  user_id2\n",
+       "0        1         2\n",
+       "1        2         3"
+      ]
+     },
+     "execution_count": 92,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(tbl_addresses\n",
+    "  >> distinct(_.user_id, user_id2 = _.user_id + 1)\n",
+    "  >> show_query()\n",
+    "  >> collect()\n",
+    "  )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 93,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT DISTINCT addresses.email_address, dense_rank() OVER (PARTITION BY addresses.user_id ORDER BY addresses.user_id) AS user_id2 \n",
+      "FROM addresses\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\siuba\\sql\\translate.py:24: SiubaSqlRuntimeWarning: \n",
+      "dense_rank sql translation defaults na_option to None. To return identical result as pandas, use na_option = 'keep'.\n",
+      "\n",
+      "This warning only displays once per function\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>email_address</th>\n",
+       "      <th>user_id2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>jack@yahoo.com</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>jack@msn.com</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>www@www.org</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>wendy@aol.com</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    email_address  user_id2\n",
+       "0  jack@yahoo.com         1\n",
+       "1    jack@msn.com         1\n",
+       "2     www@www.org         1\n",
+       "3   wendy@aol.com         1"
+      ]
+     },
+     "execution_count": 93,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(tbl_addresses\n",
+    "  >> group_by(\"user_id\")\n",
+    "  >> distinct(_.email_address, user_id2 = dense_rank(_.user_id))\n",
+    "  >> show_query()\n",
+    "  >> collect()\n",
+    "  )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Technical"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Translating symbolic function calls"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>n</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   user_id  n\n",
+       "0        1  2\n",
+       "1        2  2"
+      ]
+     },
+     "execution_count": 94,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from siuba.dply.vector import n\n",
+    "\n",
+    "tbl_addresses \\\n",
+    "  >> group_by(\"user_id\") \\\n",
+    "  >> summarize(n = n(_)) \\\n",
+    "  >> collect()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Translating str methods"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Translating dt methods"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 95,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>email_address</th>\n",
+       "      <th>hour</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@yahoo.com</td>\n",
+       "      <td>15</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@msn.com</td>\n",
+       "      <td>15</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>www@www.org</td>\n",
+       "      <td>15</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>wendy@aol.com</td>\n",
+       "      <td>15</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   id  user_id   email_address  hour\n",
+       "0   1        1  jack@yahoo.com    15\n",
+       "1   2        1    jack@msn.com    15\n",
+       "2   3        2     www@www.org    15\n",
+       "3   4        2   wendy@aol.com    15"
+      ]
+     },
+     "execution_count": 95,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "# pd.read_sql(\"\"\"select * from information_schema.tables\"\"\",conn)\n",
+    "# pd.read_sql(\"\"\"select * from pragma_table_info('addresses')\"\"\",conn)\n",
+    "# pd.read_sql(\"\"\"select * from addresses\"\"\",conn)\n",
+    "pd.read_sql(\"\"\"SELECT addresses.id, addresses.user_id, addresses.email_address, EXTRACT(hour FROM current_timestamp) AS hour \n",
+    "FROM addresses\"\"\",conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT addresses.id, addresses.user_id, addresses.email_address, addresses.update_dt, EXTRACT(hour FROM addresses.update_dt) AS hour \n",
+      "FROM addresses\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><pre># Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "</pre><div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>email_address</th>\n",
+       "      <th>update_dt</th>\n",
+       "      <th>hour</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@yahoo.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@msn.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>www@www.org</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>wendy@aol.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>8</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div><p># .. may have more rows</p></div>"
+      ],
+      "text/plain": [
+       "# Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "   id  user_id   email_address                  update_dt  hour\n",
+       "0   1        1  jack@yahoo.com 2022-01-19 08:12:01.020680     8\n",
+       "1   2        1    jack@msn.com 2022-01-19 08:12:01.020680     8\n",
+       "2   3        2     www@www.org 2022-01-19 08:12:01.020680     8\n",
+       "3   4        2   wendy@aol.com 2022-01-19 08:12:01.020680     8\n",
+       "# .. may have more rows"
+      ]
+     },
+     "execution_count": 96,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "q = tbl_addresses >> mutate(hour = _.update_dt.dt.hour) >> show_query()\n",
+    "q"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## SQL escapes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Window functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT anon_1.id, anon_1.user_id, anon_1.email_address, anon_1.update_dt, sum(anon_1.user_id) OVER (ORDER BY anon_1.id DESC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumsum \n",
+      "FROM (SELECT addresses.id AS id, addresses.user_id AS user_id, addresses.email_address AS email_address, addresses.update_dt AS update_dt \n",
+      "FROM addresses ORDER BY addresses.id DESC) AS anon_1 ORDER BY cumsum\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><pre># Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "</pre><div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>email_address</th>\n",
+       "      <th>update_dt</th>\n",
+       "      <th>cumsum</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>wendy@aol.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>www@www.org</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@msn.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@yahoo.com</td>\n",
+       "      <td>2022-01-19 08:12:01.020680</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div><p># .. may have more rows</p></div>"
+      ],
+      "text/plain": [
+       "# Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "   id  user_id   email_address                  update_dt  cumsum\n",
+       "0   4        2   wendy@aol.com 2022-01-19 08:12:01.020680       2\n",
+       "1   3        2     www@www.org 2022-01-19 08:12:01.020680       4\n",
+       "2   2        1    jack@msn.com 2022-01-19 08:12:01.020680       5\n",
+       "3   1        1  jack@yahoo.com 2022-01-19 08:12:01.020680       6\n",
+       "# .. may have more rows"
+      ]
+     },
+     "execution_count": 97,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from siuba.dply.vector import desc\n",
+    "(tbl_addresses\n",
+    "  >> arrange(desc(_.id))\n",
+    "  >> mutate(cumsum = _.user_id.cumsum())\n",
+    "  >> arrange(_.cumsum)\n",
+    "  >> show_query()\n",
+    "  )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Misc"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## postgres specific"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 98,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>email_address</th>\n",
+       "      <th>id2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@yahoo.com</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>jack@msn.com</td>\n",
+       "      <td>2.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>www@www.org</td>\n",
+       "      <td>3.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>wendy@aol.com</td>\n",
+       "      <td>4.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   id  user_id   email_address  id2\n",
+       "0   1        1  jack@yahoo.com  1.0\n",
+       "1   2        1    jack@msn.com  2.0\n",
+       "2   3        2     www@www.org  3.0\n",
+       "3   4        2   wendy@aol.com  4.0"
+      ]
+     },
+     "execution_count": 98,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#import pandas as pd\n",
+    "pd.read_sql(\"\"\"SELECT addresses.id, addresses.user_id, addresses.email_address, round(CAST(addresses.id AS NUMERIC), 2) AS id2 \n",
+    "FROM addresses\"\"\",conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 99,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT addresses.id, addresses.user_id, addresses.email_address, addresses.update_dt, round(CAST(addresses.id AS NUMERIC), 2) AS id2 \n",
+      "FROM addresses\n"
+     ]
+    },
+    {
+     "ename": "NotImplementedError",
+     "evalue": "Not implemented Error: ROUND(DECIMAL, INTEGER) with non-constant precision is not supported",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\duckdb_engine\\__init__.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, statement, parameters, context)\u001b[0m\n\u001b[0;32m     92\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 93\u001b[1;33m                 \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstatement\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparameters\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     94\u001b[0m         \u001b[1;32mexcept\u001b[0m \u001b[0mRuntimeError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mRuntimeError\u001b[0m: Not implemented Error: ROUND(DECIMAL, INTEGER) with non-constant precision is not supported",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[1;31mNotImplementedError\u001b[0m                       Traceback (most recent call last)",
+      "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_24600/4260062141.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m (tbl_addresses\n\u001b[0m\u001b[0;32m      2\u001b[0m   \u001b[1;33m>>\u001b[0m \u001b[0mmutate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mid2\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mid\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m   \u001b[1;33m>>\u001b[0m \u001b[0mshow_query\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m   \u001b[1;33m>>\u001b[0m \u001b[0mcollect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m )\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\siuba\\dply\\verbs.py\u001b[0m in \u001b[0;36m__rrshift__\u001b[1;34m(self, x)\u001b[0m\n\u001b[0;32m     97\u001b[0m             \u001b[1;32mreturn\u001b[0m \u001b[0mPipeable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcalls\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcalls\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     98\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 99\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    100\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    101\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\siuba\\dply\\verbs.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, x)\u001b[0m\n\u001b[0;32m    102\u001b[0m         \u001b[0mres\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    103\u001b[0m         \u001b[1;32mfor\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcalls\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 104\u001b[1;33m             \u001b[0mres\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mres\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    105\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0mres\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    106\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\siuba\\siu.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, x)\u001b[0m\n\u001b[0;32m    200\u001b[0m             \u001b[1;32mreturn\u001b[0m \u001b[0moperator\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetitem\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minst\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0mrest\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    201\u001b[0m         \u001b[1;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfunc\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"__call__\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 202\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minst\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mrest\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    203\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    204\u001b[0m         \u001b[1;31m# in normal case, get method to call, and then call it\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\siuba\\dply\\verbs.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    196\u001b[0m             \u001b[1;32mreturn\u001b[0m \u001b[0mdispatch_func\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mNoArgs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mstrip_kwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    197\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 198\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mdispatch_func\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mstrip_args\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mstrip_kwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    199\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    200\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\functools.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kw)\u001b[0m\n\u001b[0;32m    875\u001b[0m                             '1 positional argument')\n\u001b[0;32m    876\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 877\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mdispatch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__class__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    878\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    879\u001b[0m     \u001b[0mfuncname\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'__name__'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'singledispatch function'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\siuba\\sql\\verbs.py\u001b[0m in \u001b[0;36m_collect\u001b[1;34m(__data, as_df)\u001b[0m\n\u001b[0;32m    432\u001b[0m             \u001b[0msql_db\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_FixedSqlDatabase\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mconn\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    433\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 434\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0msql_db\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_sql\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m__data\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlast_op\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    435\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    436\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0mconn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m__data\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlast_op\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\pandas\\io\\sql.py\u001b[0m in \u001b[0;36mread_query\u001b[1;34m(self, sql, index_col, coerce_float, parse_dates, params, chunksize, dtype)\u001b[0m\n\u001b[0;32m   1577\u001b[0m         \u001b[0margs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_convert_params\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1578\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1579\u001b[1;33m         \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1580\u001b[0m         \u001b[0mcolumns\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1581\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\siuba\\sql\\utils.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m     46\u001b[0m \u001b[1;32mclass\u001b[0m \u001b[0m_FixedSqlDatabase\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_pd_sql\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSQLDatabase\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     47\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 48\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconnectable\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     49\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     50\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\sqlalchemy\\engine\\base.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, statement, *multiparams, **params)\u001b[0m\n\u001b[0;32m   1287\u001b[0m             )\n\u001b[0;32m   1288\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1289\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0mmeth\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmultiparams\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_EMPTY_EXECUTION_OPTS\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1290\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1291\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_execute_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmultiparams\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mexecution_options\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\sqlalchemy\\sql\\elements.py\u001b[0m in \u001b[0;36m_execute_on_connection\u001b[1;34m(self, connection, multiparams, params, execution_options, _force)\u001b[0m\n\u001b[0;32m    323\u001b[0m     ):\n\u001b[0;32m    324\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0m_force\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msupports_execution\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 325\u001b[1;33m             return connection._execute_clauseelement(\n\u001b[0m\u001b[0;32m    326\u001b[0m                 \u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmultiparams\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mexecution_options\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    327\u001b[0m             )\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\sqlalchemy\\engine\\base.py\u001b[0m in \u001b[0;36m_execute_clauseelement\u001b[1;34m(self, elem, multiparams, params, execution_options)\u001b[0m\n\u001b[0;32m   1479\u001b[0m             \u001b[0mlinting\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdialect\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcompiler_linting\u001b[0m \u001b[1;33m|\u001b[0m \u001b[0mcompiler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mWARN_LINTING\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1480\u001b[0m         )\n\u001b[1;32m-> 1481\u001b[1;33m         ret = self._execute_context(\n\u001b[0m\u001b[0;32m   1482\u001b[0m             \u001b[0mdialect\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1483\u001b[0m             \u001b[0mdialect\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecution_ctx_cls\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_init_compiled\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\sqlalchemy\\engine\\base.py\u001b[0m in \u001b[0;36m_execute_context\u001b[1;34m(self, dialect, constructor, statement, parameters, execution_options, *args, **kw)\u001b[0m\n\u001b[0;32m   1843\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1844\u001b[0m         \u001b[1;32mexcept\u001b[0m \u001b[0mBaseException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1845\u001b[1;33m             self._handle_dbapi_exception(\n\u001b[0m\u001b[0;32m   1846\u001b[0m                 \u001b[0me\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstatement\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparameters\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcursor\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcontext\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1847\u001b[0m             )\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\sqlalchemy\\engine\\base.py\u001b[0m in \u001b[0;36m_handle_dbapi_exception\u001b[1;34m(self, e, statement, parameters, cursor, context)\u001b[0m\n\u001b[0;32m   2028\u001b[0m                 )\n\u001b[0;32m   2029\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2030\u001b[1;33m                 \u001b[0mutil\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mraise_\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mexc_info\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mwith_traceback\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mexc_info\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   2031\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2032\u001b[0m         \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\sqlalchemy\\util\\compat.py\u001b[0m in \u001b[0;36mraise_\u001b[1;34m(***failed resolving arguments***)\u001b[0m\n\u001b[0;32m    205\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    206\u001b[0m         \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 207\u001b[1;33m             \u001b[1;32mraise\u001b[0m \u001b[0mexception\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    208\u001b[0m         \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    209\u001b[0m             \u001b[1;31m# credit to\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\sqlalchemy\\engine\\base.py\u001b[0m in \u001b[0;36m_execute_context\u001b[1;34m(self, dialect, constructor, statement, parameters, execution_options, *args, **kw)\u001b[0m\n\u001b[0;32m   1800\u001b[0m                             \u001b[1;32mbreak\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1801\u001b[0m                 \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mevt_handled\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1802\u001b[1;33m                     self.dialect.do_execute(\n\u001b[0m\u001b[0;32m   1803\u001b[0m                         \u001b[0mcursor\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstatement\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparameters\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcontext\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1804\u001b[0m                     )\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\duckdb_engine\\__init__.py\u001b[0m in \u001b[0;36mdo_execute\u001b[1;34m(self, cursor, statement, parameters, context)\u001b[0m\n\u001b[0;32m    129\u001b[0m         \u001b[0mcontext\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPGExecutionContext\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    130\u001b[0m     ) -> None:\n\u001b[1;32m--> 131\u001b[1;33m         \u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstatement\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparameters\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcontext\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    132\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    133\u001b[0m     def do_executemany(\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\envs\\new_duckdb\\lib\\site-packages\\duckdb_engine\\__init__.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, statement, parameters, context)\u001b[0m\n\u001b[0;32m     94\u001b[0m         \u001b[1;32mexcept\u001b[0m \u001b[0mRuntimeError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     95\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Not implemented Error\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 96\u001b[1;33m                 \u001b[1;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     97\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     98\u001b[0m                 \u001b[1;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mNotImplementedError\u001b[0m: Not implemented Error: ROUND(DECIMAL, INTEGER) with non-constant precision is not supported"
+     ]
+    }
+   ],
+   "source": [
+    "(tbl_addresses\n",
+    "  >> mutate(id2 = _.id.round(2))\n",
+    "  >> show_query()\n",
+    "  >> collect()\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## autoload table w/ sqlalchemy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 100,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT users.id, users.name, users.fullname, users.id + 1 AS id2 \n",
+      "FROM users\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>name</th>\n",
+       "      <th>fullname</th>\n",
+       "      <th>id2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>jack</td>\n",
+       "      <td>Jack Jones</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>wendy</td>\n",
+       "      <td>Wendy Williams</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   id   name        fullname  id2\n",
+       "0   1   jack      Jack Jones    2\n",
+       "1   2  wendy  Wendy Williams    3"
+      ]
+     },
+     "execution_count": 100,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import sqlalchemy\n",
+    "\n",
+    "metadata2 = MetaData()\n",
+    "\n",
+    "#Since we are using an in memory DuckDB that is specific to the connection, need to autoload_with=conn not =engine\n",
+    "users2 = sqlalchemy.Table('users', metadata2, autoload = True, autoload_with = conn)\n",
+    "tbl_users2 = LazyTbl(conn, users2)\n",
+    "\n",
+    "(tbl_users\n",
+    "  >> mutate(id2 = _.id + 1)\n",
+    "  >> show_query()\n",
+    "  >> collect()\n",
+    "  )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## auto table from string"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 101,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT users.id, users.name, users.fullname, users.id + 1 AS id2 \n",
+      "FROM users\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>name</th>\n",
+       "      <th>fullname</th>\n",
+       "      <th>id2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>jack</td>\n",
+       "      <td>Jack Jones</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>wendy</td>\n",
+       "      <td>Wendy Williams</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   id   name        fullname  id2\n",
+       "0   1   jack      Jack Jones    2\n",
+       "1   2  wendy  Wendy Williams    3"
+      ]
+     },
+     "execution_count": 101,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import sqlalchemy\n",
+    "\n",
+    "metadata3 = MetaData()\n",
+    "\n",
+    "tbl_users3 = LazyTbl(conn, \"users\")\n",
+    "\n",
+    "(tbl_users\n",
+    "  >> mutate(id2 = _.id + 1)\n",
+    "  >> show_query()\n",
+    "  >> collect()\n",
+    "  )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##  LazyTbl repr"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 102,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><pre># Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "</pre><div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>name</th>\n",
+       "      <th>fullname</th>\n",
+       "      <th>id2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>jack</td>\n",
+       "      <td>Jack Jones</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>wendy</td>\n",
+       "      <td>Wendy Williams</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div><p># .. may have more rows</p></div>"
+      ],
+      "text/plain": [
+       "# Source: lazy query\n",
+       "# DB Conn: Engine(duckdb:///:memory:)\n",
+       "# Preview:\n",
+       "   id   name        fullname  id2\n",
+       "0   1   jack      Jack Jones    2\n",
+       "1   2  wendy  Wendy Williams    3\n",
+       "# .. may have more rows"
+      ]
+     },
+     "execution_count": 102,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tbl_users >> mutate(id2 = _.id + 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "165px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/requirements-dev.txt b/requirements-dev.txt
index c8db4e45..48a5f324 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -61,7 +61,7 @@ pytest==5.3.5
 python-dateutil==2.8.1
 pytz==2020.1
 PyYAML==5.3.1
-pyzmq==19.0.0
+pyzmq==22.3.0
 requests==2.24.0
 scipy==1.5.2
 six==1.14.0
diff --git a/requirements-test.txt b/requirements-test.txt
index 6f5d0100..e08925e9 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -13,8 +13,8 @@ jsonschema==3.2.0
 jupyter-client==6.0.0
 jupyter-core==4.6.3
 more-itertools==8.2.0
-nbformat==5.0.4
-nbval==0.9.5
+nbformat==5.1.3
+nbval==0.9.6
 packaging==20.3
 parso==0.6.2
 pexpect==4.8.0
@@ -22,13 +22,13 @@ pickleshare==0.7.5
 pluggy==0.13.1
 prompt-toolkit==3.0.3
 ptyprocess==0.6.0
-py==1.8.1
+py==1.11.0
 Pygments==2.5.2
 pyparsing==2.4.6
 pyrsistent==0.15.7
-pytest==5.3.5
+pytest==6.2.5
 python-dateutil==2.8.1
-pyzmq==19.0.0
+pyzmq==22.3.0
 six==1.14.0
 sortedcontainers==2.1.0
 tornado==6.0.4
diff --git a/setup.py b/setup.py
index ee1a6823..e05918c3 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@
     VERSION = str(ast.literal_eval(_version_re.search(
         f.read().decode('utf-8')).group(1)))
 
-with open('README.md') as f:
+with open('README.md', encoding="utf-8") as f:
     README = f.read()
 
 # setup -----------------------------------------------------------------------
diff --git a/siuba/__init__.py b/siuba/__init__.py
index efc7c258..e9f771d6 100644
--- a/siuba/__init__.py
+++ b/siuba/__init__.py
@@ -1,5 +1,5 @@
 # version ---------------------------------------------------------------------
-__version__ = "0.0.25"
+__version__ = "1.0.0a3"
 
 # default imports--------------------------------------------------------------
 from .siu import _, Lam
diff --git a/siuba/dply/forcats.py b/siuba/dply/forcats.py
index f34f572f..55c00fee 100644
--- a/siuba/dply/forcats.py
+++ b/siuba/dply/forcats.py
@@ -16,6 +16,10 @@ def fct_reorder(fct, x, func = np.median, desc = False) -> pd.Categorical:
         func: function run over all values within a level of the categorical.
         desc: whether to sort in descending order.
 
+    Notes that NaN categories can't be ordered. When func returns NaN, sorting
+    is always done with NaNs last.
+
+
     Examples:
         >>> fct_reorder(['a', 'a', 'b'], [4, 3, 2])
         ['a', 'a', 'b']
@@ -34,11 +38,11 @@ def fct_reorder(fct, x, func = np.median, desc = False) -> pd.Categorical:
     x_vals = x.values if isinstance(x, pd.Series) else x
     s = pd.Series(x_vals, index = fct)
 
-    # for each cat, calc agg func, make values of ordered the codes
+    # sort groups by calculated agg func. note that groupby uses dropna=True by default,
+    # but that's okay, since pandas categoricals can't order the NA category
     ordered = s.groupby(level = 0).agg(func).sort_values(ascending = not desc)
-    ordered[:] = np.arange(len(ordered))
-    codes = ordered[s.index.values]
-    return pd.Categorical.from_codes(codes, list(ordered.index))
+
+    return pd.Categorical(fct, categories=ordered.index)
 
 
 # fct_recode ------------------------------------------------------------------
diff --git a/siuba/dply/vector.py b/siuba/dply/vector.py
index ce02d18f..86c32acc 100644
--- a/siuba/dply/vector.py
+++ b/siuba/dply/vector.py
@@ -270,19 +270,19 @@ def coalesce(x, *args):
         *args: other Series that are the same length as x, or a scalar
 
     Examples:
-        >>> x = pd.Series([1., None, None])
+        >>> x = pd.Series([1.1, None, None])
         >>> abc = pd.Series(['a', 'b', None])
         >>> xyz = pd.Series(['x', 'y', 'z'])
         >>> coalesce(x, abc)
-        0       1
+        0     1.1
         1       b
         2    None
         dtype: object
 
         >>> coalesce(x, abc, xyz)
-        0    1
-        1    b
-        2    z
+        0    1.1
+        1      b
+        2      z
         dtype: object
         
     """
diff --git a/siuba/dply/verbs.py b/siuba/dply/verbs.py
index f8b0d7f5..4c367a56 100644
--- a/siuba/dply/verbs.py
+++ b/siuba/dply/verbs.py
@@ -59,9 +59,14 @@ def install_pd_siu():
         setattr(DataFrameGroupBy, method_name, f)
 
     DataFrameGroupBy._repr_html_ = _repr_grouped_df_html_
+    DataFrameGroupBy.__repr__ = _repr_grouped_df_console_
 
 def _repr_grouped_df_html_(self):
-    return "<div><p>(grouped data frame)</p>" + self._selected_obj._repr_html_() + "</div>"
+    return "<div><p>(grouped data frame)</p>" + self.obj._repr_html_() + "</div>"
+
+def _repr_grouped_df_console_(self):
+    return "(grouped data frame)\n" + repr(self.obj)
+
 
 # TODO: should be a subclass of Call?
 class Pipeable:
@@ -992,8 +997,18 @@ def nest(__data, *args, key = "data"):
     g_df = __data.groupby(grp_keys)
     splitter = g_df.grouper._get_splitter(g_df.obj[nest_keys])
 
+    # TODO: iterating over splitter now only produces 1 item (the dataframe)
+    # check backwards compat
+    def _extract_subdf_pandas_1_3(entry):
+        # in pandas < 1.3, splitter.__iter__ returns tuple entries (ii, df)
+        if isinstance(entry, tuple):
+            return entry[1]
+
+        # in pandas 1.3, each entry is just the dataframe
+        return entry
+
     result_index = g_df.grouper.result_index
-    nested_dfs = [x for ii, x in splitter]
+    nested_dfs = [_extract_subdf_pandas_1_3(x) for x in splitter]
 
     out = pd.DataFrame({key: nested_dfs}, index = result_index).reset_index()
 
@@ -1095,9 +1110,16 @@ def semi_join(left, right = None, on = None):
         on_cols, right_on = map(list, zip(*on.items()))
         right = right[right_on].rename(dict(zip(right_on, on_cols)))
     elif on is None:
-        on_cols = set(left.columns).intersection(set(right.columns))
+        warnings.warn(
+            "No on column passed to join. "
+            "Inferring join columns instead using shared column names."
+        )
+
+        on_cols = list(set(left.columns).intersection(set(right.columns)))
         if not len(on_cols):
-            raise Exception("No joining column specified, and no shared column names")
+            raise Exception("No join column specified, and no shared column names")
+
+        warnings.warn("Detected shared columns: %s" % on_cols)
     elif isinstance(on, str):
         on_cols = [on]
     else:
diff --git a/siuba/experimental/datetime.py b/siuba/experimental/datetime.py
index 4138f521..f6fc0441 100644
--- a/siuba/experimental/datetime.py
+++ b/siuba/experimental/datetime.py
@@ -104,7 +104,7 @@ def _get_series_dispatcher(f, x):
 
     >>> per = pd.PeriodIndex([a_date], freq = "S")
     >>> floor_date(per, "M")
-    PeriodIndex(['2020-02'], dtype='period[M]', freq='M')
+    PeriodIndex(['2020-02'], dtype='period[M]'...
 
 """
 
diff --git a/siuba/experimental/pd_groups/groupby.py b/siuba/experimental/pd_groups/groupby.py
index e1cd3722..cc200e51 100644
--- a/siuba/experimental/pd_groups/groupby.py
+++ b/siuba/experimental/pd_groups/groupby.py
@@ -7,8 +7,12 @@
 
 from pandas import Series
 from pandas.api.types import is_scalar
-from pandas.core.groupby import SeriesGroupBy
-from pandas.core import algorithms
+from pandas.core.groupby import SeriesGroupBy, DataFrameGroupBy
+
+try:
+    from pandas.core.algorithms import take_1d
+except ImportError:
+    from pandas.core.array_algos.take import take_1d
 
 
 # Custom SeriesGroupBy class ==================================================
@@ -99,6 +103,7 @@ def broadcast_agg(groupby, result, obj):
 
     raise NotImplementedError()
 
+
 @broadcast_agg.register(GroupByAgg)
 def _broadcast_agg_gba(groupby):
     """
@@ -113,9 +118,11 @@ def _broadcast_agg_gba(groupby):
 
     src = groupby._orig_obj
     ids, _, ngroup = groupby._orig_grouper.group_info
-    out = algorithms.take_1d(groupby.obj._values, ids)
+    out = take_1d(groupby.obj._values, ids)
     
-    return Series(out, index=src.index, name=src.name)
+    # Note: reductions like siuba.dply.vector.n(_) map DataFrameGroupBy -> GroupByAgg,
+    # so the underlying object is a DataFrame, and does not have a .name attribute.
+    return Series(out, index=src.index, name=getattr(src, "name", None))
 
 @broadcast_agg.register(SeriesGroupBy)
 def _broadcast_agg_sgb(groupby):
diff --git a/siuba/experimental/pd_groups/test_pd_groups.py b/siuba/experimental/pd_groups/test_pd_groups.py
index 65508389..f6b46b7f 100644
--- a/siuba/experimental/pd_groups/test_pd_groups.py
+++ b/siuba/experimental/pd_groups/test_pd_groups.py
@@ -41,6 +41,7 @@
 f_min = method_agg_op('min', is_property = False, accessor = None)
 f_add = method_el_op2('add', is_property = False, accessor = None)
 f_abs = method_el_op('abs', is_property = False, accessor = None)
+f_df_size = lambda x: GroupByAgg.from_result(x.size(), x)
 
 # GroupByAgg is liskov substitutable, so check that our functions operate
 # like similarly substitutable subtypes. This means that...
@@ -78,6 +79,9 @@ def test_grouped_translator_methods(f_op, f_dst, cls_result):
         (lambda g: f_min(g.x),        lambda g: g.x.transform('min')),
         (lambda g: f_min(f_min(g.x)), lambda g: g.x.transform('min')),
         (lambda g: f_abs(f_min(g.x)), lambda g: g.x.transform('min').abs()),
+
+        # Note that there's no way to transform a DF method, so use an arbitrary column
+        (lambda g: f_df_size(g), lambda g: g.x.transform('size')),
         ])
 def test_agg_groupby_broadcasted_equal_to_transform(f_op, f_dst):
     g = data_default.groupby('g')
@@ -91,6 +95,20 @@ def test_agg_groupby_broadcasted_equal_to_transform(f_op, f_dst):
     assert_series_equal(broadcasted, dst, check_names = False)
 
 
+# Test generic functions ======================================================
+
+def test_fast_mutate_basic():
+    # sanity check of https://github.com/machow/siuba/issues/355
+    from siuba.siu import _
+
+    res_df = data_default.groupby("g") >> fast_mutate(num = _.x / _.y * 100)
+
+    res = res_df.num
+    dst = data_default.x / data_default.y * 100
+
+    assert_series_equal(res.obj, dst, check_names=False)
+    
+
 # Test user-defined functions =================================================
 
 from .dialect import fast_mutate, fast_summarize, fast_filter, _transform_args
diff --git a/siuba/meta_hook.py b/siuba/meta_hook.py
index f4fa607b..4d10a2f0 100644
--- a/siuba/meta_hook.py
+++ b/siuba/meta_hook.py
@@ -1,3 +1,10 @@
+"""
+DEPRECATED.
+
+Note that this module was experimental, and created very early in siuba's development.
+You should not rely on it for anything important.
+"""
+
 from importlib.abc import Loader, MetaPathFinder
 from importlib.machinery import ModuleSpec
 from importlib.util import find_spec
@@ -55,7 +62,8 @@ def exec_module(self, module):
         #self.orig_loader.exec_module(self.orig_module)
 
         #for k,v in self.orig_module.__dict__.items():
-        for k,v in self.orig_module.__dict__.items():
+        all_items = list(self.orig_module.__dict__.items())
+        for k,v in all_items:
             if k.startswith('_'):
                 module.__dict__[k] = v
             else:
diff --git a/siuba/ops/generics.py b/siuba/ops/generics.py
index 5fd54937..27b0c624 100644
--- a/siuba/ops/generics.py
+++ b/siuba/ops/generics.py
@@ -5,7 +5,6 @@
 ops_infix = Namespace(
     __add__ = operation('__add__', 'elwise', 2),
     __and__ = operation('__and__', 'elwise', 2),
-    __div__ = operation('__div__', 'elwise', 2),
     __eq__ = operation('__eq__', 'elwise', 2),
     __floordiv__ = operation('__floordiv__', 'elwise', 2),
     __ge__ = operation('__ge__', 'elwise', 2),
@@ -22,7 +21,6 @@
     __pow__ = operation('__pow__', 'elwise', 2),
     __radd__ = operation('__radd__', 'elwise', 2),
     __rand__ = operation('__rand__', 'elwise', 2),
-    __rdiv__ = operation('__rdiv__', 'elwise', 2),
     __rfloordiv__ = operation('__rfloordiv__', 'elwise', 2),
     __rmod__ = operation('__rmod__', 'elwise', 2),
     __rmul__ = operation('__rmul__', 'elwise', 2),
diff --git a/siuba/ops/support/examples.yml b/siuba/ops/support/examples.yml
index f38ef08e..79041bbf 100644
--- a/siuba/ops/support/examples.yml
+++ b/siuba/ops/support/examples.yml
@@ -2,7 +2,6 @@ T: _.T
 __add__: _ + _
 __and__: _ & _
 __array__: _.__array__()
-__div__: _.__div__(_)
 __eq__: _ == _
 __floordiv__: _ // _
 __ge__: _ >= _
@@ -19,7 +18,6 @@ __pos__: +_
 __pow__: _**_
 __radd__: _ + _
 __rand__: _ & _
-__rdiv__: _.__rdiv__(_)
 __rfloordiv__: _ // _
 __rmod__: _ % _
 __rmul__: _ * _
diff --git a/siuba/sql/dialects/base.py b/siuba/sql/dialects/base.py
index edeb0222..ae2d92aa 100644
--- a/siuba/sql/dialects/base.py
+++ b/siuba/sql/dialects/base.py
@@ -157,7 +157,6 @@ def req_bool(f):
     # infix ----
     __add__       = sql_colmeth("__add__"),
     __and__       = req_bool(sql_colmeth("__and__")),
-    __div__       = sql_colmeth("__div__"),
     __eq__        = sql_colmeth("__eq__"),
     __floordiv__  = sql_func_floordiv,
     __ge__        = sql_colmeth("__ge__"),
@@ -174,7 +173,6 @@ def req_bool(f):
     __pow__       = sql_not_impl(),
     __radd__      = sql_colmeth("__radd__"),
     __rand__      = req_bool(sql_colmeth("__rand__")),
-    __rdiv__      = sql_colmeth("__rdiv__"),
     __rfloordiv__ = lambda x, y: sql_func_floordiv(y, x),
     __rmod__      = sql_colmeth("__rmod__"),
     __rmul__      = sql_colmeth("__rmul__"),
@@ -193,8 +191,8 @@ def req_bool(f):
 
     add           = sql_colmeth("__add__"),        
     #and          =
-    div           = sql_colmeth("__div__"),        
-    divide        = sql_colmeth("__div__"),        
+    div           = sql_colmeth("__truediv__"),
+    divide        = sql_colmeth("__truediv__"),
     #divmod       = 
     eq            = sql_colmeth("__eq__"),         
     #floordiv     = sql_colmeth("__floordiv__"),         
@@ -208,7 +206,7 @@ def req_bool(f):
     ne            = sql_colmeth("__ne__"),         
     pow           = sql_not_impl(),
     radd          = sql_colmeth("__radd__"),       
-    rdiv          = sql_colmeth("__rdiv__"),       
+    rdiv          = sql_colmeth("__rtruediv__"),
     #rdivmod      = 
     #rfloordiv    = sql_colmeth("__pow__"),        
     rmod          = sql_colmeth("__rmod__"),       
diff --git a/siuba/sql/dialects/mysql.py b/siuba/sql/dialects/mysql.py
index 3bce5d07..abb1043a 100644
--- a/siuba/sql/dialects/mysql.py
+++ b/siuba/sql/dialects/mysql.py
@@ -73,11 +73,9 @@ def sql_func_between(col, left, right, inclusive=True):
 
         # copied from postgres. MYSQL does true division over ints by default,
         # but it does not produce double precision.
-        __div__ = sql_func_truediv,
         div = sql_func_truediv,
         divide = sql_func_truediv,
         rdiv = lambda x,y: sql_func_truediv(y, x),
-        __rdiv__ = lambda x, y: sql_func_truediv(y, x),
 
         __truediv__ = sql_func_truediv,
         truediv = sql_func_truediv,
diff --git a/siuba/sql/dialects/postgresql.py b/siuba/sql/dialects/postgresql.py
index 2b17225d..8b80e7c7 100644
--- a/siuba/sql/dialects/postgresql.py
+++ b/siuba/sql/dialects/postgresql.py
@@ -72,11 +72,9 @@ def sql_func_truediv(x, y):
 
         # infix and infix methods ----
 
-        __div__ = sql_func_truediv,
         div = sql_func_truediv,
         divide = sql_func_truediv,
         rdiv = lambda x,y: sql_func_truediv(y, x),
-        __rdiv__ = lambda x, y: sql_func_truediv(y, x),
 
         __truediv__ = sql_func_truediv,
         truediv = sql_func_truediv,
diff --git a/siuba/sql/verbs.py b/siuba/sql/verbs.py
index 3a2c9425..35841211 100644
--- a/siuba/sql/verbs.py
+++ b/siuba/sql/verbs.py
@@ -7,6 +7,7 @@
 
 """
 
+import warnings
 
 from siuba.dply.verbs import (
         singledispatch2,
@@ -936,7 +937,7 @@ def _semi_join(left, right = None, on = None, *args, sql_on = None):
     right_sel = right.last_op.alias()
 
     # handle arguments ----
-    on  = _validate_join_arg_on(on, sql_on)
+    on  = _validate_join_arg_on(on, sql_on, left_sel, right_sel)
     
     # create join conditions ----
     bool_clause = _create_join_conds(left_sel, right_sel, on)
@@ -962,7 +963,7 @@ def _anti_join(left, right = None, on = None, *args, sql_on = None):
     right_sel = right.last_op.alias()
 
     # handle arguments ----
-    on  = _validate_join_arg_on(on, sql_on)
+    on  = _validate_join_arg_on(on, sql_on, left, right)
     
     # create join conditions ----
     bool_clause = _create_join_conds(left_sel, right_sel, on)
@@ -981,7 +982,7 @@ def _raise_if_args(args):
     if len(args):
         raise NotImplemented("*args is reserved for future arguments (e.g. suffix)")
 
-def _validate_join_arg_on(on, sql_on = None):
+def _validate_join_arg_on(on, sql_on = None, lhs = None, rhs = None):
     # handle sql on case
     if sql_on is not None:
         if on is not None:
@@ -991,12 +992,34 @@ def _validate_join_arg_on(on, sql_on = None):
 
     # handle general cases
     if on is None:
-        raise NotImplementedError("on arg currently cannot be None (default) for SQL")
+        # TODO:  currently, we check for lhs and rhs tables to indicate whether
+        #        a verb supports inferring columns. Otherwise, raise an error.
+        if lhs is not None and rhs is not None:
+            # TODO: consolidate with duplicate logic in pandas verb code
+            warnings.warn(
+                "No on column passed to join. "
+                "Inferring join columns instead using shared column names."
+            )
+
+            on_cols = list(set(lhs.columns.keys()).intersection(set(rhs.columns.keys())))
+
+            if not on_cols:
+                raise ValueError(
+                    "No join column specified, or shared column names in join."
+                )
+
+            # trivial dict mapping shared names to themselves
+            warnings.warn("Detected shared columns: %s" % on_cols)
+            on = dict(zip(on_cols, on_cols))
+
+        else:
+            raise NotImplementedError("on arg currently cannot be None (default) for SQL")
     elif isinstance(on, str):
         on = {on: on}
     elif isinstance(on, (list, tuple)):
         on = dict(zip(on, on))
 
+
     if not isinstance(on, Mapping):
         raise TypeError("on must be a Mapping (e.g. dict)")
 
diff --git a/siuba/tests/conftest.py b/siuba/tests/conftest.py
index f9646460..52d33ea6 100644
--- a/siuba/tests/conftest.py
+++ b/siuba/tests/conftest.py
@@ -1,5 +1,5 @@
 import pytest
-from .helpers import assert_equal_query, PandasBackend, SqlBackend, data_frame
+from .helpers import assert_equal_query, PandasBackend, SqlBackend, BigqueryBackend, data_frame
 
 def pytest_addoption(parser):
     parser.addoption(
@@ -10,7 +10,7 @@ def pytest_addoption(parser):
     pytest.param(lambda: SqlBackend("postgresql"), id = "postgresql", marks=pytest.mark.postgresql),
     pytest.param(lambda: SqlBackend("mysql"), id = "mysql", marks=pytest.mark.mysql),
     pytest.param(lambda: SqlBackend("sqlite"), id = "sqlite", marks=pytest.mark.sqlite),
-    pytest.param(lambda: SqlBackend("bigquery"), id = "bigquery", marks=pytest.mark.bigquery),
+    pytest.param(lambda: BigqueryBackend("bigquery"), id = "bigquery", marks=pytest.mark.bigquery),
     pytest.param(lambda: PandasBackend("pandas"), id = "pandas", marks=pytest.mark.pandas)
     ]
 
diff --git a/siuba/tests/helpers.py b/siuba/tests/helpers.py
index 6cab99cd..567f03b1 100644
--- a/siuba/tests/helpers.py
+++ b/siuba/tests/helpers.py
@@ -1,4 +1,5 @@
 import sqlalchemy as sqla
+import uuid
 
 from siuba.sql import LazyTbl
 from siuba.dply.verbs import ungroup, collect
@@ -114,7 +115,7 @@ def load_df(self, df = None, **kwargs):
 
         table_name = self.unique_table_name()
 
-        return copy_to_sql(df, self.unique_table_name(), self.engine)
+        return copy_to_sql(df, table_name, self.engine)
 
     def load_cached_df(self, df):
         import hashlib
@@ -129,6 +130,25 @@ def load_cached_df(self, df):
 
         return res
 
+class BigqueryBackend(SqlBackend):
+    @classmethod
+    def unique_table_name(cls):
+        return "siuba_{}".format(uuid.uuid4())
+
+    def load_df(self, df = None, **kwargs):
+        df = super().load_df(df, **kwargs)
+
+        # since we are using uuids, set table to expire after 1 day, so we can
+        # easily inspect the tables, but also ensure cleanup
+        self.engine.execute("""
+            ALTER TABLE `{table_name}` 
+            SET OPTIONS (
+              expiration_timestamp=TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL 1 DAY)
+            )
+            """.format(table_name=df.tbl.name))
+        
+        return df
+
 
 def robust_multiple_sort(df, by):
     """Sort a DataFrame on multiple columns, slower but more reliable than df.sort_values
diff --git a/siuba/tests/test_dply_forcats.py b/siuba/tests/test_dply_forcats.py
index bac3a743..15a37244 100644
--- a/siuba/tests/test_dply_forcats.py
+++ b/siuba/tests/test_dply_forcats.py
@@ -34,6 +34,12 @@ def test_fct_reorder_custom_func():
 
     assert_fct_equal(res, dst)
 
+def test_fct_reorder_na_fct():
+    import numpy as np
+    res = fct_reorder([None, 'x', 'y'], [4, 3, 2], np.max)
+    dst = Categorical([None, 'x', 'y'], ['y', 'x'])
+
+    assert_fct_equal(res, dst)
 
 # fct_recode ------------------------------------------------------------------
 
diff --git a/siuba/tests/test_dply_series_methods.py b/siuba/tests/test_dply_series_methods.py
index 32942540..93557739 100644
--- a/siuba/tests/test_dply_series_methods.py
+++ b/siuba/tests/test_dply_series_methods.py
@@ -263,15 +263,17 @@ def test_pandas_grouped_frame_fast_mutate(entry):
     res = fast_mutate(gdf, result = call_expr)
     dst = mutate(gdf, result = call_expr)
 
-    # TODO: apply mark to skip failing tests, rather than downcast
-    # pandas grouped aggs, when not using cython, _try_cast back to original type
-    # but since mutate uses apply, it doesn't :/. Currently only affects median func.
-    dst_obj = dst.obj
+    # TODO: apply mark to skip failing tests, rather than casting?
+    # in pandas 1.2, grouped agg returns int, ungrouped agg returns float
+    # in pandas 1.3, grouped agg returns float, same as ungrouped agg
+    # (the difference is because the grouped agg in 1.2 did not use cython,
+    # and tries casting back to the original column dtype)
+    res_obj = res.obj
     if str_expr == '_.x.median()':
-        dst_obj['result'] = dst_obj['result'].astype(gdf.x.obj.dtype)
+        res_obj['result'] = res_obj['result'].astype(float)
 
     assert isinstance(dst, DataFrameGroupBy)
-    assert_frame_equal(res.obj, dst_obj)
+    assert_frame_equal(res_obj, dst.obj)
 
 
 @pytest.mark.skip_backend('sqlite')
@@ -324,7 +326,7 @@ def test_pandas_grouped_frame_fast_summarize(agg_entry):
     # pandas grouped aggs, when not using cython, _try_cast back to original type
     # but since summarize uses apply, it doesn't :/. Currently only affects median func.
     if str_expr == '_.x.median()':
-        dst['result'] = dst['result'].astype(gdf.x.obj.dtype)
+        res['result'] = res['result'].astype(float)
 
     assert_frame_equal(res, dst)
 
diff --git a/siuba/tests/test_verb_join.py b/siuba/tests/test_verb_join.py
index 9b64ac40..20cb39b5 100644
--- a/siuba/tests/test_verb_join.py
+++ b/siuba/tests/test_verb_join.py
@@ -169,6 +169,27 @@ def test_semi_join_no_cross(backend, df1, df2):
             DF1.iloc[:1,]
             )
 
+def test_semi_join_no_on_arg(backend, df1):
+    df_ii = backend.load_df(data_frame(ii = [1,1]))
+
+    with pytest.warns(UserWarning) as record:
+        assert_equal_query(
+                df1,
+                semi_join(_, df_ii),
+                DF1.iloc[:1,]
+                )
+
+    assert "No on column passed to join." in record[0].message.args[0]
+    assert "['ii']" in record[1].message.args[0]
+
+def test_semi_join_no_on_arg_fail(backend, df1):
+    df_ii = backend.load_df(data_frame(ZZ = [1,1]))
+
+    with pytest.raises(Exception) as excinfo:
+        collect(semi_join(df1, df_ii))
+    
+    assert "No join column specified" in str(excinfo.value) 
+
 
 def test_basic_anti_join_on_map(backend, df1, df2):
     assert_frame_sort_equal(