manzt · manzt · Aug 13, 2024 · Jul 26, 2024 · Aug 13, 2024 · Aug 13, 2024
diff --git a/src/quak/__init__.py b/src/quak/__init__.py
@@ -1,5 +1,6 @@
 """An anywidget for data that talks like a duck."""
 
+from ._util import has_pycapsule_stream_interface
 from ._version import __version__
 from ._widget import Widget
 
@@ -18,7 +19,11 @@ def format(self, obj, include=None, exclude=None):
             # special case for duckdb relations
             if isinstance(obj, duckdb.DuckDBPyRelation):
                 obj = obj.arrow()
-            if is_arrow_ipc(obj) or is_dataframe_api_obj(obj):
+            if (
+                has_pycapsule_stream_interface(obj)
+                or is_arrow_ipc(obj)
+                or is_dataframe_api_obj(obj)
+            ):
                 obj = Widget(obj)
             return super().format(obj, include, exclude)
 

diff --git a/src/quak/_util.py b/src/quak/_util.py
@@ -9,6 +9,16 @@
     import pyarrow as pa
 
 
+def has_pycapsule_stream_interface(obj: object) -> bool:
+    """
+    Check if an object implements the Arrow C Stream Arrow via the PyCapsule Interface.
+
+    https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+       has an Arrow C Stream
+    """
+    return hasattr(obj, "__arrow_c_stream__")
+
+
 def is_dataframe_api_obj(obj: object) -> DataFrameObject:
     """Check if an object has a dataframe API."""
     method = getattr(obj, "__dataframe__", None)

diff --git a/src/quak/_widget.py b/src/quak/_widget.py
@@ -6,13 +6,16 @@
 
 import anywidget
 import duckdb
+import pyarrow as pa
 import traitlets
 
 from ._util import (
     arrow_table_from_dataframe_protocol,
     arrow_table_from_ipc,
     get_columns,
+    has_pycapsule_stream_interface,
     is_arrow_ipc,
+    is_dataframe_api_obj,
     table_to_ipc,
 )
 
@@ -37,10 +40,22 @@ def __init__(self, data, *, table: str = "df"):
             conn = data
         else:
             conn = duckdb.connect(":memory:")
-            if is_arrow_ipc(data):
+            if has_pycapsule_stream_interface(data):
+                # NOTE: for now we materialize the input into an in-memory Arrow table,
+                # so that we can perform repeated queries on that. In the future, it may
+                # be better to keep this Arrow stream non-materalized in Python and
+                # create a new DuckDB table from the stream.
+                # arrow_table = pa.RecordBatchReader.from_stream(data)
+                arrow_table = pa.table(data)
+            elif is_arrow_ipc(data):
                 arrow_table = arrow_table_from_ipc(data)
-            else:
+            elif is_dataframe_api_obj(data):
                 arrow_table = arrow_table_from_dataframe_protocol(data)
+            else:
+                raise ValueError(
+                    "input must be a DuckDB connection, DataFrame-like, an Arrow IPC "
+                    "table, or an Arrow object exporting the Arrow C Stream interface."
+                )
             conn.register(table, arrow_table)
         self._conn = conn
         super().__init__(