From 1f5199192fe5ac8984a13fdaf9799c788a1c7ea3 Mon Sep 17 00:00:00 2001
From: liquidcarbon <akscrps@gmail.com>
Date: Mon, 4 Nov 2024 18:06:14 -0700
Subject: [PATCH] update README

---
 README.md | 48 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4e3c65e..fe7b21d 100644
--- a/README.md
+++ b/README.md
@@ -69,7 +69,7 @@ class IsotopeData(af.Dataset):
     mass = af.VectorF64("Isotope Mass (Da)")
     abundance = af.VectorF64("Relative natural abundance")
 
-IsotopesData.z
+IsotopeData.z
 # DescriptorType Int8 of len 0  # Atomic Number (Z)
 # Series([], dtype: Int8)
 
@@ -77,6 +77,9 @@ IsotopeData().pl  # show fields and types
 # shape: (0, 4)
 # symbol  z  mass abundance
 #    str i8   f64       f64
+
+IsotopeData.LOCATION  # new in v0.4
+# Location(folder=PosixPath('.'), file='IsotopeData_export.csv', partition_by=[])
 ```
 
 The class attributes are instantiated Vector objects of zero length.  Using the [desciptor pattern](https://docs.python.org/3/howto/descriptor.html), they are replaced with actual data arrays on building the instance.
@@ -173,6 +176,49 @@ print(data_from_parquet.pl.dtypes)
 # [String, Int8, Float64, Float64]
 ```
 
+#### 7. Bonus: Partitions
+
+The special attribute `LOCATION` helps you write the data where you want, how you want it.
+
+On calling `af.Dataset.partition()`, you'll get the formatted list of Hive-style partitions and the datasets broken up accordingly.
+
+This is en route to `af.Dataset.save()`, which in all likelihood won't be done since there's far too many ways to handle this.
+
+```python
+class PartitionedIsotopeData(af.Dataset):
+    symbol = af.VectorObject("Element")
+    z = af.VectorI8("Atomic Number (Z)")
+    mass = af.VectorF64("Isotope Mass (Da)")
+    abundance = af.VectorF64("Relative natural abundance")
+    LOCATION = af.Location(folder="mydata", file="isotopes.csv", partition_by=["z"])
+
+    url = "https://raw.githubusercontent.com/liquidcarbon/chembiodata/main/isotopes.csv"
+data_from_sql = PartitionedIsotopeData.build(query=f"FROM '{url}'", rename=True)
+paths, partitions = data_from_sql.partition()
+paths[:3], partitions[:3]
+
+# (['mydata/z=1/isotopes.csv',
+#   'mydata/z=2/isotopes.csv',
+#   'mydata/z=3/isotopes.csv'],
+#  [Dataset PartitionedIsotopeData of shape (3, 4)
+#   symbol = ['H', 'H', 'H']
+#   z = [1, 1, 1]
+#   mass = [1.007825, 2.014102, 3.016049]
+#   abundance = [0.999885, 0.000115, 0.0],
+#   Dataset PartitionedIsotopeData of shape (2, 4)
+#   symbol = ['He', 'He']
+#   z = [2, 2]
+#   mass = [3.016029, 4.002603]
+#   abundance = [1e-06, 0.999999],
+#   Dataset PartitionedIsotopeData of shape (2, 4)
+#   symbol = ['Li', 'Li']
+#   z = [3, 3]
+#   mass = [6.015123, 7.016003]
+#   abundance = [0.0759, 0.9241]])
+```
+
+
+
 
 
 ## Motivation