From 449dd341b51baac3b14afa66fc861ba789c31483 Mon Sep 17 00:00:00 2001 From: "ben.sless" Date: Fri, 21 May 2021 10:05:47 +0300 Subject: [PATCH 1/6] Add read-values and write-values read-values dispatches to the ReadValues protocol. It returns an iterator via an ObjectReader derived from the supplied mapper. The returned iterator is reified in a manner similar to Eduction to support reduction and sequence construction over it. write-values relies on two protocols - WriteValues for the output destination, similarly to WriteValue, and WriteAll for the type being written, which can be an array or an Iterable. It writes an array or iterable to destination via a SequenceWriter. Importantly, write-values distables automatic flushing on serialization to get good performance. --- src/clj/jsonista/core.clj | 129 +++++++++++++++++++++++++++++++++++++- 1 file changed, 127 insertions(+), 2 deletions(-) diff --git a/src/clj/jsonista/core.clj b/src/clj/jsonista/core.clj index 649c998..f6064c6 100644 --- a/src/clj/jsonista/core.clj +++ b/src/clj/jsonista/core.clj @@ -60,13 +60,13 @@ RatioSerializer FunctionalKeywordSerializer) (com.fasterxml.jackson.core JsonGenerator$Feature JsonFactory) (com.fasterxml.jackson.databind - JsonSerializer ObjectMapper module.SimpleModule + JsonSerializer ObjectMapper SequenceWriter SerializationFeature DeserializationFeature Module) (com.fasterxml.jackson.databind.module SimpleModule) (java.io InputStream Writer File OutputStream DataOutput Reader) (java.net URL) (com.fasterxml.jackson.datatype.jsr310 JavaTimeModule) - (java.util List Map Date) + (java.util List Map Date Iterator) (clojure.lang Keyword Ratio Symbol))) (defn- ^Module clojure-module @@ -194,6 +194,38 @@ (-read-value [this ^ObjectMapper mapper] (.readValue mapper this ^Class Object))) +(defprotocol ReadValues + (-read-values [this mapper])) + +(extend-protocol ReadValues + + (Class/forName "[B") + (-read-values [this ^ObjectMapper mapper] + (.readValues (.readerFor mapper ^Class Object) ^bytes this)) + + nil + (-read-values [_ _]) + + File + (-read-values [this ^ObjectMapper mapper] + (.readValues (.readerFor mapper ^Class Object) this)) + + URL + (-read-values [this ^ObjectMapper mapper] + (.readValues (.readerFor mapper ^Class Object) this)) + + String + (-read-values [this ^ObjectMapper mapper] + (.readValues (.readerFor mapper ^Class Object) this)) + + Reader + (-read-values [this ^ObjectMapper mapper] + (.readValues (.readerFor mapper ^Class Object) this)) + + InputStream + (-read-values [this ^ObjectMapper mapper] + (.readValues (.readerFor mapper ^Class Object) this))) + (defprotocol WriteValue (-write-value [this value mapper])) @@ -214,6 +246,50 @@ (-write-value [this value ^ObjectMapper mapper] (.writeValue mapper this value))) +(defprotocol WriteAll + (-write-all [this ^SequenceWriter writer])) + +(extend-protocol WriteAll + + (Class/forName "[Ljava.lang.Object;") + (-write-all [this ^SequenceWriter w] + (.writeAll w ^"[Ljava.lang.Object;" this)) + + Iterable + (-write-all [this ^SequenceWriter w] + (.writeAll w this))) + +(defprotocol WriteValues + (-write-values [this values mapper])) + +(defmacro ^:private -write-values* + [this value mapper] + `(doto ^SequenceWriter + (-write-all + ~value + (-> ~mapper + (.writerFor Object) + (.without SerializationFeature/FLUSH_AFTER_WRITE_VALUE) + (.writeValuesAsArray ~this))) + (.close))) + +(extend-protocol WriteValues + File + (-write-values [this value ^ObjectMapper mapper] + (-write-values* this value mapper)) + + OutputStream + (-write-values [this value ^ObjectMapper mapper] + (-write-values* this value mapper)) + + DataOutput + (-write-values [this value ^ObjectMapper mapper] + (-write-values* this value mapper)) + + Writer + (-write-values [this value ^ObjectMapper mapper] + (-write-values* this value mapper))) + ;; ;; public api ;; @@ -259,3 +335,52 @@ (-write-value to object default-object-mapper)) ([to object ^ObjectMapper mapper] (-write-value to object mapper))) + +(defn- wrap-values + [^Iterator iterator] + (reify + Iterable + (iterator [this] iterator) + Iterator + (hasNext [this] (.hasNext iterator)) + (next [this] (.next iterator)) + (remove [this] (.remove iterator)) + clojure.lang.IReduceInit + (reduce [_ f val] + (loop [ret val] + (if (.hasNext iterator) + (let [ret (f ret (.next iterator))] + (if (reduced? ret) + @ret + (recur ret))) + ret))) + clojure.lang.Sequential)) + +(defn read-values + "Decodes a sequence of values from a JSON as an iterator + from anything that satisfies [[ReadValue]] protocol. + By default, File, URL, String, Reader and InputStream are supported. + + The returned object is an Iterable, Iterator and IReduceInit. + It can be reduced on via [[reduce]] and turned into a lazy sequence + via [[iterator-seq]]. + + To configure, pass in an ObjectMapper created with [[object-mapper]], + see [[object-mapper]] docstring for the available options." + ([object] + (wrap-values (-read-values object default-object-mapper))) + ([object ^ObjectMapper mapper] + (wrap-values (-read-values object mapper)))) + +(defn write-values + "Encode values as JSON and write using the provided [[WriteValue]] instance. + By default, File, OutputStream, DataOutput and Writer are supported. + + By default, values can be an array or an Iterable. + + To configure, pass in an ObjectMapper created with [[object-mapper]], + see [[object-mapper]] docstring for the available options." + ([to object] + (-write-values to object default-object-mapper)) + ([to object ^ObjectMapper mapper] + (-write-values to object mapper))) From cb29c827c0ba0371e518a25cb049c28eb97dadd0 Mon Sep 17 00:00:00 2001 From: Ben Sless Date: Tue, 25 May 2021 16:26:01 +0300 Subject: [PATCH 2/6] Align read-values with read-value for nil --- src/clj/jsonista/core.clj | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/clj/jsonista/core.clj b/src/clj/jsonista/core.clj index f6064c6..67b74d5 100644 --- a/src/clj/jsonista/core.clj +++ b/src/clj/jsonista/core.clj @@ -338,23 +338,24 @@ (defn- wrap-values [^Iterator iterator] - (reify - Iterable - (iterator [this] iterator) - Iterator - (hasNext [this] (.hasNext iterator)) - (next [this] (.next iterator)) - (remove [this] (.remove iterator)) - clojure.lang.IReduceInit - (reduce [_ f val] - (loop [ret val] - (if (.hasNext iterator) - (let [ret (f ret (.next iterator))] - (if (reduced? ret) - @ret - (recur ret))) - ret))) - clojure.lang.Sequential)) + (when iterator + (reify + Iterable + (iterator [this] iterator) + Iterator + (hasNext [this] (.hasNext iterator)) + (next [this] (.next iterator)) + (remove [this] (.remove iterator)) + clojure.lang.IReduceInit + (reduce [_ f val] + (loop [ret val] + (if (.hasNext iterator) + (let [ret (f ret (.next iterator))] + (if (reduced? ret) + @ret + (recur ret))) + ret))) + clojure.lang.Sequential))) (defn read-values "Decodes a sequence of values from a JSON as an iterator From 455ced1ef8f274055c36f7ed8a17074887297976 Mon Sep 17 00:00:00 2001 From: Ben Sless Date: Tue, 25 May 2021 16:27:17 +0300 Subject: [PATCH 3/6] Add tests for read-values and write-values --- test/jsonista/core_test.clj | 76 +++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/test/jsonista/core_test.clj b/test/jsonista/core_test.clj index 9eff2d6..5cfaeaa 100644 --- a/test/jsonista/core_test.clj +++ b/test/jsonista/core_test.clj @@ -243,6 +243,33 @@ (testing "Reader" (is (= original (j/read-value (InputStreamReader. (str->input-stream input-string)))))))) +(deftest read-values-types + (let [original [{"ok" 1}] + input-string (j/write-value-as-string original) + file (tmp-file)] + (spit file input-string) + + (testing "nil" + (is (= nil (j/read-values nil)))) + + (testing "byte-array" + (is (= original (j/read-values (j/write-value-as-bytes original))))) + + (testing "File" + (is (= original (j/read-values file)))) + + (testing "URL" + (is (= original (j/read-values (.toURL file))))) + + (testing "String" + (is (= original (j/read-values input-string)))) + + (testing "InputStream" + (is (= original (j/read-values (str->input-stream input-string))))) + + (testing "Reader" + (is (= original (j/read-values (InputStreamReader. (str->input-stream input-string)))))))) + (deftest write-value-types (let [original {"ok" 1} expected (j/write-value-as-string original) @@ -268,3 +295,52 @@ (is (= expected (slurp file))) (.delete file)))) +(deftest write-values-types + (let [original [{"ok" 1}] + expected (j/write-value-as-string original) + file (tmp-file)] + + (testing "File" + (j/write-values file original) + (is (= expected (slurp file))) + (.delete file)) + + (testing "OutputStream" + (j/write-values (FileOutputStream. file) original) + (is (= expected (slurp file))) + (.delete file)) + + (testing "DataOutput" + (j/write-values (RandomAccessFile. file "rw") original) + (is (= expected (slurp file))) + (.delete file)) + + (testing "Writer" + (j/write-values (FileWriter. file) original) + (is (= expected (slurp file))) + (.delete file)))) + +(deftest read-values-iteration + (let [original [{"ok" 1}] + ^java.util.Iterator it (j/read-values (j/write-value-as-bytes original))] + (is (instance? java.util.Iterator it)) + (is (.hasNext it)) + (is (= (first original) (.next it))) + (is (false? (.hasNext it))))) + +(deftest read-values-reduction + (let [original [{"ok" 1}] + ^java.util.Iterator it (j/read-values (j/write-value-as-bytes original)) + xf (map #(update % "ok" inc))] + (is (= (into [] xf original) (into [] xf it))))) + +(deftest write-values-iterable + (let [original [{"ok" 1}] + xf (map #(update % "ok" inc)) + expected (j/write-value-as-string (into [] xf original)) + file (tmp-file) + eduction (->Eduction xf original)] + + (j/write-values file eduction) + (is (= expected (slurp file))) + (.delete file))) From a9b5ecf71ee26a7ac8ee4cc4d45d772a82934fa4 Mon Sep 17 00:00:00 2001 From: Joel Kaasinen Date: Mon, 16 Sep 2024 17:24:48 +0300 Subject: [PATCH 4/6] feat: :close means JsonGenerator$Feature/AUTO_CLOSE_TARGET --- src/clj/jsonista/core.clj | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/clj/jsonista/core.clj b/src/clj/jsonista/core.clj index 9afff1b..282403f 100644 --- a/src/clj/jsonista/core.clj +++ b/src/clj/jsonista/core.clj @@ -131,6 +131,7 @@ | `:date-format` | string for custom date formatting. default: `yyyy-MM-dd'T'HH:mm:ss'Z'` | | `:encode-key-fn` | true to coerce keyword keys to strings, false to leave them as keywords, or a function to provide custom coercion (default: true) | | `:encoders` | a map of custom encoders where keys should be types and values should be encoder functions | + | `:close` | close OutputStreams & other closeable targets after write-value (default: true) | Encoder functions take two parameters: the value to be encoded and a JsonGenerator object. The function should call JsonGenerator methods to emit @@ -138,8 +139,8 @@ | Decoding options | | | ------------------- | -------------------------------------------------------------- | - | `:decode-key-fn` | true to coerce keys to keywords, false to leave them as strings, or a function to provide custom coercion (default: false) | - | `:bigdecimals` | true to decode doubles as BigDecimals (default: false) |" + | `:decode-key-fn` | true to coerce keys to keywords, false to leave them as strings, or a function to provide custom coercion (default: false) | + | `:bigdecimals` | true to decode doubles as BigDecimals (default: false) |" ([] (object-mapper {})) ([options] (let [factory (:factory options) @@ -158,7 +159,8 @@ (:strip-nils options) (.setSerializationInclusion JsonInclude$Include/NON_NULL) (:strip-empties options) (.setSerializationInclusion JsonInclude$Include/NON_EMPTY) (:do-not-fail-on-empty-beans options) (.disable SerializationFeature/FAIL_ON_EMPTY_BEANS) - (:escape-non-ascii options) (doto (-> .getFactory (.enable JsonGenerator$Feature/ESCAPE_NON_ASCII)))))] + (:escape-non-ascii options) (doto (-> .getFactory (.enable JsonGenerator$Feature/ESCAPE_NON_ASCII))) + (contains? options :close) (.configure JsonGenerator$Feature/AUTO_CLOSE_TARGET (boolean (:close options)))))] (doseq [module (:modules options)] (.registerModule mapper module)) (.disable mapper SerializationFeature/WRITE_DATES_AS_TIMESTAMPS) From a85537c83e7dccd85785714b008982e41735a8bf Mon Sep 17 00:00:00 2001 From: Joel Kaasinen Date: Tue, 17 Sep 2024 15:02:22 +0300 Subject: [PATCH 5/6] feat: separate write-values and write-values-as-array --- src/clj/jsonista/core.clj | 52 +++++++++++++++++++++++++++---------- test/jsonista/core_test.clj | 38 ++++++++++++++++++++++----- 2 files changed, 69 insertions(+), 21 deletions(-) diff --git a/src/clj/jsonista/core.clj b/src/clj/jsonista/core.clj index 2599368..e93a3ae 100644 --- a/src/clj/jsonista/core.clj +++ b/src/clj/jsonista/core.clj @@ -280,35 +280,46 @@ (.writeAll w this))) (defprotocol WriteValues - (-write-values [this values mapper])) + (-write-values [this values mapper]) + (-write-values-as-array [this values mapper])) (defmacro ^:private -write-values* - [this value mapper] + [method this value mapper] `(doto ^SequenceWriter (-write-all ~value (-> ~mapper (.writerFor Object) + (.withRootValueSeparator "\n") (.without SerializationFeature/FLUSH_AFTER_WRITE_VALUE) - (.writeValuesAsArray ~this))) + (. ~method ~this))) (.close))) + (extend-protocol WriteValues File (-write-values [this value ^ObjectMapper mapper] - (-write-values* this value mapper)) + (-write-values* writeValues this value mapper)) + (-write-values-as-array [this value ^ObjectMapper mapper] + (-write-values* writeValuesAsArray this value mapper)) OutputStream (-write-values [this value ^ObjectMapper mapper] - (-write-values* this value mapper)) + (-write-values* writeValues this value mapper)) + (-write-values-as-array [this value ^ObjectMapper mapper] + (-write-values* writeValuesAsArray this value mapper)) DataOutput (-write-values [this value ^ObjectMapper mapper] - (-write-values* this value mapper)) + (-write-values* writeValues this value mapper)) + (-write-values-as-array [this value ^ObjectMapper mapper] + (-write-values* writeValuesAsArray this value mapper)) Writer (-write-values [this value ^ObjectMapper mapper] - (-write-values* this value mapper))) + (-write-values* writeValues this value mapper)) + (-write-values-as-array [this value ^ObjectMapper mapper] + (-write-values* writeValuesAsArray this value mapper))) ;; ;; public api @@ -394,14 +405,27 @@ (wrap-values (-read-values object mapper)))) (defn write-values - "Encode values as JSON and write using the provided [[WriteValue]] instance. - By default, File, OutputStream, DataOutput and Writer are supported. + "Encodes a sequence of values as JSON, separating values with a line return. + By default, `to` can be a File, OutputStream, DataOutput or Writer. - By default, values can be an array or an Iterable. + By default, `values` can be an array or an Iterable. To configure, pass in an ObjectMapper created with [[object-mapper]], see [[object-mapper]] docstring for the available options." - ([to object] - (-write-values to object default-object-mapper)) - ([to object ^ObjectMapper mapper] - (-write-values to object mapper))) + ([to values] + (-write-values to values default-object-mapper)) + ([to values ^ObjectMapper mapper] + (-write-values to values mapper))) + +(defn write-values-as-array + "Encodes a sequence of values as a JSON array. + By default, `to` can be a File, OutputStream, DataOutput or Writer. + + By default, `values` can be an array or an Iterable. + + To configure, pass in an ObjectMapper created with [[object-mapper]], + see [[object-mapper]] docstring for the available options." + ([to values] + (-write-values-as-array to values default-object-mapper)) + ([to values ^ObjectMapper mapper] + (-write-values-as-array to values mapper))) diff --git a/test/jsonista/core_test.clj b/test/jsonista/core_test.clj index 6f05620..e0dc128 100644 --- a/test/jsonista/core_test.clj +++ b/test/jsonista/core_test.clj @@ -305,28 +305,41 @@ (.delete file)))) (deftest write-values-types - (let [original [{"ok" 1}] - expected (j/write-value-as-string original) + (let [original [{"ok" 1} {"ok" 2}] + expected-array (j/write-value-as-string original) + expected-lines (str/join "\n" (mapv j/write-value-as-string original)) file (tmp-file)] (testing "File" (j/write-values file original) - (is (= expected (slurp file))) + (is (= expected-lines (slurp file))) + (.delete file) + (j/write-values-as-array file original) + (is (= expected-array (slurp file))) (.delete file)) (testing "OutputStream" (j/write-values (FileOutputStream. file) original) - (is (= expected (slurp file))) + (is (= expected-lines (slurp file))) + (.delete file) + (j/write-values-as-array (FileOutputStream. file) original) + (is (= expected-array (slurp file))) (.delete file)) (testing "DataOutput" (j/write-values (RandomAccessFile. file "rw") original) - (is (= expected (slurp file))) + (is (= expected-lines (slurp file))) + (.delete file) + (j/write-values-as-array (RandomAccessFile. file "rw") original) + (is (= expected-array (slurp file))) (.delete file)) (testing "Writer" (j/write-values (FileWriter. file) original) - (is (= expected (slurp file))) + (is (= expected-lines (slurp file))) + (.delete file) + (j/write-values-as-array (FileWriter. file) original) + (is (= expected-array (slurp file))) (.delete file)))) (deftest read-values-iteration @@ -344,12 +357,23 @@ (is (= (into [] xf original) (into [] xf it))))) (deftest write-values-iterable + (let [original [{"ok" 1} {"ok" 2}] + xf (map #(update % "ok" inc)) + expected "{\"ok\":2}\n{\"ok\":3}" + file (tmp-file) + eduction (->Eduction xf original)] + + (j/write-values file eduction) + (is (= expected (slurp file))) + (.delete file))) + +(deftest write-values-as-array-iterable (let [original [{"ok" 1}] xf (map #(update % "ok" inc)) expected (j/write-value-as-string (into [] xf original)) file (tmp-file) eduction (->Eduction xf original)] - (j/write-values file eduction) + (j/write-values-as-array file eduction) (is (= expected (slurp file))) (.delete file))) From 82e72cfe0d19de002e33403e740a6b0b4c43f5a4 Mon Sep 17 00:00:00 2001 From: Joel Kaasinen Date: Mon, 16 Sep 2024 17:25:55 +0300 Subject: [PATCH 6/6] doc: streaming.md --- README.md | 4 +++ docs/streaming.md | 72 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 docs/streaming.md diff --git a/README.md b/README.md index bf55d4b..db295f0 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,10 @@ includes both reading and writing support. In simple [perf tests](https://github.com/metosin/jsonista/blob/master/test/jsonista/json_perf_test.clj), tagged JSON is much faster than EDN or Transit. +## Streaming + +See [docs/streaming.md](docs/streaming.md). + ## Performance * All standard encoders and decoders are written in Java diff --git a/docs/streaming.md b/docs/streaming.md new file mode 100644 index 0000000..6030e61 --- /dev/null +++ b/docs/streaming.md @@ -0,0 +1,72 @@ +# Streaming JSON with Jsonista + +## JSON Lines (aka JSONL) + +Sometimes you want to store a stream of JSON objects in a file. This is common for things like logging. +This pattern is often called [JSON Lines](https://jsonlines.org/). + +### Writing + +```clj +(jsonista.core/write-values (io/output-stream "/tmp/foo.json") [{"foo" 1} {"bar" 1}]) +``` + +For actual streaming, use a lazy sequence or an eduction instead of a +vector. For example: + +```clj +(jsonista.core/write-values + (io/output-stream "/tmp/foo.json") + (eduction (map (fn [i] {:i i})) (range 100))) +``` + +Alternatively, you can use Jackson's imperative API directly: + +```clj +(let [obj-mapper (jsonista.core/object-mapper {:close false})] + (with-open [out (io/output-stream "/tmp/foo.json") + wrt (io/writer out)] + (jsonista.core/write-value wrt {"foo" 1} obj-mapper) + (.write wrt "\n") + (jsonista.core/write-value wrt {"bar" 1} obj-mapper))) +``` + +### Reading + +```clj +(into [] (jsonista.core/read-values (io/input-stream "/tmp/foo.json"))) +``` + +## Top-level array + +Instead of being separated on separate lines, sometimes you just want +a big JSON array, but don't want to keep all of the data in memory at +once. + +### Writing + +Use `jsonista.core/write-values-as-array`, which works just like `jsonista.core/write-values`. + +### Reading + +Use `jsonista.core/read-values`, it autodetects the format. + +## An array inside an object + +Sometimes you need to stream an array that sits inside an object. For this, it's best to drop down to the Jackson [JsonParser API](https://javadoc.io/static/com.fasterxml.jackson.core/jackson-core/2.18.0-rc1/com/fasterxml/jackson/core/JsonParser.html) + +```clj +(let [input "{\"foo\": 1, \"bars\": [{\"bar\": 2},{\"bar\": 3}], \"close\": \"end\"}" + obj-mapper (jsonista.core/object-mapper)] + (with-open [rdr (java.io.StringReader. input)] + (let [p (.. obj-mapper getFactory (createParser rdr))] + ;; position cursor to start of first entry in "bars" + (.nextToken p) ; START_OBJECT + (.nextToken p) ; FIELD_NAME "foo" + (.nextToken p) ; VALUE_NUMBER_INT 1 + (.nextToken p) ; FIELD_NAME "bar" + (.nextToken p) ; START_ARRAY + (.nextToken p) ; START_OBJECT + ;; grab all entries, ignore rest of input + (doall (iterator-seq (.readValuesAs p Object)))))) +```