Skip to content

Commit

Permalink
Merge pull request #491 from gnieh/json/chunk-acc
Browse files Browse the repository at this point in the history
Bring JSON parser on par with circe-fs2
  • Loading branch information
satabin authored Jul 17, 2023
2 parents 70c7443 + 66f34e8 commit 136b493
Show file tree
Hide file tree
Showing 23 changed files with 700 additions and 178 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,11 @@ jobs:

- name: Make target directories
if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main')
run: mkdir -p json/.native/target json/play/.jvm/target text/native/target cbor-json/native/target finite-state/native/target target .js/target documentation/target cbor/js/target finite-state/js/target text/js/target benchmarks/.jvm/target json/play/.js/target json/.jvm/target xml/scala-xml/.native/target csv/jvm/target xml/.jvm/target xml/.js/target cbor/native/target json/circe/.native/target finite-state/jvm/target cbor-json/js/target cbor/jvm/target csv/native/target json/circe/.jvm/target .jvm/target csv/js/target csv/generic/jvm/target .native/target text/jvm/target xml/.native/target json/diffson/.native/target json/diffson/.js/target cbor-json/jvm/target json/interpolators/.jvm/target json/.js/target json/interpolators/.js/target csv/generic/js/target json/circe/.js/target json/diffson/.jvm/target xml/scala-xml/.js/target csv/generic/native/target xml/scala-xml/.jvm/target json/interpolators/.native/target project/target
run: mkdir -p json/.native/target json/play/.jvm/target text/native/target cbor-json/native/target finite-state/native/target scalafix/rules/target target .js/target documentation/target cbor/js/target finite-state/js/target text/js/target scalafix/output/target benchmarks/.jvm/target json/play/.js/target json/.jvm/target xml/scala-xml/.native/target csv/jvm/target xml/.jvm/target xml/.js/target cbor/native/target json/circe/.native/target finite-state/jvm/target scalafix/tests/target cbor-json/js/target cbor/jvm/target csv/native/target json/circe/.jvm/target .jvm/target scalafix/input/target csv/js/target csv/generic/jvm/target .native/target text/jvm/target xml/.native/target json/diffson/.native/target json/diffson/.js/target cbor-json/jvm/target json/interpolators/.jvm/target json/.js/target json/interpolators/.js/target csv/generic/js/target json/circe/.js/target json/diffson/.jvm/target xml/scala-xml/.js/target csv/generic/native/target xml/scala-xml/.jvm/target json/interpolators/.native/target project/target

- name: Compress target directories
if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main')
run: tar cf targets.tar json/.native/target json/play/.jvm/target text/native/target cbor-json/native/target finite-state/native/target target .js/target documentation/target cbor/js/target finite-state/js/target text/js/target benchmarks/.jvm/target json/play/.js/target json/.jvm/target xml/scala-xml/.native/target csv/jvm/target xml/.jvm/target xml/.js/target cbor/native/target json/circe/.native/target finite-state/jvm/target cbor-json/js/target cbor/jvm/target csv/native/target json/circe/.jvm/target .jvm/target csv/js/target csv/generic/jvm/target .native/target text/jvm/target xml/.native/target json/diffson/.native/target json/diffson/.js/target cbor-json/jvm/target json/interpolators/.jvm/target json/.js/target json/interpolators/.js/target csv/generic/js/target json/circe/.js/target json/diffson/.jvm/target xml/scala-xml/.js/target csv/generic/native/target xml/scala-xml/.jvm/target json/interpolators/.native/target project/target
run: tar cf targets.tar json/.native/target json/play/.jvm/target text/native/target cbor-json/native/target finite-state/native/target scalafix/rules/target target .js/target documentation/target cbor/js/target finite-state/js/target text/js/target scalafix/output/target benchmarks/.jvm/target json/play/.js/target json/.jvm/target xml/scala-xml/.native/target csv/jvm/target xml/.jvm/target xml/.js/target cbor/native/target json/circe/.native/target finite-state/jvm/target scalafix/tests/target cbor-json/js/target cbor/jvm/target csv/native/target json/circe/.jvm/target .jvm/target scalafix/input/target csv/js/target csv/generic/jvm/target .native/target text/jvm/target xml/.native/target json/diffson/.native/target json/diffson/.js/target cbor-json/jvm/target json/interpolators/.jvm/target json/.js/target json/interpolators/.js/target csv/generic/js/target json/circe/.js/target json/diffson/.jvm/target xml/scala-xml/.js/target csv/generic/native/target xml/scala-xml/.jvm/target json/interpolators/.native/target project/target

- name: Upload target directories
if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main')
Expand Down
2 changes: 1 addition & 1 deletion .scalafmt.conf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ rewrite.rules = [
prefercurlyfors
]

runner.dialect = scala213
runner.dialect = scala213source3
fileOverride {
"glob:**/src/*/scala-3/**" {
runner.dialect = scala3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,15 @@ class JsonParserBenchmarks {
.unsafeRunSync()

@Benchmark
def parseJsonFs2DataValues() =
def parseJsonFs2DataParse() =
jsonStream
.through(ast.parse)
.compile
.drain
.unsafeRunSync()

@Benchmark
def parseJsonFs2DataTokensValues() =
jsonStream
.through(tokens)
.through(ast.values)
Expand Down
36 changes: 35 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,8 @@ lazy val documentation = project
"com.beachape" %% "enumeratum" % "1.7.0",
"org.gnieh" %% "diffson-circe" % diffsonVersion,
"io.circe" %% "circe-generic-extras" % circeExtrasVersion,
"co.fs2" %% "fs2-io" % fs2Version
"co.fs2" %% "fs2-io" % fs2Version,
"io.circe" %% "circe-fs2" % "0.14.1"
),
scalacOptions += "-Ymacro-annotations"
)
Expand Down Expand Up @@ -564,6 +565,39 @@ lazy val benchmarks = crossProject(JVMPlatform)
)
.dependsOn(csv, scalaXml, jsonCirce)

lazy val scalafixInput = (project in file("scalafix/input"))
.disablePlugins(ScalafixPlugin)
.dependsOn(jsonCirce.jvm)

lazy val scalafixOutput = (project in file("scalafix/output"))
.disablePlugins(ScalafixPlugin)
.dependsOn(jsonCirce.jvm)

lazy val scalafixRules = (project in file("scalafix/rules"))
.disablePlugins(ScalafixPlugin)
.settings(
libraryDependencies +=
"ch.epfl.scala" %%
"scalafix-core" %
_root_.scalafix.sbt.BuildInfo.scalafixVersion
)

lazy val scalafixTests = (project in file("scalafix/tests"))
.settings(
scalafixTestkitOutputSourceDirectories :=
(scalafixOutput / Compile / sourceDirectories).value,
scalafixTestkitInputSourceDirectories :=
(scalafixInput / Compile / sourceDirectories).value,
scalafixTestkitInputClasspath :=
(scalafixInput / Compile / fullClasspath).value,
scalafixTestkitInputScalacOptions :=
(scalafixInput / Compile / scalacOptions).value,
scalafixTestkitInputScalaVersion :=
(scalafixInput / Compile / scalaVersion).value
)
.dependsOn(scalafixInput, scalafixRules)
.enablePlugins(ScalafixTestkitPlugin)

// Utils

def onScala2[T](version: String)(values: => List[T]): List[T] = PartialFunction
Expand Down
25 changes: 23 additions & 2 deletions documentation/docs/json/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,18 +111,39 @@ root.index(1).!

To handle Json ASTs, you can use the types and pipes available in the `fs2.data.json.ast` package.

JSON ASTs can be built if you provide an implicit [`Builder[Json]`][builder-api] to the `values` pipe. The `Builder[Json]` typeclass describes how JSON ASTs of type `Json` are built from streams.
#### parsing a stream to values

If you are not interested in tokens, but instead want to parse the input stream into a stream of AST values, you can use the `ast.parse` pipe, provided you have an implicit [`Builder[Json]`][builder-api] in scope. The `Builder[Json]` typeclass describes how JSON ASTs of type `Json` are built from events generated by the parser.

```scala mdoc:compile-only
import ast._

trait SomeJsonType

implicit val builder: Builder[SomeJsonType] = ???

Stream.emit(input).covary[Fallible].through(parse)
```

The `ast` stream emits all top-level JSON values parsed, in our example, the two objects are emitted.

#### From `Token`s to values

JSON ASTs can be built from an existing token stream, provided you have an implicit [`Builder[Json]`][builder-api], using the `values` pipe.

```scala mdoc:compile-only
import ast._

trait SomeJsonType

implicit val builder: Builder[SomeJsonType] = ???

stream.through(values[Fallible, SomeJsonType])
```

The `asts` stream emits all top-level JSON values parsed, in our example, the two objects are emitted.
**Note:** even though this snippet is equivalent in result to the one using `ast.parse` it is less efficient, and if you are only interested in the values, you should always use `ast.parse`.

#### From values to `Token`s

If you provide an implicit [`Tokenizer[Json]`][tokenizer-api], which describes how a JSON AST is transformed into JSON tokens, you can apply transformations to the JSON stream. For instance, you can apply a function `fun` to all values in the `fields3` array by using this code:

Expand Down
52 changes: 42 additions & 10 deletions documentation/docs/json/libraries.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,20 @@ Examples on this page use the following input:
```scala mdoc
import fs2.{Fallible, Stream}
import fs2.data.json._
import fs2.data.json.literals._
import fs2.data.json.jsonpath._
import fs2.data.json.jsonpath.literals._

val stream = json"""{
def input[F[_]] = Stream.emit("""{
"field1": 0,
"field2": "test",
"field3": [1, 2, 3]
}
{
"field1": 2,
"field3": []
}"""
}""").covary[F]

val stream = input[Fallible].through(tokens)

val sel = jsonpath"$$.field3[*]"
```
Expand All @@ -41,19 +42,16 @@ For instance both examples from the [core module documentation][json-doc] with c

```scala mdoc:nest
import fs2.data.json.circe._
import io.circe._

val asts = stream.through(ast.values[Fallible, Json])
asts.compile.toList
val asts = input[Fallible].through(ast.parse)
asts.map(_.spaces2).compile.toList
```

You can use `filter.values` to selects only the values matching the JSONPath and deserialize them using the builder.

```scala mdoc:nest
import fs2.data.json.circe._

import io.circe._

import cats.effect._
import cats.syntax.all._
import cats.effect.unsafe.implicits.global
Expand Down Expand Up @@ -86,7 +84,6 @@ case class Wrapped(test: Int)
```scala mdoc:nest
import fs2.data.json.selector._
import fs2.data.json.circe._
import io.circe._

val values = stream.through(codec.deserialize[Fallible, Data])
values.compile.toList
Expand All @@ -104,7 +101,6 @@ Dropping values can be done similarly.

```scala mdoc:nest
import fs2.data.json.circe._
import io.circe._
import cats.syntax.all._

val f1 = root.field("field1").compile
Expand All @@ -113,6 +109,40 @@ val transformed = stream.through(codec.transformOpt(f1, (i: Int) => (i > 0).guar
transformed.compile.to(collector.pretty())
```

#### Migrating from `circe-fs2`

If you were using [`circe-fs2`][circe-fs2] to emit streams of `Json` values, you can easily switch to `fs2-data-json-circe`. Just replace your usages of `stringStreamParser` or `byteStreamParser` by usage of `fs2.data.json.ast.parse`.

For instance if you had this code:

```scala mdoc:nest
import io.circe.fs2._

import cats.effect._

input[SyncIO]
.through(stringStreamParser)
.map(_.spaces2)
.compile
.toList
.unsafeRunSync()
```

You can replace it by

```scala mdoc:nest
import fs2.data.json._
import fs2.data.json.circe._

input[Fallible]
.through(ast.parse)
.map(_.spaces2)
.compile
.toList
```

If you were using `byteStreamParser`, please refer to the the [`fs2.data.text` package documentation][text] to indicate how to decode the byte stream.

### Play! JSON

Module: [![Maven Central](https://img.shields.io/maven-central/v/org.gnieh/fs2-data-json-play_2.13.svg)](https://mvnrepository.com/artifact/org.gnieh/fs2-data-json-play_2.13)
Expand All @@ -124,3 +154,5 @@ It also provides `Deserializer` for types with a `Reads` instance and `Serialize
[json-doc]: /documentation/json/
[circe]: https://circe.github.io/circe/
[play-json]: https://www.playframework.com/
[circe-fs2]: https://github.com/circe/circe-fs2
[text]: /documentation/#decoding-textual-inputs
32 changes: 32 additions & 0 deletions json/src/main/scala-2.12/fs2/data/package.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Copyright 2023 Lucas Satabin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package fs2

import scala.collection.immutable.VectorBuilder
import scala.collection.mutable.ListBuffer

package object data {

implicit class VectorBuilderOps[T](val builder: VectorBuilder[T]) extends AnyVal {
def addOne(t: T) = builder += t
}

implicit class ListBufferOps[T](val buffer: ListBuffer[T]) extends AnyVal {
def addOne(t: T) = buffer += t
}

}
24 changes: 21 additions & 3 deletions json/src/main/scala/fs2/data/json/ast/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@
*/

package fs2
package data.json

import internals.{TokenSelector, ValueParser}
package data
package json

import cats.syntax.all._

import text.{AsCharBuffer, CharLikeChunks}
import internals.{TokenSelector, ValueParser, JsonTokenParser, LegacyTokenParser, BuilderChunkAccumulator, State}

package object ast {

/** Transforms a stream of token into another one. The transformation function `f` is
Expand Down Expand Up @@ -84,6 +86,22 @@ package object ast {
def values[F[_], Json](implicit F: RaiseThrowable[F], builder: Builder[Json]): Pipe[F, Token, Json] =
ValueParser.pipe[F, Json]

/** Parses a stream of characters into a stream of Json values. */
def parse[F[_], T, Json](implicit
F: RaiseThrowable[F],
T: CharLikeChunks[F, T],
builder: Builder[Json]): Pipe[F, T, Json] = { s =>
T match {
case asCharBuffer: AsCharBuffer[F, T] =>
Stream.suspend(
new JsonTokenParser[F, T, Json](s, new BuilderChunkAccumulator(builder))(F, asCharBuffer)
.go_(State.BeforeValue)
.stream)
case _ =>
Stream.suspend(new LegacyTokenParser[F, T, Json](s).parse(new BuilderChunkAccumulator(builder)).stream)
}
}

/** Transforms a stream of Json values into a stream of Json tokens.
*
* This operation is the opposite of `values`.
Expand Down
Loading

0 comments on commit 136b493

Please sign in to comment.