From 100946421e13b422067bd75609ae98c129addc0e Mon Sep 17 00:00:00 2001 From: Lucas Satabin Date: Thu, 22 Jun 2023 19:02:47 +0200 Subject: [PATCH 01/10] Abstract over accumulator chunk This abstraction allows to create chunks of various types out of an input stream. One possible accumulator is the `Token` accumulator which generates the token stream. Leveraging this abstraction, we can then implement a pipe that builds directly AST values instead of having intermediate `Token` representation. --- .../data/json/internal/ChunkAccumulator.scala | 72 +++++++++++ .../data/json/internal/JsonTokenParser.scala | 113 +++++++++--------- .../json/internal/LegacyTokenParser.scala | 77 ++++++------ .../json/internal/TokenChunkAccumulator.scala | 70 +++++++++++ .../fs2/data/json/internal/TokenParser.scala | 5 +- .../scala/fs2/data/json/JsonParsertest.scala | 3 +- 6 files changed, 245 insertions(+), 95 deletions(-) create mode 100644 json/src/main/scala/fs2/data/json/internal/ChunkAccumulator.scala create mode 100644 json/src/main/scala/fs2/data/json/internal/TokenChunkAccumulator.scala diff --git a/json/src/main/scala/fs2/data/json/internal/ChunkAccumulator.scala b/json/src/main/scala/fs2/data/json/internal/ChunkAccumulator.scala new file mode 100644 index 000000000..c718cab97 --- /dev/null +++ b/json/src/main/scala/fs2/data/json/internal/ChunkAccumulator.scala @@ -0,0 +1,72 @@ +package fs2 +package data +package json +package internals + +/** A chunk accumulator allows to handle events being processed from the JSON parser. + * It is an abstraction that allows to represent some accumulation of results + * that will be emitted in a chunk returned by the `chunk` method. + * + * When calling `flush`, all the fully constructed values are cleared from the + * chunk under construction, but results still being built are kept. + * This allows to accumulate values acros upstream chunk boundaries. + */ +private[internals] trait ChunkAccumulator[Res] { + + def startObject(): this.type + + def key(key: String): this.type + + def endObject(): this.type + + def startArray(): this.type + + def endArray(): this.type + + def nullValue(): this.type + + def trueValue(): this.type + + def falseValue(): this.type + + def numberValue(value: String): this.type + + def stringValue(value: String): this.type + + def chunk(): Chunk[Res] + + def flush(): this.type + +} + +private[internals] object ChunkAccumulator { + + def empty[Res]: ChunkAccumulator[Res] = new ChunkAccumulator[Res] { + + override def startObject(): this.type = this + + override def key(key: String): this.type = this + + override def endObject(): this.type = this + + override def startArray(): this.type = this + + override def endArray(): this.type = this + + override def nullValue(): this.type = this + + override def trueValue(): this.type = this + + override def falseValue(): this.type = this + + override def numberValue(value: String): this.type = this + + override def stringValue(value: String): this.type = this + + override def chunk(): Chunk[Res] = Chunk.empty + + override def flush(): this.type = this + + } + +} diff --git a/json/src/main/scala/fs2/data/json/internal/JsonTokenParser.scala b/json/src/main/scala/fs2/data/json/internal/JsonTokenParser.scala index c292c5b34..cc1975bc1 100644 --- a/json/src/main/scala/fs2/data/json/internal/JsonTokenParser.scala +++ b/json/src/main/scala/fs2/data/json/internal/JsonTokenParser.scala @@ -22,22 +22,26 @@ package internals import fs2.data.text.AsCharBuffer import scala.annotation.switch -import scala.collection.immutable.VectorBuilder import TokenParser._ -private class JsonTokenParser[F[_], T](s: Stream[F, T])(implicit F: RaiseThrowable[F], T: AsCharBuffer[F, T]) { +private class JsonTokenParser[F[_], T, Res](s: Stream[F, T], private[this] final val chunkAcc: ChunkAccumulator[Res])( + implicit + F: RaiseThrowable[F], + T: AsCharBuffer[F, T]) { private[this] var context = T.create(s) - private[this] final val chunkAcc = new VectorBuilder[Token] + + private[this] def emitChunk[T]() = + Pull.output(chunkAcc.chunk()) // the opening quote has already been read - private final def string_(key: Boolean, acc: StringBuilder): Pull[F, Token, Unit] = + private final def string_(key: Boolean, acc: StringBuilder): Pull[F, Res, Unit] = if (T.needsPull(context)) { T.appendMarked(context, acc) - emitChunk(chunkAcc) >> T.pullNext(context).flatMap { + emitChunk() >> T.pullNext(context).flatMap { case Some(context) => this.context = context - chunkAcc.clear() + chunkAcc.flush() string_(key, acc) case None => Pull.raiseError[F](new JsonException("unexpected end of input")) } @@ -46,9 +50,8 @@ private class JsonTokenParser[F[_], T](s: Stream[F, T])(implicit F: RaiseThrowab case '"' => T.appendMarked(context, acc) val res = acc.result() - val token = if (key) Token.Key(res) else Token.StringValue(res) + if (key) chunkAcc.key(res) else chunkAcc.stringValue(res) T.advance(context) - chunkAcc += token Pull.done case '\\' => T.appendMarked(context, acc) @@ -59,18 +62,18 @@ private class JsonTokenParser[F[_], T](s: Stream[F, T])(implicit F: RaiseThrowab T.advance(context) string_(key, acc) } else { - emitChunk(chunkAcc) >> Pull.raiseError[F](new JsonException(s"invalid string character '$c'")) + emitChunk() >> Pull.raiseError[F](new JsonException(s"invalid string character '$c'")) } } } - private final def slowString_(key: Boolean, state: Int, unicode: Int, acc: StringBuilder): Pull[F, Token, Unit] = { + private final def slowString_(key: Boolean, state: Int, unicode: Int, acc: StringBuilder): Pull[F, Res, Unit] = { if (T.needsPull(context)) { T.appendMarked(context, acc) - emitChunk(chunkAcc) >> T.pullNext(context).flatMap { + emitChunk() >> T.pullNext(context).flatMap { case Some(context) => this.context = context - chunkAcc.clear() + chunkAcc.flush() slowString_(key, state, unicode, acc) case None => Pull.raiseError[F](new JsonException("unexpected end of input")) } @@ -91,15 +94,14 @@ private class JsonTokenParser[F[_], T](s: Stream[F, T])(implicit F: RaiseThrowab case 't' => slowString_(key, StringState.Normal, 0, acc.append('\t')) case 'u' => slowString_(key, StringState.Expect4Unicode, 0, acc) case _ => - emitChunk(chunkAcc) >> Pull.raiseError[F](new JsonException(s"unknown escaped character '$c'")) + emitChunk() >> Pull.raiseError[F](new JsonException(s"unknown escaped character '$c'")) } case StringState.Normal => if (c == '"') { T.appendMarked(context, acc) val res = acc.result() - val token = if (key) Token.Key(res) else Token.StringValue(res) + if (key) chunkAcc.key(res) else chunkAcc.stringValue(res) T.advance(context) - chunkAcc += token Pull.done } else if (c == '\\') { T.appendMarked(context, acc) @@ -109,7 +111,7 @@ private class JsonTokenParser[F[_], T](s: Stream[F, T])(implicit F: RaiseThrowab T.advance(context) slowString_(key, StringState.Normal, 0, acc) } else - emitChunk(chunkAcc) >> Pull.raiseError[F](new JsonException(s"invalid string character '$c'")) + emitChunk() >> Pull.raiseError[F](new JsonException(s"invalid string character '$c'")) case n /* StringState.ExpectNUnicode */ => val cidx = hexa.indexOf(c.toLower.toInt) if (cidx >= 0) { @@ -123,13 +125,13 @@ private class JsonTokenParser[F[_], T](s: Stream[F, T])(implicit F: RaiseThrowab slowString_(key, n - 1, unicode1, acc) } } else { - emitChunk(chunkAcc) >> Pull.raiseError[F](new JsonException("malformed escaped unicode sequence")) + emitChunk() >> Pull.raiseError[F](new JsonException("malformed escaped unicode sequence")) } } } } - private final def number_(state: Int, acc: StringBuilder): Pull[F, Token, Unit] = { + private final def number_(state: Int, acc: StringBuilder): Pull[F, Res, Unit] = { def step(c: Char, state: Int): Int = (c: @switch) match { case '-' => @@ -191,17 +193,18 @@ private class JsonTokenParser[F[_], T](s: Stream[F, T])(implicit F: RaiseThrowab if (T.needsPull(context)) { T.appendMarked(context, acc) - emitChunk(chunkAcc) >> T.pullNext(context).flatMap { + emitChunk() >> T.pullNext(context).flatMap { case Some(context) => this.context = context - chunkAcc.clear() + chunkAcc.flush() number_(state, acc) case None => this.context = T.create(Stream.empty) - chunkAcc.clear() - if (NumberState.isFinal(state)) - Pull.output1(Token.NumberValue(acc.result())) - else + chunkAcc.flush() + if (NumberState.isFinal(state)) { + chunkAcc.numberValue(acc.result()) + Pull.done + } else Pull.raiseError[F](new JsonException("unexpected end of input")) } } else { @@ -210,10 +213,10 @@ private class JsonTokenParser[F[_], T](s: Stream[F, T])(implicit F: RaiseThrowab case NumberState.Invalid => if (NumberState.isFinal(state)) { T.appendMarked(context, acc) - chunkAcc += Token.NumberValue(acc.result()) + chunkAcc.numberValue(acc.result()) Pull.done } else - emitChunk(chunkAcc) >> Pull.raiseError[F](new JsonException(s"invalid number character '$c'")) + emitChunk() >> Pull.raiseError[F](new JsonException(s"invalid number character '$c'")) case state => T.advance(context) number_(state, acc) @@ -221,13 +224,13 @@ private class JsonTokenParser[F[_], T](s: Stream[F, T])(implicit F: RaiseThrowab } } - private final def keyword_(expected: String, eidx: Int, elen: Int, token: Token): Pull[F, Token, Unit] = { + private final def keyword_(expected: String, eidx: Int, elen: Int, accumulate: () => ChunkAccumulator[Res]): Pull[F, Res, Unit] = { if (T.needsPull(context)) { - emitChunk(chunkAcc) >> T.pullNext(context).flatMap { + emitChunk() >> T.pullNext(context).flatMap { case Some(context) => this.context = context - chunkAcc.clear() - keyword_(expected, eidx, elen, token) + chunkAcc.flush() + keyword_(expected, eidx, elen, accumulate) case None => Pull.raiseError[F](new JsonException("unexpected end of input")) } } else { @@ -235,24 +238,24 @@ private class JsonTokenParser[F[_], T](s: Stream[F, T])(implicit F: RaiseThrowab if (c == expected.charAt(eidx)) { if (eidx == elen - 1) { T.advance(context) - chunkAcc += token + accumulate() Pull.done } else { T.advance(context) - keyword_(expected, eidx + 1, elen, token) + keyword_(expected, eidx + 1, elen, accumulate) } } else { - emitChunk(chunkAcc) >> Pull.raiseError[F](new JsonException(s"unexpected character '$c' (expected $expected)")) + emitChunk() >> Pull.raiseError[F](new JsonException(s"unexpected character '$c' (expected $expected)")) } } } - private final def value_(state: Int)(implicit F: RaiseThrowable[F]): Pull[F, Token, Unit] = + private final def value_(state: Int)(implicit F: RaiseThrowable[F]): Pull[F, Res, Unit] = if (T.needsPull(context)) { - emitChunk(chunkAcc) >> T.pullNext(context).flatMap { + emitChunk() >> T.pullNext(context).flatMap { case Some(context) => this.context = context - chunkAcc.clear() + chunkAcc.flush() value_(state) case None => Pull.raiseError[F](new JsonException("unexpected end of input")) } @@ -261,15 +264,15 @@ private class JsonTokenParser[F[_], T](s: Stream[F, T])(implicit F: RaiseThrowab (c: @switch) match { case '{' => T.advance(context) - chunkAcc += Token.StartObject + chunkAcc.startObject() Pull.suspend(go_(State.BeforeObjectKey)) case '[' => T.advance(context) - chunkAcc += Token.StartArray + chunkAcc.startArray() Pull.suspend(go_(State.BeforeArrayValue)) - case 't' => keyword_("true", 0, 4, Token.TrueValue) - case 'f' => keyword_("false", 0, 5, Token.FalseValue) - case 'n' => keyword_("null", 0, 4, Token.NullValue) + case 't' => keyword_("true", 0, 4, chunkAcc.trueValue) + case 'f' => keyword_("false", 0, 5, chunkAcc.falseValue) + case 'n' => keyword_("null", 0, 4, chunkAcc.nullValue) case '"' => T.advance(context) T.mark(context) @@ -277,20 +280,20 @@ private class JsonTokenParser[F[_], T](s: Stream[F, T])(implicit F: RaiseThrowab case '-' | '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => T.mark(context) number_(NumberState.NumberStart, new StringBuilder(numberBufferCapacity)) - case _ => emitChunk(chunkAcc) >> Pull.raiseError[F](new JsonException(s"unexpected '$c'")) + case _ => emitChunk() >> Pull.raiseError[F](new JsonException(s"unexpected '$c'")) } } - final def go_(state: Int): Pull[F, Token, Unit] = { + final def go_(state: Int): Pull[F, Res, Unit] = { if (T.needsPull(context)) { - emitChunk(chunkAcc) >> T.pullNext(context).flatMap { + emitChunk() >> T.pullNext(context).flatMap { case Some(context) => this.context = context - chunkAcc.clear() + chunkAcc.flush() go_(state) case None => this.context = T.create(Stream.empty) - chunkAcc.clear() + chunkAcc.flush() Pull.done } } else { @@ -311,10 +314,10 @@ private class JsonTokenParser[F[_], T](s: Stream[F, T])(implicit F: RaiseThrowab string_(true, new StringBuilder(keyBufferCapacity)) >> go_(State.AfterObjectKey) case '}' => T.advance(context) - chunkAcc += Token.EndObject + chunkAcc.endObject() Pull.done case _ => - emitChunk(chunkAcc) >> Pull.raiseError[F](new JsonException(s"unexpected '$c' before object key")) + emitChunk() >> Pull.raiseError[F](new JsonException(s"unexpected '$c' before object key")) } case State.ExpectObjectKey => (c: @switch) match { @@ -323,7 +326,7 @@ private class JsonTokenParser[F[_], T](s: Stream[F, T])(implicit F: RaiseThrowab T.mark(context) string_(true, new StringBuilder(keyBufferCapacity)) >> go_(State.AfterObjectKey) case _ => - emitChunk(chunkAcc) >> Pull.raiseError[F](new JsonException(s"unexpected '$c' before object key")) + emitChunk() >> Pull.raiseError[F](new JsonException(s"unexpected '$c' before object key")) } case State.AfterObjectKey => (c: @switch) match { @@ -331,7 +334,7 @@ private class JsonTokenParser[F[_], T](s: Stream[F, T])(implicit F: RaiseThrowab T.advance(context) go_(State.BeforeObjectValue) case c => - emitChunk(chunkAcc) >> Pull.raiseError[F](new JsonException(s"unexpected '$c' after object key")) + emitChunk() >> Pull.raiseError[F](new JsonException(s"unexpected '$c' after object key")) } case State.BeforeObjectValue => value_(State.AfterObjectValue) >> go_(State.AfterObjectValue) @@ -342,10 +345,10 @@ private class JsonTokenParser[F[_], T](s: Stream[F, T])(implicit F: RaiseThrowab go_(State.ExpectObjectKey) case '}' => T.advance(context) - chunkAcc += Token.EndObject + chunkAcc.endObject() Pull.done case c => - emitChunk(chunkAcc) >> Pull.raiseError[F](new JsonException(s"unexpected '$c' after object value")) + emitChunk() >> Pull.raiseError[F](new JsonException(s"unexpected '$c' after object value")) } case State.ExpectArrayValue => value_(State.AfterArrayValue) >> go_(State.AfterArrayValue) @@ -353,7 +356,7 @@ private class JsonTokenParser[F[_], T](s: Stream[F, T])(implicit F: RaiseThrowab (c: @switch) match { case ']' => T.advance(context) - chunkAcc += Token.EndArray + chunkAcc.endArray() Pull.done case _ => value_(State.AfterArrayValue) >> go_(State.AfterArrayValue) @@ -362,13 +365,13 @@ private class JsonTokenParser[F[_], T](s: Stream[F, T])(implicit F: RaiseThrowab (c: @switch) match { case ']' => T.advance(context) - chunkAcc += Token.EndArray + chunkAcc.endArray() Pull.done case ',' => T.advance(context) go_(State.ExpectArrayValue) case c => - emitChunk(chunkAcc) >> Pull.raiseError[F](new JsonException(s"unexpected '$c' after array value")) + emitChunk() >> Pull.raiseError[F](new JsonException(s"unexpected '$c' after array value")) } } } diff --git a/json/src/main/scala/fs2/data/json/internal/LegacyTokenParser.scala b/json/src/main/scala/fs2/data/json/internal/LegacyTokenParser.scala index 064a8ca63..ff6aed95b 100644 --- a/json/src/main/scala/fs2/data/json/internal/LegacyTokenParser.scala +++ b/json/src/main/scala/fs2/data/json/internal/LegacyTokenParser.scala @@ -20,24 +20,27 @@ package json package internals import fs2.data.text.CharLikeChunks -import scala.collection.immutable.VectorBuilder + import scala.annotation.switch -private[json] class LegacyTokenParser[F[_], T](s: Stream[F, T])(implicit +private[json] class LegacyTokenParser[F[_], T, Res](s: Stream[F, T])(implicit F: RaiseThrowable[F], val T: CharLikeChunks[F, T]) { - private[this] final def empty = (T.create(Stream.empty), new VectorBuilder[Token]) + private[this] final def empty = (T.create(Stream.empty), ChunkAccumulator.empty[Res]) private[this] final val keyAcc = new StringBuilder(TokenParser.keyBufferCapacity) + private[this] def emitChunk(chunkAcc: ChunkAccumulator[Res]) = + Pull.output(chunkAcc.chunk()) + // the opening quote has already been read private final def string_(context: T.Context, key: Boolean, acc: StringBuilder, - chunkAcc: VectorBuilder[Token]): Pull[F, Token, (T.Context, VectorBuilder[Token])] = + chunkAcc: ChunkAccumulator[Res]): Pull[F, Res, (T.Context, ChunkAccumulator[Res])] = if (T.needsPull(context)) { emitChunk(chunkAcc) >> T.pullNext(context).flatMap { case Some(context) => - chunkAcc.clear() + chunkAcc.flush() string_(context, key, acc, chunkAcc) case None => Pull.raiseError[F](new JsonException("unexpected end of input")) } @@ -46,8 +49,8 @@ private[json] class LegacyTokenParser[F[_], T](s: Stream[F, T])(implicit case '"' => val res = acc.result() acc.clear() - val token = if (key) Token.Key(res) else Token.StringValue(res) - Pull.pure((T.advance(context), chunkAcc += token)) + if (key) chunkAcc.key(res) else chunkAcc.stringValue(res) + Pull.pure((T.advance(context), chunkAcc)) case '\\' => slowString_(T.advance(context), key, StringState.SeenBackslash, 0, acc, chunkAcc) case c => @@ -63,11 +66,11 @@ private[json] class LegacyTokenParser[F[_], T](s: Stream[F, T])(implicit state: Int, unicode: Int, acc: StringBuilder, - chunkAcc: VectorBuilder[Token]): Pull[F, Token, (T.Context, VectorBuilder[Token])] = { + chunkAcc: ChunkAccumulator[Res]): Pull[F, Res, (T.Context, ChunkAccumulator[Res])] = { if (T.needsPull(context)) { emitChunk(chunkAcc) >> T.pullNext(context).flatMap { case Some(context) => - chunkAcc.clear() + chunkAcc.flush() slowString_(context, key, state, unicode, acc, chunkAcc) case None => Pull.raiseError[F](new JsonException("unexpected end of input")) } @@ -92,8 +95,8 @@ private[json] class LegacyTokenParser[F[_], T](s: Stream[F, T])(implicit if (c == '"') { val res = acc.result() acc.clear() - val token = if (key) Token.Key(res) else Token.StringValue(res) - Pull.pure((T.advance(context), chunkAcc += token)) + if (key) chunkAcc.key(res) else chunkAcc.stringValue(res) + Pull.pure((T.advance(context), chunkAcc)) } else if (c == '\\') slowString_(T.advance(context), key, StringState.SeenBackslash, 0, acc, chunkAcc) else if (c >= 0x20 && c <= 0x10ffff) @@ -124,7 +127,7 @@ private[json] class LegacyTokenParser[F[_], T](s: Stream[F, T])(implicit private final def number_(context: T.Context, state: Int, acc: StringBuilder, - chunkAcc: VectorBuilder[Token]): Pull[F, Token, (T.Context, VectorBuilder[Token])] = { + chunkAcc: ChunkAccumulator[Res]): Pull[F, Res, (T.Context, ChunkAccumulator[Res])] = { def step(c: Char, state: Int): Int = (c: @switch) match { case '-' => @@ -187,11 +190,11 @@ private[json] class LegacyTokenParser[F[_], T](s: Stream[F, T])(implicit if (T.needsPull(context)) { emitChunk(chunkAcc) >> T.pullNext(context).flatMap { case Some(context) => - chunkAcc.clear() + chunkAcc.flush() number_(context, state, acc, chunkAcc) case None => if (NumberState.isFinal(state)) - Pull.output1(Token.NumberValue(acc.result())).as(empty) + Pull.pure(T.create(Stream.empty), chunkAcc.numberValue(acc.result())) else Pull.raiseError[F](new JsonException("unexpected end of input")) } @@ -200,7 +203,7 @@ private[json] class LegacyTokenParser[F[_], T](s: Stream[F, T])(implicit (step(c, state): @switch) match { case NumberState.Invalid => if (NumberState.isFinal(state)) - Pull.pure((context, chunkAcc += Token.NumberValue(acc.result()))) + Pull.pure((context, chunkAcc.numberValue(acc.result()))) else emitChunk(chunkAcc) >> Pull.raiseError[F](new JsonException(s"invalid number character '$c'")) case state => @@ -213,34 +216,34 @@ private[json] class LegacyTokenParser[F[_], T](s: Stream[F, T])(implicit expected: String, eidx: Int, elen: Int, - token: Token, - chunkAcc: VectorBuilder[Token]): Pull[F, Token, (T.Context, VectorBuilder[Token])] = { + accumulate: ChunkAccumulator[Res] => ChunkAccumulator[Res], + chunkAcc: ChunkAccumulator[Res]): Pull[F, Res, (T.Context, ChunkAccumulator[Res])] = { if (T.needsPull(context)) { emitChunk(chunkAcc) >> T.pullNext(context).flatMap { case Some(context) => - chunkAcc.clear() - keyword_(context, expected, eidx, elen, token, chunkAcc) + chunkAcc.flush() + keyword_(context, expected, eidx, elen, accumulate, chunkAcc) case None => Pull.raiseError[F](new JsonException("unexpected end of input")) } } else { val c = T.current(context) if (c == expected.charAt(eidx)) { if (eidx == elen - 1) - Pull.pure((T.advance(context), chunkAcc += token)) + Pull.pure((T.advance(context), accumulate(chunkAcc))) else - keyword_(T.advance(context), expected, eidx + 1, elen, token, chunkAcc) + keyword_(T.advance(context), expected, eidx + 1, elen, accumulate, chunkAcc) } else { emitChunk(chunkAcc) >> Pull.raiseError[F](new JsonException(s"unexpected character '$c' (expected $expected)")) } } } - private final def value_(context: T.Context, state: Int, chunkAcc: VectorBuilder[Token])(implicit - F: RaiseThrowable[F]): Pull[F, Token, (T.Context, VectorBuilder[Token])] = + private final def value_(context: T.Context, state: Int, chunkAcc: ChunkAccumulator[Res])(implicit + F: RaiseThrowable[F]): Pull[F, Res, (T.Context, ChunkAccumulator[Res])] = if (T.needsPull(context)) { emitChunk(chunkAcc) >> T.pullNext(context).flatMap { case Some(context) => - chunkAcc.clear() + chunkAcc.flush() value_(context, state, chunkAcc) case None => Pull.raiseError[F](new JsonException("unexpected end of input")) } @@ -248,12 +251,12 @@ private[json] class LegacyTokenParser[F[_], T](s: Stream[F, T])(implicit val c = T.current(context) (c: @switch) match { case '{' => - Pull.suspend(go_(T.advance(context), State.BeforeObjectKey, chunkAcc += Token.StartObject)) + Pull.suspend(go_(T.advance(context), State.BeforeObjectKey, chunkAcc.startObject())) case '[' => - Pull.suspend(go_(T.advance(context), State.BeforeArrayValue, chunkAcc += Token.StartArray)) - case 't' => keyword_(context, "true", 0, 4, Token.TrueValue, chunkAcc) - case 'f' => keyword_(context, "false", 0, 5, Token.FalseValue, chunkAcc) - case 'n' => keyword_(context, "null", 0, 4, Token.NullValue, chunkAcc) + Pull.suspend(go_(T.advance(context), State.BeforeArrayValue, chunkAcc.startArray())) + case 't' => keyword_(context, "true", 0, 4, _.trueValue(), chunkAcc) + case 'f' => keyword_(context, "false", 0, 5, _.falseValue(), chunkAcc) + case 'n' => keyword_(context, "null", 0, 4, _.nullValue(), chunkAcc) case '"' => string_(T.advance(context), false, new StringBuilder(TokenParser.stringBufferCapacity), chunkAcc) case '-' | '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => number_(context, NumberState.NumberStart, new StringBuilder(TokenParser.numberBufferCapacity), chunkAcc) @@ -263,11 +266,11 @@ private[json] class LegacyTokenParser[F[_], T](s: Stream[F, T])(implicit private final def go_(context: T.Context, state: Int, - chunkAcc: VectorBuilder[Token]): Pull[F, Token, (T.Context, VectorBuilder[Token])] = { + chunkAcc: ChunkAccumulator[Res]): Pull[F, Res, (T.Context, ChunkAccumulator[Res])] = { if (T.needsPull(context)) { emitChunk(chunkAcc) >> T.pullNext(context).flatMap { case Some(context) => - chunkAcc.clear() + chunkAcc.flush() go_(context, state, chunkAcc) case None => Pull.pure(empty) @@ -288,7 +291,7 @@ private[json] class LegacyTokenParser[F[_], T](s: Stream[F, T])(implicit string_(T.advance(context), true, keyAcc, chunkAcc) .flatMap { case (context, chunkAcc) => go_(context, State.AfterObjectKey, chunkAcc) } case '}' => - Pull.pure((T.advance(context), chunkAcc += Token.EndObject)) + Pull.pure((T.advance(context), chunkAcc.endObject())) case _ => emitChunk(chunkAcc) >> Pull.raiseError[F](new JsonException(s"unexpected '$c' before object key")) } @@ -314,7 +317,7 @@ private[json] class LegacyTokenParser[F[_], T](s: Stream[F, T])(implicit case ',' => go_(T.advance(context), State.ExpectObjectKey, chunkAcc) case '}' => - Pull.pure((T.advance(context), chunkAcc += Token.EndObject)) + Pull.pure((T.advance(context), chunkAcc.endObject())) case c => emitChunk(chunkAcc) >> Pull.raiseError[F](new JsonException(s"unexpected '$c' after object value")) } @@ -324,7 +327,7 @@ private[json] class LegacyTokenParser[F[_], T](s: Stream[F, T])(implicit case State.BeforeArrayValue => (c: @switch) match { case ']' => - Pull.pure((T.advance(context), chunkAcc += Token.EndArray)) + Pull.pure((T.advance(context), chunkAcc.endArray())) case _ => value_(context, State.AfterArrayValue, chunkAcc) .flatMap { case (context, chunkAcc) => go_(context, State.AfterArrayValue, chunkAcc) } @@ -332,7 +335,7 @@ private[json] class LegacyTokenParser[F[_], T](s: Stream[F, T])(implicit case State.AfterArrayValue => (c: @switch) match { case ']' => - Pull.pure((T.advance(context), chunkAcc += Token.EndArray)) + Pull.pure((T.advance(context), chunkAcc.endArray())) case ',' => go_(T.advance(context), State.ExpectArrayValue, chunkAcc) case c => @@ -343,7 +346,7 @@ private[json] class LegacyTokenParser[F[_], T](s: Stream[F, T])(implicit } } - def parse: Pull[F, Token, Unit] = - go_(T.create(s), State.BeforeValue, new VectorBuilder).void + def parse(chunkAcc: ChunkAccumulator[Res]): Pull[F, Res, Unit] = + go_(T.create(s), State.BeforeValue, chunkAcc).void } diff --git a/json/src/main/scala/fs2/data/json/internal/TokenChunkAccumulator.scala b/json/src/main/scala/fs2/data/json/internal/TokenChunkAccumulator.scala new file mode 100644 index 000000000..aea3fdd3e --- /dev/null +++ b/json/src/main/scala/fs2/data/json/internal/TokenChunkAccumulator.scala @@ -0,0 +1,70 @@ +package fs2 +package data +package json +package internals + +import scala.collection.mutable.ListBuffer + +private[json] final class TokenChunkAccumulator extends ChunkAccumulator[Token] { + + private[this] final val chunkAcc: ListBuffer[Token] = new ListBuffer + + override def startObject(): this.type = { + chunkAcc.addOne(Token.StartObject) + this + } + + override def key(key: String): this.type = { + chunkAcc.addOne(Token.Key(key)) + this + } + + override def endObject(): this.type = { + chunkAcc.addOne(Token.EndObject) + this + } + + override def startArray(): this.type = { + chunkAcc.addOne(Token.StartArray) + this + } + + override def endArray(): this.type = { + chunkAcc.addOne(Token.EndArray) + this + } + + override def nullValue(): this.type = { + chunkAcc.addOne(Token.NullValue) + this + } + + override def trueValue(): this.type = { + chunkAcc.addOne(Token.TrueValue) + this + } + + override def falseValue(): this.type = { + chunkAcc.addOne(Token.FalseValue) + this + } + + override def numberValue(value: String): this.type = { + chunkAcc.addOne(Token.NumberValue(value)) + this + } + + override def stringValue(value: String): this.type = { + chunkAcc.addOne(Token.StringValue(value)) + this + } + + override def chunk(): Chunk[Token] = + Chunk.seq(chunkAcc.result()) + + override def flush(): this.type = { + chunkAcc.clear() + this + } + +} diff --git a/json/src/main/scala/fs2/data/json/internal/TokenParser.scala b/json/src/main/scala/fs2/data/json/internal/TokenParser.scala index a738f028d..44ce4639d 100644 --- a/json/src/main/scala/fs2/data/json/internal/TokenParser.scala +++ b/json/src/main/scala/fs2/data/json/internal/TokenParser.scala @@ -41,9 +41,10 @@ private[json] object TokenParser { def pipe[F[_], T](implicit F: RaiseThrowable[F], T: CharLikeChunks[F, T]): Pipe[F, T, Token] = { s => T match { case asCharBuffer: AsCharBuffer[F, T] => - Stream.suspend(new JsonTokenParser[F, T](s)(F, asCharBuffer).go_(State.BeforeValue).stream) + Stream.suspend( + new JsonTokenParser[F, T, Token](s, new TokenChunkAccumulator)(F, asCharBuffer).go_(State.BeforeValue).stream) case _ => - Stream.suspend(new LegacyTokenParser[F, T](s).parse.stream) + Stream.suspend(new LegacyTokenParser[F, T, Token](s).parse(new TokenChunkAccumulator).stream) } } diff --git a/json/src/test/scala/fs2/data/json/JsonParsertest.scala b/json/src/test/scala/fs2/data/json/JsonParsertest.scala index a23fb175d..03cd914fa 100644 --- a/json/src/test/scala/fs2/data/json/JsonParsertest.scala +++ b/json/src/test/scala/fs2/data/json/JsonParsertest.scala @@ -24,6 +24,7 @@ import io.file.{Files, Flags, Path} import cats.effect._ import weaver._ +import fs2.data.json.internals.TokenChunkAccumulator sealed trait Expectation object Expectation { @@ -96,7 +97,7 @@ abstract class JsonParserTest[Json](implicit builder: Builder[Json]) extends Sim .through(fs2.text.utf8.decode) contentStream - .through(new json.internals.LegacyTokenParser(_).parse.stream) + .through(new json.internals.LegacyTokenParser(_).parse(new TokenChunkAccumulator).stream) .through(ast.values) .compile .toList From ecbd9000a23ae848921087a0e6f941bb9cf4ce55 Mon Sep 17 00:00:00 2001 From: Lucas Satabin Date: Sat, 24 Jun 2023 16:40:24 +0200 Subject: [PATCH 02/10] Add an accumulator that builds an AST directly Inspired by the `Facade` abstraction from jawn, we can build the AST directly, without emitting intermediate tokens, which makes it faster. --- .../benchmarks/JsonParserBenchmarks.scala | 10 +- .../scala/fs2/data/json/ast/package.scala | 24 ++- .../internal/BuilderChunkAccumulator.scala | 147 ++++++++++++++++++ .../data/json/internal/ChunkAccumulator.scala | 16 ++ .../data/json/internal/JsonTokenParser.scala | 34 ++-- .../scala/fs2/data/json/internal/State.scala | 2 +- .../json/internal/TokenChunkAccumulator.scala | 22 ++- .../scala/fs2/data/json/JsonParsertest.scala | 71 +++------ 8 files changed, 255 insertions(+), 71 deletions(-) create mode 100644 json/src/main/scala/fs2/data/json/internal/BuilderChunkAccumulator.scala diff --git a/benchmarks/src/main/scala/fs2/data/benchmarks/JsonParserBenchmarks.scala b/benchmarks/src/main/scala/fs2/data/benchmarks/JsonParserBenchmarks.scala index 6553fb26f..3c057c213 100644 --- a/benchmarks/src/main/scala/fs2/data/benchmarks/JsonParserBenchmarks.scala +++ b/benchmarks/src/main/scala/fs2/data/benchmarks/JsonParserBenchmarks.scala @@ -38,7 +38,15 @@ class JsonParserBenchmarks { .unsafeRunSync() @Benchmark - def parseJsonFs2DataValues() = + def parseJsonFs2DataParse() = + jsonStream + .through(ast.parse) + .compile + .drain + .unsafeRunSync() + + @Benchmark + def parseJsonFs2DataTokensValues() = jsonStream .through(tokens) .through(ast.values) diff --git a/json/src/main/scala/fs2/data/json/ast/package.scala b/json/src/main/scala/fs2/data/json/ast/package.scala index 847b21d3c..717ebdc95 100644 --- a/json/src/main/scala/fs2/data/json/ast/package.scala +++ b/json/src/main/scala/fs2/data/json/ast/package.scala @@ -15,12 +15,14 @@ */ package fs2 -package data.json - -import internals.{TokenSelector, ValueParser} +package data +package json import cats.syntax.all._ +import text.{AsCharBuffer, CharLikeChunks} +import internals.{TokenSelector, ValueParser, JsonTokenParser, LegacyTokenParser, BuilderChunkAccumulator, State} + package object ast { /** Transforms a stream of token into another one. The transformation function `f` is @@ -84,6 +86,22 @@ package object ast { def values[F[_], Json](implicit F: RaiseThrowable[F], builder: Builder[Json]): Pipe[F, Token, Json] = ValueParser.pipe[F, Json] + /** Parses a stream of characters into a stream of Json values. */ + def parse[F[_], T, Json](implicit + F: RaiseThrowable[F], + T: CharLikeChunks[F, T], + builder: Builder[Json]): Pipe[F, T, Json] = { s => + T match { + case asCharBuffer: AsCharBuffer[F, T] => + Stream.suspend( + new JsonTokenParser[F, T, Json](s, new BuilderChunkAccumulator(builder))(F, asCharBuffer) + .go_(State.BeforeValue) + .stream) + case _ => + Stream.suspend(new LegacyTokenParser[F, T, Json](s).parse(new BuilderChunkAccumulator(builder)).stream) + } + } + /** Transforms a stream of Json values into a stream of Json tokens. * * This operation is the opposite of `values`. diff --git a/json/src/main/scala/fs2/data/json/internal/BuilderChunkAccumulator.scala b/json/src/main/scala/fs2/data/json/internal/BuilderChunkAccumulator.scala new file mode 100644 index 000000000..3a9ccd49d --- /dev/null +++ b/json/src/main/scala/fs2/data/json/internal/BuilderChunkAccumulator.scala @@ -0,0 +1,147 @@ +/* + * Copyright 2023 Lucas Satabin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2 +package data +package json +package internals + +import scala.collection.immutable.{TreeMap, VectorBuilder} +import scala.collection.mutable.ListBuffer + +import ast.Builder + +/** A chunk accumulator that allows for building a stream of AST values + * for the provided [[ast.Builder Builder]]. + * + * The design is inspired by the jawn `Facade` and `FContext`. + */ +private[json] final class BuilderChunkAccumulator[Json](builder: Builder[Json]) extends ChunkAccumulator[Json] { + + private[this] final val chunkAcc: VectorBuilder[Json] = new VectorBuilder + + private trait Context { + def string(s: String): Unit + def value(v: Json): Unit + def finish(): Json + } + + private def toplevelContext(): Context = + new Context { + override def string(s: String): Unit = chunkAcc.addOne(builder.makeString(s)) + override def value(v: Json): Unit = chunkAcc.addOne(v) + override def finish(): Json = builder.makeNull + } + private def arrayContext(): Context = + new Context { + private[this] val vs = ListBuffer.empty[Json] + override def string(s: String): Unit = vs.addOne(builder.makeString(s)) + override def value(v: Json): Unit = vs.addOne(v) + override def finish(): Json = builder.makeArray(vs) + + } + private def objectContext(): Context = + new Context { + private[this] var key: String = null + private[this] var vs = TreeMap.empty[String, Json] + override def string(s: String): Unit = key = s + override def value(v: Json): Unit = { + vs = vs.updated(key, v) + key = null + } + override def finish(): Json = builder.makeObject(vs) + } + + private[this] var context: Context = toplevelContext() + private[this] var stack: List[Context] = Nil + + override def startObject(): this.type = { + stack = context :: stack + context = objectContext() + this + } + + override def key(key: String): this.type = { + context.string(key) + this + } + + override def endObject(): this.type = + if (stack.isEmpty) { + chunkAcc.addOne(context.finish()) + context = toplevelContext() + this + } else { + val v = context.finish() + context = stack.head + context.value(v) + stack = stack.tail + this + } + + override def startArray(): this.type = { + stack = context :: stack + context = arrayContext() + this + } + + override def endArray(): this.type = + if (stack.isEmpty) { + chunkAcc.addOne(context.finish()) + context = toplevelContext() + this + } else { + val v = context.finish() + context = stack.head + context.value(v) + stack = stack.tail + this + } + + override def nullValue(): this.type = { + context.value(builder.makeNull) + this + } + + override def trueValue(): this.type = { + context.value(builder.makeTrue) + this + } + + override def falseValue(): this.type = { + context.value(builder.makeFalse) + this + } + + override def numberValue(value: String): this.type = { + context.value(builder.makeNumber(value)) + this + } + + override def stringValue(value: String): this.type = { + context.value(builder.makeString(value)) + this + } + + override def chunk(): Chunk[Json] = + Chunk.vector(chunkAcc.result()) + + override def flush(): this.type = { + chunkAcc.clear() + this + } + +} diff --git a/json/src/main/scala/fs2/data/json/internal/ChunkAccumulator.scala b/json/src/main/scala/fs2/data/json/internal/ChunkAccumulator.scala index c718cab97..381584c40 100644 --- a/json/src/main/scala/fs2/data/json/internal/ChunkAccumulator.scala +++ b/json/src/main/scala/fs2/data/json/internal/ChunkAccumulator.scala @@ -1,3 +1,19 @@ +/* + * Copyright 2023 Lucas Satabin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package fs2 package data package json diff --git a/json/src/main/scala/fs2/data/json/internal/JsonTokenParser.scala b/json/src/main/scala/fs2/data/json/internal/JsonTokenParser.scala index cc1975bc1..f32567ca9 100644 --- a/json/src/main/scala/fs2/data/json/internal/JsonTokenParser.scala +++ b/json/src/main/scala/fs2/data/json/internal/JsonTokenParser.scala @@ -25,10 +25,9 @@ import scala.annotation.switch import TokenParser._ -private class JsonTokenParser[F[_], T, Res](s: Stream[F, T], private[this] final val chunkAcc: ChunkAccumulator[Res])( - implicit - F: RaiseThrowable[F], - T: AsCharBuffer[F, T]) { +private[json] class JsonTokenParser[F[_], T, Res]( + s: Stream[F, T], + private[this] final val chunkAcc: ChunkAccumulator[Res])(implicit F: RaiseThrowable[F], T: AsCharBuffer[F, T]) { private[this] var context = T.create(s) private[this] def emitChunk[T]() = @@ -224,7 +223,10 @@ private class JsonTokenParser[F[_], T, Res](s: Stream[F, T], private[this] final } } - private final def keyword_(expected: String, eidx: Int, elen: Int, accumulate: () => ChunkAccumulator[Res]): Pull[F, Res, Unit] = { + private final def keyword_(expected: String, + eidx: Int, + elen: Int, + accumulate: () => ChunkAccumulator[Res]): Pull[F, Res, Unit] = { if (T.needsPull(context)) { emitChunk() >> T.pullNext(context).flatMap { case Some(context) => @@ -250,13 +252,13 @@ private class JsonTokenParser[F[_], T, Res](s: Stream[F, T], private[this] final } } - private final def value_(state: Int)(implicit F: RaiseThrowable[F]): Pull[F, Res, Unit] = + private final def value_()(implicit F: RaiseThrowable[F]): Pull[F, Res, Unit] = if (T.needsPull(context)) { emitChunk() >> T.pullNext(context).flatMap { case Some(context) => this.context = context chunkAcc.flush() - value_(state) + value_() case None => Pull.raiseError[F](new JsonException("unexpected end of input")) } } else { @@ -292,9 +294,13 @@ private class JsonTokenParser[F[_], T, Res](s: Stream[F, T], private[this] final chunkAcc.flush() go_(state) case None => - this.context = T.create(Stream.empty) - chunkAcc.flush() - Pull.done + if (state == State.BeforeValue) { + this.context = T.create(Stream.empty) + chunkAcc.flush() + Pull.done + } else { + Pull.raiseError(JsonException("unexpected end of input")) + } } } else { val c = T.current(context) @@ -305,7 +311,7 @@ private class JsonTokenParser[F[_], T, Res](s: Stream[F, T], private[this] final case _ => (state: @switch) match { case State.BeforeValue => - value_(state) >> go_(State.BeforeValue) + value_() >> go_(State.BeforeValue) case State.BeforeObjectKey => (c: @switch) match { case '"' => @@ -337,7 +343,7 @@ private class JsonTokenParser[F[_], T, Res](s: Stream[F, T], private[this] final emitChunk() >> Pull.raiseError[F](new JsonException(s"unexpected '$c' after object key")) } case State.BeforeObjectValue => - value_(State.AfterObjectValue) >> go_(State.AfterObjectValue) + value_() >> go_(State.AfterObjectValue) case State.AfterObjectValue => (c: @switch) match { case ',' => @@ -351,7 +357,7 @@ private class JsonTokenParser[F[_], T, Res](s: Stream[F, T], private[this] final emitChunk() >> Pull.raiseError[F](new JsonException(s"unexpected '$c' after object value")) } case State.ExpectArrayValue => - value_(State.AfterArrayValue) >> go_(State.AfterArrayValue) + value_() >> go_(State.AfterArrayValue) case State.BeforeArrayValue => (c: @switch) match { case ']' => @@ -359,7 +365,7 @@ private class JsonTokenParser[F[_], T, Res](s: Stream[F, T], private[this] final chunkAcc.endArray() Pull.done case _ => - value_(State.AfterArrayValue) >> go_(State.AfterArrayValue) + value_() >> go_(State.AfterArrayValue) } case State.AfterArrayValue => (c: @switch) match { diff --git a/json/src/main/scala/fs2/data/json/internal/State.scala b/json/src/main/scala/fs2/data/json/internal/State.scala index 22dcd04f7..3083e5383 100644 --- a/json/src/main/scala/fs2/data/json/internal/State.scala +++ b/json/src/main/scala/fs2/data/json/internal/State.scala @@ -16,7 +16,7 @@ package fs2.data.json.internals -private[internals] object State { +private[json] object State { final val BeforeValue = 0 final val BeforeObjectKey = 1 final val ExpectObjectKey = 2 diff --git a/json/src/main/scala/fs2/data/json/internal/TokenChunkAccumulator.scala b/json/src/main/scala/fs2/data/json/internal/TokenChunkAccumulator.scala index aea3fdd3e..a1b43a5a2 100644 --- a/json/src/main/scala/fs2/data/json/internal/TokenChunkAccumulator.scala +++ b/json/src/main/scala/fs2/data/json/internal/TokenChunkAccumulator.scala @@ -1,13 +1,29 @@ +/* + * Copyright 2023 Lucas Satabin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package fs2 package data package json package internals -import scala.collection.mutable.ListBuffer +import scala.collection.immutable.VectorBuilder private[json] final class TokenChunkAccumulator extends ChunkAccumulator[Token] { - private[this] final val chunkAcc: ListBuffer[Token] = new ListBuffer + private[this] final val chunkAcc: VectorBuilder[Token] = new VectorBuilder override def startObject(): this.type = { chunkAcc.addOne(Token.StartObject) @@ -60,7 +76,7 @@ private[json] final class TokenChunkAccumulator extends ChunkAccumulator[Token] } override def chunk(): Chunk[Token] = - Chunk.seq(chunkAcc.result()) + Chunk.vector(chunkAcc.result()) override def flush(): this.type = { chunkAcc.clear() diff --git a/json/src/test/scala/fs2/data/json/JsonParsertest.scala b/json/src/test/scala/fs2/data/json/JsonParsertest.scala index 03cd914fa..c9980c6ab 100644 --- a/json/src/test/scala/fs2/data/json/JsonParsertest.scala +++ b/json/src/test/scala/fs2/data/json/JsonParsertest.scala @@ -18,13 +18,12 @@ package fs2 package data package json -import ast._ -import io.file.{Files, Flags, Path} - import cats.effect._ - +import fs2.data.json.internals.{BuilderChunkAccumulator, TokenChunkAccumulator} import weaver._ -import fs2.data.json.internals.TokenChunkAccumulator + +import ast._ +import io.file.{Files, Flags, Path} sealed trait Expectation object Expectation { @@ -37,7 +36,7 @@ abstract class JsonParserTest[Json](implicit builder: Builder[Json]) extends Sim private val testFileDir = Path("json/src/test/resources/test-parsing/") - test("Standard test suite files should be parsed correctly") { + private def standardTests(parsingPipe: Pipe[IO, String, Json]): IO[Expectations] = Files[IO] .list(testFileDir) .evalMap { path => @@ -55,12 +54,9 @@ abstract class JsonParserTest[Json](implicit builder: Builder[Json]) extends Sim .through(fs2.text.utf8.decode) contentStream - .through(tokens) - .through(ast.values) + .through(parsingPipe) .compile - .toList - .flatMap(l => - if (l.size == 1) IO.pure(l.head) else IO.raiseError(new Exception("a single value is expected"))) + .onlyOrError .attempt .flatMap(actual => expectation match { @@ -77,48 +73,25 @@ abstract class JsonParserTest[Json](implicit builder: Builder[Json]) extends Sim } .compile .foldMonoid + + test("Standard test suite files should be parsed correctly") { + standardTests( + _.through(tokens) + .through(ast.values)) } - test("Standard test suite files should be parsed correctly with legacy parser") { - Files[IO] - .list(testFileDir) - .evalMap { path => - val expectation = - if (path.fileName.toString.startsWith("y_")) - Expectation.Valid - else if (path.fileName.toString.startsWith("n_")) - Expectation.Invalid - else - Expectation.ImplementationDefined + test("Standard test suite files should be parsed directly correctly") { + standardTests(_.through(ast.parse)) + } - val contentStream = - Files[IO] - .readAll(path, 1024, Flags.Read) - .through(fs2.text.utf8.decode) + test("Standard test suite files should be parsed correctly with legacy parser") { + standardTests( + _.through(new json.internals.LegacyTokenParser(_).parse(new TokenChunkAccumulator).stream) + .through(ast.values)) + } - contentStream - .through(new json.internals.LegacyTokenParser(_).parse(new TokenChunkAccumulator).stream) - .through(ast.values) - .compile - .toList - .flatMap(l => - if (l.size == 1) IO.pure(l.head) else IO.raiseError(new Exception("a single value is expected"))) - .attempt - .flatMap(actual => - expectation match { - case Expectation.Valid | Expectation.ImplementationDefined => - contentStream.compile.string.map { rawExpected => - val expected = parse(rawExpected) - expect(actual.isRight == expected.isRight) and (if (actual.isRight) - expect(actual == expected) - else success) - } - case Expectation.Invalid => - IO.pure(expect(actual.isLeft, path.toString)) - }) - } - .compile - .foldMonoid + test("Standard test suite files should be parsed directly correctly with legacy parser") { + standardTests(_.through(new json.internals.LegacyTokenParser(_).parse(new BuilderChunkAccumulator(builder)).stream)) } def parse(content: String): Either[Throwable, Json] From 3ba90b325fb487e1e4d1569ec63bbbdb45eac26a Mon Sep 17 00:00:00 2001 From: Lucas Satabin Date: Mon, 26 Jun 2023 18:53:13 +0200 Subject: [PATCH 03/10] Avoid walking twice through accumulated values --- .../fs2/data/json/internal/ValueParser.scala | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/json/src/main/scala/fs2/data/json/internal/ValueParser.scala b/json/src/main/scala/fs2/data/json/internal/ValueParser.scala index 069919264..587ace279 100644 --- a/json/src/main/scala/fs2/data/json/internal/ValueParser.scala +++ b/json/src/main/scala/fs2/data/json/internal/ValueParser.scala @@ -19,13 +19,13 @@ package data package json package internals -import ast._ +import scala.collection.mutable.ListBuffer -import scala.collection.immutable.VectorBuilder +import ast._ private[json] object ValueParser { - private def pullArray[F[_], Json](chunk: Chunk[Token], idx: Int, rest: Stream[F, Token], acc: VectorBuilder[Json])( + private def pullArray[F[_], Json](chunk: Chunk[Token], idx: Int, rest: Stream[F, Token], acc: ListBuffer[Json])( implicit F: RaiseThrowable[F], builder: Builder[Json]): Pull[F, Nothing, Result[F, Json]] = @@ -37,7 +37,7 @@ private[json] object ValueParser { } else { chunk(idx) match { case Token.EndArray => - Pull.pure(Some((chunk, idx + 1, rest, builder.makeArray(acc.result())))) + Pull.pure(Some((chunk, idx + 1, rest, builder.makeArray(acc)))) case _ => Pull.suspend(pullValue(chunk, idx, rest).flatMap { case Some((chunk, idx, rest, json)) => pullArray(chunk, idx, rest, acc += json) @@ -49,7 +49,7 @@ private[json] object ValueParser { private def pullObject[F[_], Json](chunk: Chunk[Token], idx: Int, rest: Stream[F, Token], - acc: VectorBuilder[(String, Json)])(implicit + acc: ListBuffer[(String, Json)])(implicit F: RaiseThrowable[F], builder: Builder[Json]): Pull[F, Nothing, Result[F, Json]] = if (idx >= chunk.size) { @@ -60,7 +60,7 @@ private[json] object ValueParser { } else { chunk(idx) match { case Token.EndObject => - Pull.pure(Some((chunk, idx + 1, rest, builder.makeObject(acc.result())))) + Pull.pure(Some((chunk, idx + 1, rest, builder.makeObject(acc)))) case Token.Key(key) => pullValue(chunk, idx + 1, rest).flatMap { case Some((chunk, idx, rest, json)) => pullObject(chunk, idx, rest, acc += (key -> json)) @@ -86,8 +86,8 @@ private[json] object ValueParser { case Token.NullValue => Pull.pure(Some((chunk, idx + 1, rest, builder.makeNull))) case Token.StringValue(s) => Pull.pure(Some((chunk, idx + 1, rest, builder.makeString(s)))) case Token.NumberValue(s) => Pull.pure(Some((chunk, idx + 1, rest, builder.makeNumber(s)))) - case Token.StartArray => pullArray(chunk, idx + 1, rest, new VectorBuilder) - case Token.StartObject => pullObject(chunk, idx + 1, rest, new VectorBuilder) + case Token.StartArray => pullArray(chunk, idx + 1, rest, new ListBuffer) + case Token.StartObject => pullObject(chunk, idx + 1, rest, new ListBuffer) case token => Pull.raiseError[F](new JsonException(s"malformed json (unexpected $token)")) } } From 4c29e68b0e35b57af0d5f61f7090814bb1556f61 Mon Sep 17 00:00:00 2001 From: Lucas Satabin Date: Mon, 26 Jun 2023 22:41:07 +0200 Subject: [PATCH 04/10] Make it compile with scala 2.12 --- .../main/scala-2.12/fs2/data/package.scala | 32 +++++++++++++++++++ .../data/json/internal/ChunkAccumulator.scala | 2 +- .../json/internal/LegacyTokenParser.scala | 2 +- 3 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 json/src/main/scala-2.12/fs2/data/package.scala diff --git a/json/src/main/scala-2.12/fs2/data/package.scala b/json/src/main/scala-2.12/fs2/data/package.scala new file mode 100644 index 000000000..236521db3 --- /dev/null +++ b/json/src/main/scala-2.12/fs2/data/package.scala @@ -0,0 +1,32 @@ +/* + * Copyright 2023 Lucas Satabin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2 + +import scala.collection.immutable.VectorBuilder +import scala.collection.mutable.ListBuffer + +package object data { + + implicit class VectorBuilderOps[T](val builder: VectorBuilder[T]) extends AnyVal { + def addOne(t: T) = builder += t + } + + implicit class ListBufferOps[T](val buffer: ListBuffer[T]) extends AnyVal { + def addOne(t: T) = buffer += t + } + +} diff --git a/json/src/main/scala/fs2/data/json/internal/ChunkAccumulator.scala b/json/src/main/scala/fs2/data/json/internal/ChunkAccumulator.scala index 381584c40..71bec0375 100644 --- a/json/src/main/scala/fs2/data/json/internal/ChunkAccumulator.scala +++ b/json/src/main/scala/fs2/data/json/internal/ChunkAccumulator.scala @@ -27,7 +27,7 @@ package internals * chunk under construction, but results still being built are kept. * This allows to accumulate values acros upstream chunk boundaries. */ -private[internals] trait ChunkAccumulator[Res] { +private[json] trait ChunkAccumulator[Res] { def startObject(): this.type diff --git a/json/src/main/scala/fs2/data/json/internal/LegacyTokenParser.scala b/json/src/main/scala/fs2/data/json/internal/LegacyTokenParser.scala index ff6aed95b..8dd201769 100644 --- a/json/src/main/scala/fs2/data/json/internal/LegacyTokenParser.scala +++ b/json/src/main/scala/fs2/data/json/internal/LegacyTokenParser.scala @@ -194,7 +194,7 @@ private[json] class LegacyTokenParser[F[_], T, Res](s: Stream[F, T])(implicit number_(context, state, acc, chunkAcc) case None => if (NumberState.isFinal(state)) - Pull.pure(T.create(Stream.empty), chunkAcc.numberValue(acc.result())) + Pull.pure((T.create(Stream.empty), chunkAcc.numberValue(acc.result()))) else Pull.raiseError[F](new JsonException("unexpected end of input")) } From bf9860a850c8e7be6a1a2e2a8d0e5dde02425b7e Mon Sep 17 00:00:00 2001 From: Lucas Satabin Date: Sun, 2 Jul 2023 20:34:21 +0200 Subject: [PATCH 05/10] Add scalafix rule to migrate to `parse` --- build.sbt | 33 +++++++++++++++ project/build.properties | 2 +- project/plugins.sbt | 2 + .../src/main/scala/test/TokensValues.scala | 12 ++++++ .../src/main/scala/test/TokensValues.scala | 9 +++++ .../META-INF/services/scalafix.v1.Rule | 1 + .../rules/src/main/scala/fix/JsonParse.scala | 40 +++++++++++++++++++ .../tests/src/test/scala/fix/RuleSuite.scala | 8 ++++ 8 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 scalafix/input/src/main/scala/test/TokensValues.scala create mode 100644 scalafix/output/src/main/scala/test/TokensValues.scala create mode 100644 scalafix/rules/src/main/resources/META-INF/services/scalafix.v1.Rule create mode 100644 scalafix/rules/src/main/scala/fix/JsonParse.scala create mode 100644 scalafix/tests/src/test/scala/fix/RuleSuite.scala diff --git a/build.sbt b/build.sbt index 3dd4774e5..c81204a35 100644 --- a/build.sbt +++ b/build.sbt @@ -562,6 +562,39 @@ lazy val benchmarks = crossProject(JVMPlatform) ) .dependsOn(csv, scalaXml, jsonCirce) +lazy val scalafixInput = (project in file("scalafix/input")) + .disablePlugins(ScalafixPlugin) + .dependsOn(jsonCirce.jvm) + +lazy val scalafixOutput = (project in file("scalafix/output")) + .disablePlugins(ScalafixPlugin) + .dependsOn(jsonCirce.jvm) + +lazy val scalafixRules = (project in file("scalafix/rules")) + .disablePlugins(ScalafixPlugin) + .settings( + libraryDependencies += + "ch.epfl.scala" %% + "scalafix-core" % + _root_.scalafix.sbt.BuildInfo.scalafixVersion + ) + +lazy val scalafixTests = (project in file("scalafix/tests")) + .settings( + scalafixTestkitOutputSourceDirectories := + (scalafixOutput / Compile / sourceDirectories).value, + scalafixTestkitInputSourceDirectories := + (scalafixInput / Compile / sourceDirectories).value, + scalafixTestkitInputClasspath := + (scalafixInput / Compile / fullClasspath).value, + scalafixTestkitInputScalacOptions := + (scalafixInput / Compile / scalacOptions).value, + scalafixTestkitInputScalaVersion := + (scalafixInput / Compile / scalaVersion).value + ) + .dependsOn(scalafixInput, scalafixRules) + .enablePlugins(ScalafixTestkitPlugin) + // Utils def onScala2[T](version: String)(values: => List[T]): List[T] = PartialFunction diff --git a/project/build.properties b/project/build.properties index 40b3b8e7b..3c0b78a7c 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.9.0 +sbt.version=1.9.1 diff --git a/project/plugins.sbt b/project/plugins.sbt index b81b5557d..afca5803a 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -22,3 +22,5 @@ addSbtPlugin("org.scoverage" % "sbt-scoverage" % "2.0.8") addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.4.14") addSbtPlugin("org.portable-scala" % "sbt-scala-native-crossproject" % "1.3.1") + +addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.11.0") diff --git a/scalafix/input/src/main/scala/test/TokensValues.scala b/scalafix/input/src/main/scala/test/TokensValues.scala new file mode 100644 index 000000000..f53d455c9 --- /dev/null +++ b/scalafix/input/src/main/scala/test/TokensValues.scala @@ -0,0 +1,12 @@ +/* +rule = json-parse +*/ +package test + +import fs2._ +import fs2.data.json._ + +object Parser { + def parse[Json](s: String)(implicit builder: ast.Builder[Json]) = + Stream.emit(s).covary[Fallible].through(tokens).through(ast.values).compile.onlyOrError +} diff --git a/scalafix/output/src/main/scala/test/TokensValues.scala b/scalafix/output/src/main/scala/test/TokensValues.scala new file mode 100644 index 000000000..920a2287f --- /dev/null +++ b/scalafix/output/src/main/scala/test/TokensValues.scala @@ -0,0 +1,9 @@ +package test + +import fs2._ +import fs2.data.json._ + +object Parser { + def parse[Json](s: String)(implicit builder: ast.Builder[Json]) = + Stream.emit(s).covary[Fallible].through(ast.parse).compile.onlyOrError +} diff --git a/scalafix/rules/src/main/resources/META-INF/services/scalafix.v1.Rule b/scalafix/rules/src/main/resources/META-INF/services/scalafix.v1.Rule new file mode 100644 index 000000000..92ee8acc7 --- /dev/null +++ b/scalafix/rules/src/main/resources/META-INF/services/scalafix.v1.Rule @@ -0,0 +1 @@ +fix.JsonParse diff --git a/scalafix/rules/src/main/scala/fix/JsonParse.scala b/scalafix/rules/src/main/scala/fix/JsonParse.scala new file mode 100644 index 000000000..02f766d6a --- /dev/null +++ b/scalafix/rules/src/main/scala/fix/JsonParse.scala @@ -0,0 +1,40 @@ +package fix + +import scalafix.v1._ +import scala.meta._ + +class JsonParse extends SemanticRule("json-parse") { + + private val TokensMatcher = SymbolMatcher.normalized("fs2/data/json/package.tokens().") + private val ValuesMatcher = SymbolMatcher.normalized("fs2/data/json/ast/package.values().") + private val tokensSymbol = Symbol("fs2/data/json/package.tokens().") + private val valuesSymbol = Symbol("fs2/data/json/ast/package.values().") + + private def containsImport(importer: Importer)(implicit doc: SemanticDocument): Boolean = + doc.tree + .collect { + case i: Importer if i.importees.intersect(importer.importees) == importer.importees => + true + case _ => + false + } + .exists(identity) + + private def addFs2DataJsonImport(implicit doc: SemanticDocument): Patch = + if (containsImport(importer"fs2.data.json._")) + Patch.empty + else + Patch.addGlobalImport(importer"fs2.data.json._") + + override def fix(implicit doc: SemanticDocument): Patch = + doc.tree.collect { + // case t @ q"$_.through($tokens).through($values)" if TokensMatcher.matches(tokens) && ValuesMatcher.matches(values) => + case t @ Term.Apply(Term.Select(Term.Apply(Term.Select(base, Term.Name("through")), List(TokensMatcher(_))), + Term.Name("through")), + List(ValuesMatcher(_))) => + Patch.removeGlobalImport(tokensSymbol) + + Patch.removeGlobalImport(valuesSymbol) + + addFs2DataJsonImport + Patch.replaceTree(t, s"""$base.through(ast.parse)""") + }.asPatch + +} diff --git a/scalafix/tests/src/test/scala/fix/RuleSuite.scala b/scalafix/tests/src/test/scala/fix/RuleSuite.scala new file mode 100644 index 000000000..5ca361bc6 --- /dev/null +++ b/scalafix/tests/src/test/scala/fix/RuleSuite.scala @@ -0,0 +1,8 @@ +package fix + +import org.scalatest.funsuite.AnyFunSuiteLike +import scalafix.testkit.AbstractSemanticRuleSuite + +class RuleSuite extends AbstractSemanticRuleSuite with AnyFunSuiteLike { + runAllTests() +} From 2244e094bfef8ba4efd466b9279f95ae21f12ca8 Mon Sep 17 00:00:00 2001 From: Lucas Satabin Date: Sun, 2 Jul 2023 20:57:39 +0200 Subject: [PATCH 06/10] Document new `parse` pipe --- documentation/docs/json/index.md | 25 +++++++++++++++++++++++-- documentation/docs/json/libraries.md | 11 ++++++----- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/documentation/docs/json/index.md b/documentation/docs/json/index.md index 048793801..31c59dea7 100644 --- a/documentation/docs/json/index.md +++ b/documentation/docs/json/index.md @@ -111,7 +111,9 @@ root.index(1).! To handle Json ASTs, you can use the types and pipes available in the `fs2.data.json.ast` package. -JSON ASTs can be built if you provide an implicit [`Builder[Json]`][builder-api] to the `values` pipe. The `Builder[Json]` typeclass describes how JSON ASTs of type `Json` are built from streams. +#### parsing a stream to values + +If you are not interested in tokens, but instead want to parse the input stream into a stream of AST values, you can use the `ast.parse` pipe, provided you have an implicit [`Builder[Json]`][builder-api] in scope. The `Builder[Json]` typeclass describes how JSON ASTs of type `Json` are built from events generated by the parser. ```scala mdoc:compile-only import ast._ @@ -119,10 +121,29 @@ import ast._ trait SomeJsonType implicit val builder: Builder[SomeJsonType] = ??? + +Stream.emit(input).covary[Fallible].through(parse) +``` + +The `ast` stream emits all top-level JSON values parsed, in our example, the two objects are emitted. + +#### From `Token`s to values + +JSON ASTs can be built from an existing token stream, provided you have an implicit [`Builder[Json]`][builder-api], using the `values` pipe. + +```scala mdoc:compile-only +import ast._ + +trait SomeJsonType + +implicit val builder: Builder[SomeJsonType] = ??? + stream.through(values[Fallible, SomeJsonType]) ``` -The `asts` stream emits all top-level JSON values parsed, in our example, the two objects are emitted. +**Note:** even though this snippet is equivalent in result to the one using `ast.parse` it is less efficient, and if you are only interested in the values, you should always use `ast.parse`. + +#### From values to `Token`s If you provide an implicit [`Tokenizer[Json]`][tokenizer-api], which describes how a JSON AST is transformed into JSON tokens, you can apply transformations to the JSON stream. For instance, you can apply a function `fun` to all values in the `fields3` array by using this code: diff --git a/documentation/docs/json/libraries.md b/documentation/docs/json/libraries.md index 4160e7c75..b5c744a6d 100644 --- a/documentation/docs/json/libraries.md +++ b/documentation/docs/json/libraries.md @@ -15,11 +15,10 @@ Examples on this page use the following input: ```scala mdoc import fs2.{Fallible, Stream} import fs2.data.json._ -import fs2.data.json.literals._ import fs2.data.json.jsonpath._ import fs2.data.json.jsonpath.literals._ -val stream = json"""{ +val input = Stream.emit("""{ "field1": 0, "field2": "test", "field3": [1, 2, 3] @@ -27,7 +26,9 @@ val stream = json"""{ { "field1": 2, "field3": [] -}""" +}""").covary[Fallible] + +val stream = input.through(tokens) val sel = jsonpath"$$.field3[*]" ``` @@ -43,8 +44,8 @@ For instance both examples from the [core module documentation][json-doc] with c import fs2.data.json.circe._ import io.circe._ -val asts = stream.through(ast.values[Fallible, Json]) -asts.compile.toList +val asts = input.through(ast.parse) +asts.map(_.spaces2).compile.toList ``` You can use `filter.values` to selects only the values matching the JSONPath and deserialize them using the builder. From 3f3abbb51a2922403e691237e2379ad0da81041d Mon Sep 17 00:00:00 2001 From: Lucas Satabin Date: Sun, 2 Jul 2023 21:09:49 +0200 Subject: [PATCH 07/10] Fix workflow --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b56d66d64..de82f7565 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -98,11 +98,11 @@ jobs: - name: Make target directories if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main') - run: mkdir -p json/.native/target json/play/.jvm/target text/native/target cbor-json/native/target finite-state/native/target target .js/target documentation/target cbor/js/target finite-state/js/target text/js/target benchmarks/.jvm/target json/play/.js/target json/.jvm/target xml/scala-xml/.native/target csv/jvm/target xml/.jvm/target xml/.js/target cbor/native/target json/circe/.native/target finite-state/jvm/target cbor-json/js/target cbor/jvm/target csv/native/target json/circe/.jvm/target .jvm/target csv/js/target csv/generic/jvm/target .native/target text/jvm/target xml/.native/target json/diffson/.native/target json/diffson/.js/target cbor-json/jvm/target json/interpolators/.jvm/target json/.js/target json/interpolators/.js/target csv/generic/js/target json/circe/.js/target json/diffson/.jvm/target xml/scala-xml/.js/target csv/generic/native/target xml/scala-xml/.jvm/target json/interpolators/.native/target project/target + run: mkdir -p json/.native/target json/play/.jvm/target text/native/target cbor-json/native/target finite-state/native/target scalafix/rules/target target .js/target documentation/target cbor/js/target finite-state/js/target text/js/target scalafix/output/target benchmarks/.jvm/target json/play/.js/target json/.jvm/target xml/scala-xml/.native/target csv/jvm/target xml/.jvm/target xml/.js/target cbor/native/target json/circe/.native/target finite-state/jvm/target scalafix/tests/target cbor-json/js/target cbor/jvm/target csv/native/target json/circe/.jvm/target .jvm/target scalafix/input/target csv/js/target csv/generic/jvm/target .native/target text/jvm/target xml/.native/target json/diffson/.native/target json/diffson/.js/target cbor-json/jvm/target json/interpolators/.jvm/target json/.js/target json/interpolators/.js/target csv/generic/js/target json/circe/.js/target json/diffson/.jvm/target xml/scala-xml/.js/target csv/generic/native/target xml/scala-xml/.jvm/target json/interpolators/.native/target project/target - name: Compress target directories if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main') - run: tar cf targets.tar json/.native/target json/play/.jvm/target text/native/target cbor-json/native/target finite-state/native/target target .js/target documentation/target cbor/js/target finite-state/js/target text/js/target benchmarks/.jvm/target json/play/.js/target json/.jvm/target xml/scala-xml/.native/target csv/jvm/target xml/.jvm/target xml/.js/target cbor/native/target json/circe/.native/target finite-state/jvm/target cbor-json/js/target cbor/jvm/target csv/native/target json/circe/.jvm/target .jvm/target csv/js/target csv/generic/jvm/target .native/target text/jvm/target xml/.native/target json/diffson/.native/target json/diffson/.js/target cbor-json/jvm/target json/interpolators/.jvm/target json/.js/target json/interpolators/.js/target csv/generic/js/target json/circe/.js/target json/diffson/.jvm/target xml/scala-xml/.js/target csv/generic/native/target xml/scala-xml/.jvm/target json/interpolators/.native/target project/target + run: tar cf targets.tar json/.native/target json/play/.jvm/target text/native/target cbor-json/native/target finite-state/native/target scalafix/rules/target target .js/target documentation/target cbor/js/target finite-state/js/target text/js/target scalafix/output/target benchmarks/.jvm/target json/play/.js/target json/.jvm/target xml/scala-xml/.native/target csv/jvm/target xml/.jvm/target xml/.js/target cbor/native/target json/circe/.native/target finite-state/jvm/target scalafix/tests/target cbor-json/js/target cbor/jvm/target csv/native/target json/circe/.jvm/target .jvm/target scalafix/input/target csv/js/target csv/generic/jvm/target .native/target text/jvm/target xml/.native/target json/diffson/.native/target json/diffson/.js/target cbor-json/jvm/target json/interpolators/.jvm/target json/.js/target json/interpolators/.js/target csv/generic/js/target json/circe/.js/target json/diffson/.jvm/target xml/scala-xml/.js/target csv/generic/native/target xml/scala-xml/.jvm/target json/interpolators/.native/target project/target - name: Upload target directories if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main') From 1d8edd7a9984e7314967c8264158a0d675236c84 Mon Sep 17 00:00:00 2001 From: Lucas Satabin Date: Sun, 2 Jul 2023 21:21:37 +0200 Subject: [PATCH 08/10] Add migration guide from `circe-fs2` --- build.sbt | 3 +- documentation/docs/json/libraries.md | 46 ++++++++++++++++++++++------ 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/build.sbt b/build.sbt index c81204a35..723477927 100644 --- a/build.sbt +++ b/build.sbt @@ -533,7 +533,8 @@ lazy val documentation = project "com.beachape" %% "enumeratum" % "1.7.0", "org.gnieh" %% "diffson-circe" % diffsonVersion, "io.circe" %% "circe-generic-extras" % circeExtrasVersion, - "co.fs2" %% "fs2-io" % fs2Version + "co.fs2" %% "fs2-io" % fs2Version, + "io.circe" %% "circe-fs2" % "0.14.1" ), scalacOptions += "-Ymacro-annotations" ) diff --git a/documentation/docs/json/libraries.md b/documentation/docs/json/libraries.md index b5c744a6d..69a62cfac 100644 --- a/documentation/docs/json/libraries.md +++ b/documentation/docs/json/libraries.md @@ -18,7 +18,7 @@ import fs2.data.json._ import fs2.data.json.jsonpath._ import fs2.data.json.jsonpath.literals._ -val input = Stream.emit("""{ +def input[F[_]] = Stream.emit("""{ "field1": 0, "field2": "test", "field3": [1, 2, 3] @@ -26,9 +26,9 @@ val input = Stream.emit("""{ { "field1": 2, "field3": [] -}""").covary[Fallible] +}""").covary[F] -val stream = input.through(tokens) +val stream = input[Fallible].through(tokens) val sel = jsonpath"$$.field3[*]" ``` @@ -42,9 +42,8 @@ For instance both examples from the [core module documentation][json-doc] with c ```scala mdoc:nest import fs2.data.json.circe._ -import io.circe._ -val asts = input.through(ast.parse) +val asts = input[Fallible].through(ast.parse) asts.map(_.spaces2).compile.toList ``` @@ -53,8 +52,6 @@ You can use `filter.values` to selects only the values matching the JSONPath and ```scala mdoc:nest import fs2.data.json.circe._ -import io.circe._ - import cats.effect._ import cats.syntax.all._ import cats.effect.unsafe.implicits.global @@ -87,7 +84,6 @@ case class Wrapped(test: Int) ```scala mdoc:nest import fs2.data.json.selector._ import fs2.data.json.circe._ -import io.circe._ val values = stream.through(codec.deserialize[Fallible, Data]) values.compile.toList @@ -105,7 +101,6 @@ Dropping values can be done similarly. ```scala mdoc:nest import fs2.data.json.circe._ -import io.circe._ import cats.syntax.all._ val f1 = root.field("field1").compile @@ -114,6 +109,38 @@ val transformed = stream.through(codec.transformOpt(f1, (i: Int) => (i > 0).guar transformed.compile.to(collector.pretty()) ``` +#### Migrating from `circe-fs2` + +If you were using [`circe-fs2`][circe-fs2] to emit streams of `Json` values, you can easily switch to `fs2-data-json-circe`. Just replace your usages of `stringStreamParser` or `byteStreamParser` by usage of `fs2.data.json.ast.parse`. + +For instance if you had this code: + +```scala mdoc:nest +import io.circe.fs2._ + +import cats.effect._ + +input[SyncIO] + .through(stringStreamParser) + .map(_.spaces2) + .compile + .toList + .unsafeRunSync() +``` + +You can replace it by + +```scala mdoc:nest +import fs2.data.json._ +import fs2.data.json.circe._ + +input[Fallible] + .through(ast.parse) + .map(_.spaces2) + .compile + .toList +``` + ### Play! JSON Module: [![Maven Central](https://img.shields.io/maven-central/v/org.gnieh/fs2-data-json-play_2.13.svg)](https://mvnrepository.com/artifact/org.gnieh/fs2-data-json-play_2.13) @@ -125,3 +152,4 @@ It also provides `Deserializer` for types with a `Reads` instance and `Serialize [json-doc]: /documentation/json/ [circe]: https://circe.github.io/circe/ [play-json]: https://www.playframework.com/ +[circe-fs2]: https://github.com/circe/circe-fs2 From 814845d8c6a4619f988a6f80ff9aed79adffa8e7 Mon Sep 17 00:00:00 2001 From: Lucas Satabin Date: Mon, 17 Jul 2023 18:42:59 +0200 Subject: [PATCH 09/10] Add link to byte stream decoding --- documentation/docs/json/libraries.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/documentation/docs/json/libraries.md b/documentation/docs/json/libraries.md index 69a62cfac..609063a91 100644 --- a/documentation/docs/json/libraries.md +++ b/documentation/docs/json/libraries.md @@ -141,6 +141,8 @@ input[Fallible] .toList ``` +If you were using `byteStreamParser`, please refer to the the [`fs2.data.text` package documentation][text] to indicate how to decode the byte stream. + ### Play! JSON Module: [![Maven Central](https://img.shields.io/maven-central/v/org.gnieh/fs2-data-json-play_2.13.svg)](https://mvnrepository.com/artifact/org.gnieh/fs2-data-json-play_2.13) @@ -153,3 +155,4 @@ It also provides `Deserializer` for types with a `Reads` instance and `Serialize [circe]: https://circe.github.io/circe/ [play-json]: https://www.playframework.com/ [circe-fs2]: https://github.com/circe/circe-fs2 +[text]: /documentation/#decoding-textual-inputs From 037c1d69733e4926b8f5adeb5d2d2dbd29c14571 Mon Sep 17 00:00:00 2001 From: Lucas Satabin Date: Mon, 17 Jul 2023 18:50:57 +0200 Subject: [PATCH 10/10] Make keyword acc function clearer in name and type The fact that it now returs `Unit` makes it clearer it is side effectful. --- .scalafmt.conf | 2 +- .../data/json/internal/JsonTokenParser.scala | 17 +++++++---------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/.scalafmt.conf b/.scalafmt.conf index 83f58bea0..5e73e3d03 100644 --- a/.scalafmt.conf +++ b/.scalafmt.conf @@ -14,7 +14,7 @@ rewrite.rules = [ prefercurlyfors ] -runner.dialect = scala213 +runner.dialect = scala213source3 fileOverride { "glob:**/src/*/scala-3/**" { runner.dialect = scala3 diff --git a/json/src/main/scala/fs2/data/json/internal/JsonTokenParser.scala b/json/src/main/scala/fs2/data/json/internal/JsonTokenParser.scala index f32567ca9..61d7995be 100644 --- a/json/src/main/scala/fs2/data/json/internal/JsonTokenParser.scala +++ b/json/src/main/scala/fs2/data/json/internal/JsonTokenParser.scala @@ -223,16 +223,13 @@ private[json] class JsonTokenParser[F[_], T, Res]( } } - private final def keyword_(expected: String, - eidx: Int, - elen: Int, - accumulate: () => ChunkAccumulator[Res]): Pull[F, Res, Unit] = { + private final def keyword_(expected: String, eidx: Int, elen: Int, processToken: () => Unit): Pull[F, Res, Unit] = { if (T.needsPull(context)) { emitChunk() >> T.pullNext(context).flatMap { case Some(context) => this.context = context chunkAcc.flush() - keyword_(expected, eidx, elen, accumulate) + keyword_(expected, eidx, elen, processToken) case None => Pull.raiseError[F](new JsonException("unexpected end of input")) } } else { @@ -240,11 +237,11 @@ private[json] class JsonTokenParser[F[_], T, Res]( if (c == expected.charAt(eidx)) { if (eidx == elen - 1) { T.advance(context) - accumulate() + processToken() Pull.done } else { T.advance(context) - keyword_(expected, eidx + 1, elen, accumulate) + keyword_(expected, eidx + 1, elen, processToken) } } else { emitChunk() >> Pull.raiseError[F](new JsonException(s"unexpected character '$c' (expected $expected)")) @@ -272,9 +269,9 @@ private[json] class JsonTokenParser[F[_], T, Res]( T.advance(context) chunkAcc.startArray() Pull.suspend(go_(State.BeforeArrayValue)) - case 't' => keyword_("true", 0, 4, chunkAcc.trueValue) - case 'f' => keyword_("false", 0, 5, chunkAcc.falseValue) - case 'n' => keyword_("null", 0, 4, chunkAcc.nullValue) + case 't' => keyword_("true", 0, 4, chunkAcc.trueValue _) + case 'f' => keyword_("false", 0, 5, chunkAcc.falseValue _) + case 'n' => keyword_("null", 0, 4, chunkAcc.nullValue _) case '"' => T.advance(context) T.mark(context)