From 9694aee819af65f0089e351c450343d5d11b41ad Mon Sep 17 00:00:00 2001 From: Calvin Rose Date: Thu, 12 Sep 2024 17:03:03 -0500 Subject: [PATCH] Add rules for nth and only-tags. Address #1503 These rules allow selecting from a number of sub-captures while dropping the rest. `nth` is more succinct in many cases, but `only-tags` is more general and corresponds to an internal mechanism already present. --- src/core/filewatch.c | 1 + src/core/peg.c | 54 ++++++++++++++++++++++++++++++++++++++++++++ src/include/janet.h | 4 +++- test/suite-peg.janet | 16 +++++++++++++ 4 files changed, 74 insertions(+), 1 deletion(-) diff --git a/src/core/filewatch.c b/src/core/filewatch.c index 4e8e85b7f..ef666af64 100644 --- a/src/core/filewatch.c +++ b/src/core/filewatch.c @@ -588,6 +588,7 @@ JANET_CORE_FN(cfun_filewatch_make, "* `:wd-path` -- the string path for watched directory of file. For files, will be the same as `:file-name`, and for directories, will be the same as `:dir-name`.\n\n" "* `:cookie` -- a randomized integer used to associate related events, such as :moved-from and :moved-to events.\n\n" "") { + janet_sandbox_assert(JANET_SANDBOX_FS_READ); janet_arity(argc, 1, -1); JanetChannel *channel = janet_getchannel(argv, 0); JanetWatcher *watcher = janet_abstract(&janet_filewatch_at, sizeof(JanetWatcher)); diff --git a/src/core/peg.c b/src/core/peg.c index 48ba88d12..0a2b7d4f2 100644 --- a/src/core/peg.c +++ b/src/core/peg.c @@ -465,6 +465,16 @@ static const uint8_t *peg_rule( return result; } + case RULE_ONLY_TAGS: { + CapState cs = cap_save(s); + down1(s); + const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text); + up1(s); + if (!result) return NULL; + cap_load_keept(s, cs); + return result; + } + case RULE_GROUP: { uint32_t tag = rule[2]; int oldmode = s->mode; @@ -486,6 +496,29 @@ static const uint8_t *peg_rule( return result; } + case RULE_NTH: { + uint32_t nth = rule[1]; + uint32_t tag = rule[3]; + int oldmode = s->mode; + CapState cs = cap_save(s); + s->mode = PEG_MODE_NORMAL; + down1(s); + const uint8_t *result = peg_rule(s, s->bytecode + rule[2], text); + up1(s); + s->mode = oldmode; + if (!result) return NULL; + int32_t num_sub_captures = s->captures->count - cs.cap; + Janet cap; + if (num_sub_captures > (int32_t) nth) { + cap = s->captures->data[cs.cap + nth]; + } else { + return NULL; + } + cap_load_keept(s, cs); + pushcap(s, cap, tag); + return result; + } + case RULE_SUB: { const uint8_t *text_start = text; const uint32_t *rule_window = s->bytecode + rule[1]; @@ -1061,6 +1094,9 @@ static void spec_thru(Builder *b, int32_t argc, const Janet *argv) { static void spec_drop(Builder *b, int32_t argc, const Janet *argv) { spec_onerule(b, argc, argv, RULE_DROP); } +static void spec_only_tags(Builder *b, int32_t argc, const Janet *argv) { + spec_onerule(b, argc, argv, RULE_ONLY_TAGS); +} /* Rule of the form [rule, tag] */ static void spec_cap1(Builder *b, int32_t argc, const Janet *argv, uint32_t op) { @@ -1084,6 +1120,15 @@ static void spec_unref(Builder *b, int32_t argc, const Janet *argv) { spec_cap1(b, argc, argv, RULE_UNREF); } +static void spec_nth(Builder *b, int32_t argc, const Janet *argv) { + peg_arity(b, argc, 2, 3); + Reserve r = reserve(b, 4); + uint32_t nth = peg_getnat(b, argv[0]); + uint32_t rule = peg_compile1(b, argv[1]); + uint32_t tag = (argc == 3) ? emit_tag(b, argv[2]) : 0; + emit_3(r, RULE_NTH, nth, rule, tag); +} + static void spec_capture_number(Builder *b, int32_t argc, const Janet *argv) { peg_arity(b, argc, 1, 3); Reserve r = reserve(b, 4); @@ -1262,7 +1307,9 @@ static const SpecialPair peg_specials[] = { {"line", spec_line}, {"look", spec_look}, {"not", spec_not}, + {"nth", spec_nth}, {"number", spec_capture_number}, + {"only-tags", spec_only_tags}, {"opt", spec_opt}, {"position", spec_position}, {"quote", spec_capture}, @@ -1619,6 +1666,7 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) { break; case RULE_ERROR: case RULE_DROP: + case RULE_ONLY_TAGS: case RULE_NOT: case RULE_TO: case RULE_THRU: @@ -1632,6 +1680,12 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) { if (rule[1] > JANET_MAX_READINT_WIDTH) goto bad; i += 3; break; + case RULE_NTH: + /* [nth, rule, tag] */ + if (rule[2] >= blen) goto bad; + op_flags[rule[2]] |= 0x01; + i += 4; + break; default: goto bad; } diff --git a/src/include/janet.h b/src/include/janet.h index ce37b84a5..eb0b462f4 100644 --- a/src/include/janet.h +++ b/src/include/janet.h @@ -2180,7 +2180,9 @@ typedef enum { RULE_UNREF, /* [rule, tag] */ RULE_CAPTURE_NUM, /* [rule, tag] */ RULE_SUB, /* [rule, rule] */ - RULE_SPLIT /* [rule, rule] */ + RULE_SPLIT, /* [rule, rule] */ + RULE_NTH, /* [nth, rule, tag] */ + RULE_ONLY_TAGS, /* [rule] */ } JanetPegOpcod; typedef struct { diff --git a/test/suite-peg.janet b/test/suite-peg.janet index b4547db24..ac426cfc9 100644 --- a/test/suite-peg.janet +++ b/test/suite-peg.janet @@ -664,6 +664,8 @@ @[]) "peg if not") (defn test [name peg input expected] + (assert-no-error "compile peg" (peg/compile peg)) + (assert-no-error "marshal/unmarshal peg" (-> peg marshal unmarshal)) (assert (deep= (peg/match peg input) expected) name)) (test "sub: matches the same input twice" @@ -756,5 +758,19 @@ "a,b,c" @["a" "b" "c"]) +(test "nth 1" + ~{:prefix (number :d+ nil :n) + :word '(lenprefix (-> :n) :w) + :main (some (nth 1 (* :prefix ":" :word)))} + "5:apple6:banana6:cherry" + @["apple" "banana" "cherry"]) + +(test "only-tags 1" + ~{:prefix (number :d+ nil :n) + :word (capture (lenprefix (-> :n) :w) :W) + :main (some (* (only-tags (* :prefix ":" :word)) (-> :W)))} + "5:apple6:banana6:cherry" + @["apple" "banana" "cherry"]) + (end-suite)