From 152b3cdf9ba42e5fff90a378fecff5dec185cfed Mon Sep 17 00:00:00 2001 From: EduardoLR10 Date: Sat, 30 Dec 2023 00:12:02 -0300 Subject: [PATCH 1/2] Update serialization strategy to NOT depend on new lines --- ast.rkt | 67 ++++++++++++++++++++-------------- io.rkt | 71 ++++++++++++++++++------------------ main.rkt | 2 +- ndf/data/PROGRAMMER.ndf | Bin 40 -> 42 bytes ndf/entities/PROGRAMMER.ndf | Bin 507 -> 521 bytes ndf/schemas/schema.ndf | Bin 651 -> 688 bytes util.rkt | 63 +++++++++++++++++++------------- 7 files changed, 113 insertions(+), 90 deletions(-) diff --git a/ast.rkt b/ast.rkt index 6cec84d..38004c2 100644 --- a/ast.rkt +++ b/ast.rkt @@ -38,9 +38,11 @@ #:methods gen:serializable [(define (serialize self #:size [_size #f]) (let* [(name-bytes (string->bytes/utf-8 (symbol->string (type-name self)))) - (name-length (integer->integer-bytes (bytes-length name-bytes) 1 #t)) - (byte-size-bytes (integer->integer-bytes (type-byte-size self) 4 #t))] - (bytes-append name-length name-bytes byte-size-bytes))) + (name-length (bytes-length name-bytes)) + (serialized-name-length (integer->integer-bytes name-length 1 #t)) + (byte-size-bytes (integer->integer-bytes (type-byte-size self) 4 #t)) + (payload (bytes-append serialized-name-length name-bytes byte-size-bytes))] + (values (bytes-length payload) payload))) (define (deserialize _self byte-stream) (let* [(name-length (integer-bytes->integer (make-bytes 1 (bytes-ref byte-stream 0)) #t)) (name-value (~> name-length @@ -48,8 +50,8 @@ (subbytes byte-stream 1 _) bytes->string/utf-8 string->symbol)) - (byte-size-value (integer-bytes->integer (subbytes byte-stream (+ 1 name-length) (+ 3 name-length)) #t))] - (values (type name-value byte-size-value) (+ 5 name-length))))]) + (byte-size-value (integer-bytes->integer (subbytes byte-stream (+ 1 name-length)) #t))] + (type name-value byte-size-value)))]) (define-serializable stringl [value] #:transparent #:guard @@ -63,13 +65,14 @@ (let* [(value (stringl-value self)) (size-of-string (string-length value))] (if (< size size-of-string) - (string->bytes/utf-8 (substring value 0 size)) ;; Truncate - (let [(dest-bytes (make-bytes size 0)) - (serialyzed-string (string->bytes/utf-8 value))] - (bytes-copy! dest-bytes 0 serialyzed-string) - dest-bytes)))) + (values size (string->bytes/utf-8 (substring value 0 size))) ;; Truncate + (let* [(dest-bytes (make-bytes size 0)) + (serialized-string (string->bytes/utf-8 value)) + (serialized-string-length (bytes-length serialized-string))] + (bytes-copy! dest-bytes 0 serialized-string) + (values serialized-string-length dest-bytes))))) (define (deserialize _self byte-stream) - (values (stringl (string-trim (bytes->string/utf-8 byte-stream)) (bytes-length byte-stream))))]) + (stringl (string-trim (bytes->string/utf-8 byte-stream))))]) (define-serializable integer32 [value] #:transparent #:guard @@ -78,9 +81,9 @@ value) #:methods gen:serializable [(define (serialize self #:size [_size #f]) - (integer->integer-bytes (integer32-value self) 4 #t)) + (values 4 (bytes-append (integer->integer-bytes (integer32-value self) 4 #t)))) (define (deserialize _self byte-stream) - (values (integer32 (integer-bytes->integer (subbytes byte-stream 0 4) #t)) 4))]) + (integer32 (integer-bytes->integer (subbytes byte-stream 0 4) #t)))]) (define-serializable field [position type] #:transparent #:guard @@ -93,15 +96,16 @@ (define (serialize self #:size [_size #f]) (let* [(position (field-position self)) (position-bytes (integer->integer-bytes position 1 #t)) - (type (field-type self)) - (type-bytes (super-serialize type #:size (type-byte-size type)))] - (bytes-append position-bytes type-bytes))) + (type (field-type self))] + (define-values (type-size type-bytes) (super-serialize type #:size (type-byte-size type))) + (define total-size (+ 1 type-size)) + (values total-size (bytes-append position-bytes type-bytes)))) (define/generic super-deserialize deserialize) (define (deserialize self byte-stream) (let* [(position-value (integer-bytes->integer (make-bytes 1 (bytes-ref byte-stream 0)) #t)) - (type-bytes (subbytes byte-stream 1))] - (define-values [new-type type-consumed] (super-deserialize struct:type type-bytes)) - (values (field position-value new-type) (+ 1 type-consumed))))]) + (type-bytes (subbytes byte-stream 1)) + (new-type (super-deserialize struct:type type-bytes))] + (field position-value new-type)))]) (define (fields-size fields) (let* [(fields-values (hash-values fields))] @@ -147,9 +151,12 @@ (bytes-append serialized-count serialized-constraints))) (let* [(row-id (table-row-id self)) (row-id-bytes (integer->integer-bytes row-id 4 #t)) - (fields-list (hash->list (table-fields self))) - (constraints (serialize-constraints (table-local-constraints self)))] - (bytes-append row-id-bytes constraints (serialize-hash-list fields-list #:entity? #f)))) + (fields-list (hash->list (table-fields self))) + (how-many-fields (integer->integer-bytes (length fields-list) 2 #t)) + (serialized-fields (serialize-hash-list fields-list)) + (constraints (serialize-constraints (table-local-constraints self))) + (total-size (+ 4 2 (bytes-length constraints) (bytes-length serialized-fields)))] + (values total-size (bytes-append row-id-bytes constraints how-many-fields serialized-fields)))) (define (deserialize self byte-stream) (define (utf8-character-as-integer byte-array) (call-with-input-bytes @@ -177,10 +184,12 @@ (loop (cons constraint-value acc) (- n 1) (+ consumed-bytes constraint-size) (subbytes streamb constraint-size)))))) (define row-id-value (integer-bytes->integer (subbytes byte-stream 0 4) #t)) (define-values (constraints-length constraints-value) (deserialize-constraints (subbytes byte-stream 4))) - (define fields-value (make-hash (deserialize-hash-list struct:field (subbytes byte-stream (+ 4 constraints-length)) '()))) - (values - (table "table" row-id-value fields-value constraints-value) - (bytes-length byte-stream)))]) + (define-values (_consumed-fields-bytes fields-value) + (deserialize-hash-list + struct:field + (integer-bytes->integer (subbytes byte-stream (+ 4 constraints-length) (+ 6 constraints-length)) #t) + (subbytes byte-stream (+ 6 constraints-length)))) + (table "table" row-id-value (make-immutable-hash fields-value) constraints-value))]) (define-serializable procedure [identifier] #:transparent #:guard @@ -192,7 +201,9 @@ (procedure-identifier self))] #:methods gen:serializable [(define (serialize _self #:size [_size #f]) - (bytes-append (string->bytes/utf-8 "procedures' serialization is not yet implemented"))) + (let* [(todo-string (string->bytes/utf-8 "procedures' serialization is not yet implemented")) + (todo-string-length (bytes-length todo-string))] + (values todo-string-length todo-string))) (define (deserialize _self byte-stream) (println "procedures' deserialization is not yet implemented") - (values (procedure "procedure") (bytes-length byte-stream)))]) + (procedure "procedure"))]) diff --git a/io.rkt b/io.rkt index 0200b51..77c9ceb 100644 --- a/io.rkt +++ b/io.rkt @@ -19,7 +19,8 @@ (type (field-type attribute)) (type-size (type-byte-size type)) (position (field-position attribute))] - (cons position (serialize literal #:size type-size)))) + (define-values (_literal-size serialized-literal) (serialize literal #:size type-size)) + (cons position serialized-literal))) (define (convert-row table row) (~> @@ -66,19 +67,21 @@ (raise 'tried-update-row-id-with-procedure)]))) (define (write-table-to-disk table table-name) - (let* [(serialized-table (serialize table)) - (file-name (build-ndf-filename table-name))] - (call-with-output-file file-name #:exists 'truncate - (curry write-bytes serialized-table)) - (void))) + (define-values (_table-size serialized-table) (serialize table)) + (define filename (build-ndf-filename table-name)) + (call-with-output-file filename #:exists 'truncate + (curry write-bytes serialized-table)) + (void)) (define (write-schema-to-disk schema) (define (write-entity-to-disk file-out entities-list) - (let* [(entity-name (give-identifier (cdr (car entities-list))))] + (let* [(entity-name (give-identifier (cdr (car entities-list)))) + (entity-name-length (string-length entity-name)) + (list-length (length entities-list))] + (write-bytes (integer->integer-bytes entity-name-length 1 #t) file-out) (write-string entity-name file-out) - (newline file-out) - (write-bytes (serialize-hash-list entities-list #:entity? #t) file-out) - (newline file-out))) + (write-bytes (integer->integer-bytes list-length 2 #t) file-out) + (write-bytes (serialize-hash-list entities-list) file-out))) (let* [(schema-list (hash->list schema)) (file-name (build-ndf-filename "schema" #:data? 'schema))] (call-with-output-file file-name #:exists 'truncate @@ -94,41 +97,39 @@ read-table-values-from-disk) (require - RacketowerDB/ast + RacketowerDB/ast + RacketowerDB/util racket/hash) - + (define (read-schema-from-disk schema-name) - (define (build-hash-from-line struct-instance line-in-bytes) - (make-immutable-hash - (deserialize-hash-list - struct-instance - line-in-bytes - '()))) - (define (proceed-reading struct-name current-schema line-in-bytes) - (let [(line (bytes->string/utf-8 line-in-bytes))] - (if (hash-has-key? entity-structs line) - (cons line current-schema) - (cons struct-name (hash-union current-schema (build-hash-from-line (hash-ref entity-structs struct-name) line-in-bytes)))))) + (define (read-entities-from-disk how-many-entities entity-name byte-stream) + (define-values (consumed-bytes entities) (deserialize-hash-list + (hash-ref entity-structs entity-name) + how-many-entities + byte-stream)) + (values (make-immutable-hash entities) (subbytes byte-stream consumed-bytes))) + (define (read-block-from-disk byte-stream) + (let* [(entity-name-size (integer-bytes->integer (subbytes byte-stream 0 1) #t)) + (entity-name (bytes->string/utf-8 (subbytes byte-stream 1 (+ 1 entity-name-size)))) + (how-many-entities (integer-bytes->integer (subbytes byte-stream (+ 1 entity-name-size) (+ 3 entity-name-size)) #t))] + (read-entities-from-disk how-many-entities entity-name (subbytes byte-stream (+ 3 entity-name-size))))) (let* [(file-name (build-ndf-filename schema-name #:data? 'schema)) (in (open-input-file file-name #:mode 'binary)) - (schema (make-immutable-hash (list))) - (real-lines (port->bytes-lines in #:close? #t)) - (read-lines (fix-empty-read-bytes-lines real-lines))] - (~> (foldl - (lambda [line-in-bytes acc] - (let [(builder-struct (car acc)) - (current-schema (cdr acc))] - (proceed-reading builder-struct current-schema line-in-bytes))) - (cons null schema) read-lines) - cdr + (content (port->bytes in #:close? #t))] + (~> (let loop [(schema (make-immutable-hash (list))) + (byte-array content)] + (if (bytes-empty? byte-array) + schema + (let [] + (define-values (sub-schema remaining-byte-stream) (read-block-from-disk byte-array)) + (loop (hash-union schema sub-schema) remaining-byte-stream)))) hash->list make-hash))) (define (read-table-from-disk table-name) (let* [(file-name (build-ndf-filename #:data? 'entity table-name)) (in (open-input-file file-name #:mode 'binary))] - (define-values (table table-consumed) (deserialize struct:table (port->bytes in #:close? #t))) - table)) + (deserialize struct:table (port->bytes in #:close? #t)))) (define (read-table-values-from-disk schema table-name) (let* [(file-name (build-ndf-filename #:data? 'data table-name)) diff --git a/main.rkt b/main.rkt index f74b956..7133dda 100644 --- a/main.rkt +++ b/main.rkt @@ -23,7 +23,7 @@ (module+ main (define field-name (field 0 (type 'VARCHAR 7))) - (define field-editor (field 1 (type 'VARCHAR 9))) + (define field-editor (field 1 (type 'VARCHAR 10))) (define field-year (field 1 (type 'INTEGER 4))) (define field-age (field 2 (type 'INTEGER 4))) diff --git a/ndf/data/PROGRAMMER.ndf b/ndf/data/PROGRAMMER.ndf index 701182e696c73fc44a550870e5903ee533678e60..ae6c837c59596ab4552897bf6afe5c8595670e84 100644 GIT binary patch delta 29 kcmdPU67)+f$wk-W013lV-P1$h#80--Cdd3J^eyl-CcuNfP4`4 ObM$p(fGK1L3IG5o;tfIo diff --git a/ndf/schemas/schema.ndf b/ndf/schemas/schema.ndf index 7a64751d5be3e7957fd82a162e2d51e61ecba92d..6780c0a15b4e618f9e31259ced5b7149d20cfa52 100644 GIT binary patch delta 206 zcmeBX-M}irSx}UpoSITvl*-7!!oa`~;u;)c0Hh}h87Q)rBqrsgGBI!g`2j)x?m>>e zzOF%>Oh7IWFdd#a*^qJS#?SJM8q5rAKoM6L&k+A0ULcE+J*)hlpBn|>h46H!X*Wbm}2SY8$jghX7P+L6xLR{TlgFu1+ DmuMk_ delta 185 zcmdnM+RZ9dP?VpXno?Sn%EiLKz!2gZ95PYDK!&R%F)1gNiwh_e5ajP3YP>30b9o=1-*ggG1T-{xRK<0w5 qpQEoU156=1P=E`h(%BJgE(0r&?d$L2>Vu*Lq%YFd5oiIDVO#*qW+Pev diff --git a/util.rkt b/util.rkt index 748d5a2..0959e00 100644 --- a/util.rkt +++ b/util.rkt @@ -5,7 +5,8 @@ checked-guard entity-structs define-serializable - fix-empty-read-bytes-lines) + fix-empty-read-bytes-lines + bytes-empty?) (require (for-syntax threading racket/syntax racket/list) @@ -80,6 +81,9 @@ args ...)) ... body ...))])) +(define (bytes-empty? byte-stream) + (equal? #"" byte-stream)) + (module interfaces racket (provide serializable? @@ -90,8 +94,8 @@ gen:identifiable (contract-out [give-identifier (-> identifiable? string?)] - [serialize (->* (serializable?) (#:size integer?) bytes?)] - [deserialize (-> serializable? bytes? (values serializable? natural?))] + [serialize (->* (serializable?) (#:size integer?) (values natural? bytes?))] + [deserialize (-> serializable? bytes? serializable?)] [from-bytes (-> byteable? bytes? serializable?)] [to-byte-size (-> byteable? natural?)])) @@ -111,40 +115,47 @@ (module+ hashable (provide (contract-out - [deserialize-hash-list (-> serializable? bytes? list? list?)] - [serialize-hash-list (-> (listof (cons/c string? serializable?)) #:entity? boolean? bytes?)])) + [deserialize-hash-list (-> serializable? natural? bytes? (values natural? list?))] + [serialize-hash-list (-> (listof (cons/c string? serializable?)) bytes?)])) (require (submod ".." interfaces)) - (define (deserialize-hash-list entity byte-stream accumulator) + (define (deserialize-hash-list entity how-many byte-stream) + (inner-deserialize-hash-list entity how-many byte-stream 0 '())) + + (define (inner-deserialize-hash-list entity how-many byte-stream consumed-bytes accumulator) (define (deserialize-name more-bytes) (let* [(name-size (integer-bytes->integer (subbytes more-bytes 0 4) #t)) (name (bytes->string/utf-8 (subbytes more-bytes 4 (+ 4 name-size))))] - (cons name (+ 4 name-size)))) - (if (equal? byte-stream #"") - accumulator - (let* [(name-consumption (deserialize-name byte-stream)) - (name-consumed (cdr name-consumption)) - (name (car name-consumption))] - (define-values [thing thing-consumed] (deserialize entity (subbytes byte-stream name-consumed))) - (deserialize-hash-list - entity - (subbytes byte-stream (+ name-consumed thing-consumed) (bytes-length byte-stream)) - (append accumulator (list (cons name thing))))))) - - (define (serialize-hash-list named-values-list #:entity? entity?) + (values (+ 4 name-size) name))) + (if (zero? how-many) + (values consumed-bytes accumulator) + (let [] + (define-values [name-consumed name] (deserialize-name byte-stream)) + (define entity-size (integer-bytes->integer (subbytes byte-stream name-consumed (+ name-consumed 4)) #t)) + (define thing (deserialize entity (subbytes byte-stream (+ 4 name-consumed) (+ name-consumed entity-size 4)))) + (inner-deserialize-hash-list + entity + (- how-many 1) + (subbytes byte-stream (+ 4 name-consumed entity-size)) + (+ consumed-bytes name-consumed entity-size 4) + (append accumulator (list (cons name thing))))))) + + (define (serialize-hash-list named-values-list) (define (serialize-name name) (let* [(name-bytes (string->bytes/utf-8 name)) - (name-size (integer->integer-bytes (bytes-length name-bytes) 4 #t))] - (bytes-append name-size name-bytes))) - + (name-size (bytes-length name-bytes)) + (serialized-name-size (integer->integer-bytes name-size 4 #t))] + (bytes-append serialized-name-size name-bytes))) (~> (map (lambda [named-value] - (let [(name (car named-value)) - (value (cdr named-value))] + (let* [(name (car named-value)) + (value (cdr named-value))] + (define-values [value-size serialized-value] (serialize value)) (bytes-append (serialize-name name) - (serialize value)))) + (integer->integer-bytes value-size 4 #t) + serialized-value))) named-values-list) - (bytes-join _ (if entity? #"\n" #""))))) + (bytes-join _ #"")))) From 3d33f36e6bd3bfacfbb4e51151efdbb63e8b03a4 Mon Sep 17 00:00:00 2001 From: EduardoLR10 Date: Sat, 30 Dec 2023 00:18:02 -0300 Subject: [PATCH 2/2] Remove cursed fix-empty-read-byte-lines function --- util.rkt | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/util.rkt b/util.rkt index 0959e00..78f5cf1 100644 --- a/util.rkt +++ b/util.rkt @@ -5,7 +5,6 @@ checked-guard entity-structs define-serializable - fix-empty-read-bytes-lines bytes-empty?) (require @@ -14,32 +13,6 @@ threading br/cond) -;; FIXME: With the addition of constraints, there can be multiple new lines in sequence -;; and this stupid will not work anymore. Good luck xD -(define (fix-empty-read-bytes-lines lines) - (define (fix-one-turn inner-lines) - (let [(newline-flag #f)] - (foldl (lambda [line new-lines] - (cond - [newline-flag - (set! newline-flag #f) - (append new-lines (list (bytes-append #"\n" line)))] - - [(bytes=? #"" line) - (set! newline-flag #t) - new-lines] - - [else (append new-lines (list line))])) - (list) inner-lines))) - - (define (stop-condition lines-to-check) - (empty? (filter (curry bytes=? #"") lines-to-check))) - - (while (not (stop-condition lines)) - (set! lines (fix-one-turn lines))) - - lines) - (define (build-ndf-filename #:data? [data? 'entity] name) (let [(path (case (list 'quote data?) [('entity) "ndf/entities/"]