Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update serialization strategy to NOT depend on new lines #20

Merged
merged 2 commits into from
Jan 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 39 additions & 28 deletions ast.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,20 @@
#:methods gen:serializable
[(define (serialize self #:size [_size #f])
(let* [(name-bytes (string->bytes/utf-8 (symbol->string (type-name self))))
(name-length (integer->integer-bytes (bytes-length name-bytes) 1 #t))
(byte-size-bytes (integer->integer-bytes (type-byte-size self) 4 #t))]
(bytes-append name-length name-bytes byte-size-bytes)))
(name-length (bytes-length name-bytes))
(serialized-name-length (integer->integer-bytes name-length 1 #t))
(byte-size-bytes (integer->integer-bytes (type-byte-size self) 4 #t))
(payload (bytes-append serialized-name-length name-bytes byte-size-bytes))]
(values (bytes-length payload) payload)))
(define (deserialize _self byte-stream)
(let* [(name-length (integer-bytes->integer (make-bytes 1 (bytes-ref byte-stream 0)) #t))
(name-value (~> name-length
add1
(subbytes byte-stream 1 _)
bytes->string/utf-8
string->symbol))
(byte-size-value (integer-bytes->integer (subbytes byte-stream (+ 1 name-length) (+ 3 name-length)) #t))]
(values (type name-value byte-size-value) (+ 5 name-length))))])
(byte-size-value (integer-bytes->integer (subbytes byte-stream (+ 1 name-length)) #t))]
(type name-value byte-size-value)))])

(define-serializable stringl [value] #:transparent
#:guard
Expand All @@ -63,13 +65,14 @@
(let* [(value (stringl-value self))
(size-of-string (string-length value))]
(if (< size size-of-string)
(string->bytes/utf-8 (substring value 0 size)) ;; Truncate
(let [(dest-bytes (make-bytes size 0))
(serialyzed-string (string->bytes/utf-8 value))]
(bytes-copy! dest-bytes 0 serialyzed-string)
dest-bytes))))
(values size (string->bytes/utf-8 (substring value 0 size))) ;; Truncate
(let* [(dest-bytes (make-bytes size 0))
(serialized-string (string->bytes/utf-8 value))
(serialized-string-length (bytes-length serialized-string))]
(bytes-copy! dest-bytes 0 serialized-string)
(values serialized-string-length dest-bytes)))))
(define (deserialize _self byte-stream)
(values (stringl (string-trim (bytes->string/utf-8 byte-stream)) (bytes-length byte-stream))))])
(stringl (string-trim (bytes->string/utf-8 byte-stream))))])

(define-serializable integer32 [value] #:transparent
#:guard
Expand All @@ -78,9 +81,9 @@
value)
#:methods gen:serializable
[(define (serialize self #:size [_size #f])
(integer->integer-bytes (integer32-value self) 4 #t))
(values 4 (bytes-append (integer->integer-bytes (integer32-value self) 4 #t))))
(define (deserialize _self byte-stream)
(values (integer32 (integer-bytes->integer (subbytes byte-stream 0 4) #t)) 4))])
(integer32 (integer-bytes->integer (subbytes byte-stream 0 4) #t)))])

(define-serializable field [position type] #:transparent
#:guard
Expand All @@ -93,15 +96,16 @@
(define (serialize self #:size [_size #f])
(let* [(position (field-position self))
(position-bytes (integer->integer-bytes position 1 #t))
(type (field-type self))
(type-bytes (super-serialize type #:size (type-byte-size type)))]
(bytes-append position-bytes type-bytes)))
(type (field-type self))]
(define-values (type-size type-bytes) (super-serialize type #:size (type-byte-size type)))
(define total-size (+ 1 type-size))
(values total-size (bytes-append position-bytes type-bytes))))
(define/generic super-deserialize deserialize)
(define (deserialize self byte-stream)
(let* [(position-value (integer-bytes->integer (make-bytes 1 (bytes-ref byte-stream 0)) #t))
(type-bytes (subbytes byte-stream 1))]
(define-values [new-type type-consumed] (super-deserialize struct:type type-bytes))
(values (field position-value new-type) (+ 1 type-consumed))))])
(type-bytes (subbytes byte-stream 1))
(new-type (super-deserialize struct:type type-bytes))]
(field position-value new-type)))])

(define (fields-size fields)
(let* [(fields-values (hash-values fields))]
Expand Down Expand Up @@ -147,9 +151,12 @@
(bytes-append serialized-count serialized-constraints)))
(let* [(row-id (table-row-id self))
(row-id-bytes (integer->integer-bytes row-id 4 #t))
(fields-list (hash->list (table-fields self)))
(constraints (serialize-constraints (table-local-constraints self)))]
(bytes-append row-id-bytes constraints (serialize-hash-list fields-list #:entity? #f))))
(fields-list (hash->list (table-fields self)))
(how-many-fields (integer->integer-bytes (length fields-list) 2 #t))
(serialized-fields (serialize-hash-list fields-list))
(constraints (serialize-constraints (table-local-constraints self)))
(total-size (+ 4 2 (bytes-length constraints) (bytes-length serialized-fields)))]
(values total-size (bytes-append row-id-bytes constraints how-many-fields serialized-fields))))
(define (deserialize self byte-stream)
(define (utf8-character-as-integer byte-array)
(call-with-input-bytes
Expand Down Expand Up @@ -177,10 +184,12 @@
(loop (cons constraint-value acc) (- n 1) (+ consumed-bytes constraint-size) (subbytes streamb constraint-size))))))
(define row-id-value (integer-bytes->integer (subbytes byte-stream 0 4) #t))
(define-values (constraints-length constraints-value) (deserialize-constraints (subbytes byte-stream 4)))
(define fields-value (make-hash (deserialize-hash-list struct:field (subbytes byte-stream (+ 4 constraints-length)) '())))
(values
(table "table" row-id-value fields-value constraints-value)
(bytes-length byte-stream)))])
(define-values (_consumed-fields-bytes fields-value)
(deserialize-hash-list
struct:field
(integer-bytes->integer (subbytes byte-stream (+ 4 constraints-length) (+ 6 constraints-length)) #t)
(subbytes byte-stream (+ 6 constraints-length))))
(table "table" row-id-value (make-immutable-hash fields-value) constraints-value))])

(define-serializable procedure [identifier] #:transparent
#:guard
Expand All @@ -192,7 +201,9 @@
(procedure-identifier self))]
#:methods gen:serializable
[(define (serialize _self #:size [_size #f])
(bytes-append (string->bytes/utf-8 "procedures' serialization is not yet implemented")))
(let* [(todo-string (string->bytes/utf-8 "procedures' serialization is not yet implemented"))
(todo-string-length (bytes-length todo-string))]
(values todo-string-length todo-string)))
(define (deserialize _self byte-stream)
(println "procedures' deserialization is not yet implemented")
(values (procedure "procedure") (bytes-length byte-stream)))])
(procedure "procedure"))])
71 changes: 36 additions & 35 deletions io.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
(type (field-type attribute))
(type-size (type-byte-size type))
(position (field-position attribute))]
(cons position (serialize literal #:size type-size))))
(define-values (_literal-size serialized-literal) (serialize literal #:size type-size))
(cons position serialized-literal)))

(define (convert-row table row)
(~>
Expand Down Expand Up @@ -66,19 +67,21 @@
(raise 'tried-update-row-id-with-procedure)])))

(define (write-table-to-disk table table-name)
(let* [(serialized-table (serialize table))
(file-name (build-ndf-filename table-name))]
(call-with-output-file file-name #:exists 'truncate
(curry write-bytes serialized-table))
(void)))
(define-values (_table-size serialized-table) (serialize table))
(define filename (build-ndf-filename table-name))
(call-with-output-file filename #:exists 'truncate
(curry write-bytes serialized-table))
(void))

(define (write-schema-to-disk schema)
(define (write-entity-to-disk file-out entities-list)
(let* [(entity-name (give-identifier (cdr (car entities-list))))]
(let* [(entity-name (give-identifier (cdr (car entities-list))))
(entity-name-length (string-length entity-name))
(list-length (length entities-list))]
(write-bytes (integer->integer-bytes entity-name-length 1 #t) file-out)
(write-string entity-name file-out)
(newline file-out)
(write-bytes (serialize-hash-list entities-list #:entity? #t) file-out)
(newline file-out)))
(write-bytes (integer->integer-bytes list-length 2 #t) file-out)
(write-bytes (serialize-hash-list entities-list) file-out)))
(let* [(schema-list (hash->list schema))
(file-name (build-ndf-filename "schema" #:data? 'schema))]
(call-with-output-file file-name #:exists 'truncate
Expand All @@ -94,41 +97,39 @@
read-table-values-from-disk)

(require
RacketowerDB/ast
RacketowerDB/ast
RacketowerDB/util
racket/hash)

(define (read-schema-from-disk schema-name)
(define (build-hash-from-line struct-instance line-in-bytes)
(make-immutable-hash
(deserialize-hash-list
struct-instance
line-in-bytes
'())))
(define (proceed-reading struct-name current-schema line-in-bytes)
(let [(line (bytes->string/utf-8 line-in-bytes))]
(if (hash-has-key? entity-structs line)
(cons line current-schema)
(cons struct-name (hash-union current-schema (build-hash-from-line (hash-ref entity-structs struct-name) line-in-bytes))))))
(define (read-entities-from-disk how-many-entities entity-name byte-stream)
(define-values (consumed-bytes entities) (deserialize-hash-list
(hash-ref entity-structs entity-name)
how-many-entities
byte-stream))
(values (make-immutable-hash entities) (subbytes byte-stream consumed-bytes)))
(define (read-block-from-disk byte-stream)
(let* [(entity-name-size (integer-bytes->integer (subbytes byte-stream 0 1) #t))
(entity-name (bytes->string/utf-8 (subbytes byte-stream 1 (+ 1 entity-name-size))))
(how-many-entities (integer-bytes->integer (subbytes byte-stream (+ 1 entity-name-size) (+ 3 entity-name-size)) #t))]
(read-entities-from-disk how-many-entities entity-name (subbytes byte-stream (+ 3 entity-name-size)))))
(let* [(file-name (build-ndf-filename schema-name #:data? 'schema))
(in (open-input-file file-name #:mode 'binary))
(schema (make-immutable-hash (list)))
(real-lines (port->bytes-lines in #:close? #t))
(read-lines (fix-empty-read-bytes-lines real-lines))]
(~> (foldl
(lambda [line-in-bytes acc]
(let [(builder-struct (car acc))
(current-schema (cdr acc))]
(proceed-reading builder-struct current-schema line-in-bytes)))
(cons null schema) read-lines)
cdr
(content (port->bytes in #:close? #t))]
(~> (let loop [(schema (make-immutable-hash (list)))
(byte-array content)]
(if (bytes-empty? byte-array)
schema
(let []
(define-values (sub-schema remaining-byte-stream) (read-block-from-disk byte-array))
(loop (hash-union schema sub-schema) remaining-byte-stream))))
hash->list
make-hash)))

(define (read-table-from-disk table-name)
(let* [(file-name (build-ndf-filename #:data? 'entity table-name))
(in (open-input-file file-name #:mode 'binary))]
(define-values (table table-consumed) (deserialize struct:table (port->bytes in #:close? #t)))
table))
(deserialize struct:table (port->bytes in #:close? #t))))

(define (read-table-values-from-disk schema table-name)
(let* [(file-name (build-ndf-filename #:data? 'data table-name))
Expand Down
2 changes: 1 addition & 1 deletion main.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
(module+ main

(define field-name (field 0 (type 'VARCHAR 7)))
(define field-editor (field 1 (type 'VARCHAR 9)))
(define field-editor (field 1 (type 'VARCHAR 10)))
(define field-year (field 1 (type 'INTEGER 4)))
(define field-age (field 2 (type 'INTEGER 4)))

Expand Down
Binary file modified ndf/data/PROGRAMMER.ndf
Binary file not shown.
Binary file modified ndf/entities/PROGRAMMER.ndf
Binary file not shown.
Binary file modified ndf/schemas/schema.ndf
Binary file not shown.
88 changes: 36 additions & 52 deletions util.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -5,40 +5,14 @@
checked-guard
entity-structs
define-serializable
fix-empty-read-bytes-lines)
bytes-empty?)

(require
(for-syntax threading racket/syntax racket/list)
struct-update
threading
br/cond)

;; FIXME: With the addition of constraints, there can be multiple new lines in sequence
;; and this stupid will not work anymore. Good luck xD
(define (fix-empty-read-bytes-lines lines)
(define (fix-one-turn inner-lines)
(let [(newline-flag #f)]
(foldl (lambda [line new-lines]
(cond
[newline-flag
(set! newline-flag #f)
(append new-lines (list (bytes-append #"\n" line)))]

[(bytes=? #"" line)
(set! newline-flag #t)
new-lines]

[else (append new-lines (list line))]))
(list) inner-lines)))

(define (stop-condition lines-to-check)
(empty? (filter (curry bytes=? #"") lines-to-check)))

(while (not (stop-condition lines))
(set! lines (fix-one-turn lines)))

lines)

(define (build-ndf-filename #:data? [data? 'entity] name)
(let [(path (case (list 'quote data?)
[('entity) "ndf/entities/"]
Expand Down Expand Up @@ -80,6 +54,9 @@
args ...)) ...
body ...))]))

(define (bytes-empty? byte-stream)
(equal? #"" byte-stream))

(module interfaces racket
(provide
serializable?
Expand All @@ -90,8 +67,8 @@
gen:identifiable
(contract-out
[give-identifier (-> identifiable? string?)]
[serialize (->* (serializable?) (#:size integer?) bytes?)]
[deserialize (-> serializable? bytes? (values serializable? natural?))]
[serialize (->* (serializable?) (#:size integer?) (values natural? bytes?))]
[deserialize (-> serializable? bytes? serializable?)]
[from-bytes (-> byteable? bytes? serializable?)]
[to-byte-size (-> byteable? natural?)]))

Expand All @@ -111,40 +88,47 @@
(module+ hashable
(provide
(contract-out
[deserialize-hash-list (-> serializable? bytes? list? list?)]
[serialize-hash-list (-> (listof (cons/c string? serializable?)) #:entity? boolean? bytes?)]))
[deserialize-hash-list (-> serializable? natural? bytes? (values natural? list?))]
[serialize-hash-list (-> (listof (cons/c string? serializable?)) bytes?)]))

(require (submod ".." interfaces))

(define (deserialize-hash-list entity byte-stream accumulator)
(define (deserialize-hash-list entity how-many byte-stream)
(inner-deserialize-hash-list entity how-many byte-stream 0 '()))

(define (inner-deserialize-hash-list entity how-many byte-stream consumed-bytes accumulator)
(define (deserialize-name more-bytes)
(let* [(name-size (integer-bytes->integer (subbytes more-bytes 0 4) #t))
(name (bytes->string/utf-8 (subbytes more-bytes 4 (+ 4 name-size))))]
(cons name (+ 4 name-size))))
(if (equal? byte-stream #"")
accumulator
(let* [(name-consumption (deserialize-name byte-stream))
(name-consumed (cdr name-consumption))
(name (car name-consumption))]
(define-values [thing thing-consumed] (deserialize entity (subbytes byte-stream name-consumed)))
(deserialize-hash-list
entity
(subbytes byte-stream (+ name-consumed thing-consumed) (bytes-length byte-stream))
(append accumulator (list (cons name thing)))))))

(define (serialize-hash-list named-values-list #:entity? entity?)
(values (+ 4 name-size) name)))
(if (zero? how-many)
(values consumed-bytes accumulator)
(let []
(define-values [name-consumed name] (deserialize-name byte-stream))
(define entity-size (integer-bytes->integer (subbytes byte-stream name-consumed (+ name-consumed 4)) #t))
(define thing (deserialize entity (subbytes byte-stream (+ 4 name-consumed) (+ name-consumed entity-size 4))))
(inner-deserialize-hash-list
entity
(- how-many 1)
(subbytes byte-stream (+ 4 name-consumed entity-size))
(+ consumed-bytes name-consumed entity-size 4)
(append accumulator (list (cons name thing)))))))

(define (serialize-hash-list named-values-list)
(define (serialize-name name)
(let* [(name-bytes (string->bytes/utf-8 name))
(name-size (integer->integer-bytes (bytes-length name-bytes) 4 #t))]
(bytes-append name-size name-bytes)))

(name-size (bytes-length name-bytes))
(serialized-name-size (integer->integer-bytes name-size 4 #t))]
(bytes-append serialized-name-size name-bytes)))
(~>
(map (lambda [named-value]
(let [(name (car named-value))
(value (cdr named-value))]
(let* [(name (car named-value))
(value (cdr named-value))]
(define-values [value-size serialized-value] (serialize value))
(bytes-append
(serialize-name name)
(serialize value))))
(integer->integer-bytes value-size 4 #t)
serialized-value)))
named-values-list)
(bytes-join _ (if entity? #"\n" #"")))))
(bytes-join _ #""))))