-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathoctets.lisp
210 lines (192 loc) · 9.27 KB
/
octets.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
;;;; octets.lisp -- substrate for encoding functionality
(cl:in-package :binascii)
(defun case-fold-decode-table (decode-table encode-table)
(loop with table = (copy-seq decode-table)
for c across encode-table
do (setf (aref table (char-code (char-downcase c)))
(aref table (char-code c)))
finally (return table)))
(defun canonicalize-element-type (element-type &optional (errorp t))
(cond
((eq element-type 'character) element-type)
((eq element-type 'base-char) element-type)
;; We want (UNSIGNED-BYTE 8), but there are a variety of
;; ways to express that and we don't want to go through
;; SUBTYPEP all the time. Do a quick check for the most
;; likely form, then use SUBTYPEP for people who do things
;; weirdly.
((or (equal element-type '(unsigned-byte 8))
(and (subtypep element-type '(unsigned-byte 8))
(subtypep '(unsigned-byte 8) element-type)))
'octet)
(t
(when errorp
(error "Unsupported element-type ~A" element-type)))))
(declaim (inline array-data-and-offsets))
(defun array-data-and-offsets (v start end)
"Like ARRAY-DISPLACEMENT, only more useful."
#+sbcl
(let ((end (or end (length v))))
(sb-kernel:with-array-data ((v v) (real-start start) (real-end end))
(declare (ignore real-end))
(values v start (+ real-start (- end start)))))
#+cmu
(let ((end (or end (length v))))
(lisp::with-array-data ((v v) (real-start start) (real-end end))
(declare (ignore real-end))
(values v start (+ real-start (- end start)))))
#+ccl
(multiple-value-bind (v* offset) (ccl::array-data-and-offset v)
(values v* (+ start offset) (+ (or end (length v)) offset)))
#-(or sbcl cmu ccl)
(values v start (or end (length v))))
(defun encode-to-fresh-vector (octets state start end element-type)
(declare (type encode-state state))
(multiple-value-bind (input start end)
(array-data-and-offsets octets start end)
(let* ((fd (state-descriptor state))
(length (funcall (fd-encoded-length fd) (- end start))))
(declare (type format-descriptor fd))
(declare (type index length))
(flet ((frob (etype encode-fun)
(let ((v (make-array length :element-type etype)))
(multiple-value-bind (input-index output-index)
(funcall encode-fun state v input
0 length start end t)
(declare (ignore input-index))
(if (= output-index length)
v
(subseq v 0 output-index))))))
(declare (inline frob))
(ecase (canonicalize-element-type element-type)
(character
(frob 'character (fd-octets->string fd)))
(base-char
(frob 'base-char (fd-octets->string fd)))
(octet
(frob '(unsigned-byte 8) (fd-octets->octets/encode fd))))))))
(defun encode (octets format &key (start 0) end (element-type 'base-char))
"Encode OCTETS between START and END into ASCII characters according to
FORMAT. Return a fresh vector containing the characters. The type of
the vector depends on ELEMENT-TYPE; if ELEMENT-TYPE is a subtype of
CHARACTER, then a string is returned. If ELEMENT-TYPE is type-equivalent
to (UNSIGNED-BYTE 8), then an octet vector is returned."
(encode-to-fresh-vector octets (find-encoder format) start end element-type))
(defun encode-octets (destination octets format &key (start 0) end
(output-start 0) output-end (element-type 'base-char)
finishp)
"Encode OCTETS between START and END into ASCII characters
according to FORMAT and write them to DESTINATION according to ELEMENT-TYPE.
If DESTINATION is NIL and ELEMENT-TYPE is a subtype of CHARACTER, then a
string is returned. If DESTINATION is NIL and ELEMENT-TYPE is
\(UNSIGNED-BYTE 8) or an equivalent type, then an octet vector is returned.
If ELEMENT-TYPE is a subtype of CHARACTER, then DESTINATION may also be
a string. Similarly, if ELEMENT-TYPE is (UNSIGNED-BYTE 8) or an
equivalent type, then DESTINATION may be an octet vector. In this case,
OUTPUT-START and OUTPUT-END are used to determine the portion of
DESTINATION where the encoded output may be placed.
If DESTINATION is not NIL, The index of the first input element that was
not read and the index of the first output element that was not updated
are returned as multiple values. respectively, written are returned as
multiple values. ELEMENT-TYPE is ignored.
If FINISHP is true, then in addition to any encoding of OCTETS, also output
any necessary padding required by FORMAT."
(let* ((state (find-encoder format))
(fd (state-descriptor state)))
(declare (type encode-state state))
(declare (type format-descriptor fd))
(flet ((frob (encode-fun)
(multiple-value-bind (input input-start input-end)
(array-data-and-offsets octets start end)
(multiple-value-bind (output output-start output-end)
(array-data-and-offsets destination output-start output-end)
(funcall encode-fun state
output input
output-start output-end
input-start input-end finishp)))))
(declare (inline frob))
(etypecase destination
(null
(encode-to-fresh-vector octets state start end element-type))
(string
(frob (fd-octets->string fd)))
((array (unsigned-byte 8) (*))
(frob (fd-octets->octets/encode fd)))))))
(defun decode-to-fresh-vector (string state start end decoded-length)
(declare (type decode-state state))
(multiple-value-bind (input start end)
(array-data-and-offsets string start end)
(let* ((fd (state-descriptor state))
(length (or decoded-length
(funcall (fd-decoded-length fd) (- end start)))))
(declare (type format-descriptor fd))
(declare (type index length))
(flet ((frob (v decode-fun)
(multiple-value-bind (input-index output-index)
(funcall decode-fun state v input 0 length start end t)
;; FIXME: we should check to see if we actually
;; consumed all the input. If we didn't, then we need
;; to reallocate V and continue decoding. Even though
;; we said LASTP=T. Hmmm.
(declare (ignore input-index))
(if (= output-index length)
v
(subseq v 0 output-index)))))
(let ((octets (make-array length :element-type '(unsigned-byte 8))))
(etypecase string
(simple-string
(frob octets (fd-string->octets fd)))
(simple-octet-vector
(frob octets (fd-octets->octets/decode fd)))))))))
(defun decode (string format &key (start 0) end case-fold map01 decoded-length)
"Decode the characters of STRING between START and END into octets
according to FORMAT. DECODED-LENGTH indicates the number of decoded
octets to expect. CASE-FOLD indicates whether to consider lowercase
characters as equivalent to uppercase characters; it is only considered
for certain values of FORMAT. MAP01 indicates whether to consider #\\0
equivalent to #\\O and possibly #\\1 as equivalent to #\\I or #\\L; see
the documentation for further details."
(decode-to-fresh-vector string (find-decoder format case-fold map01)
start end decoded-length))
(defun decode-octets (destination string format &key (start 0) end
(output-start 0) output-end case-fold map01 finishp
decoded-length)
"Decode the characters of STRING between START and END into octets
according to FORMAT. DECODED-LENGTH indicates the number of decoded
octets to expect. DESTINATION may be NIL."
(let ((state (find-decoder format case-fold map01)))
(declare (type decode-state state))
(flet ((frob (decode-fun)
(multiple-value-bind (input input-start input-end)
(array-data-and-offsets string start end)
(multiple-value-bind (output output-start output-end)
(array-data-and-offsets destination output-start output-end)
(funcall decode-fun state
output input
output-start output-end
input-start input-end finishp)))))
(declare (inline frob))
(etypecase string
(null
(decode-to-fresh-vector string state start end decoded-length))
(string
(frob (fd-string->octets (state-descriptor state))))
((array (unsigned-byte 8) (*))
(frob (fd-octets->octets/decode (state-descriptor state))))))))
(defconstant +dt-invalid+ -1)
(defun make-decode-table (encode-table)
(loop with table = (make-array 256 :element-type 'fixnum
:initial-element +dt-invalid+)
for char across encode-table
for i from 0
do (setf (aref table (char-code char)) i)
finally (return table)))
(deftype decode-table () '(simple-array fixnum (256)))
(declaim (inline dtref))
(defun dtref (table i)
(declare (type decode-table table))
(declare (type index i))
;; FIXME: statically handle CHAR-CODE-LIMIT <= 256
(if (>= i 256)
+dt-invalid+
(aref table i)))