|
| 1 | +(ns pixie.streams.utf8 |
| 2 | + (require pixie.streams :refer :all)) |
| 3 | + |
| 4 | +(defprotocol IUTF8OutputStream |
| 5 | + (write-char [this char])) |
| 6 | + |
| 7 | +(defprotocol IUTF8InputStream |
| 8 | + (read-char [this])) |
| 9 | + |
| 10 | +(deftype UTF8OutputStream [out] |
| 11 | + IUTF8OutputStream |
| 12 | + (write-char [this ch] |
| 13 | + (let [ch (int ch)] |
| 14 | + (cond |
| 15 | + (<= ch 0x7F) (write-byte out ch) |
| 16 | + (<= ch 0x7FF) (do (write-byte out (bit-or 0xC0 (bit-shift-right ch 6))) |
| 17 | + (write-byte out (bit-or 0x80 (bit-and ch 0x3F)))) |
| 18 | + (<= ch 0xFFFF) (do (write-byte out (bit-or 0xE0 (bit-shift-right ch 12))) |
| 19 | + (write-byte out (bit-or 0x80 (bit-and (bit-shift-right ch 6) 0x3F))) |
| 20 | + (write-byte out (bit-or 0x80 (bit-and ch 0x3F)))) |
| 21 | + (<= ch 0x1FFFFF) (do (write-byte out (bit-or 0xE0 (bit-shift-right ch 18))) |
| 22 | + (write-byte out (bit-or 0x80 (bit-and (bit-shift-right ch 12) 0x3F))) |
| 23 | + (write-byte out (bit-or 0x80 (bit-and (bit-shift-right ch 6) 0x3F))) |
| 24 | + (write-byte out (bit-or 0x80 (bit-and ch 0x3F))) )))) |
| 25 | + IDisposable |
| 26 | + (-dispose! [this] |
| 27 | + (dispose! out))) |
| 28 | + |
| 29 | + |
| 30 | +(deftype UTF8InputStream [in] |
| 31 | + IUTF8InputStream |
| 32 | + (read-char [this] |
| 33 | + (let [ch (int (read-byte in)) |
| 34 | + [n bytes] (cond |
| 35 | + (>= 0x7F ch) [ch 1] |
| 36 | + (= 0xC0 (bit-and ch 0xE0)) [(bit-and ch 31) 2] |
| 37 | + (= 0xE0 (bit-and ch 0xF0)) [(bit-and ch 15) 3] |
| 38 | + (= 0xF0 (bit-and ch 0xF8)) [(bit-and ch 7) 4] |
| 39 | + :else (assert false (str "Got bad code " ch)))] |
| 40 | + (loop [i (dec bytes) |
| 41 | + n n] |
| 42 | + (if (pos? i) |
| 43 | + (recur (dec i) |
| 44 | + (bit-or (bit-shift-left n 6) |
| 45 | + (bit-and (read-byte in) 0x3F))) |
| 46 | + (char n))))) |
| 47 | + IDisposable |
| 48 | + (-dispose! [this] |
| 49 | + (dispose! in))) |
| 50 | + |
| 51 | +(defn utf8-input-stream [i] |
| 52 | + (->UTF8InputStream i)) |
| 53 | + |
| 54 | +(defn utf8-output-stream [o] |
| 55 | + (->UTF8OutputStream o)) |
0 commit comments