encode.lisp - clic - Clic is an command line interactive client for gopher written in Common LISP (HTM) git clone git://bitreich.org/clic/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/clic/ (DIR) Log (DIR) Files (DIR) Refs (DIR) Tags (DIR) README (DIR) LICENSE --- encode.lisp (14245B) --- 1 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- 2 ;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.26 2008/05/26 10:55:08 edi Exp $ 3 4 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. 5 6 ;;; Redistribution and use in source and binary forms, with or without 7 ;;; modification, are permitted provided that the following conditions 8 ;;; are met: 9 10 ;;; * Redistributions of source code must retain the above copyright 11 ;;; notice, this list of conditions and the following disclaimer. 12 13 ;;; * Redistributions in binary form must reproduce the above 14 ;;; copyright notice, this list of conditions and the following 15 ;;; disclaimer in the documentation and/or other materials 16 ;;; provided with the distribution. 17 18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED 19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 (in-package :flexi-streams) 31 32 (defgeneric char-to-octets (format char writer) 33 (declare #.*standard-optimize-settings*) 34 (:documentation "Converts the character CHAR to a sequence of octets 35 using the external format FORMAT. The conversion is performed by 36 calling the unary function \(which must be a functional object) WRITER 37 repeatedly each octet. The return value of this function is 38 unspecified.")) 39 40 (defgeneric write-sequence* (format stream sequence start end) 41 (declare #.*standard-optimize-settings*) 42 (:documentation "A generic function which dispatches on the external 43 format and does the real work for STREAM-WRITE-SEQUENCE.")) 44 45 (defgeneric string-to-octets* (format string start end) 46 (declare #.*standard-optimize-settings*) 47 (:documentation "A generic function which dispatches on the external 48 format and does the real work for STRING-TO-OCTETS.")) 49 50 (defmethod string-to-octets* :around (format (list list) start end) 51 (declare #.*standard-optimize-settings*) 52 (string-to-octets* format (coerce list 'string*) start end)) 53 54 (defmacro define-sequence-writers ((format-class) &body body) 55 "Non-hygienic utility macro which defines methods for 56 WRITE-SEQUENCE* and STRING-TO-OCTETS* for the class FORMAT-CLASS. For 57 BODY see the docstring of DEFINE-CHAR-ENCODERS." 58 (let ((body `((locally 59 (declare #.*fixnum-optimize-settings*) 60 ,@body)))) 61 `(progn 62 (defmethod string-to-octets* ((format ,format-class) string start end) 63 (declare #.*standard-optimize-settings*) 64 (declare (fixnum start end) (string string)) 65 (let ((octets (make-array (compute-number-of-octets format string start end) 66 :element-type 'octet)) 67 (j 0)) 68 (declare (fixnum j)) 69 (loop for i of-type fixnum from start below end do 70 (macrolet ((octet-writer (form) 71 `(progn 72 (setf (aref (the (array octet *) octets) j) ,form) 73 (incf j)))) 74 (symbol-macrolet ((char-getter (char string i))) 75 (progn ,@body)))) 76 octets)) 77 (defmethod write-sequence* ((format ,format-class) stream sequence start end) 78 (declare #.*standard-optimize-settings*) 79 (declare (fixnum start end)) 80 (with-accessors ((column flexi-stream-column)) 81 stream 82 (let* ((octet-seen-p nil) 83 (buffer-pos 0) 84 ;; estimate should be good enough... 85 (factor (encoding-factor format)) 86 ;; we don't want arbitrarily large buffer, do we? 87 (buffer-size (min +buffer-size+ (ceiling (* factor (- end start))))) 88 (buffer (make-octet-buffer buffer-size)) 89 (underlying-stream (flexi-stream-stream stream))) 90 (declare (fixnum buffer-pos buffer-size) 91 (boolean octet-seen-p) 92 (type (array octet *) buffer)) 93 (macrolet ((octet-writer (form) 94 `(write-octet ,form))) 95 (labels ((flush-buffer () 96 "Sends all octets in BUFFER to the underlying stream." 97 (write-sequence buffer underlying-stream :end buffer-pos) 98 (setq buffer-pos 0)) 99 (write-octet (octet) 100 "Adds one octet to the buffer and flushes it if necessary." 101 (declare (type octet octet)) 102 (when (>= buffer-pos buffer-size) 103 (flush-buffer)) 104 (setf (aref buffer buffer-pos) octet) 105 (incf buffer-pos)) 106 (write-object (object) 107 "Dispatches to WRITE-OCTET or WRITE-CHARACTER 108 depending on the type of OBJECT." 109 (etypecase object 110 (octet (setq octet-seen-p t) 111 (write-octet object)) 112 (character (symbol-macrolet ((char-getter object)) 113 ,@body))))) 114 (macrolet ((iterate (&body output-forms) 115 "An unhygienic macro to implement the actual 116 iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one 117 sequence element and put its octet representation into the buffer." 118 `(loop for index of-type fixnum from start below end 119 do (progn ,@output-forms) 120 finally (when (plusp buffer-pos) 121 (flush-buffer))))) 122 (etypecase sequence 123 (string (iterate 124 (symbol-macrolet ((char-getter (char sequence index))) 125 ,@body))) 126 (array (iterate 127 (symbol-macrolet ((char-getter (aref sequence index))) 128 ,@body))) 129 (list (iterate (write-object (nth index sequence)))))) 130 ;; update the column slot, setting it to NIL if we sent 131 ;; octets 132 (setq column 133 (cond (octet-seen-p nil) 134 (t (let ((last-newline-pos (position #\Newline sequence 135 :test #'char= 136 :start start 137 :end end 138 :from-end t))) 139 (cond (last-newline-pos (- end last-newline-pos 1)) 140 (column (+ column (- end start)))))))))))))))) 141 142 (defmacro define-char-encoders ((lf-format-class cr-format-class crlf-format-class) &body body) 143 "Non-hygienic utility macro which defines several encoding-related 144 methods for the classes LF-FORMAT-CLASS, CR-FORMAT-CLASS, and 145 CRLF-FORMAT-CLASS where it is assumed that CR-FORMAT-CLASS is the same 146 encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and 147 similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class. 148 BODY is a code template for the code to convert one character to 149 octets. BODY must contain a symbol CHAR-GETTER representing the form 150 which is used to obtain the character and a forms like \(OCTET-WRITE 151 <thing>) to write the octet <thing>. The CHAR-GETTER form might be 152 called more than once." 153 `(progn 154 (defmethod char-to-octets ((format ,lf-format-class) char writer) 155 (declare #.*fixnum-optimize-settings*) 156 (declare (character char) (function writer)) 157 (symbol-macrolet ((char-getter char)) 158 (macrolet ((octet-writer (form) 159 `(funcall writer ,form))) 160 ,@body))) 161 (define-sequence-writers (,lf-format-class) ,@body) 162 (define-sequence-writers (,cr-format-class) 163 ;; modify the body so that the getter replaces a #\Newline 164 ;; with a #\Return 165 ,@(sublis `((char-getter . ,(with-unique-names (char) 166 `(let ((,char char-getter)) 167 (declare (character ,char)) 168 (if (char= ,char #\Newline) 169 #\Return 170 ,char))))) 171 body)) 172 (define-sequence-writers (,crlf-format-class) 173 ;; modify the body so that we potentially write octets for 174 ;; two characters (#\Return and #\Linefeed) - the original 175 ;; body is wrapped with the WRITE-CHAR local function 176 ,(with-unique-names (char write-char) 177 `(flet ((,write-char (,char) 178 ,@(sublis `((char-getter . ,char)) body))) 179 (let ((,char char-getter)) 180 (declare (character ,char)) 181 (cond ((char= ,char #\Newline) 182 (,write-char #\Return) 183 (,write-char #\Linefeed)) 184 (t (,write-char ,char))))))))) 185 186 (define-char-encoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format) 187 (let ((octet (char-code char-getter))) 188 (when (> octet 255) 189 (signal-encoding-error format "~S (code ~A) is not a LATIN-1 character." char-getter octet)) 190 (octet-writer octet))) 191 192 (define-char-encoders (flexi-ascii-format flexi-cr-ascii-format flexi-crlf-ascii-format) 193 (let ((octet (char-code char-getter))) 194 (when (> octet 127) 195 (signal-encoding-error format "~S (code ~A) is not an ASCII character." char-getter octet)) 196 (octet-writer octet))) 197 198 (define-char-encoders (flexi-8-bit-format flexi-cr-8-bit-format flexi-crlf-8-bit-format) 199 (with-accessors ((encoding-hash external-format-encoding-hash)) 200 format 201 (let ((octet (gethash (char-code char-getter) encoding-hash))) 202 (unless octet 203 (signal-encoding-error format "~S (code ~A) is not in this encoding." char-getter octet)) 204 (octet-writer octet)))) 205 206 (define-char-encoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format) 207 ;; the old version using LDB was more elegant, but some Lisps had 208 ;; trouble optimizing it 209 (let ((char-code (char-code char-getter))) 210 (tagbody 211 (cond ((< char-code #x80) 212 (octet-writer char-code) 213 (go zero)) 214 ((< char-code #x800) 215 (octet-writer (logior* #b11000000 (ash* char-code -6))) 216 (go one)) 217 ((< char-code #x10000) 218 (octet-writer (logior* #b11100000 (ash* char-code -12))) 219 (go two)) 220 (t 221 (octet-writer (logior* #b11110000 (ash* char-code -18))))) 222 (octet-writer (logior* #b10000000 (logand* #b00111111 (ash* char-code -12)))) 223 two 224 (octet-writer (logior* #b10000000 (logand* #b00111111 (ash* char-code -6)))) 225 one 226 (octet-writer (logior* #b10000000 (logand* #b00111111 char-code))) 227 zero))) 228 229 (define-char-encoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format) 230 (flet ((write-word (word) 231 (octet-writer (logand* #x00ff word)) 232 (octet-writer (ash* (logand* #xff00 word) -8)))) 233 (declare (inline write-word)) 234 (let ((char-code (char-code char-getter))) 235 (declare (type char-code-integer char-code)) 236 (cond ((< char-code #x10000) 237 (write-word char-code)) 238 (t (decf char-code #x10000) 239 (write-word (logior* #xd800 (ash* char-code -10))) 240 (write-word (logior* #xdc00 (logand* #x03ff char-code)))))))) 241 242 (define-char-encoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format) 243 (flet ((write-word (word) 244 (octet-writer (ash* (logand* #xff00 word) -8)) 245 (octet-writer (logand* #x00ff word)))) 246 (declare (inline write-word)) 247 (let ((char-code (char-code char-getter))) 248 (declare (type char-code-integer char-code)) 249 (cond ((< char-code #x10000) 250 (write-word char-code)) 251 (t (decf char-code #x10000) 252 (write-word (logior* #xd800 (ash* char-code -10))) 253 (write-word (logior* #xdc00 (logand* #x03ff char-code)))))))) 254 255 (define-char-encoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format) 256 (let ((char-code (char-code char-getter))) 257 (octet-writer (logand* #x00ff char-code)) 258 (octet-writer (logand* #x00ff (ash* char-code -8))) 259 (octet-writer (logand* #x00ff (ash* char-code -16))) 260 (octet-writer (logand* #x00ff (ash* char-code -24))))) 261 262 (define-char-encoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format) 263 (let ((char-code (char-code char-getter))) 264 (octet-writer (logand* #x00ff (ash* char-code -24))) 265 (octet-writer (logand* #x00ff (ash* char-code -16))) 266 (octet-writer (logand* #x00ff (ash* char-code -8))) 267 (octet-writer (logand* #x00ff char-code)))) 268 269 (defmethod char-to-octets ((format flexi-cr-mixin) char writer) 270 (declare #.*fixnum-optimize-settings*) 271 (declare (character char)) 272 (if (char= char #\Newline) 273 (call-next-method format #\Return writer) 274 (call-next-method))) 275 276 (defmethod char-to-octets ((format flexi-crlf-mixin) char writer) 277 (declare #.*fixnum-optimize-settings*) 278 (declare (character char)) 279 (cond ((char= char #\Newline) 280 (call-next-method format #\Return writer) 281 (call-next-method format #\Linefeed writer)) 282 (t (call-next-method))))