encode.lisp - clic - Clic is an command line interactive client for gopher written in Common LISP
 (HTM) git clone git://bitreich.org/clic/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/clic/
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
 (DIR) README
 (DIR) LICENSE
       ---
       encode.lisp (14245B)
       ---
            1 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
            2 ;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.26 2008/05/26 10:55:08 edi Exp $
            3 
            4 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
            5 
            6 ;;; Redistribution and use in source and binary forms, with or without
            7 ;;; modification, are permitted provided that the following conditions
            8 ;;; are met:
            9 
           10 ;;;   * Redistributions of source code must retain the above copyright
           11 ;;;     notice, this list of conditions and the following disclaimer.
           12 
           13 ;;;   * Redistributions in binary form must reproduce the above
           14 ;;;     copyright notice, this list of conditions and the following
           15 ;;;     disclaimer in the documentation and/or other materials
           16 ;;;     provided with the distribution.
           17 
           18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
           19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
           20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
           21 ;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
           22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
           23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
           24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
           25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
           26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
           27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
           28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
           29 
           30 (in-package :flexi-streams)
           31 
           32 (defgeneric char-to-octets (format char writer)
           33   (declare #.*standard-optimize-settings*)
           34   (:documentation "Converts the character CHAR to a sequence of octets
           35 using the external format FORMAT.  The conversion is performed by
           36 calling the unary function \(which must be a functional object) WRITER
           37 repeatedly each octet.  The return value of this function is
           38 unspecified."))
           39 
           40 (defgeneric write-sequence* (format stream sequence start end)
           41   (declare #.*standard-optimize-settings*)
           42   (:documentation "A generic function which dispatches on the external
           43 format and does the real work for STREAM-WRITE-SEQUENCE."))
           44 
           45 (defgeneric string-to-octets* (format string start end)
           46   (declare #.*standard-optimize-settings*)
           47   (:documentation "A generic function which dispatches on the external
           48 format and does the real work for STRING-TO-OCTETS."))
           49 
           50 (defmethod string-to-octets* :around (format (list list) start end)
           51   (declare #.*standard-optimize-settings*)
           52   (string-to-octets* format (coerce list 'string*) start end))
           53 
           54 (defmacro define-sequence-writers ((format-class) &body body)
           55   "Non-hygienic utility macro which defines methods for
           56 WRITE-SEQUENCE* and STRING-TO-OCTETS* for the class FORMAT-CLASS.  For
           57 BODY see the docstring of DEFINE-CHAR-ENCODERS."
           58   (let ((body `((locally
           59                   (declare #.*fixnum-optimize-settings*)
           60                   ,@body))))
           61     `(progn
           62        (defmethod string-to-octets* ((format ,format-class) string start end)
           63          (declare #.*standard-optimize-settings*)
           64          (declare (fixnum start end) (string string))
           65          (let ((octets (make-array (compute-number-of-octets format string start end)
           66                                    :element-type 'octet))
           67                (j 0))
           68            (declare (fixnum j))
           69            (loop for i of-type fixnum from start below end do
           70                  (macrolet ((octet-writer (form)
           71                               `(progn
           72                                  (setf (aref (the (array octet *) octets) j) ,form)
           73                                  (incf j))))
           74                    (symbol-macrolet ((char-getter (char string i)))
           75                      (progn ,@body))))
           76            octets))
           77        (defmethod write-sequence* ((format ,format-class) stream sequence start end)
           78          (declare #.*standard-optimize-settings*)
           79          (declare (fixnum start end))
           80          (with-accessors ((column flexi-stream-column))
           81              stream
           82            (let* ((octet-seen-p nil)
           83                   (buffer-pos 0)
           84                   ;; estimate should be good enough...
           85                   (factor (encoding-factor format))
           86                   ;; we don't want arbitrarily large buffer, do we?
           87                   (buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
           88                   (buffer (make-octet-buffer buffer-size))
           89                   (underlying-stream (flexi-stream-stream stream)))
           90              (declare (fixnum buffer-pos buffer-size)
           91                       (boolean octet-seen-p)
           92                       (type (array octet *) buffer))
           93              (macrolet ((octet-writer (form)
           94                           `(write-octet ,form)))
           95                (labels ((flush-buffer ()
           96                           "Sends all octets in BUFFER to the underlying stream."
           97                           (write-sequence buffer underlying-stream :end buffer-pos)
           98                           (setq buffer-pos 0))
           99                         (write-octet (octet)
          100                           "Adds one octet to the buffer and flushes it if necessary."
          101                           (declare (type octet octet))
          102                           (when (>= buffer-pos buffer-size)
          103                             (flush-buffer))
          104                           (setf (aref buffer buffer-pos) octet)
          105                           (incf buffer-pos))
          106                         (write-object (object)
          107                           "Dispatches to WRITE-OCTET or WRITE-CHARACTER
          108 depending on the type of OBJECT."
          109                           (etypecase object
          110                             (octet (setq octet-seen-p t)
          111                                    (write-octet object))
          112                             (character (symbol-macrolet ((char-getter object))
          113                                          ,@body)))))
          114                  (macrolet ((iterate (&body output-forms)
          115                               "An unhygienic macro to implement the actual
          116 iteration through SEQUENCE.  OUTPUT-FORM is the form to retrieve one
          117 sequence element and put its octet representation into the buffer."
          118                               `(loop for index of-type fixnum from start below end
          119                                      do (progn ,@output-forms)
          120                                      finally (when (plusp buffer-pos)
          121                                                (flush-buffer)))))
          122                    (etypecase sequence
          123                      (string (iterate
          124                               (symbol-macrolet ((char-getter (char sequence index)))
          125                                 ,@body)))
          126                      (array (iterate
          127                              (symbol-macrolet ((char-getter (aref sequence index)))
          128                                ,@body)))
          129                      (list  (iterate (write-object (nth index sequence))))))
          130                  ;; update the column slot, setting it to NIL if we sent
          131                  ;; octets
          132                  (setq column
          133                        (cond (octet-seen-p nil)
          134                              (t (let ((last-newline-pos (position #\Newline sequence
          135                                                                   :test #'char=
          136                                                                   :start start
          137                                                                   :end end
          138                                                                   :from-end t)))
          139                                   (cond (last-newline-pos (- end last-newline-pos 1))
          140                                         (column (+ column (- end start))))))))))))))))
          141 
          142 (defmacro define-char-encoders ((lf-format-class cr-format-class crlf-format-class) &body body)
          143   "Non-hygienic utility macro which defines several encoding-related
          144 methods for the classes LF-FORMAT-CLASS, CR-FORMAT-CLASS, and
          145 CRLF-FORMAT-CLASS where it is assumed that CR-FORMAT-CLASS is the same
          146 encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and
          147 similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class.
          148 BODY is a code template for the code to convert one character to
          149 octets.  BODY must contain a symbol CHAR-GETTER representing the form
          150 which is used to obtain the character and a forms like \(OCTET-WRITE
          151 <thing>) to write the octet <thing>.  The CHAR-GETTER form might be
          152 called more than once."
          153   `(progn
          154      (defmethod char-to-octets ((format ,lf-format-class) char writer)
          155        (declare #.*fixnum-optimize-settings*)
          156        (declare (character char) (function writer))
          157        (symbol-macrolet ((char-getter char))
          158          (macrolet ((octet-writer (form)
          159                       `(funcall writer ,form)))
          160            ,@body)))
          161      (define-sequence-writers (,lf-format-class) ,@body)
          162      (define-sequence-writers (,cr-format-class)
          163        ;; modify the body so that the getter replaces a #\Newline
          164        ;; with a #\Return
          165        ,@(sublis `((char-getter . ,(with-unique-names (char)
          166                                      `(let ((,char char-getter))
          167                                         (declare (character ,char))
          168                                         (if (char= ,char #\Newline)
          169                                           #\Return
          170                                           ,char)))))
          171                  body))
          172      (define-sequence-writers (,crlf-format-class)
          173        ;; modify the body so that we potentially write octets for
          174        ;; two characters (#\Return and #\Linefeed) - the original
          175        ;; body is wrapped with the WRITE-CHAR local function
          176        ,(with-unique-names (char write-char)
          177           `(flet ((,write-char (,char)
          178                     ,@(sublis `((char-getter . ,char)) body)))
          179              (let ((,char char-getter))
          180                (declare (character ,char))
          181                (cond ((char= ,char #\Newline)
          182                       (,write-char #\Return)
          183                       (,write-char #\Linefeed))
          184                      (t (,write-char ,char)))))))))
          185 
          186 (define-char-encoders (flexi-latin-1-format flexi-cr-latin-1-format  flexi-crlf-latin-1-format)
          187   (let ((octet (char-code char-getter)))
          188     (when (> octet 255)
          189       (signal-encoding-error format "~S (code ~A) is not a LATIN-1 character." char-getter octet))
          190     (octet-writer octet)))
          191 
          192 (define-char-encoders (flexi-ascii-format flexi-cr-ascii-format flexi-crlf-ascii-format)
          193   (let ((octet (char-code char-getter)))
          194     (when (> octet 127)
          195       (signal-encoding-error format "~S (code ~A) is not an ASCII character." char-getter octet))
          196     (octet-writer octet)))
          197 
          198 (define-char-encoders (flexi-8-bit-format flexi-cr-8-bit-format flexi-crlf-8-bit-format)
          199   (with-accessors ((encoding-hash external-format-encoding-hash))
          200       format
          201     (let ((octet (gethash (char-code char-getter) encoding-hash)))
          202       (unless octet
          203         (signal-encoding-error format "~S (code ~A) is not in this encoding." char-getter octet))
          204       (octet-writer octet))))
          205 
          206 (define-char-encoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format)
          207   ;; the old version using LDB was more elegant, but some Lisps had
          208   ;; trouble optimizing it
          209   (let ((char-code (char-code char-getter)))
          210     (tagbody
          211      (cond ((< char-code #x80)
          212             (octet-writer char-code)
          213             (go zero))
          214            ((< char-code #x800)
          215             (octet-writer (logior* #b11000000 (ash* char-code -6)))
          216             (go one))
          217            ((< char-code #x10000)
          218             (octet-writer (logior* #b11100000 (ash* char-code -12)))
          219             (go two))
          220            (t
          221             (octet-writer (logior* #b11110000 (ash* char-code -18)))))
          222      (octet-writer (logior* #b10000000 (logand* #b00111111 (ash* char-code -12))))
          223      two
          224      (octet-writer (logior* #b10000000 (logand* #b00111111 (ash* char-code -6))))
          225      one
          226      (octet-writer (logior* #b10000000 (logand* #b00111111 char-code)))
          227      zero)))
          228 
          229 (define-char-encoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format)
          230   (flet ((write-word (word)
          231            (octet-writer (logand* #x00ff word))
          232            (octet-writer (ash* (logand* #xff00 word) -8))))
          233     (declare (inline write-word))
          234     (let ((char-code (char-code char-getter)))
          235       (declare (type char-code-integer char-code))
          236       (cond ((< char-code #x10000)
          237              (write-word char-code))
          238             (t (decf char-code #x10000)
          239                (write-word (logior* #xd800 (ash* char-code -10)))
          240                (write-word (logior* #xdc00 (logand* #x03ff char-code))))))))
          241 
          242 (define-char-encoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format)
          243   (flet ((write-word (word)
          244            (octet-writer (ash* (logand* #xff00 word) -8))
          245            (octet-writer (logand* #x00ff word))))
          246     (declare (inline write-word))
          247     (let ((char-code (char-code char-getter)))
          248       (declare (type char-code-integer char-code))
          249       (cond ((< char-code #x10000)
          250              (write-word char-code))
          251             (t (decf char-code #x10000)
          252                (write-word (logior* #xd800 (ash* char-code -10)))
          253                (write-word (logior* #xdc00 (logand* #x03ff char-code))))))))
          254 
          255 (define-char-encoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format)
          256   (let ((char-code (char-code char-getter)))
          257     (octet-writer (logand* #x00ff char-code))
          258     (octet-writer (logand* #x00ff (ash* char-code -8)))
          259     (octet-writer (logand* #x00ff (ash* char-code -16)))
          260     (octet-writer (logand* #x00ff (ash* char-code -24)))))
          261 
          262 (define-char-encoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format)
          263   (let ((char-code (char-code char-getter)))
          264     (octet-writer (logand* #x00ff (ash* char-code -24)))
          265     (octet-writer (logand* #x00ff (ash* char-code -16)))
          266     (octet-writer (logand* #x00ff (ash* char-code -8)))
          267     (octet-writer (logand* #x00ff char-code))))
          268 
          269 (defmethod char-to-octets ((format flexi-cr-mixin) char writer)
          270   (declare #.*fixnum-optimize-settings*)
          271   (declare (character char))
          272   (if (char= char #\Newline)
          273     (call-next-method format #\Return writer)
          274     (call-next-method)))
          275 
          276 (defmethod char-to-octets ((format flexi-crlf-mixin) char writer)
          277   (declare #.*fixnum-optimize-settings*)
          278   (declare (character char))
          279   (cond ((char= char #\Newline)
          280          (call-next-method format #\Return writer)
          281          (call-next-method format #\Linefeed writer))
          282         (t (call-next-method))))