length.lisp - clic - Clic is an command line interactive client for gopher written in Common LISP
 (HTM) git clone git://bitreich.org/clic/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/clic/
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
 (DIR) README
 (DIR) LICENSE
       ---
       length.lisp (17499B)
       ---
            1 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
            2 ;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.6 2008/05/29 10:25:14 edi Exp $
            3 
            4 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
            5 
            6 ;;; Redistribution and use in source and binary forms, with or without
            7 ;;; modification, are permitted provided that the following conditions
            8 ;;; are met:
            9 
           10 ;;;   * Redistributions of source code must retain the above copyright
           11 ;;;     notice, this list of conditions and the following disclaimer.
           12 
           13 ;;;   * Redistributions in binary form must reproduce the above
           14 ;;;     copyright notice, this list of conditions and the following
           15 ;;;     disclaimer in the documentation and/or other materials
           16 ;;;     provided with the distribution.
           17 
           18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
           19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
           20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
           21 ;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
           22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
           23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
           24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
           25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
           26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
           27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
           28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
           29 
           30 (in-package :flexi-streams)
           31 
           32 (defgeneric encoding-factor (format)
           33   (:documentation "Given an external format FORMAT, returns a factor
           34 which denotes the octets to characters ratio to expect when
           35 encoding/decoding.  If the returned value is an integer, the factor is
           36 assumed to be exact.  If it is a \(double) float, the factor is
           37 supposed to be based on heuristics and usually not exact.
           38 
           39 This factor is used in string.lisp.")
           40   (declare #.*standard-optimize-settings*))
           41 
           42 (defmethod encoding-factor ((format flexi-8-bit-format))
           43   (declare #.*standard-optimize-settings*)
           44   ;; 8-bit encodings map octets to characters in an exact one-to-one
           45   ;; fashion
           46   1)
           47 
           48 (defmethod encoding-factor ((format flexi-utf-8-format))
           49   (declare #.*standard-optimize-settings*)
           50   ;; UTF-8 characters can be anything from one to six octets, but we
           51   ;; assume that the "overhead" is only about 5 percent - this
           52   ;; estimate is obviously very much dependant on the content
           53   1.05d0)
           54 
           55 (defmethod encoding-factor ((format flexi-utf-16-format))
           56   (declare #.*standard-optimize-settings*)
           57   ;; usually one character maps to two octets, but characters with
           58   ;; code points above #x10000 map to four octets - we assume that we
           59   ;; usually don't see these characters but of course have to return a
           60   ;; float
           61   2.0d0)
           62 
           63 (defmethod encoding-factor ((format flexi-utf-32-format))
           64   (declare #.*standard-optimize-settings*)
           65   ;; UTF-32 always matches every character to four octets
           66   4)
           67 
           68 (defmethod encoding-factor ((format flexi-crlf-mixin))
           69   (declare #.*standard-optimize-settings*)
           70   ;; if the sequence #\Return #\Linefeed is the line-end marker, this
           71   ;; obviously makes encodings potentially longer and definitely makes
           72   ;; the estimate unexact
           73   (* 1.02d0 (call-next-method)))
           74 
           75 (defgeneric check-end (format start end i)
           76   (declare #.*fixnum-optimize-settings*)
           77   (:documentation "Helper function used below to determine if we tried
           78 to read past the end of the sequence.")
           79   (:method (format start end i)
           80    (declare #.*fixnum-optimize-settings*)
           81    (declare (ignore start))
           82    (declare (fixnum end i))
           83    (when (> i end)
           84      (signal-encoding-error format "This sequence can't be decoded ~
           85 using ~A as it is too short.  ~A octet~:P missing at the end."
           86                             (external-format-name format)
           87                             (- i end))))
           88   (:method ((format flexi-utf-16-format) start end i)
           89    (declare #.*fixnum-optimize-settings*)
           90    (declare (fixnum start end i))
           91    (declare (ignore i))
           92    ;; don't warn twice
           93    (when (evenp (- end start))
           94      (call-next-method))))
           95 
           96 (defgeneric compute-number-of-chars (format sequence start end)
           97   (declare #.*standard-optimize-settings*)
           98   (:documentation "Computes the exact number of characters required to
           99 decode the sequence of octets in SEQUENCE from START to END using the
          100 external format FORMAT."))
          101 
          102 (defmethod compute-number-of-chars :around (format (list list) start end)
          103   (declare #.*standard-optimize-settings*)
          104   (call-next-method format (coerce list 'vector) start end))
          105 
          106 (defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end)
          107   (declare #.*fixnum-optimize-settings*)
          108   (declare (fixnum start end))
          109   (declare (ignore sequence))
          110   (- end start))
          111 
          112 (defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end)
          113   ;; this method only applies to the 8-bit formats as all other
          114   ;; formats with CRLF line endings have their own specialized methods
          115   ;; below
          116   (declare #.*fixnum-optimize-settings*)
          117   (declare (fixnum start end) (vector sequence))
          118   (let ((i start)
          119         (length (- end start)))
          120     (declare (fixnum i length))
          121     (loop
          122      (when (>= i end)
          123        (return))
          124      (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :end2 end :test #'=)))
          125        (unless position
          126          (return))
          127        (setq i (1+ position))
          128        (decf length)))
          129     length))
          130 
          131 (defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end)
          132   (declare #.*fixnum-optimize-settings*)
          133   (declare (fixnum start end) (vector sequence))
          134   (let ((sum 0)
          135         (i start))
          136     (declare (fixnum i sum))
          137     (loop
          138      (when (>= i end)
          139        (return))
          140      (let* ((octet (aref sequence i))
          141             ;; note that there are no validity checks here
          142             (length (cond ((not (logbitp 7 octet)) 1)
          143                           ((= #b11000000 (logand* octet #b11100000)) 2)
          144                           ((= #b11100000 (logand* octet #b11110000)) 3)
          145                           (t 4))))
          146        (declare (fixnum length) (type octet octet))
          147        (incf sum)
          148        (incf i length)))
          149     (check-end format start end i)
          150     sum))
          151 
          152 (defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end)
          153   (declare #.*fixnum-optimize-settings*)
          154   (declare (fixnum start end) (vector sequence))
          155   (let ((sum 0)
          156         (i start)
          157         (last-octet 0))
          158     (declare (fixnum i sum) (type octet last-octet))
          159     (loop
          160      (when (>= i end)
          161        (return))
          162      (let* ((octet (aref sequence i))
          163             ;; note that there are no validity checks here
          164             (length (cond ((not (logbitp 7 octet)) 1)
          165                           ((= #b11000000 (logand* octet #b11100000)) 2)
          166                           ((= #b11100000 (logand* octet #b11110000)) 3)
          167                           (t 4))))
          168        (declare (fixnum length) (type octet octet))
          169        (unless (and (= octet +lf+) (= last-octet +cr+))
          170          (incf sum))
          171        (incf i length)
          172        (setq last-octet octet)))
          173     (check-end format start end i)
          174     sum))
          175 
          176 (defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end)
          177   (declare #.*fixnum-optimize-settings*)
          178   (declare (fixnum start end) (vector sequence))
          179   (declare (ignore sequence))
          180   (when (oddp (- end start))
          181     (signal-encoding-error format "~A octet~:P cannot be decoded ~
          182 using UTF-16 as ~:*~A is not even."
          183                            (- end start))))
          184   
          185 (defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end)
          186   (declare #.*fixnum-optimize-settings*)
          187   (declare (fixnum start end))
          188   (let ((sum 0)
          189         (i start))
          190     (declare (fixnum i sum))
          191     (decf end 2)
          192     (loop
          193      (when (> i end)
          194        (return))
          195      (let* ((high-octet (aref sequence (1+ i)))
          196             (length (cond ((<= #xd8 high-octet #xdf) 4)
          197                           (t 2))))
          198        (declare (fixnum length) (type octet high-octet))
          199        (incf sum)
          200        (incf i length)))
          201     (check-end format start (+ end 2) i)
          202     sum))
          203 
          204 (defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end)
          205   (declare #.*fixnum-optimize-settings*)
          206   (declare (fixnum start end) (vector sequence))
          207   (let ((sum 0)
          208         (i start))
          209     (declare (fixnum i sum))
          210     (decf end 2)
          211     (loop
          212      (when (> i end)
          213        (return))
          214      (let* ((high-octet (aref sequence i))
          215             (length (cond ((<= #xd8 high-octet #xdf) 4)
          216                           (t 2))))
          217        (declare (fixnum length) (type octet high-octet))
          218        (incf sum)
          219        (incf i length)))
          220     (check-end format start (+ end 2) i)
          221     sum))
          222 
          223 (defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end)
          224   (declare #.*fixnum-optimize-settings*)
          225   (declare (fixnum start end) (vector sequence))
          226   (let ((sum 0)
          227         (i start)
          228         (last-octet 0))
          229     (declare (fixnum i sum) (type octet last-octet))
          230     (decf end 2)
          231     (loop
          232      (when (> i end)
          233        (return))
          234      (let* ((high-octet (aref sequence (1+ i)))
          235             (length (cond ((<= #xd8 high-octet #xdf) 4)
          236                           (t 2))))
          237        (declare (fixnum length) (type octet high-octet))
          238        (unless (and (zerop high-octet)
          239                     (= (the octet (aref sequence i)) +lf+)
          240                     (= last-octet +cr+))         
          241          (incf sum))
          242        (setq last-octet (if (zerop high-octet)
          243                           (aref sequence i)
          244                           0))
          245        (incf i length)))
          246     (check-end format start (+ end 2) i)
          247     sum))
          248 
          249 (defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end)
          250   (declare #.*fixnum-optimize-settings*)
          251   (declare (fixnum start end) (vector sequence))
          252   (let ((sum 0)
          253         (i start)
          254         (last-octet 0))
          255     (declare (fixnum i sum) (type octet last-octet))
          256     (decf end 2)
          257     (loop
          258      (when (> i end)
          259        (return))
          260      (let* ((high-octet (aref sequence i))
          261             (length (cond ((<= #xd8 high-octet #xdf) 4)
          262                           (t 2))))
          263        (declare (fixnum length) (type octet high-octet))
          264        (unless (and (zerop high-octet)
          265                     (= (the octet (aref sequence (1+ i))) +lf+)
          266                     (= last-octet +cr+))
          267          (incf sum))
          268        (setq last-octet (if (zerop high-octet)
          269                           (aref sequence (1+ i))
          270                           0))
          271        (incf i length)))
          272     (check-end format start (+ end 2) i)
          273     sum))
          274 
          275 (defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end)
          276   (declare #.*fixnum-optimize-settings*)
          277   (declare (fixnum start end))
          278   (declare (ignore sequence))
          279   (let ((length (- end start)))
          280     (when (plusp (mod length 4))
          281       (signal-encoding-error format "~A octet~:P cannot be decoded ~
          282 using UTF-32 as ~:*~A is not a multiple-value of four."
          283                              length))))
          284 
          285 (defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end)
          286   (declare #.*fixnum-optimize-settings*)
          287   (declare (fixnum start end))
          288   (declare (ignore sequence))
          289   (ceiling (- end start) 4))
          290 
          291 (defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end)
          292   (declare #.*fixnum-optimize-settings*)
          293   (declare (fixnum start end) (vector sequence))
          294   (let ((i start)
          295         (length (ceiling (- end start) 4)))
          296     (decf end 8)
          297     (loop
          298      (when (> i end)
          299        (return))
          300      (cond ((loop for j of-type fixnum from i
          301                   for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0)
          302                   always (= octet (aref sequence j)))
          303             (decf length)
          304             (incf i 8))
          305            (t (incf i 4))))
          306     length))
          307 
          308 (defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end)
          309   (declare #.*fixnum-optimize-settings*)
          310   (declare (fixnum start end) (vector sequence))
          311   (let ((i start)
          312         (length (ceiling (- end start) 4)))
          313     (decf end 8)
          314     (loop
          315      (when (> i end)
          316        (return))
          317      (cond ((loop for j of-type fixnum from i
          318                   for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+)
          319                   always (= octet (aref sequence j)))
          320             (decf length)
          321             (incf i 8))
          322            (t (incf i 4))))
          323     length))
          324 
          325 (defgeneric compute-number-of-octets (format sequence start end)
          326   (declare #.*standard-optimize-settings*)
          327   (:documentation "Computes the exact number of octets required to
          328 encode the sequence of characters in SEQUENCE from START to END using
          329 the external format FORMAT."))
          330 
          331 (defmethod compute-number-of-octets :around (format (list list) start end)
          332   (declare #.*standard-optimize-settings*)
          333   (call-next-method format (coerce list 'string*) start end))
          334 
          335 (defmethod compute-number-of-octets ((format flexi-8-bit-format) string start end)
          336   (declare #.*fixnum-optimize-settings*)
          337   (declare (fixnum start end))
          338   (declare (ignore string))  
          339   (- end start))
          340 
          341 (defmethod compute-number-of-octets ((format flexi-utf-8-format) string start end)
          342   (declare #.*fixnum-optimize-settings*)
          343   (declare (fixnum start end) (string string))
          344   (let ((sum 0)
          345         (i start))
          346     (declare (fixnum i sum))
          347     (loop
          348      (when (>= i end)
          349        (return))
          350      (let* ((char-code (char-code (char string i)))
          351             (char-length (cond ((< char-code #x80) 1)
          352                                ((< char-code #x800) 2)
          353                                ((< char-code #x10000) 3)
          354                                (t 4))))
          355        (declare (fixnum char-length) (type char-code-integer char-code))
          356        (incf sum char-length)
          357        (incf i)))
          358     sum))
          359 
          360 (defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) string start end)
          361   (declare #.*fixnum-optimize-settings*)
          362   (declare (fixnum start end) (string string))
          363   (let ((sum 0)
          364         (i start))
          365     (declare (fixnum i sum))
          366     (loop
          367      (when (>= i end)
          368        (return))
          369      (let* ((char-code (char-code (char string i)))
          370             (char-length (cond ((= char-code #.(char-code #\Newline)) 2)
          371                                ((< char-code #x80) 1)
          372                                ((< char-code #x800) 2)
          373                                ((< char-code #x10000) 3)
          374                                (t 4))))
          375        (declare (fixnum char-length) (type char-code-integer char-code))
          376        (incf sum char-length)
          377        (incf i)))
          378     sum))
          379 
          380 (defmethod compute-number-of-octets ((format flexi-utf-16-format) string start end)
          381   (declare #.*fixnum-optimize-settings*)
          382   (declare (fixnum start end) (string string))
          383   (let ((sum 0)
          384         (i start))
          385     (declare (fixnum i sum))
          386     (loop
          387      (when (>= i end)
          388        (return))
          389      (let* ((char-code (char-code (char string i)))
          390             (char-length (cond ((< char-code #x10000) 2)
          391                                (t 4))))
          392        (declare (fixnum char-length) (type char-code-integer char-code))
          393        (incf sum char-length)
          394        (incf i)))
          395     sum))
          396 
          397 (defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) string start end)
          398   (declare #.*fixnum-optimize-settings*)
          399   (declare (fixnum start end) (string string))
          400   (let ((sum 0)
          401         (i start))
          402     (declare (fixnum i sum))
          403     (loop
          404      (when (>= i end)
          405        (return))
          406      (let* ((char-code (char-code (char string i)))
          407             (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
          408                                ((< char-code #x10000) 2)
          409                                (t 4))))
          410        (declare (fixnum char-length) (type char-code-integer char-code))
          411        (incf sum char-length)
          412        (incf i)))
          413     sum))
          414 
          415 (defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) string start end)
          416   (declare #.*fixnum-optimize-settings*)
          417   (declare (fixnum start end) (string string))
          418   (let ((sum 0)
          419         (i start))
          420     (declare (fixnum i sum))
          421     (loop
          422      (when (>= i end)
          423        (return))
          424      (let* ((char-code (char-code (char string i)))
          425             (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
          426                                ((< char-code #x10000) 2)
          427                                (t 4))))
          428        (declare (fixnum char-length) (type char-code-integer char-code))
          429        (incf sum char-length)
          430        (incf i)))
          431     sum))
          432 
          433 (defmethod compute-number-of-octets ((format flexi-utf-32-format) string start end)
          434   (declare #.*fixnum-optimize-settings*)
          435   (declare (fixnum start end))
          436   (declare (ignore string))
          437   (* 4 (- end start)))
          438 
          439 (defmethod compute-number-of-octets ((format flexi-crlf-mixin) string start end)
          440   (declare #.*fixnum-optimize-settings*)
          441   (declare (fixnum start end) (string string))
          442   (+ (call-next-method)
          443      (* (case (external-format-name format)
          444           (:utf-32 4)
          445           (otherwise 1))
          446         (count #\Newline string :start start :end end :test #'char=))))
          447 
          448 (defgeneric character-length (format char)
          449   (declare #.*fixnum-optimize-settings*)
          450   (:documentation "Returns the number of octets needed to encode the
          451 single character CHAR.")
          452   (:method (format char)
          453    (compute-number-of-octets format (string char) 0 1)))
          454 
          455 (defmethod character-length :around ((format flexi-crlf-mixin) (char (eql #\Newline)))
          456   (declare #.*fixnum-optimize-settings*)
          457   (+ (call-next-method format +cr+)
          458      (call-next-method format +lf+)))
          459 
          460 (defmethod character-length ((format flexi-8-bit-format) char)
          461   (declare #.*fixnum-optimize-settings*)
          462   (declare (ignore char))
          463   1)
          464 
          465 (defmethod character-length ((format flexi-utf-32-format) char)
          466   (declare #.*fixnum-optimize-settings*)
          467   (declare (ignore char))
          468