length.lisp - clic - Clic is an command line interactive client for gopher written in Common LISP (HTM) git clone git://bitreich.org/clic/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/clic/ (DIR) Log (DIR) Files (DIR) Refs (DIR) Tags (DIR) README (DIR) LICENSE --- length.lisp (17499B) --- 1 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- 2 ;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.6 2008/05/29 10:25:14 edi Exp $ 3 4 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. 5 6 ;;; Redistribution and use in source and binary forms, with or without 7 ;;; modification, are permitted provided that the following conditions 8 ;;; are met: 9 10 ;;; * Redistributions of source code must retain the above copyright 11 ;;; notice, this list of conditions and the following disclaimer. 12 13 ;;; * Redistributions in binary form must reproduce the above 14 ;;; copyright notice, this list of conditions and the following 15 ;;; disclaimer in the documentation and/or other materials 16 ;;; provided with the distribution. 17 18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED 19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 (in-package :flexi-streams) 31 32 (defgeneric encoding-factor (format) 33 (:documentation "Given an external format FORMAT, returns a factor 34 which denotes the octets to characters ratio to expect when 35 encoding/decoding. If the returned value is an integer, the factor is 36 assumed to be exact. If it is a \(double) float, the factor is 37 supposed to be based on heuristics and usually not exact. 38 39 This factor is used in string.lisp.") 40 (declare #.*standard-optimize-settings*)) 41 42 (defmethod encoding-factor ((format flexi-8-bit-format)) 43 (declare #.*standard-optimize-settings*) 44 ;; 8-bit encodings map octets to characters in an exact one-to-one 45 ;; fashion 46 1) 47 48 (defmethod encoding-factor ((format flexi-utf-8-format)) 49 (declare #.*standard-optimize-settings*) 50 ;; UTF-8 characters can be anything from one to six octets, but we 51 ;; assume that the "overhead" is only about 5 percent - this 52 ;; estimate is obviously very much dependant on the content 53 1.05d0) 54 55 (defmethod encoding-factor ((format flexi-utf-16-format)) 56 (declare #.*standard-optimize-settings*) 57 ;; usually one character maps to two octets, but characters with 58 ;; code points above #x10000 map to four octets - we assume that we 59 ;; usually don't see these characters but of course have to return a 60 ;; float 61 2.0d0) 62 63 (defmethod encoding-factor ((format flexi-utf-32-format)) 64 (declare #.*standard-optimize-settings*) 65 ;; UTF-32 always matches every character to four octets 66 4) 67 68 (defmethod encoding-factor ((format flexi-crlf-mixin)) 69 (declare #.*standard-optimize-settings*) 70 ;; if the sequence #\Return #\Linefeed is the line-end marker, this 71 ;; obviously makes encodings potentially longer and definitely makes 72 ;; the estimate unexact 73 (* 1.02d0 (call-next-method))) 74 75 (defgeneric check-end (format start end i) 76 (declare #.*fixnum-optimize-settings*) 77 (:documentation "Helper function used below to determine if we tried 78 to read past the end of the sequence.") 79 (:method (format start end i) 80 (declare #.*fixnum-optimize-settings*) 81 (declare (ignore start)) 82 (declare (fixnum end i)) 83 (when (> i end) 84 (signal-encoding-error format "This sequence can't be decoded ~ 85 using ~A as it is too short. ~A octet~:P missing at the end." 86 (external-format-name format) 87 (- i end)))) 88 (:method ((format flexi-utf-16-format) start end i) 89 (declare #.*fixnum-optimize-settings*) 90 (declare (fixnum start end i)) 91 (declare (ignore i)) 92 ;; don't warn twice 93 (when (evenp (- end start)) 94 (call-next-method)))) 95 96 (defgeneric compute-number-of-chars (format sequence start end) 97 (declare #.*standard-optimize-settings*) 98 (:documentation "Computes the exact number of characters required to 99 decode the sequence of octets in SEQUENCE from START to END using the 100 external format FORMAT.")) 101 102 (defmethod compute-number-of-chars :around (format (list list) start end) 103 (declare #.*standard-optimize-settings*) 104 (call-next-method format (coerce list 'vector) start end)) 105 106 (defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end) 107 (declare #.*fixnum-optimize-settings*) 108 (declare (fixnum start end)) 109 (declare (ignore sequence)) 110 (- end start)) 111 112 (defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end) 113 ;; this method only applies to the 8-bit formats as all other 114 ;; formats with CRLF line endings have their own specialized methods 115 ;; below 116 (declare #.*fixnum-optimize-settings*) 117 (declare (fixnum start end) (vector sequence)) 118 (let ((i start) 119 (length (- end start))) 120 (declare (fixnum i length)) 121 (loop 122 (when (>= i end) 123 (return)) 124 (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :end2 end :test #'=))) 125 (unless position 126 (return)) 127 (setq i (1+ position)) 128 (decf length))) 129 length)) 130 131 (defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end) 132 (declare #.*fixnum-optimize-settings*) 133 (declare (fixnum start end) (vector sequence)) 134 (let ((sum 0) 135 (i start)) 136 (declare (fixnum i sum)) 137 (loop 138 (when (>= i end) 139 (return)) 140 (let* ((octet (aref sequence i)) 141 ;; note that there are no validity checks here 142 (length (cond ((not (logbitp 7 octet)) 1) 143 ((= #b11000000 (logand* octet #b11100000)) 2) 144 ((= #b11100000 (logand* octet #b11110000)) 3) 145 (t 4)))) 146 (declare (fixnum length) (type octet octet)) 147 (incf sum) 148 (incf i length))) 149 (check-end format start end i) 150 sum)) 151 152 (defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end) 153 (declare #.*fixnum-optimize-settings*) 154 (declare (fixnum start end) (vector sequence)) 155 (let ((sum 0) 156 (i start) 157 (last-octet 0)) 158 (declare (fixnum i sum) (type octet last-octet)) 159 (loop 160 (when (>= i end) 161 (return)) 162 (let* ((octet (aref sequence i)) 163 ;; note that there are no validity checks here 164 (length (cond ((not (logbitp 7 octet)) 1) 165 ((= #b11000000 (logand* octet #b11100000)) 2) 166 ((= #b11100000 (logand* octet #b11110000)) 3) 167 (t 4)))) 168 (declare (fixnum length) (type octet octet)) 169 (unless (and (= octet +lf+) (= last-octet +cr+)) 170 (incf sum)) 171 (incf i length) 172 (setq last-octet octet))) 173 (check-end format start end i) 174 sum)) 175 176 (defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end) 177 (declare #.*fixnum-optimize-settings*) 178 (declare (fixnum start end) (vector sequence)) 179 (declare (ignore sequence)) 180 (when (oddp (- end start)) 181 (signal-encoding-error format "~A octet~:P cannot be decoded ~ 182 using UTF-16 as ~:*~A is not even." 183 (- end start)))) 184 185 (defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end) 186 (declare #.*fixnum-optimize-settings*) 187 (declare (fixnum start end)) 188 (let ((sum 0) 189 (i start)) 190 (declare (fixnum i sum)) 191 (decf end 2) 192 (loop 193 (when (> i end) 194 (return)) 195 (let* ((high-octet (aref sequence (1+ i))) 196 (length (cond ((<= #xd8 high-octet #xdf) 4) 197 (t 2)))) 198 (declare (fixnum length) (type octet high-octet)) 199 (incf sum) 200 (incf i length))) 201 (check-end format start (+ end 2) i) 202 sum)) 203 204 (defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end) 205 (declare #.*fixnum-optimize-settings*) 206 (declare (fixnum start end) (vector sequence)) 207 (let ((sum 0) 208 (i start)) 209 (declare (fixnum i sum)) 210 (decf end 2) 211 (loop 212 (when (> i end) 213 (return)) 214 (let* ((high-octet (aref sequence i)) 215 (length (cond ((<= #xd8 high-octet #xdf) 4) 216 (t 2)))) 217 (declare (fixnum length) (type octet high-octet)) 218 (incf sum) 219 (incf i length))) 220 (check-end format start (+ end 2) i) 221 sum)) 222 223 (defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end) 224 (declare #.*fixnum-optimize-settings*) 225 (declare (fixnum start end) (vector sequence)) 226 (let ((sum 0) 227 (i start) 228 (last-octet 0)) 229 (declare (fixnum i sum) (type octet last-octet)) 230 (decf end 2) 231 (loop 232 (when (> i end) 233 (return)) 234 (let* ((high-octet (aref sequence (1+ i))) 235 (length (cond ((<= #xd8 high-octet #xdf) 4) 236 (t 2)))) 237 (declare (fixnum length) (type octet high-octet)) 238 (unless (and (zerop high-octet) 239 (= (the octet (aref sequence i)) +lf+) 240 (= last-octet +cr+)) 241 (incf sum)) 242 (setq last-octet (if (zerop high-octet) 243 (aref sequence i) 244 0)) 245 (incf i length))) 246 (check-end format start (+ end 2) i) 247 sum)) 248 249 (defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end) 250 (declare #.*fixnum-optimize-settings*) 251 (declare (fixnum start end) (vector sequence)) 252 (let ((sum 0) 253 (i start) 254 (last-octet 0)) 255 (declare (fixnum i sum) (type octet last-octet)) 256 (decf end 2) 257 (loop 258 (when (> i end) 259 (return)) 260 (let* ((high-octet (aref sequence i)) 261 (length (cond ((<= #xd8 high-octet #xdf) 4) 262 (t 2)))) 263 (declare (fixnum length) (type octet high-octet)) 264 (unless (and (zerop high-octet) 265 (= (the octet (aref sequence (1+ i))) +lf+) 266 (= last-octet +cr+)) 267 (incf sum)) 268 (setq last-octet (if (zerop high-octet) 269 (aref sequence (1+ i)) 270 0)) 271 (incf i length))) 272 (check-end format start (+ end 2) i) 273 sum)) 274 275 (defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end) 276 (declare #.*fixnum-optimize-settings*) 277 (declare (fixnum start end)) 278 (declare (ignore sequence)) 279 (let ((length (- end start))) 280 (when (plusp (mod length 4)) 281 (signal-encoding-error format "~A octet~:P cannot be decoded ~ 282 using UTF-32 as ~:*~A is not a multiple-value of four." 283 length)))) 284 285 (defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end) 286 (declare #.*fixnum-optimize-settings*) 287 (declare (fixnum start end)) 288 (declare (ignore sequence)) 289 (ceiling (- end start) 4)) 290 291 (defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end) 292 (declare #.*fixnum-optimize-settings*) 293 (declare (fixnum start end) (vector sequence)) 294 (let ((i start) 295 (length (ceiling (- end start) 4))) 296 (decf end 8) 297 (loop 298 (when (> i end) 299 (return)) 300 (cond ((loop for j of-type fixnum from i 301 for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0) 302 always (= octet (aref sequence j))) 303 (decf length) 304 (incf i 8)) 305 (t (incf i 4)))) 306 length)) 307 308 (defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end) 309 (declare #.*fixnum-optimize-settings*) 310 (declare (fixnum start end) (vector sequence)) 311 (let ((i start) 312 (length (ceiling (- end start) 4))) 313 (decf end 8) 314 (loop 315 (when (> i end) 316 (return)) 317 (cond ((loop for j of-type fixnum from i 318 for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+) 319 always (= octet (aref sequence j))) 320 (decf length) 321 (incf i 8)) 322 (t (incf i 4)))) 323 length)) 324 325 (defgeneric compute-number-of-octets (format sequence start end) 326 (declare #.*standard-optimize-settings*) 327 (:documentation "Computes the exact number of octets required to 328 encode the sequence of characters in SEQUENCE from START to END using 329 the external format FORMAT.")) 330 331 (defmethod compute-number-of-octets :around (format (list list) start end) 332 (declare #.*standard-optimize-settings*) 333 (call-next-method format (coerce list 'string*) start end)) 334 335 (defmethod compute-number-of-octets ((format flexi-8-bit-format) string start end) 336 (declare #.*fixnum-optimize-settings*) 337 (declare (fixnum start end)) 338 (declare (ignore string)) 339 (- end start)) 340 341 (defmethod compute-number-of-octets ((format flexi-utf-8-format) string start end) 342 (declare #.*fixnum-optimize-settings*) 343 (declare (fixnum start end) (string string)) 344 (let ((sum 0) 345 (i start)) 346 (declare (fixnum i sum)) 347 (loop 348 (when (>= i end) 349 (return)) 350 (let* ((char-code (char-code (char string i))) 351 (char-length (cond ((< char-code #x80) 1) 352 ((< char-code #x800) 2) 353 ((< char-code #x10000) 3) 354 (t 4)))) 355 (declare (fixnum char-length) (type char-code-integer char-code)) 356 (incf sum char-length) 357 (incf i))) 358 sum)) 359 360 (defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) string start end) 361 (declare #.*fixnum-optimize-settings*) 362 (declare (fixnum start end) (string string)) 363 (let ((sum 0) 364 (i start)) 365 (declare (fixnum i sum)) 366 (loop 367 (when (>= i end) 368 (return)) 369 (let* ((char-code (char-code (char string i))) 370 (char-length (cond ((= char-code #.(char-code #\Newline)) 2) 371 ((< char-code #x80) 1) 372 ((< char-code #x800) 2) 373 ((< char-code #x10000) 3) 374 (t 4)))) 375 (declare (fixnum char-length) (type char-code-integer char-code)) 376 (incf sum char-length) 377 (incf i))) 378 sum)) 379 380 (defmethod compute-number-of-octets ((format flexi-utf-16-format) string start end) 381 (declare #.*fixnum-optimize-settings*) 382 (declare (fixnum start end) (string string)) 383 (let ((sum 0) 384 (i start)) 385 (declare (fixnum i sum)) 386 (loop 387 (when (>= i end) 388 (return)) 389 (let* ((char-code (char-code (char string i))) 390 (char-length (cond ((< char-code #x10000) 2) 391 (t 4)))) 392 (declare (fixnum char-length) (type char-code-integer char-code)) 393 (incf sum char-length) 394 (incf i))) 395 sum)) 396 397 (defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) string start end) 398 (declare #.*fixnum-optimize-settings*) 399 (declare (fixnum start end) (string string)) 400 (let ((sum 0) 401 (i start)) 402 (declare (fixnum i sum)) 403 (loop 404 (when (>= i end) 405 (return)) 406 (let* ((char-code (char-code (char string i))) 407 (char-length (cond ((= char-code #.(char-code #\Newline)) 4) 408 ((< char-code #x10000) 2) 409 (t 4)))) 410 (declare (fixnum char-length) (type char-code-integer char-code)) 411 (incf sum char-length) 412 (incf i))) 413 sum)) 414 415 (defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) string start end) 416 (declare #.*fixnum-optimize-settings*) 417 (declare (fixnum start end) (string string)) 418 (let ((sum 0) 419 (i start)) 420 (declare (fixnum i sum)) 421 (loop 422 (when (>= i end) 423 (return)) 424 (let* ((char-code (char-code (char string i))) 425 (char-length (cond ((= char-code #.(char-code #\Newline)) 4) 426 ((< char-code #x10000) 2) 427 (t 4)))) 428 (declare (fixnum char-length) (type char-code-integer char-code)) 429 (incf sum char-length) 430 (incf i))) 431 sum)) 432 433 (defmethod compute-number-of-octets ((format flexi-utf-32-format) string start end) 434 (declare #.*fixnum-optimize-settings*) 435 (declare (fixnum start end)) 436 (declare (ignore string)) 437 (* 4 (- end start))) 438 439 (defmethod compute-number-of-octets ((format flexi-crlf-mixin) string start end) 440 (declare #.*fixnum-optimize-settings*) 441 (declare (fixnum start end) (string string)) 442 (+ (call-next-method) 443 (* (case (external-format-name format) 444 (:utf-32 4) 445 (otherwise 1)) 446 (count #\Newline string :start start :end end :test #'char=)))) 447 448 (defgeneric character-length (format char) 449 (declare #.*fixnum-optimize-settings*) 450 (:documentation "Returns the number of octets needed to encode the 451 single character CHAR.") 452 (:method (format char) 453 (compute-number-of-octets format (string char) 0 1))) 454 455 (defmethod character-length :around ((format flexi-crlf-mixin) (char (eql #\Newline))) 456 (declare #.*fixnum-optimize-settings*) 457 (+ (call-next-method format +cr+) 458 (call-next-method format +lf+))) 459 460 (defmethod character-length ((format flexi-8-bit-format) char) 461 (declare #.*fixnum-optimize-settings*) 462 (declare (ignore char)) 463 1) 464 465 (defmethod character-length ((format flexi-utf-32-format) char) 466 (declare #.*fixnum-optimize-settings*) 467 (declare (ignore char)) 468