external-format.lisp - clic - Clic is an command line interactive client for gopher written in Common LISP (HTM) git clone git://bitreich.org/clic/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/clic/ (DIR) Log (DIR) Files (DIR) Refs (DIR) Tags (DIR) README (DIR) LICENSE --- external-format.lisp (17187B) --- 1 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- 2 ;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.24 2008/05/26 10:55:08 edi Exp $ 3 4 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. 5 6 ;;; Redistribution and use in source and binary forms, with or without 7 ;;; modification, are permitted provided that the following conditions 8 ;;; are met: 9 10 ;;; * Redistributions of source code must retain the above copyright 11 ;;; notice, this list of conditions and the following disclaimer. 12 13 ;;; * Redistributions in binary form must reproduce the above 14 ;;; copyright notice, this list of conditions and the following 15 ;;; disclaimer in the documentation and/or other materials 16 ;;; provided with the distribution. 17 18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED 19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 (in-package :flexi-streams) 31 32 (defclass external-format () 33 ((name :initarg :name 34 :reader external-format-name 35 :documentation "The name of the external format - a 36 keyword.") 37 (id :initarg :id 38 :initform nil 39 :reader external-format-id 40 :documentation "If the external format denotes a Windows 41 code page this ID specifies which one to use. Otherwise the 42 value is ignored \(and usually NIL).") 43 (little-endian :initarg :little-endian 44 :initform *default-little-endian* 45 :reader external-format-little-endian 46 :documentation "Whether multi-octet values are 47 read and written with the least significant octet first. For 48 8-bit encodings like :ISO-8859-1 this value is ignored.") 49 (eol-style :initarg :eol-style 50 :reader external-format-eol-style 51 :documentation "The character\(s) to or from which 52 a #\Newline will be translated - one of the keywords :CR, :LF, 53 or :CRLF.")) 54 (:documentation "EXTERNAL-FORMAT objects are used to denote 55 encodings for flexi streams or for the string functions defined in 56 strings.lisp.")) 57 58 (defmethod make-load-form ((thing external-format) &optional environment) 59 "Defines a way to reconstruct external formats. Needed for OpenMCL." 60 (make-load-form-saving-slots thing :environment environment)) 61 62 (defclass flexi-cr-mixin () 63 () 64 (:documentation "A mixin for external-formats where the end-of-line 65 designator is #\Return.")) 66 67 (defclass flexi-crlf-mixin () 68 () 69 (:documentation "A mixin for external-formats where the end-of-line 70 designator is the sequence #\Return #\Linefeed.")) 71 72 (defclass flexi-8-bit-format (external-format) 73 ((encoding-hash :accessor external-format-encoding-hash) 74 (decoding-table :accessor external-format-decoding-table)) 75 (:documentation "The class for all flexi streams which use an 8-bit 76 encoding and thus need additional slots for the encoding/decoding 77 tables.")) 78 79 (defclass flexi-cr-8-bit-format (flexi-cr-mixin flexi-8-bit-format) 80 () 81 (:documentation "Special class for external formats which use an 82 8-bit encoding /and/ have #\Return as the line-end character.")) 83 84 (defclass flexi-crlf-8-bit-format (flexi-crlf-mixin flexi-8-bit-format) 85 () 86 (:documentation "Special class for external formats which use an 87 8-bit encoding /and/ have the sequence #\Return #\Linefeed as the 88 line-end character.")) 89 90 (defclass flexi-ascii-format (flexi-8-bit-format) 91 () 92 (:documentation "Special class for external formats which use the 93 US-ASCII encoding.")) 94 95 (defclass flexi-cr-ascii-format (flexi-cr-mixin flexi-ascii-format) 96 () 97 (:documentation "Special class for external formats which use the 98 US-ASCII encoding /and/ have #\Return as the line-end character.")) 99 100 (defclass flexi-crlf-ascii-format (flexi-crlf-mixin flexi-ascii-format) 101 () 102 (:documentation "Special class for external formats which use the 103 US-ASCII encoding /and/ have the sequence #\Return #\Linefeed as the 104 line-end character.")) 105 106 (defclass flexi-latin-1-format (flexi-8-bit-format) 107 () 108 (:documentation "Special class for external formats which use the 109 ISO-8859-1 encoding.")) 110 111 (defclass flexi-cr-latin-1-format (flexi-cr-mixin flexi-latin-1-format) 112 () 113 (:documentation "Special class for external formats which use the 114 ISO-8859-1 encoding /and/ have #\Return as the line-end character.")) 115 116 (defclass flexi-crlf-latin-1-format (flexi-crlf-mixin flexi-latin-1-format) 117 () 118 (:documentation "Special class for external formats which use the 119 ISO-8859-1 encoding /and/ have the sequence #\Return #\Linefeed as the 120 line-end character.")) 121 122 (defclass flexi-utf-32-format (external-format) 123 () 124 (:documentation "Abstract class for external formats which use the 125 UTF-32 encoding.")) 126 127 (defclass flexi-utf-32-le-format (flexi-utf-32-format) 128 () 129 (:documentation "Special class for external formats which use the 130 UTF-32 encoding with little-endian byte ordering.")) 131 132 (defclass flexi-cr-utf-32-le-format (flexi-cr-mixin flexi-utf-32-le-format) 133 () 134 (:documentation "Special class for external formats which use the 135 UTF-32 encoding with little-endian byte ordering /and/ have #\Return 136 as the line-end character.")) 137 138 (defclass flexi-crlf-utf-32-le-format (flexi-crlf-mixin flexi-utf-32-le-format) 139 () 140 (:documentation "Special class for external formats which use the 141 UTF-32 encoding with little-endian byte ordering /and/ have the 142 sequence #\Return #\Linefeed as the line-end character.")) 143 144 (defclass flexi-utf-32-be-format (flexi-utf-32-format) 145 () 146 (:documentation "Special class for external formats which use the 147 UTF-32 encoding with big-endian byte ordering.")) 148 149 (defclass flexi-cr-utf-32-be-format (flexi-cr-mixin flexi-utf-32-be-format) 150 () 151 (:documentation "Special class for external formats which use the 152 UTF-32 encoding with big-endian byte ordering /and/ have #\Return as 153 the line-end character.")) 154 155 (defclass flexi-crlf-utf-32-be-format (flexi-crlf-mixin flexi-utf-32-be-format) 156 () 157 (:documentation "Special class for external formats which use the 158 the UTF-32 encoding with big-endian byte ordering /and/ have the 159 sequence #\Return #\Linefeed as the line-end character.")) 160 161 (defclass flexi-utf-16-format (external-format) 162 () 163 (:documentation "Abstract class for external formats which use the 164 UTF-16 encoding.")) 165 166 (defclass flexi-utf-16-le-format (flexi-utf-16-format) 167 () 168 (:documentation "Special class for external formats which use the 169 UTF-16 encoding with little-endian byte ordering.")) 170 171 (defclass flexi-cr-utf-16-le-format (flexi-cr-mixin flexi-utf-16-le-format) 172 () 173 (:documentation "Special class for external formats which use the 174 UTF-16 encoding with little-endian byte ordering /and/ have #\Return 175 as the line-end character.")) 176 177 (defclass flexi-crlf-utf-16-le-format (flexi-crlf-mixin flexi-utf-16-le-format) 178 () 179 (:documentation "Special class for external formats which use the 180 UTF-16 encoding with little-endian byte ordering /and/ have the 181 sequence #\Return #\Linefeed as the line-end character.")) 182 183 (defclass flexi-utf-16-be-format (flexi-utf-16-format) 184 () 185 (:documentation "Special class for external formats which use the 186 UTF-16 encoding with big-endian byte ordering.")) 187 188 (defclass flexi-cr-utf-16-be-format (flexi-cr-mixin flexi-utf-16-be-format) 189 () 190 (:documentation "Special class for external formats which use the 191 UTF-16 encoding with big-endian byte ordering /and/ have #\Return as 192 the line-end character.")) 193 194 (defclass flexi-crlf-utf-16-be-format (flexi-crlf-mixin flexi-utf-16-be-format) 195 () 196 (:documentation "Special class for external formats which use the 197 UTF-16 encoding with big-endian byte ordering /and/ have the sequence 198 #\Return #\Linefeed as the line-end character.")) 199 200 (defclass flexi-utf-8-format (external-format) 201 () 202 (:documentation "Special class for external formats which use the 203 UTF-8 encoding.")) 204 205 (defclass flexi-cr-utf-8-format (flexi-cr-mixin flexi-utf-8-format) 206 () 207 (:documentation "Special class for external formats which use the 208 UTF-8 encoding /and/ have #\Return as the line-end character.")) 209 210 (defclass flexi-crlf-utf-8-format (flexi-crlf-mixin flexi-utf-8-format) 211 () 212 (:documentation "Special class for external formats which use the 213 UTF-8 encoding /and/ have the sequence #\Return #\Linefeed as the 214 line-end character.")) 215 216 (defmethod initialize-instance :after ((external-format flexi-8-bit-format) &rest initargs) 217 "Sets the fixed encoding/decoding tables for this particular 218 external format." 219 (declare #.*standard-optimize-settings*) 220 (declare (ignore initargs)) 221 (with-accessors ((encoding-hash external-format-encoding-hash) 222 (decoding-table external-format-decoding-table) 223 (name external-format-name) 224 (id external-format-id)) 225 external-format 226 (multiple-value-setq (encoding-hash decoding-table) 227 (cond ((ascii-name-p name) 228 (values +ascii-hash+ +ascii-table+)) 229 ((koi8-r-name-p name) 230 (values +koi8-r-hash+ +koi8-r-table+)) 231 ((iso-8859-name-p name) 232 (values (cdr (assoc name +iso-8859-hashes+ :test #'eq)) 233 (cdr (assoc name +iso-8859-tables+ :test #'eq)))) 234 ((code-page-name-p name) 235 (values (cdr (assoc id +code-page-hashes+)) 236 (cdr (assoc id +code-page-tables+)))))))) 237 238 (defun external-format-class-name (real-name &key eol-style little-endian id) 239 "Given the initargs for a general external format returns the name 240 \(a symbol) of the most specific subclass matching these arguments." 241 (declare #.*standard-optimize-settings*) 242 (declare (ignore id)) 243 (cond ((ascii-name-p real-name) 244 (ecase eol-style 245 (:lf 'flexi-ascii-format) 246 (:cr 'flexi-cr-ascii-format) 247 (:crlf 'flexi-crlf-ascii-format))) 248 ((eq real-name :iso-8859-1) 249 (ecase eol-style 250 (:lf 'flexi-latin-1-format) 251 (:cr 'flexi-cr-latin-1-format) 252 (:crlf 'flexi-crlf-latin-1-format))) 253 ((or (koi8-r-name-p real-name) 254 (iso-8859-name-p real-name) 255 (code-page-name-p real-name)) 256 (ecase eol-style 257 (:lf 'flexi-8-bit-format) 258 (:cr 'flexi-cr-8-bit-format) 259 (:crlf 'flexi-crlf-8-bit-format))) 260 (t (ecase real-name 261 (:utf-8 (ecase eol-style 262 (:lf 'flexi-utf-8-format) 263 (:cr 'flexi-cr-utf-8-format) 264 (:crlf 'flexi-crlf-utf-8-format))) 265 (:utf-16 (ecase eol-style 266 (:lf (if little-endian 267 'flexi-utf-16-le-format 268 'flexi-utf-16-be-format)) 269 (:cr (if little-endian 270 'flexi-cr-utf-16-le-format 271 'flexi-cr-utf-16-be-format)) 272 (:crlf (if little-endian 273 'flexi-crlf-utf-16-le-format 274 'flexi-crlf-utf-16-be-format)))) 275 (:utf-32 (ecase eol-style 276 (:lf (if little-endian 277 'flexi-utf-32-le-format 278 'flexi-utf-32-be-format)) 279 (:cr (if little-endian 280 'flexi-cr-utf-32-le-format 281 'flexi-cr-utf-32-be-format)) 282 (:crlf (if little-endian 283 'flexi-crlf-utf-32-le-format 284 'flexi-crlf-utf-32-be-format)))))))) 285 286 (defun make-external-format% (name &key (little-endian *default-little-endian*) 287 id eol-style) 288 "Used internally by MAKE-EXTERNAL-FORMAT to default some of the 289 keywords arguments and to determine the right subclass of 290 EXTERNAL-FORMAT." 291 (declare #.*standard-optimize-settings*) 292 (let* ((real-name (normalize-external-format-name name)) 293 (initargs 294 (cond ((or (iso-8859-name-p real-name) 295 (koi8-r-name-p real-name) 296 (ascii-name-p real-name)) 297 (list :eol-style (or eol-style *default-eol-style*))) 298 ((code-page-name-p real-name) 299 (list :id (or (known-code-page-id-p id) 300 (error 'external-format-error 301 :format-control "Unknown code page ID ~S" 302 :format-arguments (list id))) 303 ;; default EOL style for Windows code pages is :CRLF 304 :eol-style (or eol-style :crlf))) 305 (t (list :eol-style (or eol-style *default-eol-style*) 306 :little-endian little-endian))))) 307 (apply #'make-instance (apply #'external-format-class-name real-name initargs) 308 :name real-name 309 initargs))) 310 311 (defun make-external-format (name &rest args 312 &key (little-endian *default-little-endian*) 313 id eol-style) 314 "Creates and returns an external format object as specified. 315 NAME is a keyword like :LATIN1 or :UTF-8, LITTLE-ENDIAN specifies 316 the `endianess' of the external format and is ignored for 8-bit 317 encodings, EOL-STYLE is one of the keywords :CR, :LF, or :CRLF 318 which denote the end-of-line character \(sequence), ID is the ID 319 of a Windows code page \(and ignored for other encodings)." 320 (declare #.*standard-optimize-settings*) 321 ;; the keyword arguments are only there for arglist display in the IDE 322 (declare (ignore id little-endian)) 323 (let ((shortcut-args (cdr (assoc name +shortcut-map+ :test #'string-equal)))) 324 (cond (shortcut-args 325 (apply #'make-external-format% 326 (append shortcut-args 327 `(:eol-style ,eol-style)))) 328 (t (apply #'make-external-format% name args))))) 329 330 (defun maybe-convert-external-format (external-format) 331 "Given an external format designator \(a keyword, a list, or an 332 EXTERNAL-FORMAT object) returns the corresponding EXTERNAL-FORMAT 333 object." 334 (declare #.*standard-optimize-settings*) 335 (typecase external-format 336 (symbol (make-external-format external-format)) 337 (list (apply #'make-external-format external-format)) 338 (otherwise external-format))) 339 340 (defun external-format-equal (ef1 ef2) 341 "Checks whether two EXTERNAL-FORMAT objects denote the same encoding." 342 (declare #.*standard-optimize-settings*) 343 (let* ((name1 (external-format-name ef1)) 344 (code-page-name-p (code-page-name-p name1))) 345 ;; they must habe the same canonical name 346 (and (eq name1 347 (external-format-name ef2)) 348 ;; if both are code pages the IDs must be the same 349 (or (not code-page-name-p) 350 (eql (external-format-id ef1) 351 (external-format-id ef2))) 352 ;; for non-8-bit encodings the endianess must be the same 353 (or code-page-name-p 354 (ascii-name-p name1) 355 (koi8-r-name-p name1) 356 (iso-8859-name-p name1) 357 (eq name1 :utf-8) 358 (eq (not (external-format-little-endian ef1)) 359 (not (external-format-little-endian ef2)))) 360 ;; the EOL style must also be the same 361 (eq (external-format-eol-style ef1) 362 (external-format-eol-style ef2))))) 363 364 (defun normalize-external-format (external-format) 365 "Returns a list which is a `normalized' representation of the 366 external format EXTERNAL-FORMAT. Used internally by PRINT-OBJECT, for 367 example. Basically, the result is an argument list that can be fed 368 back to MAKE-EXTERNAL-FORMAT to create an equivalent object." 369 (declare #.*standard-optimize-settings*) 370 (let ((name (external-format-name external-format)) 371 (eol-style (external-format-eol-style external-format))) 372 (cond ((or (ascii-name-p name) 373 (koi8-r-name-p name) 374 (iso-8859-name-p name) 375 (eq name :utf-8)) 376 (list name :eol-style eol-style)) 377 ((code-page-name-p name) 378 (list name 379 :id (external-format-id external-format) 380 :eol-style eol-style)) 381 (t (list name 382 :eol-style eol-style 383 :little-endian (external-format-little-endian external-format)))))) 384 385 (defmethod print-object ((object external-format) stream) 386 "How an EXTERNAL-FORMAT object is rendered. Uses 387 NORMALIZE-EXTERNAL-FORMAT." 388 (print-unreadable-object (object stream :type t :identity t) 389 (prin1 (normalize-external-format object) stream)))