external-format.lisp - clic - Clic is an command line interactive client for gopher written in Common LISP
 (HTM) git clone git://bitreich.org/clic/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/clic/
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
 (DIR) README
 (DIR) LICENSE
       ---
       external-format.lisp (17187B)
       ---
            1 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
            2 ;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.24 2008/05/26 10:55:08 edi Exp $
            3 
            4 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
            5 
            6 ;;; Redistribution and use in source and binary forms, with or without
            7 ;;; modification, are permitted provided that the following conditions
            8 ;;; are met:
            9 
           10 ;;;   * Redistributions of source code must retain the above copyright
           11 ;;;     notice, this list of conditions and the following disclaimer.
           12 
           13 ;;;   * Redistributions in binary form must reproduce the above
           14 ;;;     copyright notice, this list of conditions and the following
           15 ;;;     disclaimer in the documentation and/or other materials
           16 ;;;     provided with the distribution.
           17 
           18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
           19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
           20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
           21 ;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
           22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
           23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
           24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
           25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
           26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
           27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
           28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
           29 
           30 (in-package :flexi-streams)
           31 
           32 (defclass external-format ()
           33   ((name :initarg :name
           34          :reader external-format-name
           35          :documentation "The name of the external format - a
           36 keyword.")
           37    (id :initarg :id
           38        :initform nil
           39        :reader external-format-id
           40        :documentation "If the external format denotes a Windows
           41 code page this ID specifies which one to use.  Otherwise the
           42 value is ignored \(and usually NIL).")
           43    (little-endian :initarg :little-endian
           44                   :initform *default-little-endian*
           45                   :reader external-format-little-endian
           46                   :documentation "Whether multi-octet values are
           47 read and written with the least significant octet first.  For
           48 8-bit encodings like :ISO-8859-1 this value is ignored.")
           49    (eol-style :initarg :eol-style
           50               :reader external-format-eol-style
           51               :documentation "The character\(s) to or from which
           52 a #\Newline will be translated - one of the keywords :CR, :LF,
           53 or :CRLF."))
           54   (:documentation "EXTERNAL-FORMAT objects are used to denote
           55 encodings for flexi streams or for the string functions defined in
           56 strings.lisp."))
           57 
           58 (defmethod make-load-form ((thing external-format) &optional environment)
           59   "Defines a way to reconstruct external formats.  Needed for OpenMCL."
           60   (make-load-form-saving-slots thing :environment environment))
           61 
           62 (defclass flexi-cr-mixin ()
           63   ()
           64   (:documentation "A mixin for external-formats where the end-of-line
           65 designator is #\Return."))
           66 
           67 (defclass flexi-crlf-mixin ()
           68   ()
           69   (:documentation "A mixin for external-formats where the end-of-line
           70 designator is the sequence #\Return #\Linefeed."))
           71 
           72 (defclass flexi-8-bit-format (external-format)
           73   ((encoding-hash :accessor external-format-encoding-hash)
           74    (decoding-table :accessor external-format-decoding-table))
           75   (:documentation "The class for all flexi streams which use an 8-bit
           76 encoding and thus need additional slots for the encoding/decoding
           77 tables."))
           78 
           79 (defclass flexi-cr-8-bit-format (flexi-cr-mixin flexi-8-bit-format)
           80   ()
           81   (:documentation "Special class for external formats which use an
           82 8-bit encoding /and/ have #\Return as the line-end character."))
           83 
           84 (defclass flexi-crlf-8-bit-format (flexi-crlf-mixin flexi-8-bit-format)
           85   ()
           86   (:documentation "Special class for external formats which use an
           87 8-bit encoding /and/ have the sequence #\Return #\Linefeed as the
           88 line-end character."))
           89 
           90 (defclass flexi-ascii-format (flexi-8-bit-format)
           91   ()
           92   (:documentation "Special class for external formats which use the
           93 US-ASCII encoding."))
           94 
           95 (defclass flexi-cr-ascii-format (flexi-cr-mixin flexi-ascii-format)
           96   ()
           97   (:documentation "Special class for external formats which use the
           98 US-ASCII encoding /and/ have #\Return as the line-end character."))
           99 
          100 (defclass flexi-crlf-ascii-format (flexi-crlf-mixin flexi-ascii-format)
          101   ()
          102   (:documentation "Special class for external formats which use the
          103 US-ASCII encoding /and/ have the sequence #\Return #\Linefeed as the
          104 line-end character."))
          105 
          106 (defclass flexi-latin-1-format (flexi-8-bit-format)
          107   ()
          108   (:documentation "Special class for external formats which use the
          109 ISO-8859-1 encoding."))
          110 
          111 (defclass flexi-cr-latin-1-format (flexi-cr-mixin flexi-latin-1-format)
          112   ()
          113   (:documentation "Special class for external formats which use the
          114 ISO-8859-1 encoding /and/ have #\Return as the line-end character."))
          115 
          116 (defclass flexi-crlf-latin-1-format (flexi-crlf-mixin flexi-latin-1-format)
          117   ()
          118   (:documentation "Special class for external formats which use the
          119 ISO-8859-1 encoding /and/ have the sequence #\Return #\Linefeed as the
          120 line-end character."))
          121 
          122 (defclass flexi-utf-32-format (external-format)
          123   ()
          124   (:documentation "Abstract class for external formats which use the
          125 UTF-32 encoding."))
          126 
          127 (defclass flexi-utf-32-le-format (flexi-utf-32-format)
          128   ()
          129   (:documentation "Special class for external formats which use the
          130 UTF-32 encoding with little-endian byte ordering."))
          131 
          132 (defclass flexi-cr-utf-32-le-format (flexi-cr-mixin flexi-utf-32-le-format)
          133   ()
          134   (:documentation "Special class for external formats which use the
          135 UTF-32 encoding with little-endian byte ordering /and/ have #\Return
          136 as the line-end character."))
          137 
          138 (defclass flexi-crlf-utf-32-le-format (flexi-crlf-mixin flexi-utf-32-le-format)
          139   ()
          140   (:documentation "Special class for external formats which use the
          141 UTF-32 encoding with little-endian byte ordering /and/ have the
          142 sequence #\Return #\Linefeed as the line-end character."))
          143 
          144 (defclass flexi-utf-32-be-format (flexi-utf-32-format)
          145   ()
          146   (:documentation "Special class for external formats which use the
          147 UTF-32 encoding with big-endian byte ordering."))
          148 
          149 (defclass flexi-cr-utf-32-be-format (flexi-cr-mixin flexi-utf-32-be-format)
          150   ()
          151   (:documentation "Special class for external formats which use the
          152 UTF-32 encoding with big-endian byte ordering /and/ have #\Return as
          153 the line-end character."))
          154 
          155 (defclass flexi-crlf-utf-32-be-format (flexi-crlf-mixin flexi-utf-32-be-format)
          156   ()
          157   (:documentation "Special class for external formats which use the
          158 the UTF-32 encoding with big-endian byte ordering /and/ have the
          159 sequence #\Return #\Linefeed as the line-end character."))
          160 
          161 (defclass flexi-utf-16-format (external-format)
          162   ()
          163   (:documentation "Abstract class for external formats which use the
          164 UTF-16 encoding."))
          165 
          166 (defclass flexi-utf-16-le-format (flexi-utf-16-format)
          167   ()
          168   (:documentation "Special class for external formats which use the
          169 UTF-16 encoding with little-endian byte ordering."))
          170 
          171 (defclass flexi-cr-utf-16-le-format (flexi-cr-mixin flexi-utf-16-le-format)
          172   ()
          173   (:documentation "Special class for external formats which use the
          174 UTF-16 encoding with little-endian byte ordering /and/ have #\Return
          175 as the line-end character."))
          176 
          177 (defclass flexi-crlf-utf-16-le-format (flexi-crlf-mixin flexi-utf-16-le-format)
          178   ()
          179   (:documentation "Special class for external formats which use the
          180 UTF-16 encoding with little-endian byte ordering /and/ have the
          181 sequence #\Return #\Linefeed as the line-end character."))
          182 
          183 (defclass flexi-utf-16-be-format (flexi-utf-16-format)
          184   ()
          185   (:documentation "Special class for external formats which use the
          186 UTF-16 encoding with big-endian byte ordering."))
          187 
          188 (defclass flexi-cr-utf-16-be-format (flexi-cr-mixin flexi-utf-16-be-format)
          189   ()
          190   (:documentation "Special class for external formats which use the
          191 UTF-16 encoding with big-endian byte ordering /and/ have #\Return as
          192 the line-end character."))
          193 
          194 (defclass flexi-crlf-utf-16-be-format (flexi-crlf-mixin flexi-utf-16-be-format)
          195   ()
          196   (:documentation "Special class for external formats which use the
          197 UTF-16 encoding with big-endian byte ordering /and/ have the sequence
          198 #\Return #\Linefeed as the line-end character."))
          199 
          200 (defclass flexi-utf-8-format (external-format)
          201   ()
          202   (:documentation "Special class for external formats which use the
          203 UTF-8 encoding."))
          204 
          205 (defclass flexi-cr-utf-8-format (flexi-cr-mixin flexi-utf-8-format)
          206   ()
          207   (:documentation "Special class for external formats which use the
          208 UTF-8 encoding /and/ have #\Return as the line-end character."))
          209 
          210 (defclass flexi-crlf-utf-8-format (flexi-crlf-mixin flexi-utf-8-format)
          211   ()
          212   (:documentation "Special class for external formats which use the
          213 UTF-8 encoding /and/ have the sequence #\Return #\Linefeed as the
          214 line-end character."))
          215 
          216 (defmethod initialize-instance :after ((external-format flexi-8-bit-format) &rest initargs)
          217   "Sets the fixed encoding/decoding tables for this particular
          218 external format."
          219   (declare #.*standard-optimize-settings*)
          220   (declare (ignore initargs))
          221   (with-accessors ((encoding-hash external-format-encoding-hash)
          222                    (decoding-table external-format-decoding-table)
          223                    (name external-format-name)
          224                    (id external-format-id))
          225       external-format
          226     (multiple-value-setq (encoding-hash decoding-table)
          227         (cond ((ascii-name-p name)
          228                (values +ascii-hash+ +ascii-table+))
          229               ((koi8-r-name-p name)
          230                (values +koi8-r-hash+ +koi8-r-table+))
          231               ((iso-8859-name-p name)
          232                (values (cdr (assoc name +iso-8859-hashes+ :test #'eq))                       
          233                        (cdr (assoc name +iso-8859-tables+ :test #'eq))))
          234               ((code-page-name-p name)
          235                (values (cdr (assoc id +code-page-hashes+))                       
          236                        (cdr (assoc id +code-page-tables+))))))))
          237 
          238 (defun external-format-class-name (real-name &key eol-style little-endian id)
          239   "Given the initargs for a general external format returns the name
          240 \(a symbol) of the most specific subclass matching these arguments."
          241   (declare #.*standard-optimize-settings*)
          242   (declare (ignore id))
          243   (cond ((ascii-name-p real-name)
          244          (ecase eol-style
          245            (:lf 'flexi-ascii-format)
          246            (:cr 'flexi-cr-ascii-format)
          247            (:crlf 'flexi-crlf-ascii-format)))
          248         ((eq real-name :iso-8859-1)
          249          (ecase eol-style
          250            (:lf 'flexi-latin-1-format)
          251            (:cr 'flexi-cr-latin-1-format)
          252            (:crlf 'flexi-crlf-latin-1-format)))
          253         ((or (koi8-r-name-p real-name)
          254              (iso-8859-name-p real-name)
          255              (code-page-name-p real-name))
          256          (ecase eol-style
          257            (:lf 'flexi-8-bit-format)
          258            (:cr 'flexi-cr-8-bit-format)
          259            (:crlf 'flexi-crlf-8-bit-format)))
          260         (t (ecase real-name
          261              (:utf-8 (ecase eol-style
          262                        (:lf 'flexi-utf-8-format)
          263                        (:cr 'flexi-cr-utf-8-format)
          264                        (:crlf 'flexi-crlf-utf-8-format)))
          265              (:utf-16 (ecase eol-style
          266                         (:lf (if little-endian
          267                                'flexi-utf-16-le-format
          268                                'flexi-utf-16-be-format))
          269                         (:cr (if little-endian
          270                                'flexi-cr-utf-16-le-format
          271                                'flexi-cr-utf-16-be-format))
          272                         (:crlf (if little-endian
          273                                  'flexi-crlf-utf-16-le-format
          274                                  'flexi-crlf-utf-16-be-format))))
          275              (:utf-32 (ecase eol-style
          276                         (:lf (if little-endian
          277                                'flexi-utf-32-le-format
          278                                'flexi-utf-32-be-format))
          279                         (:cr (if little-endian
          280                                'flexi-cr-utf-32-le-format
          281                                'flexi-cr-utf-32-be-format))
          282                         (:crlf (if little-endian
          283                                  'flexi-crlf-utf-32-le-format
          284                                  'flexi-crlf-utf-32-be-format))))))))
          285                          
          286 (defun make-external-format% (name &key (little-endian *default-little-endian*)
          287                                    id eol-style)
          288   "Used internally by MAKE-EXTERNAL-FORMAT to default some of the
          289 keywords arguments and to determine the right subclass of
          290 EXTERNAL-FORMAT."
          291   (declare #.*standard-optimize-settings*)
          292   (let* ((real-name (normalize-external-format-name name))
          293          (initargs
          294           (cond ((or (iso-8859-name-p real-name)
          295                      (koi8-r-name-p real-name)
          296                      (ascii-name-p real-name))
          297                  (list :eol-style (or eol-style *default-eol-style*)))
          298                 ((code-page-name-p real-name)
          299                  (list :id (or (known-code-page-id-p id)
          300                                (error 'external-format-error
          301                                       :format-control "Unknown code page ID ~S"
          302                                       :format-arguments (list id)))
          303                        ;; default EOL style for Windows code pages is :CRLF
          304                        :eol-style (or eol-style :crlf)))
          305                 (t (list :eol-style (or eol-style *default-eol-style*)
          306                          :little-endian little-endian)))))
          307     (apply #'make-instance (apply #'external-format-class-name real-name initargs)
          308            :name real-name
          309            initargs)))
          310 
          311 (defun make-external-format (name &rest args
          312                                   &key (little-endian *default-little-endian*)
          313                                        id eol-style)
          314   "Creates and returns an external format object as specified.
          315 NAME is a keyword like :LATIN1 or :UTF-8, LITTLE-ENDIAN specifies
          316 the `endianess' of the external format and is ignored for 8-bit
          317 encodings, EOL-STYLE is one of the keywords :CR, :LF, or :CRLF
          318 which denote the end-of-line character \(sequence), ID is the ID
          319 of a Windows code page \(and ignored for other encodings)."
          320   (declare #.*standard-optimize-settings*)
          321   ;; the keyword arguments are only there for arglist display in the IDE
          322   (declare (ignore id little-endian))
          323   (let ((shortcut-args (cdr (assoc name +shortcut-map+ :test #'string-equal))))
          324     (cond (shortcut-args
          325            (apply #'make-external-format%
          326                   (append shortcut-args
          327                           `(:eol-style ,eol-style))))
          328           (t (apply #'make-external-format% name args)))))
          329 
          330 (defun maybe-convert-external-format (external-format)
          331   "Given an external format designator \(a keyword, a list, or an
          332 EXTERNAL-FORMAT object) returns the corresponding EXTERNAL-FORMAT
          333 object."
          334   (declare #.*standard-optimize-settings*)
          335   (typecase external-format
          336     (symbol (make-external-format external-format))
          337     (list (apply #'make-external-format external-format))
          338     (otherwise external-format)))
          339   
          340 (defun external-format-equal (ef1 ef2)
          341   "Checks whether two EXTERNAL-FORMAT objects denote the same encoding."
          342   (declare #.*standard-optimize-settings*)
          343   (let* ((name1 (external-format-name ef1))
          344          (code-page-name-p (code-page-name-p name1)))
          345     ;; they must habe the same canonical name
          346     (and (eq name1
          347              (external-format-name ef2))
          348          ;; if both are code pages the IDs must be the same
          349          (or (not code-page-name-p)
          350              (eql (external-format-id ef1)
          351                   (external-format-id ef2)))
          352          ;; for non-8-bit encodings the endianess must be the same
          353          (or code-page-name-p
          354              (ascii-name-p name1)
          355              (koi8-r-name-p name1)
          356              (iso-8859-name-p name1)
          357              (eq name1 :utf-8)
          358              (eq (not (external-format-little-endian ef1))
          359                  (not (external-format-little-endian ef2))))
          360          ;; the EOL style must also be the same
          361          (eq (external-format-eol-style ef1)
          362              (external-format-eol-style ef2)))))
          363 
          364 (defun normalize-external-format (external-format)
          365   "Returns a list which is a `normalized' representation of the
          366 external format EXTERNAL-FORMAT.  Used internally by PRINT-OBJECT, for
          367 example.  Basically, the result is an argument list that can be fed
          368 back to MAKE-EXTERNAL-FORMAT to create an equivalent object."
          369   (declare #.*standard-optimize-settings*)
          370   (let ((name (external-format-name external-format))
          371         (eol-style (external-format-eol-style external-format)))
          372     (cond ((or (ascii-name-p name)
          373                (koi8-r-name-p name)
          374                (iso-8859-name-p name)
          375                (eq name :utf-8))
          376            (list name :eol-style eol-style))
          377           ((code-page-name-p name)
          378            (list name
          379                  :id (external-format-id external-format)
          380                  :eol-style eol-style))
          381           (t (list name
          382                    :eol-style eol-style
          383                    :little-endian (external-format-little-endian external-format))))))
          384 
          385 (defmethod print-object ((object external-format) stream)
          386   "How an EXTERNAL-FORMAT object is rendered.  Uses
          387 NORMALIZE-EXTERNAL-FORMAT."
          388   (print-unreadable-object (object stream :type t :identity t)
          389     (prin1 (normalize-external-format object) stream)))