enc-cp1252.lisp - clic - Clic is an command line interactive client for gopher written in Common LISP
 (HTM) git clone git://bitreich.org/clic/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/clic/
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
 (DIR) README
 (DIR) LICENSE
       ---
       enc-cp1252.lisp (3129B)
       ---
            1 ;;;; -*- Mode: lisp; indent-tabs-mode: nil -*-
            2 ;;;
            3 ;;; enc-cp1252.lisp --- Implementation of the CP1252 character encoding.
            4 ;;;
            5 ;;; Copyright (C) 2011, Nicolas Martyanoff
            6 ;;;
            7 ;;; Permission is hereby granted, free of charge, to any person
            8 ;;; obtaining a copy of this software and associated documentation
            9 ;;; files (the "Software"), to deal in the Software without
           10 ;;; restriction, including without limitation the rights to use, copy,
           11 ;;; modify, merge, publish, distribute, sublicense, and/or sell copies
           12 ;;; of the Software, and to permit persons to whom the Software is
           13 ;;; furnished to do so, subject to the following conditions:
           14 ;;;
           15 ;;; The above copyright notice and this permission notice shall be
           16 ;;; included in all copies or substantial portions of the Software.
           17 ;;;
           18 ;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
           19 ;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
           20 ;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
           21 ;;; NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
           22 ;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
           23 ;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
           24 ;;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
           25 ;;; DEALINGS IN THE SOFTWARE.
           26 
           27 (in-package #:babel-encodings)
           28 
           29 (define-character-encoding :cp1252
           30     "A 8-bit, fixed-width character encoding used by Windows for Western
           31     European languages."
           32   :aliases '(:windows-1252)
           33   :literal-char-code-limit 256)
           34 
           35 (define-constant +cp1252-to-unicode+
           36     #(#x20ac    nil #x201a #x0192 #x201e #x2026 #x2020 #x2021
           37       #x02c6 #x2030 #x0160 #x2039 #x0152    nil #x017d    nil
           38          nil #x2018 #x2019 #x201c #x201d #x2022 #x2013 #x2014
           39       #x02dc #x2122 #x0161 #x203a #x0153    nil #x017e #x0178)
           40   :test #'equalp)
           41 
           42 (define-unibyte-decoder :cp1252 (octet)
           43   (if (and (>= octet #x80) (<= octet #x9f))
           44       (svref +cp1252-to-unicode+
           45              (the ub8 (- octet #x80)))
           46       octet))
           47 
           48 (define-constant +unicode-0152-017e-cp1252+
           49     #(#x8c #x9c #x00 #x00 #x00 #x00 #x00 #x00
           50       #x00 #x00 #x00 #x00 #x00 #x00 #x8a #x9a
           51       #x00 #x00 #x00 #x00 #x00 #x00 #x00 #x00
           52       #x00 #x00 #x00 #x00 #x00 #x00 #x00 #x00
           53       #x00 #x00 #x00 #x00 #x00 #x00 #x9f #x00
           54       #x00 #x00 #x00 #x8e #x9e)
           55   :test #'equalp)
           56 
           57 (define-constant +unicode-2013-203a-cp1252+
           58     #(#x96 #x97 #x00 #x00 #x00 #x91 #x92 #x82
           59       #x00 #x93 #x94 #x84 #x00 #x86 #x87 #x95
           60       #x00 #x00 #x00 #x85 #x00 #x00 #x00 #x00
           61       #x00 #x00 #x00 #x00 #x00 #x89 #x00 #x00
           62       #x00 #x00 #x00 #x00 #x00 #x00 #x8b #x9b)
           63   :test #'equalp)
           64 
           65 (define-unibyte-encoder :cp1252 (code)
           66   (cond
           67     ((or (< code #x80)
           68          (and (> code #xa0) (<= code #xff)))
           69      code)
           70     ((and (>= code #x0152) (<= code #x017e))
           71      (svref +unicode-0152-017e-cp1252+
           72             (the ub8 (- code #x0152))))
           73     ((= code #x0192) #x83)
           74     ((= code #x02c6) #x88)
           75     ((= code #x02dc) #x89)
           76     ((and (>= code #x2013) (<= code #x203a))
           77      (svref +unicode-2013-203a-cp1252+
           78             (the ub8 (- code #x2013))))
           79     ((= code #x20ac) #x80)
           80     ((= code #x2122) #x99)
           81     (t (handle-error))))