enc-jpn.lisp - clic - Clic is an command line interactive client for gopher written in Common LISP
 (HTM) git clone git://bitreich.org/clic/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/clic/
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
 (DIR) README
 (DIR) LICENSE
       ---
       enc-jpn.lisp (37967B)
       ---
            1 ;;;; -*- Mode: lisp; indent-tabs-mode: nil -*-
            2 ;;;
            3 ;;; enc-jpn.lisp --- Japanese encodings.
            4 ;;;
            5 
            6 (in-package #:babel-encodings)
            7 
            8 ;;;; helper functions
            9 (defvar *eucjp-to-ucs-hash* (make-hash-table))
           10 (defvar *ucs-to-eucjp-hash* (make-hash-table))
           11 (defvar *cp932-to-ucs-hash* (make-hash-table))
           12 (defvar *ucs-to-cp932-hash* (make-hash-table))
           13 
           14 (dolist (i `((,*cp932-only*
           15               ,*cp932-to-ucs-hash*
           16               ,*ucs-to-cp932-hash*)
           17              (,*eucjp-only*
           18               ,*eucjp-to-ucs-hash*
           19               ,*ucs-to-eucjp-hash*)
           20              (,*eucjp*
           21               ,*eucjp-to-ucs-hash*
           22               ,*ucs-to-eucjp-hash*)))
           23   (dolist (j (first i))
           24     (setf (gethash (car j) (second i)) (cadr j))
           25     (setf (gethash (cadr j) (third i)) (car j))))
           26 
           27 (flet ((euc-cp932 (x)
           28          (let ((high (ash x -16))
           29                (mid (logand (ash x -8) 255))
           30                (low (logand x 255)))
           31            (cond ((not (zerop high))
           32                   nil)
           33                  ((= mid #x8e)
           34                   (logand x 255))
           35                  ((zerop mid)
           36                   x)
           37                  ((decf mid #xa1)
           38                   (decf low #x80)
           39                   (incf low (if (zerop (logand mid 1)) #x1f #x7e))
           40                   (incf low (if (<= #x7f low #x9d) 1 0))
           41                   (setq mid (ash mid -1))
           42                   (incf mid (if (<= mid #x1e) #x81 #xc1))
           43                   (+ (ash mid 8) low))))))
           44   (dolist (i *eucjp*)
           45     (let ((cp932 (euc-cp932 (first i))))
           46       (when cp932
           47         (setf (gethash cp932 *cp932-to-ucs-hash*) (second i))
           48         (setf (gethash (second i) *ucs-to-cp932-hash*) cp932)))))
           49 
           50 ;ascii
           51 (loop for i from #x00 to #x7f do
           52       (setf (gethash i *cp932-to-ucs-hash*) i)
           53       (setf (gethash i *eucjp-to-ucs-hash*) i)
           54       (setf (gethash i *ucs-to-eucjp-hash*) i)
           55       (setf (gethash i *ucs-to-cp932-hash*) i))
           56 
           57 ;half-width katakana
           58 (loop for i from #xa1 to #xdf do
           59       (setf (gethash i *cp932-to-ucs-hash*) (+ #xff61 #x-a1 i))
           60       (setf (gethash (+ #xff61 #x-a1 i) *ucs-to-cp932-hash*) i)
           61       (setf (gethash (+ #x8e00 i) *eucjp-to-ucs-hash*) (+ #xff61 #x-a1 i))
           62       (setf (gethash (+ #xff61 #x-a1 i) *ucs-to-eucjp-hash*) (+ #x8e00 i)))
           63 
           64 ;; This is quoted from https://support.microsoft.com/en-us/kb/170559/en-us
           65 (let ((kb170559 "0x8790   -> U+2252   -> 0x81e0   Approximately Equal To Or The Image Of
           66 0x8791   -> U+2261   -> 0x81df   Identical To
           67 0x8792   -> U+222b   -> 0x81e7   Integral
           68 0x8795   -> U+221a   -> 0x81e3   Square Root
           69 0x8796   -> U+22a5   -> 0x81db   Up Tack
           70 0x8797   -> U+2220   -> 0x81da   Angle
           71 0x879a   -> U+2235   -> 0x81e6   Because
           72 0x879b   -> U+2229   -> 0x81bf   Intersection
           73 0x879c   -> U+222a   -> 0x81be   Union
           74 0xed40   -> U+7e8a   -> 0xfa5c   CJK Unified Ideograph
           75 0xed41   -> U+891c   -> 0xfa5d   CJK Unified Ideograph
           76 0xed42   -> U+9348   -> 0xfa5e   CJK Unified Ideograph
           77 0xed43   -> U+9288   -> 0xfa5f   CJK Unified Ideograph
           78 0xed44   -> U+84dc   -> 0xfa60   CJK Unified Ideograph
           79 0xed45   -> U+4fc9   -> 0xfa61   CJK Unified Ideograph
           80 0xed46   -> U+70bb   -> 0xfa62   CJK Unified Ideograph
           81 0xed47   -> U+6631   -> 0xfa63   CJK Unified Ideograph
           82 0xed48   -> U+68c8   -> 0xfa64   CJK Unified Ideograph
           83 0xed49   -> U+92f9   -> 0xfa65   CJK Unified Ideograph
           84 0xed4a   -> U+66fb   -> 0xfa66   CJK Unified Ideograph
           85 0xed4b   -> U+5f45   -> 0xfa67   CJK Unified Ideograph
           86 0xed4c   -> U+4e28   -> 0xfa68   CJK Unified Ideograph
           87 0xed4d   -> U+4ee1   -> 0xfa69   CJK Unified Ideograph
           88 0xed4e   -> U+4efc   -> 0xfa6a   CJK Unified Ideograph
           89 0xed4f   -> U+4f00   -> 0xfa6b   CJK Unified Ideograph
           90 0xed50   -> U+4f03   -> 0xfa6c   CJK Unified Ideograph
           91 0xed51   -> U+4f39   -> 0xfa6d   CJK Unified Ideograph
           92 0xed52   -> U+4f56   -> 0xfa6e   CJK Unified Ideograph
           93 0xed53   -> U+4f92   -> 0xfa6f   CJK Unified Ideograph
           94 0xed54   -> U+4f8a   -> 0xfa70   CJK Unified Ideograph
           95 0xed55   -> U+4f9a   -> 0xfa71   CJK Unified Ideograph
           96 0xed56   -> U+4f94   -> 0xfa72   CJK Unified Ideograph
           97 0xed57   -> U+4fcd   -> 0xfa73   CJK Unified Ideograph
           98 0xed58   -> U+5040   -> 0xfa74   CJK Unified Ideograph
           99 0xed59   -> U+5022   -> 0xfa75   CJK Unified Ideograph
          100 0xed5a   -> U+4fff   -> 0xfa76   CJK Unified Ideograph
          101 0xed5b   -> U+501e   -> 0xfa77   CJK Unified Ideograph
          102 0xed5c   -> U+5046   -> 0xfa78   CJK Unified Ideograph
          103 0xed5d   -> U+5070   -> 0xfa79   CJK Unified Ideograph
          104 0xed5e   -> U+5042   -> 0xfa7a   CJK Unified Ideograph
          105 0xed5f   -> U+5094   -> 0xfa7b   CJK Unified Ideograph
          106 0xed60   -> U+50f4   -> 0xfa7c   CJK Unified Ideograph
          107 0xed61   -> U+50d8   -> 0xfa7d   CJK Unified Ideograph
          108 0xed62   -> U+514a   -> 0xfa7e   CJK Unified Ideograph
          109 0xed63   -> U+5164   -> 0xfa80   CJK Unified Ideograph
          110 0xed64   -> U+519d   -> 0xfa81   CJK Unified Ideograph
          111 0xed65   -> U+51be   -> 0xfa82   CJK Unified Ideograph
          112 0xed66   -> U+51ec   -> 0xfa83   CJK Unified Ideograph
          113 0xed67   -> U+5215   -> 0xfa84   CJK Unified Ideograph
          114 0xed68   -> U+529c   -> 0xfa85   CJK Unified Ideograph
          115 0xed69   -> U+52a6   -> 0xfa86   CJK Unified Ideograph
          116 0xed6a   -> U+52c0   -> 0xfa87   CJK Unified Ideograph
          117 0xed6b   -> U+52db   -> 0xfa88   CJK Unified Ideograph
          118 0xed6c   -> U+5300   -> 0xfa89   CJK Unified Ideograph
          119 0xed6d   -> U+5307   -> 0xfa8a   CJK Unified Ideograph
          120 0xed6e   -> U+5324   -> 0xfa8b   CJK Unified Ideograph
          121 0xed6f   -> U+5372   -> 0xfa8c   CJK Unified Ideograph
          122 0xed70   -> U+5393   -> 0xfa8d   CJK Unified Ideograph
          123 0xed71   -> U+53b2   -> 0xfa8e   CJK Unified Ideograph
          124 0xed72   -> U+53dd   -> 0xfa8f   CJK Unified Ideograph
          125 0xed73   -> U+fa0e   -> 0xfa90   CJK compatibility Ideograph
          126 0xed74   -> U+549c   -> 0xfa91   CJK Unified Ideograph
          127 0xed75   -> U+548a   -> 0xfa92   CJK Unified Ideograph
          128 0xed76   -> U+54a9   -> 0xfa93   CJK Unified Ideograph
          129 0xed77   -> U+54ff   -> 0xfa94   CJK Unified Ideograph
          130 0xed78   -> U+5586   -> 0xfa95   CJK Unified Ideograph
          131 0xed79   -> U+5759   -> 0xfa96   CJK Unified Ideograph
          132 0xed7a   -> U+5765   -> 0xfa97   CJK Unified Ideograph
          133 0xed7b   -> U+57ac   -> 0xfa98   CJK Unified Ideograph
          134 0xed7c   -> U+57c8   -> 0xfa99   CJK Unified Ideograph
          135 0xed7d   -> U+57c7   -> 0xfa9a   CJK Unified Ideograph
          136 0xed7e   -> U+fa0f   -> 0xfa9b   CJK compatibility Ideograph
          137 0xed80   -> U+fa10   -> 0xfa9c   CJK compatibility Ideograph
          138 0xed81   -> U+589e   -> 0xfa9d   CJK Unified Ideograph
          139 0xed82   -> U+58b2   -> 0xfa9e   CJK Unified Ideograph
          140 0xed83   -> U+590b   -> 0xfa9f   CJK Unified Ideograph
          141 0xed84   -> U+5953   -> 0xfaa0   CJK Unified Ideograph
          142 0xed85   -> U+595b   -> 0xfaa1   CJK Unified Ideograph
          143 0xed86   -> U+595d   -> 0xfaa2   CJK Unified Ideograph
          144 0xed87   -> U+5963   -> 0xfaa3   CJK Unified Ideograph
          145 0xed88   -> U+59a4   -> 0xfaa4   CJK Unified Ideograph
          146 0xed89   -> U+59ba   -> 0xfaa5   CJK Unified Ideograph
          147 0xed8a   -> U+5b56   -> 0xfaa6   CJK Unified Ideograph
          148 0xed8b   -> U+5bc0   -> 0xfaa7   CJK Unified Ideograph
          149 0xed8c   -> U+752f   -> 0xfaa8   CJK Unified Ideograph
          150 0xed8d   -> U+5bd8   -> 0xfaa9   CJK Unified Ideograph
          151 0xed8e   -> U+5bec   -> 0xfaaa   CJK Unified Ideograph
          152 0xed8f   -> U+5c1e   -> 0xfaab   CJK Unified Ideograph
          153 0xed90   -> U+5ca6   -> 0xfaac   CJK Unified Ideograph
          154 0xed91   -> U+5cba   -> 0xfaad   CJK Unified Ideograph
          155 0xed92   -> U+5cf5   -> 0xfaae   CJK Unified Ideograph
          156 0xed93   -> U+5d27   -> 0xfaaf   CJK Unified Ideograph
          157 0xed94   -> U+5d53   -> 0xfab0   CJK Unified Ideograph
          158 0xed95   -> U+fa11   -> 0xfab1   CJK compatibility Ideograph
          159 0xed96   -> U+5d42   -> 0xfab2   CJK Unified Ideograph
          160 0xed97   -> U+5d6d   -> 0xfab3   CJK Unified Ideograph
          161 0xed98   -> U+5db8   -> 0xfab4   CJK Unified Ideograph
          162 0xed99   -> U+5db9   -> 0xfab5   CJK Unified Ideograph
          163 0xed9a   -> U+5dd0   -> 0xfab6   CJK Unified Ideograph
          164 0xed9b   -> U+5f21   -> 0xfab7   CJK Unified Ideograph
          165 0xed9c   -> U+5f34   -> 0xfab8   CJK Unified Ideograph
          166 0xed9d   -> U+5f67   -> 0xfab9   CJK Unified Ideograph
          167 0xed9e   -> U+5fb7   -> 0xfaba   CJK Unified Ideograph
          168 0xed9f   -> U+5fde   -> 0xfabb   CJK Unified Ideograph
          169 0xeda0   -> U+605d   -> 0xfabc   CJK Unified Ideograph
          170 0xeda1   -> U+6085   -> 0xfabd   CJK Unified Ideograph
          171 0xeda2   -> U+608a   -> 0xfabe   CJK Unified Ideograph
          172 0xeda3   -> U+60de   -> 0xfabf   CJK Unified Ideograph
          173 0xeda4   -> U+60d5   -> 0xfac0   CJK Unified Ideograph
          174 0xeda5   -> U+6120   -> 0xfac1   CJK Unified Ideograph
          175 0xeda6   -> U+60f2   -> 0xfac2   CJK Unified Ideograph
          176 0xeda7   -> U+6111   -> 0xfac3   CJK Unified Ideograph
          177 0xeda8   -> U+6137   -> 0xfac4   CJK Unified Ideograph
          178 0xeda9   -> U+6130   -> 0xfac5   CJK Unified Ideograph
          179 0xedaa   -> U+6198   -> 0xfac6   CJK Unified Ideograph
          180 0xedab   -> U+6213   -> 0xfac7   CJK Unified Ideograph
          181 0xedac   -> U+62a6   -> 0xfac8   CJK Unified Ideograph
          182 0xedad   -> U+63f5   -> 0xfac9   CJK Unified Ideograph
          183 0xedae   -> U+6460   -> 0xfaca   CJK Unified Ideograph
          184 0xedaf   -> U+649d   -> 0xfacb   CJK Unified Ideograph
          185 0xedb0   -> U+64ce   -> 0xfacc   CJK Unified Ideograph
          186 0xedb1   -> U+654e   -> 0xfacd   CJK Unified Ideograph
          187 0xedb2   -> U+6600   -> 0xface   CJK Unified Ideograph
          188 0xedb3   -> U+6615   -> 0xfacf   CJK Unified Ideograph
          189 0xedb4   -> U+663b   -> 0xfad0   CJK Unified Ideograph
          190 0xedb5   -> U+6609   -> 0xfad1   CJK Unified Ideograph
          191 0xedb6   -> U+662e   -> 0xfad2   CJK Unified Ideograph
          192 0xedb7   -> U+661e   -> 0xfad3   CJK Unified Ideograph
          193 0xedb8   -> U+6624   -> 0xfad4   CJK Unified Ideograph
          194 0xedb9   -> U+6665   -> 0xfad5   CJK Unified Ideograph
          195 0xedba   -> U+6657   -> 0xfad6   CJK Unified Ideograph
          196 0xedbb   -> U+6659   -> 0xfad7   CJK Unified Ideograph
          197 0xedbc   -> U+fa12   -> 0xfad8   CJK compatibility Ideograph
          198 0xedbd   -> U+6673   -> 0xfad9   CJK Unified Ideograph
          199 0xedbe   -> U+6699   -> 0xfada   CJK Unified Ideograph
          200 0xedbf   -> U+66a0   -> 0xfadb   CJK Unified Ideograph
          201 0xedc0   -> U+66b2   -> 0xfadc   CJK Unified Ideograph
          202 0xedc1   -> U+66bf   -> 0xfadd   CJK Unified Ideograph
          203 0xedc2   -> U+66fa   -> 0xfade   CJK Unified Ideograph
          204 0xedc3   -> U+670e   -> 0xfadf   CJK Unified Ideograph
          205 0xedc4   -> U+f929   -> 0xfae0   CJK compatibility Ideograph
          206 0xedc5   -> U+6766   -> 0xfae1   CJK Unified Ideograph
          207 0xedc6   -> U+67bb   -> 0xfae2   CJK Unified Ideograph
          208 0xedc7   -> U+6852   -> 0xfae3   CJK Unified Ideograph
          209 0xedc8   -> U+67c0   -> 0xfae4   CJK Unified Ideograph
          210 0xedc9   -> U+6801   -> 0xfae5   CJK Unified Ideograph
          211 0xedca   -> U+6844   -> 0xfae6   CJK Unified Ideograph
          212 0xedcb   -> U+68cf   -> 0xfae7   CJK Unified Ideograph
          213 0xedcc   -> U+fa13   -> 0xfae8   CJK compatibility Ideograph
          214 0xedcd   -> U+6968   -> 0xfae9   CJK Unified Ideograph
          215 0xedce   -> U+fa14   -> 0xfaea   CJK compatibility Ideograph
          216 0xedcf   -> U+6998   -> 0xfaeb   CJK Unified Ideograph
          217 0xedd0   -> U+69e2   -> 0xfaec   CJK Unified Ideograph
          218 0xedd1   -> U+6a30   -> 0xfaed   CJK Unified Ideograph
          219 0xedd2   -> U+6a6b   -> 0xfaee   CJK Unified Ideograph
          220 0xedd3   -> U+6a46   -> 0xfaef   CJK Unified Ideograph
          221 0xedd4   -> U+6a73   -> 0xfaf0   CJK Unified Ideograph
          222 0xedd5   -> U+6a7e   -> 0xfaf1   CJK Unified Ideograph
          223 0xedd6   -> U+6ae2   -> 0xfaf2   CJK Unified Ideograph
          224 0xedd7   -> U+6ae4   -> 0xfaf3   CJK Unified Ideograph
          225 0xedd8   -> U+6bd6   -> 0xfaf4   CJK Unified Ideograph
          226 0xedd9   -> U+6c3f   -> 0xfaf5   CJK Unified Ideograph
          227 0xedda   -> U+6c5c   -> 0xfaf6   CJK Unified Ideograph
          228 0xeddb   -> U+6c86   -> 0xfaf7   CJK Unified Ideograph
          229 0xeddc   -> U+6c6f   -> 0xfaf8   CJK Unified Ideograph
          230 0xeddd   -> U+6cda   -> 0xfaf9   CJK Unified Ideograph
          231 0xedde   -> U+6d04   -> 0xfafa   CJK Unified Ideograph
          232 0xeddf   -> U+6d87   -> 0xfafb   CJK Unified Ideograph
          233 0xede0   -> U+6d6f   -> 0xfafc   CJK Unified Ideograph
          234 0xede1   -> U+6d96   -> 0xfb40   CJK Unified Ideograph
          235 0xede2   -> U+6dac   -> 0xfb41   CJK Unified Ideograph
          236 0xede3   -> U+6dcf   -> 0xfb42   CJK Unified Ideograph
          237 0xede4   -> U+6df8   -> 0xfb43   CJK Unified Ideograph
          238 0xede5   -> U+6df2   -> 0xfb44   CJK Unified Ideograph
          239 0xede6   -> U+6dfc   -> 0xfb45   CJK Unified Ideograph
          240 0xede7   -> U+6e39   -> 0xfb46   CJK Unified Ideograph
          241 0xede8   -> U+6e5c   -> 0xfb47   CJK Unified Ideograph
          242 0xede9   -> U+6e27   -> 0xfb48   CJK Unified Ideograph
          243 0xedea   -> U+6e3c   -> 0xfb49   CJK Unified Ideograph
          244 0xedeb   -> U+6ebf   -> 0xfb4a   CJK Unified Ideograph
          245 0xedec   -> U+6f88   -> 0xfb4b   CJK Unified Ideograph
          246 0xeded   -> U+6fb5   -> 0xfb4c   CJK Unified Ideograph
          247 0xedee   -> U+6ff5   -> 0xfb4d   CJK Unified Ideograph
          248 0xedef   -> U+7005   -> 0xfb4e   CJK Unified Ideograph
          249 0xedf0   -> U+7007   -> 0xfb4f   CJK Unified Ideograph
          250 0xedf1   -> U+7028   -> 0xfb50   CJK Unified Ideograph
          251 0xedf2   -> U+7085   -> 0xfb51   CJK Unified Ideograph
          252 0xedf3   -> U+70ab   -> 0xfb52   CJK Unified Ideograph
          253 0xedf4   -> U+710f   -> 0xfb53   CJK Unified Ideograph
          254 0xedf5   -> U+7104   -> 0xfb54   CJK Unified Ideograph
          255 0xedf6   -> U+715c   -> 0xfb55   CJK Unified Ideograph
          256 0xedf7   -> U+7146   -> 0xfb56   CJK Unified Ideograph
          257 0xedf8   -> U+7147   -> 0xfb57   CJK Unified Ideograph
          258 0xedf9   -> U+fa15   -> 0xfb58   CJK compatibility Ideograph
          259 0xedfa   -> U+71c1   -> 0xfb59   CJK Unified Ideograph
          260 0xedfb   -> U+71fe   -> 0xfb5a   CJK Unified Ideograph
          261 0xedfc   -> U+72b1   -> 0xfb5b   CJK Unified Ideograph
          262 0xee40   -> U+72be   -> 0xfb5c   CJK Unified Ideograph
          263 0xee41   -> U+7324   -> 0xfb5d   CJK Unified Ideograph
          264 0xee42   -> U+fa16   -> 0xfb5e   CJK compatibility Ideograph
          265 0xee43   -> U+7377   -> 0xfb5f   CJK Unified Ideograph
          266 0xee44   -> U+73bd   -> 0xfb60   CJK Unified Ideograph
          267 0xee45   -> U+73c9   -> 0xfb61   CJK Unified Ideograph
          268 0xee46   -> U+73d6   -> 0xfb62   CJK Unified Ideograph
          269 0xee47   -> U+73e3   -> 0xfb63   CJK Unified Ideograph
          270 0xee48   -> U+73d2   -> 0xfb64   CJK Unified Ideograph
          271 0xee49   -> U+7407   -> 0xfb65   CJK Unified Ideograph
          272 0xee4a   -> U+73f5   -> 0xfb66   CJK Unified Ideograph
          273 0xee4b   -> U+7426   -> 0xfb67   CJK Unified Ideograph
          274 0xee4c   -> U+742a   -> 0xfb68   CJK Unified Ideograph
          275 0xee4d   -> U+7429   -> 0xfb69   CJK Unified Ideograph
          276 0xee4e   -> U+742e   -> 0xfb6a   CJK Unified Ideograph
          277 0xee4f   -> U+7462   -> 0xfb6b   CJK Unified Ideograph
          278 0xee50   -> U+7489   -> 0xfb6c   CJK Unified Ideograph
          279 0xee51   -> U+749f   -> 0xfb6d   CJK Unified Ideograph
          280 0xee52   -> U+7501   -> 0xfb6e   CJK Unified Ideograph
          281 0xee53   -> U+756f   -> 0xfb6f   CJK Unified Ideograph
          282 0xee54   -> U+7682   -> 0xfb70   CJK Unified Ideograph
          283 0xee55   -> U+769c   -> 0xfb71   CJK Unified Ideograph
          284 0xee56   -> U+769e   -> 0xfb72   CJK Unified Ideograph
          285 0xee57   -> U+769b   -> 0xfb73   CJK Unified Ideograph
          286 0xee58   -> U+76a6   -> 0xfb74   CJK Unified Ideograph
          287 0xee59   -> U+fa17   -> 0xfb75   CJK compatibility Ideograph
          288 0xee5a   -> U+7746   -> 0xfb76   CJK Unified Ideograph
          289 0xee5b   -> U+52af   -> 0xfb77   CJK Unified Ideograph
          290 0xee5c   -> U+7821   -> 0xfb78   CJK Unified Ideograph
          291 0xee5d   -> U+784e   -> 0xfb79   CJK Unified Ideograph
          292 0xee5e   -> U+7864   -> 0xfb7a   CJK Unified Ideograph
          293 0xee5f   -> U+787a   -> 0xfb7b   CJK Unified Ideograph
          294 0xee60   -> U+7930   -> 0xfb7c   CJK Unified Ideograph
          295 0xee61   -> U+fa18   -> 0xfb7d   CJK compatibility Ideograph
          296 0xee62   -> U+fa19   -> 0xfb7e   CJK compatibility Ideograph
          297 0xee63   -> U+fa1a   -> 0xfb80   CJK compatibility Ideograph
          298 0xee64   -> U+7994   -> 0xfb81   CJK Unified Ideograph
          299 0xee65   -> U+fa1b   -> 0xfb82   CJK compatibility Ideograph
          300 0xee66   -> U+799b   -> 0xfb83   CJK Unified Ideograph
          301 0xee67   -> U+7ad1   -> 0xfb84   CJK Unified Ideograph
          302 0xee68   -> U+7ae7   -> 0xfb85   CJK Unified Ideograph
          303 0xee69   -> U+fa1c   -> 0xfb86   CJK compatibility Ideograph
          304 0xee6a   -> U+7aeb   -> 0xfb87   CJK Unified Ideograph
          305 0xee6b   -> U+7b9e   -> 0xfb88   CJK Unified Ideograph
          306 0xee6c   -> U+fa1d   -> 0xfb89   CJK compatibility Ideograph
          307 0xee6d   -> U+7d48   -> 0xfb8a   CJK Unified Ideograph
          308 0xee6e   -> U+7d5c   -> 0xfb8b   CJK Unified Ideograph
          309 0xee6f   -> U+7db7   -> 0xfb8c   CJK Unified Ideograph
          310 0xee70   -> U+7da0   -> 0xfb8d   CJK Unified Ideograph
          311 0xee71   -> U+7dd6   -> 0xfb8e   CJK Unified Ideograph
          312 0xee72   -> U+7e52   -> 0xfb8f   CJK Unified Ideograph
          313 0xee73   -> U+7f47   -> 0xfb90   CJK Unified Ideograph
          314 0xee74   -> U+7fa1   -> 0xfb91   CJK Unified Ideograph
          315 0xee75   -> U+fa1e   -> 0xfb92   CJK compatibility Ideograph
          316 0xee76   -> U+8301   -> 0xfb93   CJK Unified Ideograph
          317 0xee77   -> U+8362   -> 0xfb94   CJK Unified Ideograph
          318 0xee78   -> U+837f   -> 0xfb95   CJK Unified Ideograph
          319 0xee79   -> U+83c7   -> 0xfb96   CJK Unified Ideograph
          320 0xee7a   -> U+83f6   -> 0xfb97   CJK Unified Ideograph
          321 0xee7b   -> U+8448   -> 0xfb98   CJK Unified Ideograph
          322 0xee7c   -> U+84b4   -> 0xfb99   CJK Unified Ideograph
          323 0xee7d   -> U+8553   -> 0xfb9a   CJK Unified Ideograph
          324 0xee7e   -> U+8559   -> 0xfb9b   CJK Unified Ideograph
          325 0xee80   -> U+856b   -> 0xfb9c   CJK Unified Ideograph
          326 0xee81   -> U+fa1f   -> 0xfb9d   CJK compatibility Ideograph
          327 0xee82   -> U+85b0   -> 0xfb9e   CJK Unified Ideograph
          328 0xee83   -> U+fa20   -> 0xfb9f   CJK compatibility Ideograph
          329 0xee84   -> U+fa21   -> 0xfba0   CJK compatibility Ideograph
          330 0xee85   -> U+8807   -> 0xfba1   CJK Unified Ideograph
          331 0xee86   -> U+88f5   -> 0xfba2   CJK Unified Ideograph
          332 0xee87   -> U+8a12   -> 0xfba3   CJK Unified Ideograph
          333 0xee88   -> U+8a37   -> 0xfba4   CJK Unified Ideograph
          334 0xee89   -> U+8a79   -> 0xfba5   CJK Unified Ideograph
          335 0xee8a   -> U+8aa7   -> 0xfba6   CJK Unified Ideograph
          336 0xee8b   -> U+8abe   -> 0xfba7   CJK Unified Ideograph
          337 0xee8c   -> U+8adf   -> 0xfba8   CJK Unified Ideograph
          338 0xee8d   -> U+fa22   -> 0xfba9   CJK compatibility Ideograph
          339 0xee8e   -> U+8af6   -> 0xfbaa   CJK Unified Ideograph
          340 0xee8f   -> U+8b53   -> 0xfbab   CJK Unified Ideograph
          341 0xee90   -> U+8b7f   -> 0xfbac   CJK Unified Ideograph
          342 0xee91   -> U+8cf0   -> 0xfbad   CJK Unified Ideograph
          343 0xee92   -> U+8cf4   -> 0xfbae   CJK Unified Ideograph
          344 0xee93   -> U+8d12   -> 0xfbaf   CJK Unified Ideograph
          345 0xee94   -> U+8d76   -> 0xfbb0   CJK Unified Ideograph
          346 0xee95   -> U+fa23   -> 0xfbb1   CJK compatibility Ideograph
          347 0xee96   -> U+8ecf   -> 0xfbb2   CJK Unified Ideograph
          348 0xee97   -> U+fa24   -> 0xfbb3   CJK compatibility Ideograph
          349 0xee98   -> U+fa25   -> 0xfbb4   CJK compatibility Ideograph
          350 0xee99   -> U+9067   -> 0xfbb5   CJK Unified Ideograph
          351 0xee9a   -> U+90de   -> 0xfbb6   CJK Unified Ideograph
          352 0xee9b   -> U+fa26   -> 0xfbb7   CJK compatibility Ideograph
          353 0xee9c   -> U+9115   -> 0xfbb8   CJK Unified Ideograph
          354 0xee9d   -> U+9127   -> 0xfbb9   CJK Unified Ideograph
          355 0xee9e   -> U+91da   -> 0xfbba   CJK Unified Ideograph
          356 0xee9f   -> U+91d7   -> 0xfbbb   CJK Unified Ideograph
          357 0xeea0   -> U+91de   -> 0xfbbc   CJK Unified Ideograph
          358 0xeea1   -> U+91ed   -> 0xfbbd   CJK Unified Ideograph
          359 0xeea2   -> U+91ee   -> 0xfbbe   CJK Unified Ideograph
          360 0xeea3   -> U+91e4   -> 0xfbbf   CJK Unified Ideograph
          361 0xeea4   -> U+91e5   -> 0xfbc0   CJK Unified Ideograph
          362 0xeea5   -> U+9206   -> 0xfbc1   CJK Unified Ideograph
          363 0xeea6   -> U+9210   -> 0xfbc2   CJK Unified Ideograph
          364 0xeea7   -> U+920a   -> 0xfbc3   CJK Unified Ideograph
          365 0xeea8   -> U+923a   -> 0xfbc4   CJK Unified Ideograph
          366 0xeea9   -> U+9240   -> 0xfbc5   CJK Unified Ideograph
          367 0xeeaa   -> U+923c   -> 0xfbc6   CJK Unified Ideograph
          368 0xeeab   -> U+924e   -> 0xfbc7   CJK Unified Ideograph
          369 0xeeac   -> U+9259   -> 0xfbc8   CJK Unified Ideograph
          370 0xeead   -> U+9251   -> 0xfbc9   CJK Unified Ideograph
          371 0xeeae   -> U+9239   -> 0xfbca   CJK Unified Ideograph
          372 0xeeaf   -> U+9267   -> 0xfbcb   CJK Unified Ideograph
          373 0xeeb0   -> U+92a7   -> 0xfbcc   CJK Unified Ideograph
          374 0xeeb1   -> U+9277   -> 0xfbcd   CJK Unified Ideograph
          375 0xeeb2   -> U+9278   -> 0xfbce   CJK Unified Ideograph
          376 0xeeb3   -> U+92e7   -> 0xfbcf   CJK Unified Ideograph
          377 0xeeb4   -> U+92d7   -> 0xfbd0   CJK Unified Ideograph
          378 0xeeb5   -> U+92d9   -> 0xfbd1   CJK Unified Ideograph
          379 0xeeb6   -> U+92d0   -> 0xfbd2   CJK Unified Ideograph
          380 0xeeb7   -> U+fa27   -> 0xfbd3   CJK compatibility Ideograph
          381 0xeeb8   -> U+92d5   -> 0xfbd4   CJK Unified Ideograph
          382 0xeeb9   -> U+92e0   -> 0xfbd5   CJK Unified Ideograph
          383 0xeeba   -> U+92d3   -> 0xfbd6   CJK Unified Ideograph
          384 0xeebb   -> U+9325   -> 0xfbd7   CJK Unified Ideograph
          385 0xeebc   -> U+9321   -> 0xfbd8   CJK Unified Ideograph
          386 0xeebd   -> U+92fb   -> 0xfbd9   CJK Unified Ideograph
          387 0xeebe   -> U+fa28   -> 0xfbda   CJK compatibility Ideograph
          388 0xeebf   -> U+931e   -> 0xfbdb   CJK Unified Ideograph
          389 0xeec0   -> U+92ff   -> 0xfbdc   CJK Unified Ideograph
          390 0xeec1   -> U+931d   -> 0xfbdd   CJK Unified Ideograph
          391 0xeec2   -> U+9302   -> 0xfbde   CJK Unified Ideograph
          392 0xeec3   -> U+9370   -> 0xfbdf   CJK Unified Ideograph
          393 0xeec4   -> U+9357   -> 0xfbe0   CJK Unified Ideograph
          394 0xeec5   -> U+93a4   -> 0xfbe1   CJK Unified Ideograph
          395 0xeec6   -> U+93c6   -> 0xfbe2   CJK Unified Ideograph
          396 0xeec7   -> U+93de   -> 0xfbe3   CJK Unified Ideograph
          397 0xeec8   -> U+93f8   -> 0xfbe4   CJK Unified Ideograph
          398 0xeec9   -> U+9431   -> 0xfbe5   CJK Unified Ideograph
          399 0xeeca   -> U+9445   -> 0xfbe6   CJK Unified Ideograph
          400 0xeecb   -> U+9448   -> 0xfbe7   CJK Unified Ideograph
          401 0xeecc   -> U+9592   -> 0xfbe8   CJK Unified Ideograph
          402 0xeecd   -> U+f9dc   -> 0xfbe9   CJK compatibility Ideograph
          403 0xeece   -> U+fa29   -> 0xfbea   CJK compatibility Ideograph
          404 0xeecf   -> U+969d   -> 0xfbeb   CJK Unified Ideograph
          405 0xeed0   -> U+96af   -> 0xfbec   CJK Unified Ideograph
          406 0xeed1   -> U+9733   -> 0xfbed   CJK Unified Ideograph
          407 0xeed2   -> U+973b   -> 0xfbee   CJK Unified Ideograph
          408 0xeed3   -> U+9743   -> 0xfbef   CJK Unified Ideograph
          409 0xeed4   -> U+974d   -> 0xfbf0   CJK Unified Ideograph
          410 0xeed5   -> U+974f   -> 0xfbf1   CJK Unified Ideograph
          411 0xeed6   -> U+9751   -> 0xfbf2   CJK Unified Ideograph
          412 0xeed7   -> U+9755   -> 0xfbf3   CJK Unified Ideograph
          413 0xeed8   -> U+9857   -> 0xfbf4   CJK Unified Ideograph
          414 0xeed9   -> U+9865   -> 0xfbf5   CJK Unified Ideograph
          415 0xeeda   -> U+fa2a   -> 0xfbf6   CJK compatibility Ideograph
          416 0xeedb   -> U+fa2b   -> 0xfbf7   CJK compatibility Ideograph
          417 0xeedc   -> U+9927   -> 0xfbf8   CJK Unified Ideograph
          418 0xeedd   -> U+fa2c   -> 0xfbf9   CJK compatibility Ideograph
          419 0xeede   -> U+999e   -> 0xfbfa   CJK Unified Ideograph
          420 0xeedf   -> U+9a4e   -> 0xfbfb   CJK Unified Ideograph
          421 0xeee0   -> U+9ad9   -> 0xfbfc   CJK Unified Ideograph
          422 0xeee1   -> U+9adc   -> 0xfc40   CJK Unified Ideograph
          423 0xeee2   -> U+9b75   -> 0xfc41   CJK Unified Ideograph
          424 0xeee3   -> U+9b72   -> 0xfc42   CJK Unified Ideograph
          425 0xeee4   -> U+9b8f   -> 0xfc43   CJK Unified Ideograph
          426 0xeee5   -> U+9bb1   -> 0xfc44   CJK Unified Ideograph
          427 0xeee6   -> U+9bbb   -> 0xfc45   CJK Unified Ideograph
          428 0xeee7   -> U+9c00   -> 0xfc46   CJK Unified Ideograph
          429 0xeee8   -> U+9d70   -> 0xfc47   CJK Unified Ideograph
          430 0xeee9   -> U+9d6b   -> 0xfc48   CJK Unified Ideograph
          431 0xeeea   -> U+fa2d   -> 0xfc49   CJK compatibility Ideograph
          432 0xeeeb   -> U+9e19   -> 0xfc4a   CJK Unified Ideograph
          433 0xeeec   -> U+9ed1   -> 0xfc4b   CJK Unified Ideograph
          434 0xeeef   -> U+2170   -> 0xfa40   Small Roman Numeral One
          435 0xeef0   -> U+2171   -> 0xfa41   Small Roman Numeral Two
          436 0xeef1   -> U+2172   -> 0xfa42   Small Roman Numeral Three
          437 0xeef2   -> U+2173   -> 0xfa43   Small Roman Numeral Four
          438 0xeef3   -> U+2174   -> 0xfa44   Small Roman Numeral Five
          439 0xeef4   -> U+2175   -> 0xfa45   Small Roman Numeral Six
          440 0xeef5   -> U+2176   -> 0xfa46   Small Roman Numeral Seven
          441 0xeef6   -> U+2177   -> 0xfa47   Small Roman Numeral Eight
          442 0xeef7   -> U+2178   -> 0xfa48   Small Roman Numeral Nine
          443 0xeef8   -> U+2179   -> 0xfa49   Small Roman Numeral Ten
          444 0xeef9   -> U+ffe2   -> 0x81ca   Fullwidth Not Sign
          445 0xeefa   -> U+ffe4   -> 0xfa55   Fullwidth Broken Bar
          446 0xeefb   -> U+ff07   -> 0xfa56   Fullwidth Apostrophe
          447 0xeefc   -> U+ff02   -> 0xfa57   Fullwidth Quotation Mark
          448 0xfa4a   -> U+2160   -> 0x8754   Roman Numeral One
          449 0xfa4b   -> U+2161   -> 0x8755   Roman Numeral Two
          450 0xfa4c   -> U+2162   -> 0x8756   Roman Numeral Three
          451 0xfa4d   -> U+2163   -> 0x8757   Roman Numeral Four
          452 0xfa4e   -> U+2164   -> 0x8758   Roman Numeral Five
          453 0xfa4f   -> U+2165   -> 0x8759   Roman Numeral Six
          454 0xfa50   -> U+2166   -> 0x875a   Roman Numeral Seven
          455 0xfa51   -> U+2167   -> 0x875b   Roman Numeral Eight
          456 0xfa52   -> U+2168   -> 0x875c   Roman Numeral Nine
          457 0xfa53   -> U+2169   -> 0x875d   Roman Numeral Ten
          458 0xfa54   -> U+ffe2   -> 0x81ca   Fullwidth Not Sign
          459 0xfa58   -> U+3231   -> 0x878a   Parenthesized Ideograph Stock
          460 0xfa59   -> U+2116   -> 0x8782   Numero Sign
          461 0xfa5a   -> U+2121   -> 0x8784   Telephone Sign
          462 0xfa5b   -> U+2235   -> 0x81e6   Because"))
          463   (with-input-from-string (s kb170559)
          464     (loop for line = (read-line s nil) until (null line)
          465           do (let ((ucs (parse-integer (subseq line 14 18) :radix 16))
          466                    (cp932 (parse-integer (subseq line 26 30) :radix 16)))
          467                (setf (gethash ucs *ucs-to-cp932-hash*) cp932)))))
          468 
          469 (defun eucjp-to-ucs (code)
          470   (values (gethash code *eucjp-to-ucs-hash*)))
          471 
          472 (defun ucs-to-eucjp (code)
          473   (values (gethash code *ucs-to-eucjp-hash*)))
          474 
          475 (defun cp932-to-ucs (code)
          476   (values (gethash code *cp932-to-ucs-hash*)))
          477 
          478 (defun ucs-to-cp932 (code)
          479   (values (gethash code *ucs-to-cp932-hash*)))
          480 
          481 ;;;; EUC-JP
          482 
          483 (define-character-encoding :eucjp
          484     "An 8-bit, variable-length character encoding in which
          485 character code points in the range #x00-#x7f can be encoded in a
          486 single octet; characters with larger code values can be encoded
          487 in 2 to 3 bytes."
          488   :max-units-per-char 3
          489   :literal-char-code-limit #x80)
          490 
          491 
          492 (define-octet-counter :eucjp (getter type)
          493   `(named-lambda eucjp-octet-counter (seq start end max)
          494      (declare (type ,type seq) (fixnum start end max))
          495      (loop with noctets fixnum = 0
          496            for i fixnum from start below end
          497            for code of-type code-point = (,getter seq i)
          498            do (let* ((c (ucs-to-eucjp code))
          499                      (new (+ (cond ((< #xffff c) 3)
          500                                    ((< #xff c) 2)
          501                                    (t 1))
          502                              noctets)))
          503                 (if (and (plusp max) (> new max))
          504                     (loop-finish)
          505                     (setq noctets new)))
          506            finally (return (values noctets i)))))
          507 
          508 (define-code-point-counter :eucjp (getter type)
          509   `(named-lambda eucjp-code-point-counter (seq start end max)
          510      (declare (type ,type seq) (fixnum start end max))
          511      (loop with nchars fixnum = 0
          512            with i fixnum = start
          513            while (< i end) do
          514              (let* ((octet (,getter seq i))
          515                     (next-i (+ i (cond ((= #x8f octet) 3)
          516                                        ((or (< #xa0 octet #xff)
          517                                             (= #x8e octet)) 2)
          518                                        (t 1)))))
          519                (declare (type ub8 octet) (fixnum next-i))
          520                (cond ((> next-i end)
          521                       ;; Should we add restarts to this error, we'll have
          522                       ;; to figure out a way to communicate with the
          523                       ;; decoder since we probably want to do something
          524                       ;; about it right here when we have a chance to
          525                       ;; change the count or something.  (Like an
          526                       ;; alternative replacement character or perhaps the
          527                       ;; existence of this error so that the decoder
          528                       ;; doesn't have to check for it on every iteration
          529                       ;; like we do.)
          530                       ;;
          531                       ;; FIXME: The data for this error is not right.
          532                       (decoding-error (vector octet) :eucjp seq i
          533                                       nil 'end-of-input-in-character)
          534                       (return (values (1+ nchars) end)))
          535                      (t
          536                       (setq nchars (1+ nchars)
          537                             i next-i)
          538                       (when (and (plusp max) (= nchars max))
          539                         (return (values nchars i))))))
          540            finally (progn (assert (= i end))
          541                      (return (values nchars i))))))
          542 
          543 (define-encoder :eucjp (getter src-type setter dest-type)
          544   `(named-lambda eucjp-encoder (src start end dest d-start)
          545      (declare (type ,src-type src)
          546               (type ,dest-type dest)
          547               (fixnum start end d-start))
          548      (loop with di fixnum = d-start
          549            for i fixnum from start below end
          550            for code of-type code-point = (,getter src i)
          551            for eucjp of-type code-point
          552              = (ucs-to-eucjp code) do
          553                (macrolet ((set-octet (offset value)
          554                             `(,',setter ,value dest (the fixnum (+ di ,offset)))))
          555                  (cond
          556                    ;; 1 octet
          557                    ((< eucjp #x100)
          558                     (set-octet 0 eucjp)
          559                     (incf di))
          560                    ;; 2 octets
          561                    ((< eucjp #x10000)
          562                     (set-octet 0 (f-logand #xff (f-ash eucjp -8)))
          563                     (set-octet 1 (logand eucjp #xff))
          564                     (incf di 2))
          565                    ;; 3 octets
          566                    (t
          567                     (set-octet 0 (f-logand #xff (f-ash eucjp -16)))
          568                     (set-octet 1 (f-logand #xff (f-ash eucjp -8)))
          569                     (set-octet 2 (logand eucjp #xff))
          570                     (incf di 3))
          571                    ))
          572            finally (return (the fixnum (- di d-start))))))
          573 
          574 
          575 (define-decoder :eucjp (getter src-type setter dest-type)
          576   `(named-lambda eucjp-decoder (src start end dest d-start)
          577      (declare (type ,src-type src)
          578               (type ,dest-type dest)
          579               (fixnum start end d-start))
          580      (let ((u2 0))
          581        (declare (type ub8 u2))
          582        (loop for di fixnum from d-start
          583              for i fixnum from start below end
          584              for u1 of-type ub8 = (,getter src i) do
          585                ;; Note: CONSUME-OCTET doesn't check if I is being
          586                ;; incremented past END.  We're assuming that END has
          587                ;; been calculated with the CODE-POINT-POINTER above that
          588                ;; checks this.
          589                (macrolet
          590                    ((consume-octet ()
          591                       `(let ((next-i (incf i)))
          592                          (if (= next-i end)
          593                              ;; FIXME: data for this error is incomplete.
          594                              ;; and signalling this error twice
          595                              (return-from setter-block
          596                                (decoding-error nil :eucjp src i +repl+
          597                                                'end-of-input-in-character))
          598                              (,',getter src next-i))))
          599                     (handle-error (n &optional (c 'character-decoding-error))
          600                       `(decoding-error
          601                         (vector ,@(subseq '(u1 u2) 0 n))
          602                         :eucjp src (1+ (- i ,n)) +repl+ ',c))
          603                     (handle-error-if-icb (var n)
          604                       `(when (not (< #x7f ,var #xc0))
          605                          (decf i)
          606                          (return-from setter-block
          607                            (handle-error ,n invalid-utf8-continuation-byte)))))
          608                  (,setter
          609                   (block setter-block
          610                     (cond
          611                       ;; 3 octets
          612                       ((= u1 #x8f)
          613                        (setq u2 (consume-octet))
          614                        (eucjp-to-ucs (logior #x8f0000
          615                                              (f-ash u2 8)
          616                                              (consume-octet))))
          617                       ;; 2 octets
          618                       ((or (= u1 #x8e)
          619                            (< #xa0 u1 #xff))
          620                        (eucjp-to-ucs (logior (f-ash u1 8)
          621                                              (consume-octet))))
          622                       ;; 1 octet
          623                       (t
          624                        (eucjp-to-ucs u1))))
          625                   dest di))
          626          finally (return (the fixnum (- di d-start)))))))
          627 
          628 ;;;; CP932
          629 
          630 (define-character-encoding :cp932
          631     "An 8-bit, variable-length character encoding in which
          632 character code points in the range #x00-#x7f can be encoded in a
          633 single octet; characters with larger code values can be encoded
          634 in 2 bytes."
          635   :max-units-per-char 2
          636   :literal-char-code-limit #x80)
          637 
          638 
          639 (define-octet-counter :cp932 (getter type)
          640   `(named-lambda cp932-octet-counter (seq start end max)
          641      (declare (type ,type seq) (fixnum start end max))
          642      (loop with noctets fixnum = 0
          643            for i fixnum from start below end
          644            for code of-type code-point = (,getter seq i)
          645            do (let* ((c (ucs-to-cp932 code))
          646                      (new (+ (cond ((< #xff c) 2)
          647                                    (t 1))
          648                              noctets)))
          649                 (if (and (plusp max) (> new max))
          650                     (loop-finish)
          651                     (setq noctets new)))
          652            finally (return (values noctets i)))))
          653 
          654 (define-code-point-counter :cp932 (getter type)
          655   `(named-lambda cp932-code-point-counter (seq start end max)
          656      (declare (type ,type seq) (fixnum start end max))
          657      (loop with nchars fixnum = 0
          658            with i fixnum = start
          659            while (< i end) do
          660              (let* ((octet (,getter seq i))
          661                     (next-i (+ i (cond ((or (<= #x81 octet #x9f)
          662                                             (<= #xe0 octet #xfc))
          663                                         2)
          664                                        (t 1)))))
          665                (declare (type ub8 octet) (fixnum next-i))
          666                (cond ((> next-i end)
          667                       ;; Should we add restarts to this error, we'll have
          668                       ;; to figure out a way to communicate with the
          669                       ;; decoder since we probably want to do something
          670                       ;; about it right here when we have a chance to
          671                       ;; change the count or something.  (Like an
          672                       ;; alternative replacement character or perhaps the
          673                       ;; existence of this error so that the decoder
          674                       ;; doesn't have to check for it on every iteration
          675                       ;; like we do.)
          676                       ;;
          677                       ;; FIXME: The data for this error is not right.
          678                       (decoding-error (vector octet) :cp932 seq i
          679                                       nil 'end-of-input-in-character)
          680                       (return (values (1+ nchars) end)))
          681                      (t
          682                       (setq nchars (1+ nchars)
          683                             i next-i)
          684                       (when (and (plusp max) (= nchars max))
          685                         (return (values nchars i))))))
          686            finally (progn (assert (= i end))
          687                      (return (values nchars i))))))
          688 
          689 (define-encoder :cp932 (getter src-type setter dest-type)
          690   `(named-lambda cp932-encoder (src start end dest d-start)
          691      (declare (type ,src-type src)
          692               (type ,dest-type dest)
          693               (fixnum start end d-start))
          694      (loop with di fixnum = d-start
          695            for i fixnum from start below end
          696            for code of-type code-point = (,getter src i)
          697            for cp932 of-type code-point
          698              = (ucs-to-cp932 code) do
          699                (macrolet ((set-octet (offset value)
          700                             `(,',setter ,value dest (the fixnum (+ di ,offset)))))
          701                  (cond
          702                    ;; 1 octet
          703                    ((< cp932 #x100)
          704                     (set-octet 0 cp932)
          705                     (incf di))
          706                    ;; 2 octets
          707                    ((< cp932 #x10000)
          708                     (set-octet 0 (f-logand #xff (f-ash cp932 -8)))
          709                     (set-octet 1 (logand cp932 #xff))
          710                     (incf di 2))
          711                    ;; 3 octets
          712                    (t
          713                     (set-octet 0 (f-logand #xff (f-ash cp932 -16)))
          714                     (set-octet 1 (f-logand #xff (f-ash cp932 -8)))
          715                     (set-octet 2 (logand cp932 #xff))
          716                     (incf di 3))
          717                    ))
          718            finally (return (the fixnum (- di d-start))))))
          719 
          720 
          721 (define-decoder :cp932 (getter src-type setter dest-type)
          722   `(named-lambda cp932-decoder (src start end dest d-start)
          723      (declare (type ,src-type src)
          724               (type ,dest-type dest)
          725               (fixnum start end d-start))
          726      (let ((u2 0))
          727        (declare (type ub8 u2))
          728        (loop for di fixnum from d-start
          729              for i fixnum from start below end
          730              for u1 of-type ub8 = (,getter src i) do
          731                ;; Note: CONSUME-OCTET doesn't check if I is being
          732                ;; incremented past END.  We're assuming that END has
          733                ;; been calculated with the CODE-POINT-POINTER above that
          734                ;; checks this.
          735                (macrolet
          736                    ((consume-octet ()
          737                       `(let ((next-i (incf i)))
          738                          (if (= next-i end)
          739                              ;; FIXME: data for this error is incomplete.
          740                              ;; and signalling this error twice
          741                              (return-from setter-block
          742                                (decoding-error nil :cp932 src i +repl+
          743                                                'end-of-input-in-character))
          744                              (,',getter src next-i))))
          745                     (handle-error (n &optional (c 'character-decoding-error))
          746                       `(decoding-error
          747                         (vector ,@(subseq '(u1 u2) 0 n))
          748                         :cp932 src (1+ (- i ,n)) +repl+ ',c))
          749                     (handle-error-if-icb (var n)
          750                       `(when (not (< #x7f ,var #xc0))
          751                          (decf i)
          752                          (return-from setter-block
          753                            (handle-error ,n invalid-utf8-continuation-byte)))))
          754                  (,setter
          755                   (block setter-block
          756                     (cond
          757                       ;; 2 octets
          758                       ((or (<= #x81 u1 #x9f)
          759                            (<= #xe0 u1 #xfc))
          760                        (setq u2 (consume-octet))
          761                        (cp932-to-ucs (logior (f-ash u1 8)
          762                                              u2)))
          763                       ;; 1 octet
          764                       (t
          765                        (cp932-to-ucs u1))))
          766                   dest di))
          767          finally (return (the fixnum (- di d-start)))))))