root/trunk/lisp/international/codepage.el

Revision 4220, 30.6 kB (checked in by miyoshi, 9 months ago)

Sync up with Emacs22.2.

  • Property svn:eol-style set to LF
Line 
1 ;;; codepage.el --- MS-DOS/MS-Windows specific coding systems
2
3 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
4 ;;   2007, 2008  Free Software Foundation, Inc.
5 ;; Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
6 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
7 ;;   Registration Number H14PRO021
8
9 ;; Author: Eli Zaretskii
10 ;; Maintainer: FSF
11 ;; Keywords: i18n ms-dos ms-windows codepage
12
13 ;; This file is part of GNU Emacs.
14
15 ;; GNU Emacs is free software; you can redistribute it and/or modify
16 ;; it under the terms of the GNU General Public License as published by
17 ;; the Free Software Foundation; either version 3, or (at your option)
18 ;; any later version.
19
20 ;; GNU Emacs is distributed in the hope that it will be useful,
21 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23 ;; GNU General Public License for more details.
24
25 ;; You should have received a copy of the GNU General Public License
26 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
27 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
28 ;; Boston, MA 02110-1301, USA.
29
30 ;;; Commentary:
31
32 ;; Special coding systems for DOS/Windows codepage support.
33 ;;
34 ;; These coding systems perform conversion from the DOS/Windows
35 ;; codepage encoding to one of the ISO-8859 character sets.  Each
36 ;; codepage has its corresponding ISO-8859 charset, chosen so as to be
37 ;; able to convert all (or most) of the characters.  The idea is that
38 ;; Emacs internally works with the usual MULE charsets, and the
39 ;; conversion to and from the DOS codepage is performed on I/O only.
40 ;; See term/internal.el for the complementary setup of the DOS
41 ;; terminal display and input methods.
42 ;;
43 ;; Thanks to Ken'ichi Handa <handa@etl.go.jp> for writing the CCL
44 ;; encoders/decoders, and for help in debugging this code.
45
46 ;;; Code:
47
48 (defvar dos-unsupported-char-glyph)
49
50 (defun cp-coding-system-for-codepage-1 (coding mnemonic iso-name
51                                                decoder encoder)
52   "Make coding system CODING for a DOS codepage using translation tables.
53 MNEMONIC is a character to be displayed on mode line for the coding system.
54 ISO-NAME is the name of the ISO-8859 charset which corresponds to this
55 codepage.
56 DECODER is a translation table for converting characters in the DOS codepage
57 encoding to Emacs multibyte characters.
58 ENCODER is a translation table for encoding Emacs multibyte characters into
59 external DOS codepage codes."
60   (save-match-data
61     (let* ((coding-name (symbol-name coding))
62            (undef (if (eq system-type 'ms-dos)
63                       (if dos-unsupported-char-glyph
64                           (logand dos-unsupported-char-glyph 255)
65                         127)
66                     ??))
67            (safe-chars (make-char-table 'safe-chars))
68            (ccl-decoder
69             (ccl-compile
70              ;; The 4 here supplies the buf_magnification parameter
71              ;; for the CCL program.  A multibyte character may take
72              ;; at most 4-bytes.
73              `(4 (loop (read r1)
74                        (if (r1 >= 128)
75                            ((r0 = ,(charset-id 'ascii))
76                             (translate-character ,decoder r0 r1)
77                             (write-multibyte-character r0 r1))
78                          (write r1))
79                        (repeat)))))
80            (ccl-encoder
81             (ccl-compile
82              ;; The 2 here supplies the buf_magnification parameter for
83              ;; the CCL program.  Since the -dos coding system generates
84              ;; \r\n for each \n, a factor of 2 covers even the worst case
85              ;; of empty lines with a single \n.
86              `(2 (loop (read-multibyte-character r0 r1)
87                        (if (r0 != ,(charset-id 'ascii))
88                            ((translate-character ,encoder r0 r1)
89                             (if (r0 == ,(charset-id 'japanese-jisx0208))
90                                 ((r1 = ,undef)
91                                  (write r1)))))
92                        (write-repeat r1))))))
93
94       ;; Set elements of safe multibyte characters for this codepage
95       ;; to t in the char-table safe-chars.
96       (let ((tbl (get decoder 'translation-table))
97             (i 128)
98             ch)
99         (while (< i 256)
100           (setq ch (aref tbl i))
101           (if ch (aset safe-chars ch t))
102           (setq i (1+ i))))
103
104       ;; Make coding system CODING.
105       (make-coding-system
106        coding 4 mnemonic
107        (concat "8-bit encoding of " (symbol-name iso-name)
108                " characters using IBM codepage " coding-name)
109        (cons ccl-decoder ccl-encoder)
110        `((safe-charsets ascii eight-bit-control eight-bit-graphic ,iso-name)
111          (safe-chars . ,safe-chars)
112          (valid-codes (0 . 255)))))))
113
114 (defun cp-decoding-vector-for-codepage (table charset offset)
115   "Create a vector for decoding IBM PC characters using conversion table
116 TABLE into an ISO-8859 character set CHARSET whose first non-ASCII
117 character is generated by (make-char CHARSET OFFSET)."
118   (let* ((len (length table))
119          (undefined-char
120           (if (eq system-type 'ms-dos)
121               (if dos-unsupported-char-glyph
122                   (logand dos-unsupported-char-glyph 255)
123                 127)
124             32))
125          (vec1 (make-vector 256 undefined-char))
126          (i 0))
127     (while (< i 256)
128       (aset vec1 i i)
129       (setq i (1+ i)))
130     (setq i 0)
131     (while (< i len)
132       (if (aref table i)
133           (aset vec1 (aref table i) (make-char charset (+ i offset))))
134       (setq i (1+ i)))
135     vec1))
136
137 ;;; You don't think I created all these tables below by hand, do you?
138 ;;; The following Awk script will create the table for cp850-to-Latin-1
139 ;;; conversion from the RFC 1345 file (the other tables are left as an
140 ;;; excercise):
141 ;;; BEGIN { n_pages = 11;
142 ;;;         pn["IBM437"] = 0; pn["IBM850"] = 1; pn["IBM851"] = 2;
143 ;;;         pn["IBM852"] = 3; pn["IBM855"] = 4; pn["IBM860"] = 5;
144 ;;;         pn["IBM861"] = 6; pn["IBM862"] = 7; pn["IBM863"] = 8;
145 ;;;         pn["IBM864"] = 9; pn["IBM865"] = 10;
146 ;;;       }
147 ;;; $1 == "&charset" { charset = $2; }
148 ;;; $1 == "&code"    { code = $2; }
149 ;;; /^  [^&]/  {
150 ;;;   if ((charset ~ /^IBM(437|8(5[0125]|6[0-5]))$/) || (charset ~ /^ISO_8859-1/))
151 ;;;     {
152 ;;;       for (i = 1; i <= NF; i++)
153 ;;;         chars[charset,code++] = $i;
154 ;;;     }
155 ;;;   }
156 ;;;
157 ;;; END {
158 ;;;   for (i = 160; i < 256; i++)
159 ;;;     {
160 ;;;       c =  chars["ISO_8859-1:1987",i];
161 ;;;       if (c == "??")        # skip unused positions
162 ;;;         {
163 ;;;           printf " nil";
164 ;;;           if ((i - 159)%16 == 0)
165 ;;;             printf "\n";
166 ;;;           continue;
167 ;;;         }
168 ;;;       found = 0;
169 ;;;       for (j in pn)
170 ;;;         map[j] = "nil";
171 ;;;       for (combined in chars)
172 ;;;         {
173 ;;;           candidate = chars[combined];
174 ;;;           split (combined, separate, SUBSEP);
175 ;;;           if (separate[1] == "IBM850" && candidate == c)
176 ;;;             {
177 ;;;               found = 1;
178 ;;;               map[separate[1]] = separate[2];
179 ;;;             }
180 ;;;         }
181 ;;;       printf " %s", map["IBM850"];
182 ;;;       if ((i - 159)%16 == 0)
183 ;;;         printf "\n";
184 ;;;     }
185 ;;; }
186
187 ;;; WARNING WARNING WARNING!!!
188 ;;;
189 ;;; If you want to get fancy with these tables, remember that the inverse
190 ;;; tables, created by `cp-decoding-vector-for-codepage' above, are installed
191 ;;; on MS-DOS as nonascii-translation-table (see `dos-codepage-setup' on
192 ;;; internal.el).  Therefore, you should NOT put any codes below 128 in
193 ;;; these tables!  Otherwise, various Emacs commands and functions will
194 ;;; mysteriously fail!  For example, a typical screwup is to map the Latin-N
195 ;;; acute accent character to the apostrophe, and have all regexps which
196 ;;; end with "\\'" begin to fail (e.g., the automatic setting of the major
197 ;;; mode by file name extension will stop working).
198 ;;;
199 ;;; You HAVE BEEN warned!
200
201 ;; US/English/PC-8/IBM-2.  This doesn't support Latin-1 characters very
202 ;; well, but why not use what we can salvage?
203 (defvar cp437-decode-table
204   ;; Nth element is the code of a cp437 glyph for the multibyte
205   ;; character created by (make-char 'latin-iso8859-1 (+ N 160)).
206   ;; The element nil means there's no corresponding cp437 glyph.
207   [
208    255 173 155 156 nil 157 179 nil nil nil 166 174 170 196 nil nil
209    248 241 253 nil nil nil nil 249 nil nil 167 175 172 171 nil 168
210    nil nil nil nil 142 143 146 128 nil 144 nil nil nil nil nil nil
211    nil 165 nil nil nil nil 153 nil nil nil nil nil 154 nil nil 225
212    133 160 131 nil 132 134 145 135 138 130 136 137 141 161 140 139
213    nil 164 149 162 147 nil 148 246 nil 151 163 150 129 nil nil 152]
214   "Table for converting ISO-8859-1 characters into codepage 437 glyphs.")
215 (setplist 'cp437-decode-table
216           '(charset latin-iso8859-1 language "Latin-1" offset 160))
217
218 ;; Multilingual (Latin-1)
219 (defvar cp850-decode-table
220   ;; Nth element is the code of a cp850 glyph for the multibyte
221   ;; character created by (make-char 'latin-iso8859-1 (+ N 160)).
222   ;; The element nil means there's no corresponding cp850 glyph.
223   [
224    255 173 189 156 207 190 221 245 249 184 166 174 170 240 169 238
225    248 241 253 252 239 230 244 250 247 251 167 175 172 171 243 168
226    183 181 182 199 142 143 146 128 212 144 210 211 222 214 215 216
227    209 165 227 224 226 229 153 158 157 235 233 234 154 237 232 225
228    133 160 131 198 132 134 145 135 138 130 136 137 141 161 140 139
229    208 164 149 162 147 228 148 246 155 151 163 150 129 236 231 152]
230   "Table for converting ISO-8859-1 characters into codepage 850 glyphs.")
231 (setplist 'cp850-decode-table
232           '(charset latin-iso8859-1 language "Latin-1" offset 160))
233
234 ;; Multilingual (Latin-9)
235 (defvar cp858-decode-table
236   ;; Nth element is the code of a cp858 glyph for the multibyte
237   ;; character created by (make-char 'latin-iso8859-15 (+ N 160)).
238   ;; The element nil means there's no corresponding cp858 glyph.
239   [
240    255 173 189 156 213 190 221 245 249 184 166 174 170 240 169 238
241    248 241 253 252 239 230 244 250 247 251 167 175 172 171 243 168
242    183 181 182 199 142 143 146 128 212 144 210 211 222 214 215 216
243    209 165 227 224 226 229 153 158 157 235 233 234 154 237 232 225
244    133 160 131 198 132 134 145 135 138 130 136 137 141 161 140 139
245    208 164 149 162 147 228 148 246 155 151 163 150 129 236 231 152]
246   "Table for converting ISO-8859-15 characters into codepage 858 glyphs.")
247 (setplist 'cp858-decode-table
248           '(charset latin-iso8859-15 language "Latin-9" offset 160))
249
250 ;; Greek
251 (defvar cp851-decode-table
252   [
253    255 nil nil 156 nil nil nil 245 249 nil nil 174 nil 240 nil nil
254    248 241 nil nil 239 nil 134 nil 141 143 144 175 146 171 149 152
255    161 164 165 166 167 168 169 170 172 173 181 182 184 183 189 190
256    198 199 nil 207 208 209 210 211 212 213 nil nil 155 157 158 159
257    252 214 215 216 221 222 224 225 226 227 228 229 230 231 232 233
258    234 235 237 236 238 242 243 244 246 250 160 251 162 163 253 nil]
259   "Table for converting ISO-8859-7 characters into codepage 851 glyphs.")
260 (setplist 'cp851-decode-table
261           '(charset greek-iso8859-7 language "Greek" offset 160))
262
263 ;; Slavic/Eastern Europe (Latin-2)
264 (defvar cp852-decode-table
265   [
266    255 164 244 157 207 149 151 245 249 230 184 155 141 240 166 189
267    248 165 247 136 239 150 152 243 242 231 173 156 171 241 167 190
268    232 181 182 198 142 145 143 128 172 144 168 211 183 214 215 210
269    209 227 213 224 226 138 153 158 252 222 233 235 154 237 221 225
270    234 160 131 199 132 146 134 135 159 130 169 137 216 161 140 212
271    208 228 229 162 147 139 148 246 253 133 163 251 129 236 238 250]
272   "Table for converting ISO-8859-2 characters into codepage 852 glyphs.")
273 (setplist 'cp852-decode-table
274           '(charset latin-iso8859-2 language "Latin-2" offset 160))
275
276 ;; Russian
277 (defvar cp855-decode-table
278   [
279    255 133 129 131 135 137 139 141 143 145 147 149 151 240 153 155
280    161 163 236 173 167 169 234 244 184 190 199 209 211 213 215 221
281    226 228 230 232 171 182 165 252 246 250 159 242 238 248 157 224
282    160 162 235 172 166 168 233 243 183 189 198 208 210 212 214 216
283    225 227 229 231 170 181 164 251 245 249 158 241 237 247 156 222
284    239 132 128 130 134 136 138 140 142 144 146 148 150 253 152 154]
285   "Table for converting ISO-8859-5 characters into codepage 855 glyphs.")
286 (setplist 'cp855-decode-table
287           '(charset cyrillic-iso8859-5 language "Cyrillic-ISO" offset 160))
288
289 ;; Turkish
290 (defvar cp857-decode-table
291   [
292    255 nil nil 156 207 nil 245 249 152 158 166 nil 240 nil
293    248 nil 253 252 239 nil nil nil nil 141 159 167 nil 171 nil
294    183 181 182 142 nil nil 128 212 144 210 211 222 214 215 216
295    165 227 224 226 nil 153 232 nil 235 233 234 154 nil nil 225
296    133 160 131 132 nil nil 135 138 130 136 137 236 161 140 139
297    164 149 162 147 nil 148 246 nil 151 163 150 129 nil nil 250]
298   "Table for converting ISO-8859-3 characters into codepage 857 glyphs.")
299 (setplist 'cp857-decode-table
300           '(charset latin-iso8859-3 language "Latin-3" offset 160))
301
302 ;; Portuguese
303 (defvar cp860-decode-table
304   [
305    255 173 155 156 nil nil 179 nil nil nil 166 174 170 nil nil nil
306    nil 241 253 nil nil nil nil 249 nil nil 167 175 172 171 nil 168
307    145 134 143 142 nil nil nil 128 146 144 137 nil 152 nil 139 nil
308    nil 165 159 169 140 153 nil nil nil 157 150 nil 154 nil nil nil
309    133 160 131 132 nil nil nil 135 138 130 136 nil 141 161 nil nil
310    nil 164 149 162 147 148 nil 246 nil 151 163 nil 129 nil nil nil]
311   "Table for converting ISO-8859-1 characters into codepage 860 glyphs.")
312 (setplist 'cp860-decode-table
313           '(charset latin-iso8859-1 language "Latin-1" offset 160))
314
315 ;; Icelandic
316 (defvar cp861-decode-table
317   [
318    255 173 nil 156 nil nil nil nil nil nil nil 174 170 nil nil nil
319    nil 241 253 nil nil nil nil 249 nil nil nil 175 172 171 nil 168
320    nil 164 nil nil 142 143 146 128 nil 144 nil nil nil 165 nil nil
321    139 nil 159 166 nil nil 153 nil 157 nil 167 nil 154 151 141 nil
322    133 160 131 nil 132 134 145 135 138 130 136 137 nil 161 nil nil
323    140 nil nil 162 147 nil 148 246 155 nil 163 150 129 152 149 nil]
324   "Table for converting ISO-8859-1 characters into codepage 861 glyphs.")
325 (setplist 'cp861-decode-table
326           '(charset latin-iso8859-1 language "Latin-1" offset 160))
327
328 ;; Hebrew
329 (defvar cp862-decode-table
330   ;; Nth element is the code of a cp862 glyph for the multibyte
331   ;; character created by (make-char 'hebrew-iso8859-8 (+ N 160)).
332   ;; The element nil means there's no corresponding cp862 glyph.
333   [
334    255 173 155 156 nil 157 179 nil nil nil nil 174 170 196 nil nil
335    248 241 253 nil nil 230 nil 249 nil nil 246 175 172 171 nil nil
336    nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
337    nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil 205
338    128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
339    144 145 146 147 148 149 150 151 152 153 154 nil nil nil nil nil]
340   "Table for converting ISO-8859-8 characters into codepage 862 glyphs.")
341 (setplist 'cp862-decode-table
342           '(charset hebrew-iso8859-8 language "Hebrew" offset 160))
343
344 ;; French Canadian
345 (defvar cp863-decode-table
346   [
347    255 nil 155 156 152 nil 160 143 164 nil nil 174 170 nil nil 167
348    nil 241 253 166 161 nil 134 249 165 nil nil 175 172 171 173 nil
349    142 nil 132 nil nil nil nil 128 145 144 146 148 nil nil 168 149
350    nil nil nil nil 153 nil nil nil nil 157 nil 158 154 nil nil nil
351    133 nil 131 nil nil nil nil 135 138 130 136 137 141 nil 140 139
352    nil nil nil 162 147 nil nil 246 nil 151 163 150 129 nil nil nil]
353   "Table for converting ISO-8859-1 characters into codepage 863 glyphs.")
354 (setplist 'cp863-decode-table
355           '(charset latin-iso8859-1 language "Latin-1" offset 160))
356
357 ;; Arabic
358 ;; FIXME: Emacs doesn't seem to support the "Arabic" language
359 ;; environment yet.  So this is only partially usable, for now
360 (defvar cp864-decode-table
361   [
362    255 nil nil nil 164 nil nil nil nil nil nil nil 172 161 nil nil
363    nil nil nil nil nil nil nil nil nil nil nil 187 nil nil nil 191
364    nil 193 194 195 196 nil 198 199 169 201 170 171 173 174 175 207
365    208 209 210 188 189 190 235 215 216 223 238 nil nil nil nil nil
366    224 247 248 252 251 239 242 243 232 233 253 nil nil nil nil nil
367    nil 241 nil nil nil nil nil nil nil nil nil nil nil nil nil nil]
368   "Table for converting ISO-8859-6 characters into codepage 864 glyphs.")
369 (setplist 'cp864-decode-table
370           '(charset arabic-iso8859-6 language nil offset 160))
371
372 ;; Arabic OEM codepage used by Windows
373 ;; FIXME: Emacs doesn't seem to support the "Arabic" language
374 ;; environment yet.  So this is only partially usable, for now
375 (defvar cp720-decode-table
376   [
377    255 nil nil nil 148 nil nil nil nil nil nil nil nil 196 nil nil
378    nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
379    nil 152 153 154 155 157 158 159 160 161 162 163 164 165 166 167
380    168 169 170 171 172 173 224 225 226 227 228 nil nil nil nil nil
381    149 229 231 232 233 234 235 236 237 238 239 241 242 243 244 245
382    246 145 146 nil nil nil nil nil nil nil nil nil nil nil nil nil]
383   "Table for converting ISO-8859-6 characters into codepage 720 glyphs.")
384 (setplist 'cp720-decode-table
385           '(charset arabic-iso8859-6 language nil offset 160))
386
387
388 ;; Nordic (Norwegian/Danish)
389 (defvar cp865-decode-table
390   [
391    255 173 nil 156 nil nil nil nil nil nil 166 174 170 nil nil nil
392    nil 241 253 nil nil nil nil 249 nil nil 167 175 172 171 nil 168
393    nil nil nil nil 142 143 146 128 nil 144 nil nil nil nil nil nil
394    nil 165 nil nil nil nil 153 nil 157 nil nil nil 154 nil nil nil
395    133 160 131 nil 132 134 145 135 138 130 136 137 141 161 140 139
396    nil 164 149 162 147 nil 148 246 155 151 163 150 129 nil nil 152]
397   "Table for converting ISO-8859-1 characters into codepage 865 glyphs.")
398 (setplist 'cp865-decode-table
399           '(charset latin-iso8859-1 language "Latin-1" offset 160))
400
401 ;; Russian (Yes, another one!  This one's supposed to be used
402 ;; on Windows as the Russian OEM code page.)
403 (defvar cp866-decode-table
404   [
405    255 240 nil nil 242 nil nil 244 nil nil nil nil nil nil 246 nil
406    128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
407    144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
408    160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
409    224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
410    252 241 nil nil 243 nil nil 245 nil nil nil nil nil nil 247 nil]
411   "Table for converting ISO-8859-5 characters into codepage 866 glyphs.")
412 (setplist 'cp866-decode-table
413           '(charset cyrillic-iso8859-5 language "Cyrillic-ISO" offset 160))
414
415 ;; Greek (yes, another one!)
416 (defvar cp869-decode-table
417   [
418    255 139 140 156 nil nil 138 245 249 151 nil 174 137 240 nil 142
419    248 241 153 154 239 247 134 136 141 143 144 175 146 171 149 152
420    161 164 165 166 167 168 169 170 172 173 181 182 183 184 189 190
421    198 199 nil 207 208 209 210 211 212 213 145 150 155 157 158 159
422    252 214 215 216 221 222 224 225 226 227 228 229 230 231 232 233
423    234 235 237 236 238 242 243 244 246 250 160 251 162 163 253 nil]
424   "Table for converting ISO-8859-7 characters into codepage 869 glyphs.")
425 (setplist 'cp869-decode-table
426           '(charset greek-iso8859-7 language "Greek" offset 160))
427
428 ;; Greek OEM codepage used by Windows
429 (defvar cp737-decode-table
430   [
431    255 nil nil nil nil nil 179 nil nil nil nil nil nil 196 nil nil
432    248 241 253 nil nil nil 234 250 235 236 237 nil 238 nil 239 240
433    nil 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
434    143 144 nil 145 146 147 148 149 150 151 244 245 225 226 227 229
435    nil 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
436    167 168 170 169 171 172 173 174 175 224 228 232 230 231 233 nil]
437   "Table for converting ISO-8859-7 characters into codepage 737 glyphs.")
438 (setplist 'cp737-decode-table
439           '(charset greek-iso8859-7 language "Greek" offset 160))
440
441 ;; Conversion from codepages 770-775 to Latin-4 for Baltic countries.
442 ;; FIXME: Once we support Latin-7, these should be remapped into it.
443 (defvar cp770-decode-table
444   [
445    255 143 nil nil 155 nil 156 nil 157 159 137 168 nil 196 146 nil
446    248 133 nil nil nil nil 134 nil nil 158 136 152 nil nil 145 nil
447    160 nil nil nil 142 nil nil 173 128 nil 139 nil 144 nil nil 161
448    nil nil nil 163 nil 149 153 nil nil 167 nil nil 154 nil 166 225
449    131 nil nil nil 132 nil nil 141 135 nil 138 nil 130 nil nil 140
450    nil nil nil 162 nil 147 148 247 nil 151 nil nil 129 nil 150 nil]
451   "Table for converting ISO-8859-4 characters into codepage 770 glyphs.")
452 (setplist 'cp770-decode-table
453           '(charset latin-iso8859-4 language "Latin-4" offset 160))
454
455 (defvar cp773-decode-table
456   [
457    255 220 nil 138 150 nil 234 190 166 246 237 149 173 196 252 nil
458    208 nil nil 139 239 nil 235 nil nil 247 137 133 136 nil 253 nil
459    160 nil nil nil 142 143 146 244 222 144 240 nil 242 nil nil 161
460    nil 238 226 232 nil 229 153 158 157 248 nil nil 154 nil 250 225
461    131 nil nil nil 132 134 145 245 223 130 241 nil 243 nil nil 140
462    nil 236 147 233 nil 228 148 198 155 249 nil nil 129 nil 251 nil]
463   "Table for converting ISO-8859-4 characters into codepage 773 glyphs.")
464 (setplist 'cp773-decode-table
465           '(charset latin-iso8859-4 language "Latin-4" offset 160))
466
467 (defvar cp774-decode-table
468   [
469    255 181 nil nil 155 nil nil nil 245 190 nil nil nil 196 207 nil
470    248 208 nil nil nil nil nil nil nil 213 nil nil nil nil 216 nil
471    nil nil nil nil 142 143 146 189 182 144 183 nil 184 nil nil nil
472    nil nil nil nil nil nil 153 nil nil 198 nil nil 154 nil 199 225
473    nil 160 nil nil 132 134 145 212 209 130 210 137 211 161 140 nil
474    nil nil nil nil 147 nil 148 246 237 214 163 150 129 nil 215 248]
475   "Table for converting ISO-8859-4 characters into codepage 774 glyphs.")
476 (setplist 'cp774-decode-table
477           '(charset latin-iso8859-4 language "Latin-4" offset 160))
478
479 (defvar cp775-decode-table
480   [
481    255 181 nil 138 150 nil 234 245 166 190 237 149 173 240 207 nil
482    248 208 nil 139 239 nil 235 nil nil 213 137 133 136 nil 216 nil
483    160 nil nil nil 142 143 146 189 182 144 183 nil 184 nil nil 161
484    nil 238 226 232 nil 229 153 158 157 198 nil nil 154 nil 199 225
485    131 nil nil nil 132 134 145 212 209 130 210 nil 211 nil nil 140
486    nil 236 147 233 nil 228 148 247 155 214 nil nil 129 nil 215 nil]
487   "Table for converting ISO-8859-4 characters into codepage 775 glyphs.")
488 (setplist 'cp775-decode-table
489           '(charset latin-iso8859-4 language "Latin-4" offset 160))
490
491 ;; Support for the Windows 12xx series of codepages that MS has
492 ;; butchered from the ISO-8859 specs. This does not add support for
493 ;; the extended characters that MS has added in the 128 - 159 coding
494 ;; range, only translates those characters that can be expressed in
495 ;; the corresponding iso-8859 charset.
496
497 ;; Codepage Mapping:
498 ;;
499 ;; Windows-1250: ISO-8859-2 (Central Europe) - differs in some positions
500 ;; Windows-1251: ISO-8859-5 (Cyrillic)       - differs wildly
501 ;; Windows-1252: ISO-8859-1 (West Europe)    - exact match
502 ;; Windows-1253: ISO-8859-7 (Greek)          - differs in some positions
503 ;; Windows-1254: ISO-8859-9 (Turkish)        - exact match
504 ;; Windows-1255: ISO-8859-8 (Hebrew)         - exact match
505 ;; Windows-1256: ISO-8859-6 (Arabic)         - half match
506 ;; Windows-1257: ISO-8859-4 (Baltic)         - differs, future Latin-7
507 ;; Windows-1258: VISCII (Vietnamese)         - Completely different
508
509 (defvar cp1250-decode-table
510   [
511     160 165 162 163 164 188 140 167 168 138 170 141 143 173 142 175
512     176 185 178 179 180 190 156 161 184 154 186 157 159 189 158 191
513     192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
514     208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
515     224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
516     240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 ]
517   "ISO-8859-2 to Windows-1250 (Central Europe) codepage decoding table.")
518 (setplist 'cp1250-decode-table
519           '(charset latin-iso8859-2 language "Latin-2" offset 160))
520
521 (defvar cp1251-decode-table
522   [
523     160 168 128 129 170 189 178 175 163 138 140 142 141 173 161 143
524     192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
525     208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
526     224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
527     240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
528     185 184 144 131 186 190 179 191 188 154 156 158 157 167 162 159 ]
529   "ISO-8859-5 to Windows-1251 (Cyrillic) codepage decoding table.")
530 (setplist 'cp1251-decode-table
531           '(charset cyrillic-iso8859-5 language "Cyrillic-ISO" offset 160))
532
533 ;; cp1253 is missing nbsp so we cannot quite translate perfectly. It
534 ;; also has two micro/mu characters which would require more complex
535 ;; processing to accomodate.
536 (defvar cp1253-decode-table
537   [
538     nil 145 146 163 nil nil 166 167 168 169 nil 171 172 173 nil 151
539     176 177 178 179 180 161 162 183 184 185 186 187 188 189 190 191
540     192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
541     208 209 nil 211 212 213 214 215 216 217 218 219 220 221 222 223
542     224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
543     240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 nil ]
544   "ISO-8859-7 to Windows-1253 (Greek) codepage decoding table.")
545 (setplist 'cp1253-decode-table
546           '(charset greek-iso8859-7 language "Greek" offset 160))
547
548 ;; Since Latin-7 is not yet official, and Emacs does not support it,
549 ;; provide translation between Windows-1257 and Latin-4 the best we
550 ;; can.
551 (defvar cp1257-decode-table
552   [
553     160 192 nil 170 164 nil 207 167 nil 208 199 204 nil 173 222 nil
554     176 224 nil 186 nil nil 239 nil nil 240 231 236 nil nil 254 nil
555     194 nil nil nil 196 197 175 193 200 201 198 nil 203 nil nil 206
556     nil 210 212 205 nil 213 214 215 168 216 nil nil 220 nil 219 223
557     226 nil nil nil 228 229 191 225 232 233 230 nil 235 nil nil 238
558     nil 242 244 237 nil 245 246 247 184 248 nil nil 252 nil 251 nil ]
559   "ISO-8859-4 to Windows-1257 (Baltic) codepage decoding table.")
560 (setplist 'cp1257-decode-table
561           '(charset latin-iso8859-4 language "Latin-4" offset 160))
562
563 ;;;###autoload
564 (defun cp-make-coding-systems-for-codepage (codepage iso-name offset)
565   "Create a coding system to convert IBM CODEPAGE into charset ISO-NAME
566 whose first character is at offset OFFSET from the beginning of 8-bit
567 ASCII table.
568
569 The created coding system has the usual 3 subsidiary systems: for Unix-,
570 DOS- and Mac-style EOL conversion.  However, unlike built-in coding
571 systems, the Mac-style EOL conversion is currently not supported by the
572 decoder and encoder created by this function."
573   (let* ((decode-table (intern (format "%s-decode-table" codepage)))
574          (nonascii-table
575           (intern (format "%s-nonascii-translation-table" codepage)))
576          (decode-translation
577           (intern (format "%s-decode-translation-table" codepage)))
578          (encode-translation
579           (intern (format "%s-encode-translation-table" codepage))))
580     (set nonascii-table
581          (make-translation-table-from-vector
582           (cp-decoding-vector-for-codepage
583            (symbol-value decode-table) iso-name offset)))
584     (define-translation-table encode-translation
585       (char-table-extra-slot (symbol-value nonascii-table) 0))
586     ;; For charsets other than ascii, eight-bit-* and ISO-NAME, set
587     ;; `?' for one-column charsets, and some Japanese character for
588     ;; wide-column charsets.  CCL encoder convert that Japanese
589     ;; character to either dos-unsupported-char-glyph or "??".
590     (let ((tbl (char-table-extra-slot (symbol-value nonascii-table) 0))
591           (undef (if (eq system-type 'ms-dos)
592                      (if dos-unsupported-char-glyph
593                          (logand dos-unsupported-char-glyph 255)
594                        127)
595                    ??))
596           (charsets (delq 'ascii
597                           (delq 'eight-bit-control
598                                 (delq 'eight-bit-graphic
599                                       (delq iso-name
600                                             (copy-sequence charset-list))))))
601           (wide-column-char (make-char 'japanese-jisx0208 32 32)))
602       (while charsets
603         (aset tbl (make-char (car charsets))
604               (if (= (charset-width (car charsets)) 1) undef wide-column-char))
605         (setq charsets (cdr charsets))))
606     (define-translation-table decode-translation
607       (symbol-value nonascii-table))
608     (cp-coding-system-for-codepage-1
609      (intern codepage) ?D iso-name decode-translation encode-translation)
610     ))
611
612 (defun cp-codepage-decoder (codepage)
613   "If CODEPAGE is the name of a supported codepage, return its decode table.
614 Otherwise return nil."
615   (let ((cp (if (symbolp codepage) (symbol-name codepage) codepage)))
616     (cond
617      ((stringp cp)
618       (intern-soft (format "%s-decode-table" cp)))
619      (t nil))))
620
621 ;;;###autoload
622 (defun cp-charset-for-codepage (codepage)
623   "Return the charset for which there is a translation table to DOS CODEPAGE.
624 CODEPAGE must be the name of a DOS codepage, a string."
625   (let ((cp-decoder (cp-codepage-decoder codepage)))
626     (if (null cp-decoder)
627         (error "Unsupported codepage %s" codepage)
628       (get cp-decoder 'charset))))
629
630 ;;;###autoload
631 (defun cp-language-for-codepage (codepage)
632   "Return the name of the MULE language environment for CODEPAGE.
633 CODEPAGE must be the name of a DOS codepage, a string."
634   (let ((cp-decoder (cp-codepage-decoder codepage)))
635     (if (null cp-decoder)
636         (error "Unsupported codepage %s" codepage)
637       (get cp-decoder 'language))))
638
639 ;;;###autoload
640 (defun cp-offset-for-codepage (codepage)
641   "Return the offset to be used in setting up coding systems for CODEPAGE.
642 CODEPAGE must be the name of a DOS codepage, a string."
643   (let ((cp-decoder (cp-codepage-decoder codepage)))
644     (if (null cp-decoder)
645         (error "Unsupported codepage %s" codepage)
646       (get cp-decoder 'offset))))
647
648 ;;;###autoload
649 (defun cp-supported-codepages ()
650   "Return an alist of supported codepages.
651
652 Each association in the alist has the form (NNN . CHARSET), where NNN is the
653 codepage number, and CHARSET is the MULE charset which is the closest match
654 for the character set supported by that codepage.
655
656 A codepage NNN is supported if a variable called `cpNNN-decode-table' exists,
657 is a vector, and has a charset property."
658   (save-match-data
659     (let (alist chset sname)
660       (mapatoms
661        (function
662         (lambda (sym)
663           (if (and (boundp sym)
664                    (string-match "\\`cp\\([1-9][0-9][0-9][0-9]?\\)-decode-table\\'"
665                                  (setq sname (symbol-name sym)))
666                    (vectorp (symbol-value sym))
667                    (setq chset (get sym 'charset)))
668               (setq alist
669                     (cons (cons (match-string 1 sname) chset) alist))))))
670       alist)))
671
672 ;;;###autoload
673 (defun codepage-setup (codepage)
674   "Create a coding system cpCODEPAGE to support the IBM codepage CODEPAGE.
675
676 These coding systems are meant for encoding and decoding 8-bit non-ASCII
677 characters used by the IBM codepages, typically in conjunction with files
678 read/written by MS-DOS software, or for display on the MS-DOS terminal."
679   (interactive
680    (let ((completion-ignore-case t)
681          (candidates (cp-supported-codepages)))
682      (list (completing-read "Setup DOS Codepage (default 437): " candidates
683                             nil t nil nil "437"))))
684   (let* ((cp (format "cp%s" codepage))
685          (cp-defined (intern-soft cp)))
686     (or (and cp-defined  ;; avoid defining if already defined
687              (coding-system-p cp-defined))
688         (cp-make-coding-systems-for-codepage
689          cp (cp-charset-for-codepage cp) (cp-offset-for-codepage cp)))))
690
691 ;; Add DOS codepages to `non-iso-charset-alist'.
692 (eval-after-load "mule-diag"
693   '(let ((tail (cp-supported-codepages))
694          elt)
695      (while tail
696        (setq elt (car tail) tail (cdr tail))
697        ;; Now ELT is (CODEPAGE . CHARSET), where CODEPAGE is a string
698        ;; (e.g. "850"), CHARSET is a charset that characters in CODEPAGE
699        ;; are mapped to.
700        (unless (assq (intern (concat "cp" (car elt))) non-iso-charset-alist)
701          (setq non-iso-charset-alist
702                (cons (list (intern (concat "cp" (car elt)))
703                            (list 'ascii (cdr elt))
704                            `(lambda (code)
705                               (decode-codepage-char ,(string-to-int (car elt))
706                                                     code))
707                            (list (list 0 255)))
708                      non-iso-charset-alist))))))
709
710 (provide 'codepage)
711
712 ;;; arch-tag: 80328de8-b94e-4386-be26-5876105731f0
713 ;;; codepage.el ends here
714
Note: See TracBrowser for help on using the browser.