| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
|
|---|
| 4 |
|
|---|
| 5 |
|
|---|
| 6 |
|
|---|
| 7 |
|
|---|
| 8 |
|
|---|
| 9 |
|
|---|
| 10 |
|
|---|
| 11 |
|
|---|
| 12 |
|
|---|
| 13 |
|
|---|
| 14 |
|
|---|
| 15 |
|
|---|
| 16 |
|
|---|
| 17 |
|
|---|
| 18 |
|
|---|
| 19 |
|
|---|
| 20 |
|
|---|
| 21 |
|
|---|
| 22 |
|
|---|
| 23 |
|
|---|
| 24 |
|
|---|
| 25 |
|
|---|
| 26 |
|
|---|
| 27 |
|
|---|
| 28 |
|
|---|
| 29 |
|
|---|
| 30 |
|
|---|
| 31 |
|
|---|
| 32 |
|
|---|
| 33 |
|
|---|
| 34 |
|
|---|
| 35 |
|
|---|
| 36 |
|
|---|
| 37 |
|
|---|
| 38 |
|
|---|
| 39 |
|
|---|
| 40 |
(make-coding-system |
|---|
| 41 |
'iso-2022-cn 2 ?C |
|---|
| 42 |
"ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)." |
|---|
| 43 |
'(ascii |
|---|
| 44 |
(nil chinese-gb2312 chinese-cns11643-1) |
|---|
| 45 |
(nil chinese-cns11643-2) |
|---|
| 46 |
nil |
|---|
| 47 |
nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil |
|---|
| 48 |
init-bol) |
|---|
| 49 |
'((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2) |
|---|
| 50 |
(mime-charset . iso-2022-cn))) |
|---|
| 51 |
|
|---|
| 52 |
(define-coding-system-alias 'chinese-iso-7bit 'iso-2022-cn) |
|---|
| 53 |
|
|---|
| 54 |
(make-coding-system |
|---|
| 55 |
'iso-2022-cn-ext 2 ?C |
|---|
| 56 |
"ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)." |
|---|
| 57 |
'(ascii |
|---|
| 58 |
(nil chinese-gb2312 chinese-cns11643-1) |
|---|
| 59 |
(nil chinese-cns11643-2) |
|---|
| 60 |
(nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 |
|---|
| 61 |
chinese-cns11643-6 chinese-cns11643-7) |
|---|
| 62 |
nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil |
|---|
| 63 |
init-bol) |
|---|
| 64 |
'((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2 |
|---|
| 65 |
chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 |
|---|
| 66 |
chinese-cns11643-6 chinese-cns11643-7) |
|---|
| 67 |
(mime-charset . iso-2022-cn-ext))) |
|---|
| 68 |
|
|---|
| 69 |
|
|---|
| 70 |
|
|---|
| 71 |
|
|---|
| 72 |
|
|---|
| 73 |
|
|---|
| 74 |
(make-coding-system |
|---|
| 75 |
'chinese-iso-8bit 2 ?c |
|---|
| 76 |
"ISO 2022 based EUC encoding for Chinese GB2312 (MIME:GB2312)." |
|---|
| 77 |
'(ascii chinese-gb2312 nil nil |
|---|
| 78 |
nil ascii-eol ascii-cntl nil nil nil nil) |
|---|
| 79 |
'((safe-charsets ascii chinese-gb2312) |
|---|
| 80 |
(mime-charset . gb2312))) |
|---|
| 81 |
|
|---|
| 82 |
(define-coding-system-alias 'cn-gb-2312 'chinese-iso-8bit) |
|---|
| 83 |
(define-coding-system-alias 'euc-china 'chinese-iso-8bit) |
|---|
| 84 |
(define-coding-system-alias 'euc-cn 'chinese-iso-8bit) |
|---|
| 85 |
(define-coding-system-alias 'cn-gb 'chinese-iso-8bit) |
|---|
| 86 |
(define-coding-system-alias 'gb2312 'chinese-iso-8bit) |
|---|
| 87 |
(define-coding-system-alias 'cp936 'chinese-iso-8bit) |
|---|
| 88 |
|
|---|
| 89 |
(make-coding-system |
|---|
| 90 |
'chinese-hz 0 ?z |
|---|
| 91 |
"Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)." |
|---|
| 92 |
nil |
|---|
| 93 |
'((safe-charsets ascii chinese-gb2312) |
|---|
| 94 |
(mime-charset . hz-gb-2312) |
|---|
| 95 |
(post-read-conversion . post-read-decode-hz) |
|---|
| 96 |
(pre-write-conversion . pre-write-encode-hz))) |
|---|
| 97 |
|
|---|
| 98 |
(define-coding-system-alias 'hz-gb-2312 'chinese-hz) |
|---|
| 99 |
(define-coding-system-alias 'hz 'chinese-hz) |
|---|
| 100 |
|
|---|
| 101 |
(defun post-read-decode-hz (len) |
|---|
| 102 |
(let ((pos (point)) |
|---|
| 103 |
(buffer-modified-p (buffer-modified-p)) |
|---|
| 104 |
last-coding-system-used) |
|---|
| 105 |
(prog1 |
|---|
| 106 |
(decode-hz-region pos (+ pos len)) |
|---|
| 107 |
(set-buffer-modified-p buffer-modified-p)))) |
|---|
| 108 |
|
|---|
| 109 |
(defun pre-write-encode-hz (from to) |
|---|
| 110 |
(let ((buf (current-buffer))) |
|---|
| 111 |
(set-buffer (generate-new-buffer " *temp*")) |
|---|
| 112 |
(if (stringp from) |
|---|
| 113 |
(insert from) |
|---|
| 114 |
(insert-buffer-substring buf from to)) |
|---|
| 115 |
(let (last-coding-system-used) |
|---|
| 116 |
(encode-hz-region 1 (point-max))) |
|---|
| 117 |
nil)) |
|---|
| 118 |
|
|---|
| 119 |
(set-language-info-alist |
|---|
| 120 |
"Chinese-GB" '((charset chinese-gb2312 chinese-sisheng) |
|---|
| 121 |
(coding-system chinese-iso-8bit iso-2022-cn chinese-hz) |
|---|
| 122 |
(coding-priority chinese-iso-8bit chinese-big5 iso-2022-cn) |
|---|
| 123 |
(input-method . "chinese-py-punct") |
|---|
| 124 |
(features china-util) |
|---|
| 125 |
(sample-text . "Chinese ($AVPND(B,$AFUM(;0(B,$A::So(B) $ADc:C(B") |
|---|
| 126 |
(documentation . "Support for Chinese GB2312 character set.") |
|---|
| 127 |
(tutorial . "TUTORIAL.cn")) |
|---|
| 128 |
'("Chinese")) |
|---|
| 129 |
|
|---|
| 130 |
|
|---|
| 131 |
|
|---|
| 132 |
|
|---|
| 133 |
|
|---|
| 134 |
(make-coding-system |
|---|
| 135 |
'chinese-big5 3 ?B |
|---|
| 136 |
"BIG5 8-bit encoding for Chinese (MIME:Big5)." |
|---|
| 137 |
nil |
|---|
| 138 |
'((safe-charsets ascii chinese-big5-1 chinese-big5-2) |
|---|
| 139 |
(mime-charset . big5) |
|---|
| 140 |
(charset-origin-alist (chinese-big5-1 "BIG5" encode-big5-char) |
|---|
| 141 |
(chinese-big5-2 "BIG5" encode-big5-char)))) |
|---|
| 142 |
|
|---|
| 143 |
(define-coding-system-alias 'big5 'chinese-big5) |
|---|
| 144 |
(define-coding-system-alias 'cn-big5 'chinese-big5) |
|---|
| 145 |
(define-coding-system-alias 'cp950 'chinese-big5) |
|---|
| 146 |
|
|---|
| 147 |
|
|---|
| 148 |
(define-ccl-program ccl-encode-big5-font |
|---|
| 149 |
`(0 |
|---|
| 150 |
|
|---|
| 151 |
|
|---|
| 152 |
|
|---|
| 153 |
|
|---|
| 154 |
|
|---|
| 155 |
((r2 = ((((r1 - ?\x21) * 94) + r2) - ?\x21)) |
|---|
| 156 |
(if (r0 == ,(charset-id 'chinese-big5-2)) (r2 += 6280)) |
|---|
| 157 |
(r1 = ((r2 / 157) + ?\xA1)) |
|---|
| 158 |
(r2 %= 157) |
|---|
| 159 |
(if (r2 < ?\x3F) (r2 += ?\x40) (r2 += ?\x62)))) |
|---|
| 160 |
"CCL program to encode a Big5 code to code point of Big5 font.") |
|---|
| 161 |
|
|---|
| 162 |
(setq font-ccl-encoder-alist |
|---|
| 163 |
(cons (cons "big5" ccl-encode-big5-font) font-ccl-encoder-alist)) |
|---|
| 164 |
|
|---|
| 165 |
(set-language-info-alist |
|---|
| 166 |
"Chinese-BIG5" '((charset chinese-big5-1 chinese-big5-2) |
|---|
| 167 |
(coding-system chinese-big5 chinese-iso-7bit) |
|---|
| 168 |
(coding-priority chinese-big5 iso-2022-cn chinese-iso-8bit) |
|---|
| 169 |
(input-method . "chinese-py-punct-b5") |
|---|
| 170 |
(features china-util) |
|---|
| 171 |
(sample-text . "Cantonese ($(0GnM$(B,$(0N]0*Hd(B) $(0*/=((B, $(0+$)p(B") |
|---|
| 172 |
(documentation . "Support for Chinese Big5 character set.") |
|---|
| 173 |
(tutorial . "TUTORIAL.zh")) |
|---|
| 174 |
'("Chinese")) |
|---|
| 175 |
|
|---|
| 176 |
|
|---|
| 177 |
|
|---|
| 178 |
|
|---|
| 179 |
|
|---|
| 180 |
(defvar big5-to-cns (make-translation-table) |
|---|
| 181 |
"Translation table for encoding to `euc-tw'.") |
|---|
| 182 |
|
|---|
| 183 |
(unless (get 'big5-to-cns 'translation-table) |
|---|
| 184 |
(define-translation-table 'big5-to-cns big5-to-cns)) |
|---|
| 185 |
|
|---|
| 186 |
(define-ccl-program ccl-decode-euc-tw |
|---|
| 187 |
|
|---|
| 188 |
|
|---|
| 189 |
|
|---|
| 190 |
|
|---|
| 191 |
|
|---|
| 192 |
`(2 |
|---|
| 193 |
|
|---|
| 194 |
((r4 = ,(charset-id 'chinese-cns11643-1)) |
|---|
| 195 |
(r5 = ,(charset-id 'chinese-cns11643-2)) |
|---|
| 196 |
(r6 = ,(charset-id 'chinese-cns11643-3)) |
|---|
| 197 |
(loop |
|---|
| 198 |
(read-if (r0 < #x80) |
|---|
| 199 |
|
|---|
| 200 |
(write-repeat r0) |
|---|
| 201 |
|
|---|
| 202 |
(if (r0 == #x8E) |
|---|
| 203 |
|
|---|
| 204 |
(read-if (r1 < #xA1) |
|---|
| 205 |
|
|---|
| 206 |
((write r0) |
|---|
| 207 |
(write-repeat r1)) |
|---|
| 208 |
(if (r1 > #xA7) |
|---|
| 209 |
|
|---|
| 210 |
((write r0) |
|---|
| 211 |
(write-repeat r1)) |
|---|
| 212 |
|
|---|
| 213 |
(read-if (r2 < #xA1) |
|---|
| 214 |
|
|---|
| 215 |
((write r0 r1) |
|---|
| 216 |
(write-repeat r2)) |
|---|
| 217 |
(read-if (r3 < #xA1) |
|---|
| 218 |
|
|---|
| 219 |
((write r0 r1 r2) |
|---|
| 220 |
(write-repeat r3)) |
|---|
| 221 |
|
|---|
| 222 |
((branch (r1 - #xA1) |
|---|
| 223 |
(r1 = r4) |
|---|
| 224 |
(r1 = r5) |
|---|
| 225 |
(r1 = r6) |
|---|
| 226 |
(r1 = ,(charset-id 'chinese-cns11643-4)) |
|---|
| 227 |
(r1 = ,(charset-id 'chinese-cns11643-5)) |
|---|
| 228 |
(r1 = ,(charset-id 'chinese-cns11643-6)) |
|---|
| 229 |
(r1 = ,(charset-id 'chinese-cns11643-7))) |
|---|
| 230 |
(r2 = ((((r2 - #x80) << 7) + r3) - #x80)) |
|---|
| 231 |
(write-multibyte-character r1 r2) |
|---|
| 232 |
(repeat)))))) |
|---|
| 233 |
|
|---|
| 234 |
(if (r0 < #xA1) |
|---|
| 235 |
|
|---|
| 236 |
(write-repeat r0) |
|---|
| 237 |
(read-if (r1 < #xA1) |
|---|
| 238 |
|
|---|
| 239 |
((write r0) |
|---|
| 240 |
(write-repeat r1)) |
|---|
| 241 |
|
|---|
| 242 |
((r1 = ((((r0 - #x80) << 7) + r1) - #x80)) |
|---|
| 243 |
(write-multibyte-character r4 r1) |
|---|
| 244 |
(repeat))))))))) |
|---|
| 245 |
"CCL program to decode EUC-TW encoding." |
|---|
| 246 |
) |
|---|
| 247 |
|
|---|
| 248 |
(define-ccl-program ccl-encode-euc-tw |
|---|
| 249 |
`(2 |
|---|
| 250 |
|
|---|
| 251 |
((r2 = ,(charset-id 'ascii)) |
|---|
| 252 |
(r3 = ,(charset-id 'chinese-big5-1)) |
|---|
| 253 |
(r4 = ,(charset-id 'chinese-big5-2)) |
|---|
| 254 |
(r5 = ,(charset-id 'chinese-cns11643-1)) |
|---|
| 255 |
(r6 = ,(charset-id 'chinese-cns11643-2)) |
|---|
| 256 |
(loop |
|---|
| 257 |
(read-multibyte-character r0 r1) |
|---|
| 258 |
(if (r0 == r2) |
|---|
| 259 |
(write-repeat r1) |
|---|
| 260 |
( |
|---|
| 261 |
(if (r0 == r3) |
|---|
| 262 |
(translate-character big5-to-cns r0 r1) |
|---|
| 263 |
(if (r0 == r4) |
|---|
| 264 |
(translate-character big5-to-cns r0 r1))) |
|---|
| 265 |
(if (r0 == r5) |
|---|
| 266 |
(r0 = #xA1) |
|---|
| 267 |
(if (r0 == r6) |
|---|
| 268 |
(r0 = #xA2) |
|---|
| 269 |
(if (r0 == ,(charset-id 'chinese-cns11643-3)) |
|---|
| 270 |
(r0 = #xA3) |
|---|
| 271 |
(if (r0 == ,(charset-id 'chinese-cns11643-4)) |
|---|
| 272 |
(r0 = #xA4) |
|---|
| 273 |
(if (r0 == ,(charset-id 'chinese-cns11643-5)) |
|---|
| 274 |
(r0 = #xA5) |
|---|
| 275 |
(if (r0 == ,(charset-id 'chinese-cns11643-6)) |
|---|
| 276 |
(r0 = #xA6) |
|---|
| 277 |
(if (r0 == ,(charset-id 'chinese-cns11643-7)) |
|---|
| 278 |
(r0 = #xA7) |
|---|
| 279 |
|
|---|
| 280 |
|
|---|
| 281 |
|
|---|
| 282 |
(write-repeat #xFF)))))))))) |
|---|
| 283 |
(if (r0 != #xA1) |
|---|
| 284 |
|
|---|
| 285 |
((write #x8E) |
|---|
| 286 |
(write r0))) |
|---|
| 287 |
(write ((r1 >> 7) + #x80)) |
|---|
| 288 |
(write ((r1 % #x80) + #x80)) |
|---|
| 289 |
(repeat)))) |
|---|
| 290 |
"CCL program to encode EUC-TW encoding." |
|---|
| 291 |
) |
|---|
| 292 |
|
|---|
| 293 |
(defun euc-tw-pre-write-conversion (beg end) |
|---|
| 294 |
"Semi-dummy pre-write function effectively to autoload china-util." |
|---|
| 295 |
|
|---|
| 296 |
(require 'china-util) |
|---|
| 297 |
|
|---|
| 298 |
(coding-system-put 'euc-tw 'pre-write-conversion nil) |
|---|
| 299 |
nil) |
|---|
| 300 |
|
|---|
| 301 |
(make-coding-system |
|---|
| 302 |
'euc-tw 4 ?Z |
|---|
| 303 |
"ISO 2022 based EUC encoding for Chinese CNS11643. |
|---|
| 304 |
Big5 encoding is accepted for input also (which is then converted to CNS)." |
|---|
| 305 |
'(ccl-decode-euc-tw . ccl-encode-euc-tw) |
|---|
| 306 |
'((safe-charsets ascii |
|---|
| 307 |
chinese-big5-1 |
|---|
| 308 |
chinese-big5-2 |
|---|
| 309 |
chinese-cns11643-1 |
|---|
| 310 |
chinese-cns11643-2 |
|---|
| 311 |
chinese-cns11643-3 |
|---|
| 312 |
chinese-cns11643-4 |
|---|
| 313 |
chinese-cns11643-5 |
|---|
| 314 |
chinese-cns11643-6 |
|---|
| 315 |
chinese-cns11643-7) |
|---|
| 316 |
(valid-codes (0 . 255)) |
|---|
| 317 |
(pre-write-conversion . euc-tw-pre-write-conversion))) |
|---|
| 318 |
|
|---|
| 319 |
(define-coding-system-alias 'euc-taiwan 'euc-tw) |
|---|
| 320 |
|
|---|
| 321 |
(set-language-info-alist |
|---|
| 322 |
"Chinese-CNS" '((charset chinese-cns11643-1 chinese-cns11643-2 |
|---|
| 323 |
chinese-cns11643-3 chinese-cns11643-4 |
|---|
| 324 |
chinese-cns11643-5 chinese-cns11643-6 |
|---|
| 325 |
chinese-cns11643-7) |
|---|
| 326 |
(coding-system iso-2022-cn euc-tw) |
|---|
| 327 |
(coding-priority iso-2022-cn euc-tw chinese-big5 |
|---|
| 328 |
chinese-iso-8bit) |
|---|
| 329 |
(features china-util) |
|---|
| 330 |
(input-method . "chinese-cns-quick") |
|---|
| 331 |
(documentation . "\ |
|---|
| 332 |
Support for Chinese CNS character sets. Note that the EUC-TW coding system |
|---|
| 333 |
accepts Big5 for input also (which is then converted to CNS).")) |
|---|
| 334 |
'("Chinese")) |
|---|
| 335 |
|
|---|
| 336 |
(set-language-info-alist |
|---|
| 337 |
"Chinese-EUC-TW" '((charset chinese-cns11643-1 chinese-cns11643-2 |
|---|
| 338 |
chinese-cns11643-3 chinese-cns11643-4 |
|---|
| 339 |
chinese-cns11643-5 chinese-cns11643-6 |
|---|
| 340 |
chinese-cns11643-7 chinese-big5-1 chinese-big5-2) |
|---|
| 341 |
(coding-system euc-tw iso-2022-cn) |
|---|
| 342 |
(coding-priority euc-tw chinese-big5 iso-2022-cn |
|---|
| 343 |
chinese-iso-8bit) |
|---|
| 344 |
(features china-util) |
|---|
| 345 |
(input-method . "chinese-cns-quick") |
|---|
| 346 |
(documentation . "\ |
|---|
| 347 |
Support for Chinese, prefering the EUC-TW character set. Note that |
|---|
| 348 |
the EUC-TW coding system accepts Big5 for input also (which is then |
|---|
| 349 |
converted to CNS).")) |
|---|
| 350 |
'("Chinese")) |
|---|
| 351 |
|
|---|
| 352 |
(provide 'chinese) |
|---|
| 353 |
|
|---|
| 354 |
|
|---|
| 355 |
|
|---|
| 356 |
|
|---|