root/trunk/lisp/international/characters.el

Revision 4220, 51.1 kB (checked in by miyoshi, 9 months ago)

Sync up with Emacs22.2.

  • Property svn:eol-style set to LF
Line 
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 ;;   Free Software Foundation, Inc.
5 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 ;;   2005, 2006, 2007, 2008
7 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
8 ;;   Registration Number H14PRO021
9
10 ;; Keywords: multibyte character, character set, syntax, category
11
12 ;; This file is part of GNU Emacs.
13
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation; either version 3, or (at your option)
17 ;; any later version.
18
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22 ;; GNU General Public License for more details.
23
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
26 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
27 ;; Boston, MA 02110-1301, USA.
28
29 ;;; Commentary:
30
31 ;; This file contains multibyte characters.  Save this file always in
32 ;; the coding system `iso-2022-7bit'.
33
34 ;; This file does not define the syntax for Latin-N character sets;
35 ;; those are defined by the files latin-N.el.
36
37 ;;; Code:
38
39 ;; We must set utf-translate-cjk-mode to nil while loading this file
40 ;; to avoid translating CJK characters in decode-char.
41 (defvar saved-utf-translate-cjk-mode utf-translate-cjk-mode)
42 (setq utf-translate-cjk-mode nil)
43
44 ;;; Predefined categories.
45
46 ;; For each character set.
47
48 (define-category ?a "ASCII graphic characters 32-126 (ISO646 IRV:1983[4/0])")
49 (define-category ?l "Latin")
50 (define-category ?t "Thai")
51 (define-category ?g "Greek")
52 (define-category ?b "Arabic")
53 (define-category ?w "Hebrew")
54 (define-category ?y "Cyrillic")
55 (define-category ?k "Japanese katakana")
56 (define-category ?r "Japanese roman")
57 (define-category ?c "Chinese")
58 (define-category ?j "Japanese")
59 (define-category ?h "Korean")
60 (define-category ?e "Ethiopic (Ge'ez)")
61 (define-category ?v "Vietnamese")
62 (define-category ?i "Indian")
63 (define-category ?o "Lao")
64 (define-category ?q "Tibetan")
65
66 ;; For each group (row) of 2-byte character sets.
67
68 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
69 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
70 (define-category ?G "Greek characters of 2-byte character sets")
71 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
72 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
73 (define-category ?N "Korean Hangul characters of 2-byte character sets")
74 (define-category ?Y "Cyrillic characters of 2-byte character sets")
75 (define-category ?I "Indian Glyphs")
76
77 ;; For phonetic classifications.
78
79 (define-category ?0 "consonant")
80 (define-category ?1 "base (independent) vowel")
81 (define-category ?2 "upper diacritical mark (including upper vowel)")
82 (define-category ?3 "lower diacritical mark (including lower vowel)")
83 (define-category ?4 "tone mark")
84 (define-category ?5 "symbol")
85 (define-category ?6 "digit")
86 (define-category ?7 "vowel-modifying diacritical mark")
87 (define-category ?8 "vowel-signs")
88 (define-category ?9 "semivowel lower")
89
90 ;; For filling.
91 (define-category ?| "While filling, we can break a line at this character.")
92
93 ;; For indentation calculation.
94 (define-category ?\s
95   "This character counts as a space for indentation purposes.")
96
97 ;; Keep the following for `kinsoku' processing.  See comments in
98 ;; kinsoku.el.
99 (define-category ?> "A character which can't be placed at beginning of line.")
100 (define-category ?< "A character which can't be placed at end of line.")
101
102 ;; Combining
103 (define-category ?^ "Combining diacritic or mark")
104
105 ;;; Setting syntax and category.
106
107 ;; ASCII
108
109 (let ((ch 32))
110   (while (< ch 127)                     ; All ASCII characters have
111     (modify-category-entry ch ?a)       ; the category `a' (ASCII)
112     (modify-category-entry ch ?l)       ; and `l' (Latin).
113     (setq ch (1+ ch))))
114
115 ;; Arabic character set
116
117 (let ((charsets '(arabic-iso8859-6
118                   arabic-digit
119                   arabic-1-column
120                   arabic-2-column)))
121   (while charsets
122 ;;     (modify-syntax-entry (make-char (car charsets)) "w")
123     (modify-category-entry (make-char (car charsets)) ?b)
124     (setq charsets (cdr charsets))))
125 (let ((ch #x600))
126   (while (<= ch #x6ff)
127     (modify-category-entry (decode-char 'ucs ch) ?b)
128     (setq ch (1+ ch)))
129   (setq ch #xfb50)
130   (while (<= ch #xfdff)
131     (modify-category-entry (decode-char 'ucs ch) ?b)
132     (setq ch (1+ ch)))
133   (setq ch #xfe70)
134   (while (<= ch #xfefe)
135     (modify-category-entry (decode-char 'ucs ch) ?b)
136     (setq ch (1+ ch))))
137
138 ;; Chinese character set (GB2312)
139
140 ;; (modify-syntax-entry (make-char 'chinese-gb2312) "w")
141 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
142 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
143 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
144 (modify-syntax-entry ?\$A!2(B "($A!3(B")
145 (modify-syntax-entry ?\$A!4(B "($A!5(B")
146 (modify-syntax-entry ?\$A!6(B "($A!7(B")
147 (modify-syntax-entry ?\$A!8(B "($A!9(B")
148 (modify-syntax-entry ?\$A!:(B "($A!;(B")
149 (modify-syntax-entry ?\$A!<(B "($A!=(B")
150 (modify-syntax-entry ?\$A!>(B "($A!?(B")
151 (modify-syntax-entry ?\$A#((B "($A#)(B")
152 (modify-syntax-entry ?\$A#{(B "($A#}(B")
153 (modify-syntax-entry ?\$A#[(B "($A#](B")
154 (modify-syntax-entry ?\$A!3(B ")$A!2(B")
155 (modify-syntax-entry ?\$A!5(B ")$A!4(B")
156 (modify-syntax-entry ?\$A!7(B ")$A!6(B")
157 (modify-syntax-entry ?\$A!9(B ")$A!8(B")
158 (modify-syntax-entry ?\$A!;(B ")$A!:(B")
159 (modify-syntax-entry ?\$A!=(B ")$A!<(B")
160 (modify-syntax-entry ?\$A!?(B ")$A!>(B")
161 (modify-syntax-entry ?\$A#)(B ")$A#((B")
162 (modify-syntax-entry ?\$A#}(B ")$A#{(B")
163 (modify-syntax-entry ?\$A#](B ")$A#[(B")
164
165 (let ((chars "$A#,!"!##.!$#;#:#?#!!C!-!'#|#_!.!/!0!1#"!e#`!d(B"))
166   (dotimes (i (length chars))
167     (modify-syntax-entry (aref chars i) ".")))
168
169 (modify-category-entry (make-char 'chinese-gb2312) ?c)
170 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
171 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
172 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
173 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
174 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
175 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
176 (let ((row 48))
177   (while (< row 127)
178     (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
179     (setq row (1+ row))))
180
181 (let ((tbl (standard-case-table)))
182   (dotimes (i 26)
183     (set-case-syntax-pair (make-char 'chinese-gb2312 #x23 (+ #x41 i))
184                           (make-char 'chinese-gb2312 #x23 (+ #x61 i)) tbl))
185   (dotimes (i 24)
186     (set-case-syntax-pair (make-char 'chinese-gb2312 #x26 (+ #x21 i))
187                           (make-char 'chinese-gb2312 #x26 (+ #x41 i)) tbl))
188   (dotimes (i 33)
189     (set-case-syntax-pair (make-char 'chinese-gb2312 #x27 (+ #x21 i))
190                           (make-char 'chinese-gb2312 #x27 (+ #x51 i)) tbl)))
191
192 ;; Chinese character set (BIG5)
193
194 (let ((from (decode-big5-char #xA141))
195       (to (decode-big5-char #xA15D)))
196   (while (< from to)
197     (modify-syntax-entry from ".")
198     (setq from (1+ from))))
199 (let ((from (decode-big5-char #xA1A5))
200       (to (decode-big5-char #xA1AD)))
201   (while (< from to)
202     (modify-syntax-entry from ".")
203     (setq from (1+ from))))
204 (let ((from (decode-big5-char #xA1AD))
205       (to (decode-big5-char #xA2AF)))
206   (while (< from to)
207     (modify-syntax-entry from "_")
208     (setq from (1+ from))))
209
210 (let ((parens "$(0!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c(B")
211       open close)
212   (dotimes (i (/ (length parens) 2))
213     (setq open (aref parens (* i 2))
214           close (aref parens (1+ (* i 2))))
215     (modify-syntax-entry open (format "(%c" close))
216     (modify-syntax-entry close (format ")%c" open))))
217
218 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
219       (generic-big5-2-char (make-char 'chinese-big5-2)))
220 ;;   (modify-syntax-entry generic-big5-1-char "w")
221 ;;   (modify-syntax-entry generic-big5-2-char "w")
222
223   (modify-category-entry generic-big5-1-char ?c)
224   (modify-category-entry generic-big5-2-char ?c)
225
226   (modify-category-entry generic-big5-1-char ?C)
227   (modify-category-entry generic-big5-2-char ?C)
228
229   (modify-category-entry generic-big5-1-char ?\|)
230   (modify-category-entry generic-big5-2-char ?\|))
231
232 (let ((tbl (standard-case-table)))
233   (dotimes (i 22)
234     (set-case-syntax-pair (decode-big5-char (+ #xA2CF i))
235                           (decode-big5-char (+ #xA2CF i 26)) tbl))
236   (dotimes (i 4)
237     (set-case-syntax-pair (decode-big5-char (+ #xA2E4 i))
238                           (decode-big5-char (+ #xA340 i)) tbl))
239   (dotimes (i 24)
240     (set-case-syntax-pair (decode-big5-char (+ #xA344 i))
241                           (decode-big5-char (+ #xA344 i 24)) tbl)))
242
243
244 ;; Chinese character set (CNS11643)
245
246 (let ((cns-list '(chinese-cns11643-1
247                   chinese-cns11643-2
248                   chinese-cns11643-3
249                   chinese-cns11643-4
250                   chinese-cns11643-5
251                   chinese-cns11643-6
252                   chinese-cns11643-7))
253       generic-char)
254   (while cns-list
255     (setq generic-char (make-char (car cns-list)))
256 ;;     (modify-syntax-entry generic-char "w")
257     (modify-category-entry generic-char ?c)
258     (modify-category-entry generic-char ?C)
259     (modify-category-entry generic-char ?|)
260     (setq cns-list (cdr cns-list))))
261
262 (let ((parens "$(G!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c(B")
263       open close)
264   (dotimes (i (/ (length parens) 2))
265     (setq open (aref parens (* i 2))
266           close (aref parens (1+ (* i 2))))
267     (modify-syntax-entry open (format "(%c" close))
268     (modify-syntax-entry close (format ")%c" open))))
269
270 ;; Cyrillic character set (ISO-8859-5)
271
272 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
273
274 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
275 (modify-syntax-entry ?,L-(B ".")
276 (modify-syntax-entry ?,Lp(B ".")
277 (modify-syntax-entry ?,L}(B ".")
278 (let ((tbl (standard-case-table)))
279   (set-case-syntax-pair ?,L!(B ?,Lq(B tbl)
280   (set-case-syntax-pair ?,L"(B ?,Lr(B tbl)
281   (set-case-syntax-pair ?,L#(B ?,Ls(B tbl)
282   (set-case-syntax-pair ?,L$(B ?,Lt(B tbl)
283   (set-case-syntax-pair ?,L%(B ?,Lu(B tbl)
284   (set-case-syntax-pair ?,L&(B ?,Lv(B tbl)
285   (set-case-syntax-pair ?,L'(B ?,Lw(B tbl)
286   (set-case-syntax-pair ?,L((B ?,Lx(B tbl)
287   (set-case-syntax-pair ?,L)(B ?,Ly(B tbl)
288   (set-case-syntax-pair ?,L*(B ?,Lz(B tbl)
289   (set-case-syntax-pair ?,L+(B ?,L{(B tbl)
290   (set-case-syntax-pair ?,L,(B ?,L|(B tbl)
291   (set-case-syntax-pair ?,L.(B ?,L~(B tbl)
292   (set-case-syntax-pair ?,L/(B ?,L(B tbl)
293   (set-case-syntax-pair ?,L0(B ?,LP(B tbl)
294   (set-case-syntax-pair ?,L1(B ?,LQ(B tbl)
295   (set-case-syntax-pair ?,L2(B ?,LR(B tbl)
296   (set-case-syntax-pair ?,L3(B ?,LS(B tbl)
297   (set-case-syntax-pair ?,L4(B ?,LT(B tbl)
298   (set-case-syntax-pair ?,L5(B ?,LU(B tbl)
299   (set-case-syntax-pair ?,L6(B ?,LV(B tbl)
300   (set-case-syntax-pair ?,L7(B ?,LW(B tbl)
301   (set-case-syntax-pair ?,L8(B ?,LX(B tbl)
302   (set-case-syntax-pair ?,L9(B ?,LY(B tbl)
303   (set-case-syntax-pair ?,L:(B ?,LZ(B tbl)
304   (set-case-syntax-pair ?,L;(B ?,L[(B tbl)
305   (set-case-syntax-pair ?,L<(B ?,L\(B tbl)
306   (set-case-syntax-pair ?,L=(B ?,L](B tbl)
307   (set-case-syntax-pair ?,L>(B ?,L^(B tbl)
308   (set-case-syntax-pair ?,L?(B ?,L_(B tbl)
309   (set-case-syntax-pair ?,L@(B ?,L`(B tbl)
310   (set-case-syntax-pair ?,LA(B ?,La(B tbl)
311   (set-case-syntax-pair ?,LB(B ?,Lb(B tbl)
312   (set-case-syntax-pair ?,LC(B ?,Lc(B tbl)
313   (set-case-syntax-pair ?,LD(B ?,Ld(B tbl)
314   (set-case-syntax-pair ?,LE(B ?,Le(B tbl)
315   (set-case-syntax-pair ?,LF(B ?,Lf(B tbl)
316   (set-case-syntax-pair ?,LG(B ?,Lg(B tbl)
317   (set-case-syntax-pair ?,LH(B ?,Lh(B tbl)
318   (set-case-syntax-pair ?,LI(B ?,Li(B tbl)
319   (set-case-syntax-pair ?,LJ(B ?,Lj(B tbl)
320   (set-case-syntax-pair ?,LK(B ?,Lk(B tbl)
321   (set-case-syntax-pair ?,LL(B ?,Ll(B tbl)
322   (set-case-syntax-pair ?,LM(B ?,Lm(B tbl)
323   (set-case-syntax-pair ?,LN(B ?,Ln(B tbl)
324   (set-case-syntax-pair ?,LO(B ?,Lo(B tbl)
325   (set-case-syntax-pair ?$,1(!(B ?$,1(q(B tbl)
326   (set-case-syntax-pair ?$,1("(B ?$,1(r(B tbl)
327   (set-case-syntax-pair ?$,1(#(B ?$,1(s(B tbl)
328   (set-case-syntax-pair ?$,1($(B ?$,1(t(B tbl)
329   (set-case-syntax-pair ?$,1(%(B ?$,1(u(B tbl)
330   (set-case-syntax-pair ?$,1(&(B ?$,1(v(B tbl)
331   (set-case-syntax-pair ?$,1('(B ?$,1(w(B tbl)
332   (set-case-syntax-pair ?$,1(((B ?$,1(x(B tbl)
333   (set-case-syntax-pair ?$,1()(B ?$,1(y(B tbl)
334   (set-case-syntax-pair ?$,1(*(B ?$,1(z(B tbl)
335   (set-case-syntax-pair ?$,1(+(B ?$,1({(B tbl)
336   (set-case-syntax-pair ?$,1(,(B ?$,1(|(B tbl)
337   (set-case-syntax-pair ?$,1(.(B ?$,1(~(B tbl)
338   (set-case-syntax-pair ?$,1(/(B ?$,1((B tbl)
339   (set-case-syntax-pair ?$,1(0(B ?$,1(P(B tbl)
340   (set-case-syntax-pair ?$,1(1(B ?$,1(Q(B tbl)
341   (set-case-syntax-pair ?$,1(2(B ?$,1(R(B tbl)
342   (set-case-syntax-pair ?$,1(3(B ?$,1(S(B tbl)
343   (set-case-syntax-pair ?$,1(4(B ?$,1(T(B tbl)
344   (set-case-syntax-pair ?$,1(5(B ?$,1(U(B tbl)
345   (set-case-syntax-pair ?$,1(6(B ?$,1(V(B tbl)
346   (set-case-syntax-pair ?$,1(7(B ?$,1(W(B tbl)
347   (set-case-syntax-pair ?$,1(8(B ?$,1(X(B tbl)
348   (set-case-syntax-pair ?$,1(9(B ?$,1(Y(B tbl)
349   (set-case-syntax-pair ?$,1(:(B ?$,1(Z(B tbl)
350   (set-case-syntax-pair ?$,1(;(B ?$,1([(B tbl)
351   (set-case-syntax-pair ?$,1(<(B ?$,1(\(B tbl)
352   (set-case-syntax-pair ?$,1(=(B ?$,1(](B tbl)
353   (set-case-syntax-pair ?$,1(>(B ?$,1(^(B tbl)
354   (set-case-syntax-pair ?$,1(?(B ?$,1(_(B tbl)
355   (set-case-syntax-pair ?$,1(@(B ?$,1(`(B tbl)
356   (set-case-syntax-pair ?$,1(A(B ?$,1(a(B tbl)
357   (set-case-syntax-pair ?$,1(B(B ?$,1(b(B tbl)
358   (set-case-syntax-pair ?$,1(C(B ?$,1(c(B tbl)
359   (set-case-syntax-pair ?$,1(D(B ?$,1(d(B tbl)
360   (set-case-syntax-pair ?$,1(E(B ?$,1(e(B tbl)
361   (set-case-syntax-pair ?$,1(F(B ?$,1(f(B tbl)
362   (set-case-syntax-pair ?$,1(G(B ?$,1(g(B tbl)
363   (set-case-syntax-pair ?$,1(H(B ?$,1(h(B tbl)
364   (set-case-syntax-pair ?$,1(I(B ?$,1(i(B tbl)
365   (set-case-syntax-pair ?$,1(J(B ?$,1(j(B tbl)
366   (set-case-syntax-pair ?$,1(K(B ?$,1(k(B tbl)
367   (set-case-syntax-pair ?$,1(L(B ?$,1(l(B tbl)
368   (set-case-syntax-pair ?$,1(M(B ?$,1(m(B tbl)
369   (set-case-syntax-pair ?$,1(N(B ?$,1(n(B tbl)
370   (set-case-syntax-pair ?$,1(O(B ?$,1(o(B tbl))
371
372 ;; Devanagari character set
373
374 ;;; Commented out since the categories appear not to be used anywhere
375 ;;; and word syntax is the default.
376 ;; (let ((deflist       '(;; chars      syntax  category
377 ;;                ("$(5!!!"!#(B"      "w"     ?7) ; vowel-modifying diacritical mark
378 ;;                                          ; chandrabindu, anuswar, visarga
379 ;;                ("$(5!$(B-$(5!2(B"        "w"     ?1) ; independent vowel
380 ;;                ("$(5!3(B-$(5!X(B"        "w"     ?0) ; consonant
381 ;;                ("$(5!Z(B-$(5!g(B"        "w"     ?8) ; matra
382 ;;                ("$(5!q(B-$(5!z(B"        "w"     ?6) ; digit
383 ;;                ;; Unicode equivalents
384 ;;                ("$,15A5B5C(B"      "w"     ?7) ; vowel-modifying diacritical mark
385 ;;                                          ; chandrabindu, anuswar, visarga
386 ;;                ("$,15E(B-$,15M(B"        "w"     ?1) ; independent vowel
387 ;;                ("$,15U(B-$,15y(B"        "w"     ?0) ; consonant
388 ;;                ("$,15~(B-$,16)(B"        "w"     ?8) ; matra
389 ;;                ("$,16F(B-$,16O(B"        "w"     ?6) ; digit
390 ;;                ))
391 ;;       elm chars len syntax category to ch i)
392 ;;   (while deflist
393 ;;     (setq elm (car deflist))
394 ;;     (setq chars (car elm)
395 ;;        len (length chars)
396 ;;        syntax (nth 1 elm)
397 ;;        category (nth 2 elm)
398 ;;        i 0)
399 ;;     (while (< i len)
400 ;;       (if (= (aref chars i) ?-)
401 ;;        (setq i (1+ i)
402 ;;              to (aref chars i))
403 ;;      (setq ch (aref chars i)
404 ;;            to ch))
405 ;;       (while (<= ch to)
406 ;;      (modify-syntax-entry ch syntax)
407 ;;      (modify-category-entry ch category)
408 ;;      (setq ch (1+ ch)))
409 ;;       (setq i (1+ i)))
410 ;;     (setq deflist (cdr deflist))))
411
412 ;; Ethiopic character set
413
414 (modify-category-entry (make-char 'ethiopic) ?e)
415 ;; (modify-syntax-entry (make-char 'ethiopic) "w")
416 (dotimes (i (1+ (- #x137c #x1200)))
417   (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
418 (let ((chars '(?$(3$h(B ?$(3$i(B ?$(3$j(B ?$(3$k(B ?$(3$l(B ?$(3$m(B ?$(3$n(B ?$(3$o(B ?$(3%i(B ?$(3%t(B ?$(3%u(B ?$(3%v(B ?$(3%w(B ?$(3%x(B
419                ;; Unicode equivalents of the above:
420                ?$,1Q!(B ?$,1Q"(B ?$,1Q#(B ?$,1Q$(B ?$,1Q%(B ?$,1Q&(B ?$,1Q'(B ?$,1Q((B ?$,3op(B ?$,3o{(B ?$,3o|(B ?$,3o}(B ?$,3o~(B ?$,3o(B)))
421   (while chars
422     (modify-syntax-entry (car chars) ".")
423     (setq chars (cdr chars))))
424
425 ;; Greek character set (ISO-8859-7)
426
427 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
428 (let ((c #x370))
429   (while (<= c #x3ff)
430     (modify-category-entry (decode-char 'ucs c) ?g)
431     (setq c (1+ c))))
432
433 ;; (let ((c 182))
434 ;;   (while (< c 255)
435 ;;     (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
436 ;;     (setq c (1+ c))))
437 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
438 (modify-syntax-entry ?,F7(B ".")
439 (modify-syntax-entry ?,F;(B ".")
440 (modify-syntax-entry ?,F=(B ".")
441 (let ((tbl (standard-case-table)))
442   ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
443   ;; in several cases.
444   (set-case-syntax ?,F!(B "." tbl)
445   (set-case-syntax ?,F"(B "." tbl)
446   (set-case-syntax ?,F&(B "." tbl)
447   (set-case-syntax ?,F&(B "_" tbl)
448   (set-case-syntax ?,F'(B "." tbl)
449   (set-case-syntax ?,F)(B "_" tbl)
450   (set-case-syntax ?,F+(B "." tbl)
451   (set-case-syntax ?,F,(B "_" tbl)
452   (set-case-syntax ?,F-(B "_" tbl)
453   (set-case-syntax ?,F/(B "." tbl)
454   (set-case-syntax ?,F0(B "_" tbl)
455   (set-case-syntax ?,F1(B "_" tbl)
456 ;;  (set-case-syntax ?,F7(B "_" tbl)
457 ;;  (set-case-syntax ?,F=(B "_" tbl)
458   (set-case-syntax-pair ?,FA(B ?,Fa(B tbl)
459   (set-case-syntax-pair ?,FB(B ?,Fb(B tbl)
460   (set-case-syntax-pair ?,FC(B ?,Fc(B tbl)
461   (set-case-syntax-pair ?,FD(B ?,Fd(B tbl)
462   (set-case-syntax-pair ?,FE(B ?,Fe(B tbl)
463   (set-case-syntax-pair ?,FF(B ?,Ff(B tbl)
464   (set-case-syntax-pair ?,FG(B ?,Fg(B tbl)
465   (set-case-syntax-pair ?,FH(B ?,Fh(B tbl)
466   (set-case-syntax-pair ?,FI(B ?,Fi(B tbl)
467   (set-case-syntax-pair ?,FJ(B ?,Fj(B tbl)
468   (set-case-syntax-pair ?,FK(B ?,Fk(B tbl)
469   (set-case-syntax-pair ?,FL(B ?,Fl(B tbl)
470   (set-case-syntax-pair ?,FM(B ?,Fm(B tbl)
471   (set-case-syntax-pair ?,FN(B ?,Fn(B tbl)
472   (set-case-syntax-pair ?,FO(B ?,Fo(B tbl)
473   (set-case-syntax-pair ?,FP(B ?,Fp(B tbl)
474   (set-case-syntax-pair ?,FQ(B ?,Fq(B tbl)
475   (set-upcase-syntax    ?,FS(B ?,Fr(B tbl)
476   (set-case-syntax-pair ?,FS(B ?,Fs(B tbl)
477   (set-case-syntax-pair ?,FT(B ?,Ft(B tbl)
478   (set-case-syntax-pair ?,FU(B ?,Fu(B tbl)
479   (set-case-syntax-pair ?,FV(B ?,Fv(B tbl)
480   (set-case-syntax-pair ?,FW(B ?,Fw(B tbl)
481   (set-case-syntax-pair ?,FX(B ?,Fx(B tbl)
482   (set-case-syntax-pair ?,FY(B ?,Fy(B tbl)
483   (set-case-syntax-pair ?,FZ(B ?,Fz(B tbl)
484   (set-case-syntax-pair ?,F[(B ?,F{(B tbl)
485   (set-case-syntax-pair ?,F?(B ?,F~(B tbl)
486   (set-case-syntax-pair ?,F>(B ?,F}(B tbl)
487   (set-case-syntax-pair ?,F<(B ?,F|(B tbl)
488   (set-case-syntax-pair ?,F6(B ?,F\(B tbl)
489   (set-case-syntax-pair ?,F8(B ?,F](B tbl)
490   (set-case-syntax-pair ?,F9(B ?,F^(B tbl)
491   (set-case-syntax-pair ?,F:(B ?,F_(B tbl)
492   ;; Unicode equivalents
493   (set-case-syntax-pair ?$,1&q(B ?$,1'1(B tbl)
494   (set-case-syntax-pair ?$,1&r(B ?$,1'2(B tbl)
495   (set-case-syntax-pair ?$,1&s(B ?$,1'3(B tbl)
496   (set-case-syntax-pair ?$,1&t(B ?$,1'4(B tbl)
497   (set-case-syntax-pair ?$,1&u(B ?$,1'5(B tbl)
498   (set-case-syntax-pair ?$,1&v(B ?$,1'6(B tbl)
499   (set-case-syntax-pair ?$,1&w(B ?$,1'7(B tbl)
500   (set-case-syntax-pair ?$,1&x(B ?$,1'8(B tbl)
501   (set-case-syntax-pair ?$,1&y(B ?$,1'9(B tbl)
502   (set-case-syntax-pair ?$,1&z(B ?$,1':(B tbl)
503   (set-case-syntax-pair ?$,1&{(B ?$,1';(B tbl)
504   (set-case-syntax-pair ?$,1&|(B ?$,1'<(B tbl)
505   (set-case-syntax-pair ?$,1&}(B ?$,1'=(B tbl)
506   (set-case-syntax-pair ?$,1&~(B ?$,1'>(B tbl)
507   (set-case-syntax-pair ?$,1&(B ?$,1'?(B tbl)
508   (set-case-syntax-pair ?$,1' (B ?$,1'@(B tbl)
509   (set-case-syntax-pair ?$,1'!(B ?$,1'A(B tbl)
510   (set-upcase-syntax    ?$,1'#(B ?$,1'B(B tbl)
511   (set-case-syntax-pair ?$,1'#(B ?$,1'C(B tbl)
512   (set-case-syntax-pair ?$,1'$(B ?$,1'D(B tbl)
513   (set-case-syntax-pair ?$,1'%(B ?$,1'E(B tbl)
514   (set-case-syntax-pair ?$,1'&(B ?$,1'F(B tbl)
515   (set-case-syntax-pair ?$,1''(B ?$,1'G(B tbl)
516   (set-case-syntax-pair ?$,1'((B ?$,1'H(B tbl)
517   (set-case-syntax-pair ?$,1')(B ?$,1'I(B tbl)
518   (set-case-syntax-pair ?$,1'*(B ?$,1'J(B tbl)
519   (set-case-syntax-pair ?$,1'+(B ?$,1'K(B tbl)
520   (set-case-syntax-pair ?$,1&o(B ?$,1'N(B tbl)
521   (set-case-syntax-pair ?$,1&n(B ?$,1'M(B tbl)
522   (set-case-syntax-pair ?$,1&l(B ?$,1'L(B tbl)
523   (set-case-syntax-pair ?$,1&f(B ?$,1',(B tbl)
524   (set-case-syntax-pair ?$,1&h(B ?$,1'-(B tbl)
525   (set-case-syntax-pair ?$,1&i(B ?$,1'.(B tbl)
526   (set-case-syntax-pair ?$,1&j(B ?$,1'/(B tbl))
527
528 ;; Hebrew character set (ISO-8859-8)
529
530 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
531 (let ((c #x591))
532   (while (<= c #x5f4)
533     (modify-category-entry (decode-char 'ucs c) ?w)
534     (setq c (1+ c))))
535
536 (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
537 (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
538 (modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF
539 (modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ
540 (modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ
541 (modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH
542 (modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM
543
544 ;; (let ((c 224))
545 ;;   (while (< c 251)
546 ;;     (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
547 ;;     (setq c (1+ c))))
548 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
549
550 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
551
552 (modify-category-entry (make-char 'indian-is13194) ?i)
553 (modify-category-entry (make-char 'indian-2-column) ?I)
554 (modify-category-entry (make-char 'indian-glyph) ?I)
555 ;; Unicode Devanagari block
556 (let ((c #x901))
557   (while (<= c #x970)
558     (modify-category-entry (decode-char 'ucs c) ?i)
559     (setq c (1+ c))))
560
561 (let ((l '(;; RANGE   CATEGORY          MEANINGS
562            (#x01 #x03 ?7)               ; vowel modifier
563            (#x05 #x14 ?1)               ; base vowel
564            (#x15 #x39 ?0)               ; consonants
565            (#x3e #x4d ?8)               ; vowel modifier
566            (#x51 #x54 ?4)               ; stress/tone mark
567            (#x58 #x5f ?0)               ; consonants
568            (#x60 #x61 ?1)               ; base vowel
569            (#x62 #x63 ?8)               ; vowel modifier
570            (#x66 #x6f ?6)               ; digits
571            )))
572   (dolist (elt1 '(#x900 #x980 #xa00 #xa80 #xb00 #xb80 #xc00 #xc80 #xd00))
573     (dolist (elt2 l)
574       (let* ((from (car elt2))
575              (counts (1+ (- (nth 1 elt2) from)))
576              (category (nth 2 elt2)))
577         (dotimes (i counts)
578           (modify-category-entry (decode-char 'ucs (+ elt1 from i))
579                                  category))))))
580
581 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
582
583 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
584 (modify-category-entry (make-char 'katakana-jisx0201) ?j)
585 (modify-category-entry (make-char 'latin-jisx0201) ?r)
586 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
587 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
588 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
589 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
590 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
591
592 ;; Unicode equivalents of JISX0201-kana
593 (let ((c #xff61))
594   (while (<= c #xff9f)
595     (modify-category-entry (decode-char 'ucs c) ?k)
596     (modify-category-entry (decode-char 'ucs c) ?j)
597     (modify-category-entry (decode-char 'ucs c) ?\|)
598     (setq c (1+ c))))
599
600 ;; Katakana block
601 (let ((c #x30a0))
602   (while (<= c #x30ff)
603     ;; ?K is double width, ?k isn't specified
604     (modify-category-entry (decode-char 'ucs c) ?k)
605     (modify-category-entry (decode-char 'ucs c) ?j)
606     (modify-category-entry (decode-char 'ucs c) ?\|)
607     (setq c (1+ c))))
608
609 ;; Hiragana block
610 (let ((c #x3040))
611   (while (<= c #x309f)
612     ;; ?H is actually defined to be double width
613     (modify-category-entry (decode-char 'ucs c) ?H)
614     ;;(modify-category-entry (decode-char 'ucs c) ?j)
615     (modify-category-entry (decode-char 'ucs c) ?\|)
616     (setq c (1+ c))))
617
618 ;; JISX0208
619 ;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
620 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
621 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
622 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
623 (let ((chars '(?$B!<(B ?$B!+(B ?$B!,(B ?$B!3(B ?$B!4(B ?$B!5(B ?$B!6(B ?$B!7(B ?$B!8(B ?$B!9(B ?$B!:(B ?$B!;(B)))
624   (while chars
625     (modify-syntax-entry (car chars) "w")
626     (setq chars (cdr chars))))
627 (let ((parens "$B!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![(B" )
628       open close)
629   (dotimes (i (/ (length parens) 2))
630     (setq open (aref parens (* i 2))
631           close (aref parens (1+ (* i 2))))
632     (modify-syntax-entry open (format "(%c" close))
633     (modify-syntax-entry close (format ")%c" open))))
634
635 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
636 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
637 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
638 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
639 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
640 (let ((row 48))
641   (while (< row 127)
642     (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
643     (setq row (1+ row))))
644 (modify-category-entry ?$B!<(B ?K)
645 (let ((chars '(?$B!+(B ?$B!,(B)))
646   (while chars
647     (modify-category-entry (car chars) ?K)
648     (modify-category-entry (car chars) ?H)
649     (setq chars (cdr chars))))
650 (let ((chars '(?$B!3(B ?$B!4(B ?$B!5(B ?$B!6(B ?$B!7(B ?$B!8(B ?$B!9(B ?$B!:(B ?$B!;(B)))
651   (while chars
652     (modify-category-entry (car chars) ?C)
653     (setq chars (cdr chars))))
654
655 (let ((tbl (standard-case-table)))
656   (dotimes (i 26)
657     (set-case-syntax-pair (make-char 'japanese-jisx0208 #x23 (+ #x41 i))
658                           (make-char 'japanese-jisx0208 #x23 (+ #x61 i)) tbl))
659   (dotimes (i 24)
660     (set-case-syntax-pair (make-char 'japanese-jisx0208 #x26 (+ #x21 i))
661                           (make-char 'japanese-jisx0208 #x26 (+ #x41 i)) tbl))
662   (dotimes (i 33)
663     (set-case-syntax-pair (make-char 'japanese-jisx0208 #x27 (+ #x21 i))
664                           (make-char 'japanese-jisx0208 #x27 (+ #x51 i)) tbl)))
665
666 ;; JISX0212
667 ;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
668 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
669 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
670 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
671
672 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
673
674 ;; JISX0201-Kana
675 ;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
676 (let ((chars '(?(I!(B ?(I$(B ?(I%(B
677                ;; Unicode:
678                ?$,3sa(B ?$,3sd(B ?$,3se(B)))
679   (while chars
680     (modify-syntax-entry (car chars) ".")
681     (setq chars (cdr chars))))
682
683 (modify-syntax-entry ?\(I"(B "((I#(B")
684 (modify-syntax-entry ?\(I#(B "((I"(B")
685
686 ;; Korean character set (KSC5601)
687
688 ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
689 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
690 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
691 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
692 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
693 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
694 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
695
696 (modify-category-entry (make-char 'korean-ksc5601) ?h)
697 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
698 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
699 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
700 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
701 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
702
703 (let ((parens "$(C!2!3!4!5!6!7!8!9!:!;!<!=#(#)#[#]#{#}(B" )
704       open close)
705   (dotimes (i (/ (length parens) 2))
706     (setq open (aref parens (* i 2))
707           close (aref parens (1+ (* i 2))))
708     (modify-syntax-entry open (format "(%c" close))
709     (modify-syntax-entry close (format ")%c" open))))
710
711 (let ((tbl (standard-case-table)))
712   (dotimes (i 26)
713     (set-case-syntax-pair (make-char 'korean-ksc5601 #x23 (+ #x41 i))
714                           (make-char 'korean-ksc5601 #x23 (+ #x61 i)) tbl))
715   (dotimes (i 10)
716     (set-case-syntax-pair (make-char 'korean-ksc5601 #x25 (+ #x21 i))
717                           (make-char 'korean-ksc5601 #x25 (+ #x30 i)) tbl))
718   (dotimes (i 24)
719     (set-case-syntax-pair (make-char 'korean-ksc5601 #x25 (+ #x41 i))
720                           (make-char 'korean-ksc5601 #x25 (+ #x61 i)) tbl))
721   (dotimes (i 33)
722     (set-case-syntax-pair (make-char 'korean-ksc5601 #x2C (+ #x21 i))
723                           (make-char 'korean-ksc5601 #x2C (+ #x51 i)) tbl)))
724
725 ;; Latin character set (latin-1,2,3,4,5,8,9)
726
727 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
728 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
729 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
730 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
731 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
732 (modify-category-entry (make-char 'latin-iso8859-14) ?l)
733 (modify-category-entry (make-char 'latin-iso8859-15) ?l)
734
735 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
736 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
737 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
738 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
739 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
740 (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
741 (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
742
743 ;; Lao character set
744
745 (modify-category-entry (make-char 'lao) ?o)
746 (dotimes (i (1+ (- #xeff #xe80)))
747   (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
748
749 (let ((deflist  '(;; chars      syntax  category
750                   ("(1!(B-(1N(B"    "w"     ?0) ; consonant
751                   ("(1PRS]`(B-(1d(B"        "w"     ?1) ; vowel base
752                   ("(1QT(B-(1W[m(B" "w"     ?2) ; vowel upper
753                   ("(1XY(B"           "w"     ?3) ; vowel lower
754                   ("(1h(B-(1l(B"    "w"     ?4) ; tone mark
755                   ("(1\(B"            "w"     ?9) ; semivowel lower
756                   ("(1p(B-(1y(B"    "w"     ?6) ; digit
757                   ("(1Of(B"           "_"     ?5) ; symbol
758                   ;; Unicode equivalents
759                   ("$,1D!(B-$,1DN(B"        "w"     ?0) ; consonant
760                   ("$,1DPDRDSD]D`(B-$,1Dd(B"        "w"     ?1) ; vowel base
761                   ("$,1DQDT(B-$,1DWD[Dm(B"  "w"     ?2) ; vowel upper
762                   ("$,1DXDY(B"        "w"     ?3) ; vowel lower
763                   ("$,1Dh(B-$,1Dk(B"        "w"     ?4) ; tone mark
764                   ("$,1D\D](B"        "w"     ?9) ; semivowel lower
765                   ("$,1Dp(B-$,1Dy(B"        "w"     ?6) ; digit
766                   ("$,1DODf(B"        "_"     ?5) ; symbol
767                   ))
768       elm chars len syntax category to ch i)
769   (while deflist
770     (setq elm (car deflist))
771     (setq chars (car elm)
772           len (length chars)
773           syntax (nth 1 elm)
774           category (nth 2 elm)
775           i 0)
776     (while (< i len)
777       (if (= (aref chars i) ?-)
778           (setq i (1+ i)
779                 to (aref chars i))
780         (setq ch (aref chars i)
781               to ch))
782       (while (<= ch to)
783         (unless (string-equal syntax "w")
784           (modify-syntax-entry ch syntax))
785         (modify-category-entry ch category)
786         (setq ch (1+ ch)))
787       (setq i (1+ i)))
788     (setq deflist (cdr deflist))))
789
790 ;; Thai character set (TIS620)
791
792 (modify-category-entry (make-char 'thai-tis620) ?t)
793 (dotimes (i (1+ (- #xe7f #xe00)))
794   (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
795
796 (let ((deflist  '(;; chars      syntax  category
797                   (",T!(B-,TCEG(B-,TN(B"  "w"     ?0) ; consonant
798                   (",TDFPRS`(B-,Te(B"       "w"     ?1) ; vowel base
799                   (",TQT(B-,TWgn(B" "w"     ?2) ; vowel upper
800                   (",TX(B-,TZ(B"    "w"     ?3) ; vowel lower
801                   (",Th(B-,Tm(B"    "w"     ?4) ; tone mark
802                   (",Tp(B-,Ty(B"    "w"     ?6) ; digit
803                   (",TOf_oz{(B"       "_"     ?5) ; symbol
804                   ;; Unicode equivalents
805                   ("$,1Ba(B-$,1C#C%C'(B-$,1C.(B"  "w"     ?0) ; consonant
806                   ("$,1C$C&C0C2C3C@(B-$,1CE(B"      "w"     ?1) ; vowel base
807                   ("$,1C1C4(B-$,1C7CGCN(B"  "w"     ?2) ; vowel upper
808                   ("$,1C8(B-$,1C:(B"        "w"     ?3) ; vowel lower
809                   ("$,1CH(B-$,1CM(B"        "w"     ?4) ; tone mark
810                   ("$,1CP(B-$,1CY(B"        "w"     ?6) ; digit
811                   ("$,1C/CFC?COCZC[(B"        "_"     ?5) ; symbol
812                   ))
813       elm chars len syntax category to ch i)
814   (while deflist
815     (setq elm (car deflist))
816     (setq chars (car elm)
817           len (length chars)
818           syntax (nth 1 elm)
819           category (nth 2 elm)
820           i 0)
821     (while (< i len)
822       (if (= (aref chars i) ?-)
823           (setq i (1+ i)
824                 to (aref chars i))
825         (setq ch (