root/trunk/lisp/international/mule-cmds.el

Revision 4220, 108.7 kB (checked in by miyoshi, 9 months ago)

Sync up with Emacs22.2.

  • Property svn:eol-style set to LF
Line 
1 ;;; mule-cmds.el --- commands for multilingual environment -*-coding: iso-2022-7bit -*-
2
3 ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
4 ;;   2006, 2007, 2008  Free Software Foundation, Inc.
5 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 ;;   2005, 2006, 2007, 2008
7 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
8 ;;   Registration Number H14PRO021
9
10 ;; Keywords: mule, multilingual
11
12 ;; This file is part of GNU Emacs.
13
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation; either version 3, or (at your option)
17 ;; any later version.
18
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22 ;; GNU General Public License for more details.
23
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
26 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
27 ;; Boston, MA 02110-1301, USA.
28
29 ;;; Commentary:
30
31 ;;; Code:
32
33 (eval-when-compile
34   (defvar dos-codepage)
35   (autoload 'widget-value "wid-edit"))
36
37 (defvar mac-system-coding-system)
38 (defvar mac-system-locale)
39
40 ;;; MULE related key bindings and menus.
41
42 (defvar mule-keymap (make-sparse-keymap)
43   "Keymap for Mule (Multilingual environment) specific commands.")
44
45 ;; Keep "C-x C-m ..." for mule specific commands.
46 (define-key ctl-x-map "\C-m" mule-keymap)
47
48 (define-key mule-keymap "f" 'set-buffer-file-coding-system)
49 (define-key mule-keymap "r" 'revert-buffer-with-coding-system)
50 (define-key mule-keymap "F" 'set-file-name-coding-system)
51 (define-key mule-keymap "t" 'set-terminal-coding-system)
52 (define-key mule-keymap "k" 'set-keyboard-coding-system)
53 (define-key mule-keymap "p" 'set-buffer-process-coding-system)
54 (define-key mule-keymap "x" 'set-selection-coding-system)
55 (define-key mule-keymap "X" 'set-next-selection-coding-system)
56 (define-key mule-keymap "\C-\\" 'set-input-method)
57 (define-key mule-keymap "c" 'universal-coding-system-argument)
58 (define-key mule-keymap "l" 'set-language-environment)
59
60 (defvar mule-menu-keymap
61   (make-sparse-keymap "Mule (Multilingual Environment)")
62   "Keymap for Mule (Multilingual environment) menu specific commands.")
63
64 (defvar describe-language-environment-map
65   (make-sparse-keymap "Describe Language Environment"))
66
67 (defvar setup-language-environment-map
68   (make-sparse-keymap "Set Language Environment"))
69
70 (defvar set-coding-system-map
71   (make-sparse-keymap "Set Coding System"))
72
73 (define-key-after mule-menu-keymap [set-language-environment]
74   (list 'menu-item  "Set Language Environment" setup-language-environment-map))
75 (define-key-after mule-menu-keymap [separator-mule]
76   '("--")
77   t)
78 (define-key-after mule-menu-keymap [toggle-input-method]
79   '(menu-item "Toggle Input Method" toggle-input-method)
80   t)
81 (define-key-after mule-menu-keymap [set-input-method]
82   '(menu-item "Select Input Method..." set-input-method)
83   t)
84 (define-key-after mule-menu-keymap [describe-input-method]
85   '(menu-item "Describe Input Method"  describe-input-method))
86 (define-key-after mule-menu-keymap [separator-input-method]
87   '("--")
88   t)
89 (define-key-after mule-menu-keymap [set-various-coding-system]
90   (list 'menu-item "Set Coding Systems" set-coding-system-map
91         :enable 'default-enable-multibyte-characters))
92 (define-key-after mule-menu-keymap [view-hello-file]
93   '(menu-item "Show Multi-lingual Text" view-hello-file
94               :enable (file-readable-p
95                        (expand-file-name "HELLO" data-directory))
96               :help "Display file which says HELLO in many languages")
97   t)
98 (define-key-after mule-menu-keymap [separator-coding-system]
99   '("--")
100   t)
101 (define-key-after mule-menu-keymap [describe-language-environment]
102   (list 'menu-item "Describe Language Environment"
103         describe-language-environment-map
104         :help "Show multilingual settings for a specific language")
105   t)
106 (define-key-after mule-menu-keymap [describe-input-method]
107   '(menu-item "Describe Input Method..." describe-input-method
108               :help "Keyboard layout for a specific input method")
109   t)
110 (define-key-after mule-menu-keymap [describe-coding-system]
111   '(menu-item "Describe Coding System..." describe-coding-system)
112   t)
113 (define-key-after mule-menu-keymap [list-character-sets]
114   '(menu-item "List Character Sets" list-character-sets
115               :help "Show table of available character sets"))
116 (define-key-after mule-menu-keymap [mule-diag]
117   '(menu-item "Show All of Mule Status" mule-diag
118               :help "Display multilingual environment settings")
119   t)
120
121 (define-key-after set-coding-system-map [universal-coding-system-argument]
122   '(menu-item "For Next Command" universal-coding-system-argument
123               :help "Coding system to be used by next command")
124   t)
125 (define-key-after set-coding-system-map [separator-1]
126   '("--")
127   t)
128 (define-key-after set-coding-system-map [set-buffer-file-coding-system]
129   '(menu-item "For Saving This Buffer" set-buffer-file-coding-system
130               :help "How to encode this buffer when saved")
131   t)
132 (define-key-after set-coding-system-map [revert-buffer-with-coding-system]
133   '(menu-item "For Reverting This File Now" revert-buffer-with-coding-system
134               :enable buffer-file-name
135               :help "Revisit this file immediately using specified coding system")
136   t)
137 (define-key-after set-coding-system-map [set-file-name-coding-system]
138   '(menu-item "For File Name" set-file-name-coding-system
139               :help "How to decode/encode file names")
140   t)
141 (define-key-after set-coding-system-map [separator-2]
142   '("--")
143   t)
144
145 (define-key-after set-coding-system-map [set-keyboard-coding-system]
146   '(menu-item "For Keyboard" set-keyboard-coding-system
147               :help "How to decode keyboard input")
148   t)
149 (define-key-after set-coding-system-map [set-terminal-coding-system]
150   '(menu-item "For Terminal" set-terminal-coding-system
151               :enable (null (memq window-system '(x w32 mac)))
152               :help "How to encode terminal output")
153   t)
154 (define-key-after set-coding-system-map [separator-3]
155   '("--")
156   t)
157 (define-key-after set-coding-system-map [set-selection-coding-system]
158   '(menu-item "For X Selections/Clipboard" set-selection-coding-system
159               :visible (display-selections-p)
160               :help "How to en/decode data to/from selection/clipboard")
161   t)
162 (define-key-after set-coding-system-map [set-next-selection-coding-system]
163   '(menu-item "For Next X Selection" set-next-selection-coding-system
164               :visible (display-selections-p)
165               :help "How to en/decode next selection/clipboard operation")
166   t)
167 (define-key-after set-coding-system-map [set-buffer-process-coding-system]
168   '(menu-item "For I/O with Subprocess" set-buffer-process-coding-system
169               :visible (fboundp 'start-process)
170               :enable (get-buffer-process (current-buffer))
171               :help "How to en/decode I/O from/to subprocess connected to this buffer")
172   t)
173
174
175 (define-key setup-language-environment-map
176   [Default] '(menu-item "Default" setup-specified-language-environment))
177
178 (define-key describe-language-environment-map
179   [Default] '(menu-item "Default" describe-specified-language-support))
180
181 ;; This should be a single character key binding because users use it
182 ;; very frequently while editing multilingual text.  Now we can use
183 ;; only two such keys: "\C-\\" and "\C-^", but the latter is not
184 ;; convenient because it requires shifting on most keyboards.  An
185 ;; alternative is "\C-\]" which is now bound to `abort-recursive-edit'
186 ;; but it won't be used that frequently.
187 (define-key global-map "\C-\\" 'toggle-input-method)
188
189 ;; This is no good because people often type Shift-SPC
190 ;; meaning to type SPC.  -- rms.
191 ;; ;; Here's an alternative key binding for X users (Shift-SPACE).
192 ;; (define-key global-map [?\S- ] 'toggle-input-method)
193
194 ;;; Mule related hyperlinks.
195 (defconst help-xref-mule-regexp-template
196   (purecopy (concat "\\(\\<\\("
197                     "\\(coding system\\)\\|"
198                     "\\(input method\\)\\|"
199                     "\\(character set\\)\\|"
200                     "\\(charset\\)"
201                     "\\)\\s-+\\)?"
202                     ;; Note starting with word-syntax character:
203                     "`\\(\\sw\\(\\sw\\|\\s_\\)+\\)'")))
204
205 (defun coding-system-change-eol-conversion (coding-system eol-type)
206   "Return a coding system which differs from CODING-SYSTEM in EOL conversion.
207 The returned coding system converts end-of-line by EOL-TYPE
208 but text as the same way as CODING-SYSTEM.
209 EOL-TYPE should be `unix', `dos', `mac', or nil.
210 If EOL-TYPE is nil, the returned coding system detects
211 how end-of-line is formatted automatically while decoding.
212
213 EOL-TYPE can be specified by an integer 0, 1, or 2.
214 They means `unix', `dos', and `mac' respectively."
215   (if (symbolp eol-type)
216       (setq eol-type (cond ((eq eol-type 'unix) 0)
217                            ((eq eol-type 'dos) 1)
218                            ((eq eol-type 'mac) 2)
219                            (t eol-type))))
220   ;; We call `coding-system-base' before `coding-system-eol-type',
221   ;; because the coding-system may not be initialized until then.
222   (let* ((base (coding-system-base coding-system))
223          (orig-eol-type (coding-system-eol-type coding-system)))
224     (cond ((vectorp orig-eol-type)
225            (if (not eol-type)
226                coding-system
227              (aref orig-eol-type eol-type)))
228           ((not eol-type)
229            base)
230           ((= eol-type orig-eol-type)
231            coding-system)
232           ((progn (setq orig-eol-type (coding-system-eol-type base))
233                   (vectorp orig-eol-type))
234            (aref orig-eol-type eol-type)))))
235
236 (defun coding-system-change-text-conversion (coding-system coding)
237   "Return a coding system which differs from CODING-SYSTEM in text conversion.
238 The returned coding system converts text by CODING
239 but end-of-line as the same way as CODING-SYSTEM.
240 If CODING is nil, the returned coding system detects
241 how text is formatted automatically while decoding."
242   (let ((eol-type (coding-system-eol-type coding-system)))
243     (coding-system-change-eol-conversion
244      (if coding coding 'undecided)
245      (if (numberp eol-type) (aref [unix dos mac] eol-type)))))
246
247 (defun toggle-enable-multibyte-characters (&optional arg)
248   "Change whether this buffer uses multibyte characters.
249 With arg, use multibyte characters if the arg is positive.
250
251 Note that this command does not convert the byte contents of
252 the buffer; it only changes the way those bytes are interpreted.
253 In general, therefore, this command *changes* the sequence of
254 characters that the current buffer contains.
255
256 We suggest you avoid using this command unless you know what you are
257 doing.  If you use it by mistake, and the buffer is now displayed
258 wrong, use this command again to toggle back to the right mode."
259   (interactive "P")
260   (let ((new-flag
261          (if (null arg) (null enable-multibyte-characters)
262            (> (prefix-numeric-value arg) 0))))
263     (set-buffer-multibyte new-flag))
264   (force-mode-line-update))
265
266 (defun view-hello-file ()
267   "Display the HELLO file which list up many languages and characters."
268   (interactive)
269   ;; We have to decode the file in any environment.
270   (let ((default-enable-multibyte-characters t)
271         (coding-system-for-read 'iso-2022-7bit))
272     (view-file (expand-file-name "HELLO" data-directory))))
273
274 (defun universal-coding-system-argument (coding-system)
275   "Execute an I/O command using the specified coding system."
276   (interactive
277    (let ((default (and buffer-file-coding-system
278                        (not (eq (coding-system-type buffer-file-coding-system)
279                                 t))
280                        buffer-file-coding-system)))
281      (list (read-coding-system
282             (if default
283                 (format "Coding system for following command (default %s): " default)
284               "Coding system for following command: ")
285             default))))
286   (let* ((keyseq (read-key-sequence
287                   (format "Command to execute with %s:" coding-system)))
288          (cmd (key-binding keyseq))
289          prefix)
290
291     (when (eq cmd 'universal-argument)
292       (call-interactively cmd)
293
294       ;; Process keys bound in `universal-argument-map'.
295       (while (progn
296                (setq keyseq (read-key-sequence nil t)
297                      cmd (key-binding keyseq t))
298                (not (eq cmd 'universal-argument-other-key)))
299         (let ((current-prefix-arg prefix-arg)
300               ;; Have to bind `last-command-char' here so that
301               ;; `digit-argument', for instance, can compute the
302               ;; prefix arg.
303               (last-command-char (aref keyseq 0)))
304           (call-interactively cmd)))
305
306       ;; This is the final call to `universal-argument-other-key', which
307       ;; set's the final `prefix-arg.
308       (let ((current-prefix-arg prefix-arg))
309         (call-interactively cmd))
310
311       ;; Read the command to execute with the given prefix arg.
312       (setq prefix prefix-arg
313             keyseq (read-key-sequence nil t)
314             cmd (key-binding keyseq)))
315
316     (let ((coding-system-for-read coding-system)
317           (coding-system-for-write coding-system)
318           (coding-system-require-warning t)
319           (current-prefix-arg prefix))
320       (message "")
321       (call-interactively cmd))))
322
323 (defun set-default-coding-systems (coding-system)
324   "Set default value of various coding systems to CODING-SYSTEM.
325 This sets the following coding systems:
326   o coding system of a newly created buffer
327   o default coding system for subprocess I/O
328 This also sets the following values:
329   o default value used as `file-name-coding-system' for converting file names
330       if CODING-SYSTEM is ASCII-compatible
331   o default value for the command `set-terminal-coding-system' (not on MSDOS)
332   o default value for the command `set-keyboard-coding-system'
333       if CODING-SYSTEM is ASCII-compatible"
334   (check-coding-system coding-system)
335   (setq-default buffer-file-coding-system coding-system)
336   (if (fboundp 'ucs-set-table-for-input)
337       (dolist (buffer (buffer-list))
338         (or (local-variable-p 'buffer-file-coding-system buffer)
339             (ucs-set-table-for-input buffer))))
340
341   (if (eq system-type 'darwin)
342       ;; The file-name coding system on Darwin systems is always utf-8.
343       (setq default-file-name-coding-system 'utf-8)
344     (if (and default-enable-multibyte-characters
345              (or (not coding-system)
346                  (not (coding-system-get coding-system 'ascii-incompatible))))
347         (setq default-file-name-coding-system coding-system)))
348   ;; If coding-system is nil, honor that on MS-DOS as well, so
349   ;; that they could reset the terminal coding system.
350   (unless (and (eq window-system 'pc) coding-system)
351     (setq default-terminal-coding-system coding-system))
352   (if (or (not coding-system)
353           (not (coding-system-get coding-system 'ascii-incompatible)))
354       (setq default-keyboard-coding-system coding-system))
355   ;; Preserve eol-type from existing default-process-coding-systems.
356   ;; On non-unix-like systems in particular, these may have been set
357   ;; carefully by the user, or by the startup code, to deal with the
358   ;; users shell appropriately, so should not be altered by changing
359   ;; language environment.
360   (let ((output-coding
361          (coding-system-change-text-conversion
362           (car default-process-coding-system) coding-system))
363         (input-coding
364          (coding-system-change-text-conversion
365           (cdr default-process-coding-system) coding-system)))
366     (setq default-process-coding-system
367           (cons output-coding input-coding))))
368
369 (defun prefer-coding-system (coding-system)
370   "Add CODING-SYSTEM at the front of the priority list for automatic detection.
371 This also sets the following coding systems:
372   o coding system of a newly created buffer
373   o default coding system for subprocess I/O
374 This also sets the following values:
375   o default value used as `file-name-coding-system' for converting file names
376   o default value for the command `set-terminal-coding-system' (not on MSDOS)
377   o default value for the command `set-keyboard-coding-system'
378
379 If CODING-SYSTEM specifies a certain type of EOL conversion, the coding
380 systems set by this function will use that type of EOL conversion.
381
382 This command does not change the default value of terminal coding system
383 for MS-DOS terminal, because DOS terminals only support a single coding
384 system, and Emacs automatically sets the default to that coding system at
385 startup.
386
387 A coding system that requires automatic detection of text
388 encoding (e.g. undecided, unix) can't be preferred.
389
390 See also `coding-category-list' and `coding-system-category'."
391   (interactive "zPrefer coding system: ")
392   (if (not (and coding-system (coding-system-p coding-system)))
393       (error "Invalid coding system `%s'" coding-system))
394   (let ((coding-category (coding-system-category coding-system))
395         (base (coding-system-base coding-system))
396         (eol-type (coding-system-eol-type coding-system)))
397     (if (not coding-category)
398         ;; CODING-SYSTEM is no-conversion or undecided.
399         (error "Can't prefer the coding system `%s'" coding-system))
400     (set coding-category (or base coding-system))
401     ;; Changing the binding of a coding category requires this call.
402     (update-coding-systems-internal)
403     (or (eq coding-category (car coding-category-list))
404         ;; We must change the order.
405         (set-coding-priority (list coding-category)))
406     (if (and base (interactive-p))
407         (message "Highest priority is set to %s (base of %s)"
408                  base coding-system))
409     ;; If they asked for specific EOL conversion, honor that.
410     (if (memq eol-type '(0 1 2))
411         (setq coding-system
412               (coding-system-change-eol-conversion base eol-type))
413       (setq coding-system base))
414     (set-default-coding-systems coding-system)))
415
416 (defvar sort-coding-systems-predicate nil
417   "If non-nil, a predicate function to sort coding systems.
418
419 It is called with two coding systems, and should return t if the first
420 one is \"less\" than the second.
421
422 The function `sort-coding-systems' use it.")
423
424 (defun sort-coding-systems (codings)
425   "Sort coding system list CODINGS by a priority of each coding system.
426 Return the sorted list.  CODINGS is modified by side effects.
427
428 If a coding system is most preferred, it has the highest priority.
429 Otherwise, coding systems that correspond to MIME charsets have
430 higher priorities.  Among them, a coding system included in the
431 `coding-system' key of the current language environment has higher
432 priority.  See also the documentation of `language-info-alist'.
433
434 If the variable `sort-coding-systems-predicate' (which see) is
435 non-nil, it is used to sort CODINGS instead."
436   (if sort-coding-systems-predicate
437       (sort codings sort-coding-systems-predicate)
438     (let* ((from-categories (mapcar #'(lambda (x) (symbol-value x))
439                                     coding-category-list))
440            (most-preferred (car from-categories))
441            (lang-preferred (get-language-info current-language-environment
442                                               'coding-system))
443            (func (function
444                   (lambda (x)
445                     (let ((base (coding-system-base x)))
446                       ;; We calculate the priority number 0..255 by
447                       ;; using the 8 bits PMMLCEII as this:
448                       ;; P: 1 if most preferred.
449                       ;; MM: greater than 0 if mime-charset.
450                       ;; L: 1 if one of the current lang. env.'s codings.
451                       ;; C: 1 if one of codings listed in the category list.
452                       ;; E: 1 if not XXX-with-esc
453                       ;; II: if iso-2022 based, 0..3, else 1.
454                       (logior
455                        (lsh (if (eq base most-preferred) 1 0) 7)
456                        (lsh
457                         (let ((mime (coding-system-get base 'mime-charset)))
458                            ;; Prefer coding systems corresponding to a
459                            ;; MIME charset.
460                            (if mime
461                                ;; Lower utf-16 priority so that we
462                                ;; normally prefer utf-8 to it, and put
463                                ;; x-ctext below that.
464                                (cond ((string-match "utf-16"
465                                                     (symbol-name mime))
466                                       2)
467                                      ((string-match "^x-" (symbol-name mime))
468                                       1)
469                                      (t 3))
470                              0))
471                         5)
472                        (lsh (if (memq base lang-preferred) 1 0) 4)
473                        (lsh (if (memq base from-categories) 1 0) 3)
474                        (lsh (if (string-match "-with-esc\\'"
475                                               (symbol-name base))
476                                 0 1) 2)
477                        (if (eq (coding-system-type base) 2)
478                            ;; For ISO based coding systems, prefer
479                            ;; one that doesn't use escape sequences.
480                            (let ((flags (coding-system-flags base)))
481                              (if (or (consp (aref flags 0))
482                                      (consp (aref flags 1))
483                                      (consp (aref flags 2))
484                                      (consp (aref flags 3)))
485                                  (if (or (aref flags 8) (aref flags 9))
486                                      0
487                                    1)
488                                2))
489                          1)))))))
490       (sort codings (function (lambda (x y)
491                                 (> (funcall func x) (funcall func y))))))))
492
493 (defun find-coding-systems-region (from to)
494   "Return a list of proper coding systems to encode a text between FROM and TO.
495 If FROM is a string, find coding systems in that instead of the buffer.
496 All coding systems in the list can safely encode any multibyte characters
497 in the text.
498
499 If the text contains no multibyte characters, return a list of a single
500 element `undecided'."
501   (let ((codings (find-coding-systems-region-internal from to)))
502     (if (eq codings t)
503         ;; The text contains only ASCII characters.  Any coding
504         ;; systems are safe.
505         '(undecided)
506       ;; We need copy-sequence because sorting will alter the argument.
507       (sort-coding-systems (copy-sequence codings)))))
508
509 (defun find-coding-systems-string (string)
510   "Return a list of proper coding systems to encode STRING.
511 All coding systems in the list can safely encode any multibyte characters
512 in STRING.
513
514 If STRING contains no multibyte characters, return a list of a single
515 element `undecided'."
516   (find-coding-systems-region string nil))
517
518 (defun find-coding-systems-for-charsets (charsets)
519   "Return a list of proper coding systems to encode characters of CHARSETS.
520 CHARSETS is a list of character sets.
521 It actually checks at most the first 96 characters of each charset.
522 So, if a charset of dimension two is included in CHARSETS, the value may
523 contain a coding system that can't encode all characters of the charset."
524   (cond ((or (null charsets)
525              (and (= (length charsets) 1)
526                   (eq 'ascii (car charsets))))
527          '(undecided))
528         ((or (memq 'eight-bit-control charsets)
529              (memq 'eight-bit-graphic charsets))
530          '(raw-text emacs-mule))
531         (t
532          (let ((codings t)
533                charset l str)
534            (while (and codings charsets)
535              (setq charset (car charsets) charsets (cdr charsets))
536              (unless (eq charset 'ascii)
537                (setq str (make-string 96 32))
538                (if (= (charset-dimension charset) 1)
539                    (if (= (charset-chars charset) 96)
540                        (dotimes (i 96)
541                          (aset str i (make-char charset (+ i 32))))
542                      (dotimes (i 94)
543                        (aset str i (make-char charset (+ i 33)))))
544                  (if (= (charset-chars charset) 96)
545                      (dotimes (i 96)
546                        (aset str i (make-char charset 32 (+ i 32))))
547                    (dotimes (i 94)
548                      (aset str i (make-char charset 33 (+ i 33))))))
549                (setq l (find-coding-systems-string str))
550                (if (eq codings t)
551                    (setq codings l)
552                  (let ((ll nil))
553                    (dolist (elt codings)
554                      (if (memq elt l)
555                          (setq ll (cons elt ll))))
556                    (setq codings ll)))))
557            codings))))
558
559 (defun find-multibyte-characters (from to &optional maxcount excludes)
560   "Find multibyte characters in the region specified by FROM and TO.
561 If FROM is a string, find multibyte characters in the string.
562 The return value is an alist of the following format:
563   ((CHARSET COUNT CHAR ...) ...)
564 where
565   CHARSET is a character set,
566   COUNT is a number of characters,
567   CHARs are the characters found from the character set.
568 Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list.
569 Optional 4th arg EXCLUDES is a list of character sets to be ignored.
570
571 For invalid characters, CHARs are actually strings."
572   (let ((chars nil)
573         charset char)
574     (if (stringp from)
575         (let ((idx 0))
576           (while (setq idx (string-match "[^\000-\177]" from idx))
577             (setq char (aref from idx)
578                   charset (char-charset char))
579             (if (eq charset 'unknown)
580                 (setq char (match-string 0)))
581             (if (or (memq charset '(unknown
582                                     eight-bit-control eight-bit-graphic))
583                     (not (or (eq excludes t) (memq charset excludes))))
584                 (let ((slot (assq charset chars)))
585                   (if slot
586                       (if (not (memq char (nthcdr 2 slot)))
587                           (let ((count (nth 1 slot)))
588                             (setcar (cdr slot) (1+ count))
589                             (if (or (not maxcount) (< count maxcount))
590                                 (nconc slot (list char)))))
591                     (setq chars (cons (list charset 1 char) chars)))))
592             (setq idx (1+ idx))))
593       (save-excursion
594         (goto-char from)
595         (while (re-search-forward "[^\000-\177]" to t)
596           (setq char (preceding-char)
597                 charset (char-charset char))
598           (if (eq charset 'unknown)
599               (setq char (match-string 0)))
600           (if (or (memq charset '(unknown eight-bit-control eight-bit-graphic))
601                   (not (or (eq excludes t) (memq charset excludes))))
602               (let ((slot (assq charset chars)))
603                 (if slot
604                     (if (not (member char (nthcdr 2 slot)))
605                         (let ((count (nth 1 slot)))
606                           (setcar (cdr slot) (1+ count))
607                           (if (or (not maxcount) (< count maxcount))
608                               (nconc slot (list char)))))
609                   (setq chars (cons (list charset 1 char) chars))))))))
610     (nreverse chars)))
611
612
613 (defun search-unencodable-char (coding-system)
614   "Search forward from point for a character that is not encodable.
615 It asks which coding system to check.
616 If such a character is found, set point after that character.
617 Otherwise, don't move point.
618
619 When called from a program, the value is the position of the unencodable
620 character found, or nil if all characters are encodable."
621   (interactive
622    (list (let ((default (or buffer-file-coding-system 'us-ascii)))
623            (read-coding-system
624             (format "Coding-system (default %s): " default)
625             default))))
626   (let ((pos (unencodable-char-position (point) (point-max) coding-system)))
627     (if pos
628         (goto-char (1+ pos))
629       (message "All following characters are encodable by %s" coding-system))
630     pos))
631
632
633 (defvar last-coding-system-specified nil
634   "Most recent coding system explicitly specified by the user when asked.
635 This variable is set whenever Emacs asks the user which coding system
636 to use in order to write a file.  If you set it to nil explicitly,
637 then call `write-region', then afterward this variable will be non-nil
638 only if the user was explicitly asked and specified a coding system.")
639
640 (defvar select-safe-coding-system-accept-default-p nil
641   "If non-nil, a function to control the behavior of coding system selection.
642 The meaning is the same as the argument ACCEPT-DEFAULT-P of the
643 function `select-safe-coding-system' (which see).  This variable
644 overrides that argument.")
645
646 (defun select-safe-coding-system-interactively (from to codings unsafe
647                                                 &optional rejected default)
648   "Select interactively a coding system for the region FROM ... TO.
649 FROM can be a string, as in `write-region'.
650 CODINGS is the list of base coding systems known to be safe for this region,
651   typically obtained with `find-coding-systems-region'.
652 UNSAFE is a list of coding systems known to be unsafe for this region.
653 REJECTED is a list of coding systems which were safe but for some reason
654   were not recommended in the particular context.
655 DEFAULT is the coding system to use by default in the query."
656   ;; At first, if some defaults are unsafe, record at most 11
657   ;; problematic characters and their positions for them by turning
658   ;;    (CODING ...)
659   ;; into
660   ;;    ((CODING (POS . CHAR) (POS . CHAR) ...) ...)
661   (if unsafe
662       (setq unsafe
663             (mapcar #'(lambda (coding)
664                         (cons coding
665                               (if (stringp from)
666                                   (mapcar #'(lambda (pos)
667                                               (cons pos (aref from pos)))
668                                           (unencodable-char-position
669                                            0 (length from) coding
670                                            11 from))
671                                 (mapcar #'(lambda (pos)
672                                             (cons pos (char-after pos)))
673                                         (unencodable-char-position
674                                          from to coding 11)))))
675                     unsafe)))
676
677   ;; Change each safe coding system to the corresponding
678   ;; mime-charset name if it is also a coding system.  Such a name
679   ;; is more friendly to users.
680   (let ((l codings)
681         mime-charset)
682     (while l
683       (setq mime-charset (coding-system-get (car l) 'mime-charset))
684       (if (and mime-charset (coding-system-p mime-charset))
685           (setcar l mime-charset))
686       (setq l (cdr l))))
687
688   ;; Don't offer variations with locking shift, which you
689   ;; basically never want.
690   (let (l)
691     (dolist (elt codings (setq codings (nreverse l)))
692       (unless (or (eq 'coding-category-iso-7-else
693                       (coding-system-category elt))
694                   (eq 'coding-category-iso-8-else
695                       (coding-system-category elt)))
696         (push elt l))))
697
698   ;; Remove raw-text, emacs-mule and no-conversion unless nothing
699   ;; else is available.
700   (setq codings
701         (or (delq 'raw-text
702                   (delq 'emacs-mule
703                         (delq 'no-conversion codings)))
704             '(raw-text emacs-mule no-conversion)))
705
706   (let ((window-configuration (current-window-configuration))
707         (bufname (buffer-name))
708         coding-system)
709     (save-excursion
710       ;; If some defaults are unsafe, make sure the offending
711       ;; buffer is displayed.
712       (when (and unsafe (not (stringp from)))
713         (pop-to-buffer bufname)
714         (goto-char (apply 'min (mapcar #'(lambda (x) (car (cadr x)))
715                                        unsafe))))
716       ;; Then ask users to select one from CODINGS while showing
717       ;; the reason why none of the defaults are not used.
718       (with-output-to-temp-buffer "*Warning*"
719         (with-current-buffer standard-output
720           (if (and (null rejected) (null unsafe))
721               (insert "No default coding systems to try for "
722                       (if (stringp from)
723                           (format "string \"%s\"." from)
724                         (format "buffer `%s'." bufname)))
725             (insert
726              "These default coding systems were tried to encode"
727              (if (stringp from)
728                  (concat " \"" (if (> (length from) 10)
729                                    (concat (substring from 0 10) "...\"")
730                                  (concat from "\"")))
731                (format " text\nin the buffer `%s'" bufname))
732              ":\n")
733             (let ((pos (point))
734                   (fill-prefix "  "))
735               (dolist (x (append rejected unsafe))
736                 (princ "  ") (princ (car x)))
737               (insert "\n")
738               (fill-region-as-paragraph pos (point)))
739             (when rejected
740               (insert "These safely encode the text in the buffer,
741 but are not recommended for encoding text in this context,
742 e.g., for sending an email message.\n ")
743               (dolist (x rejected)
744                 (princ " ") (princ x))
745               (insert "\n"))
746             (when unsafe
747               (insert (if rejected "The other coding systems"
748                         "However, each of them")
749                       " encountered characters it couldn't encode:\n")
750               (dolist (coding unsafe)
751                 (insert (format "  %s cannot encode these:" (car coding)))
752                 (let ((i 0)
753                       (func1
754                        #'(lambda (bufname pos)
755                            (when (buffer-live-p (get-buffer bufname))
756                              (pop-to-buffer bufname)
757                              (goto-char pos))))
758                       (func2
759                        #'(lambda (bufname pos coding)
760                            (when (buffer-live-p (get-buffer bufname))
761                              (pop-to-buffer bufname)
762                              (if (< (point) pos)
763                                  (goto-char pos)
764                                (forward-char 1)
765                                (search-unencodable-char coding)
766                                (forward-char -1))))))
767                   (dolist (elt (cdr coding))
768                     (insert " ")
769                     (if (stringp from)
770                         (insert (if (< i 10) (cdr elt) "..."))
771                       (if (< i 10)
772                           (insert-text-button