Changeset 1490 for branches/GNU/src/coding.c
- Timestamp:
- 02/17/98 01:45:05 (11 years ago)
- Files:
-
- branches/GNU/src/coding.c (modified) (128 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
branches/GNU/src/coding.c
r1488 r1490 1 1 /* Coding system handler (conversion, detection, and etc). 2 Copyright (C) 1995, 1997 , 1998Electrotechnical Laboratory, JAPAN.2 Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. 3 3 Licensed to the Free Software Foundation. 4 4 … … 26 26 3. ISO2022 handlers 27 27 4. Shift-JIS and BIG5 handlers 28 5. CCL handlers 29 6. End-of-line handlers 30 7. C library functions 31 8. Emacs Lisp library functions 32 9. Post-amble 28 5. End-of-line handlers 29 6. C library functions 30 7. Emacs Lisp library functions 31 8. Post-amble 33 32 34 33 */ … … 71 70 4. Raw text 72 71 73 A coding system for a text containing random 8-bit code. Emacs does 74 no code conversion on such a text except for end-of-line format. 72 A coding system to for a text containing random 8-bit code. Emacs 73 does no code conversion on such a text except for end-of-line 74 format. 75 75 76 76 5. Other … … 81 81 while reading/writing. 82 82 83 Emacs represents a coding system by a Lisp symbol that has a property84 `coding-system'. But, before actually using the coding system, the83 Emacs represents a coding-system by a Lisp symbol that has a property 84 `coding-system'. But, before actually using the coding-system, the 85 85 information about it is set in a structure of type `struct 86 86 coding_system' for rapid processing. See section 6 for more details. … … 93 93 instance, Unix's format is just one byte of `line-feed' code, 94 94 whereas DOS's format is two-byte sequence of `carriage-return' and 95 `line-feed' codes. MacOS's format is usually one byte of 96 `carriage-return'. 95 `line-feed' codes. MacOS's format is one byte of `carriage-return'. 97 96 98 97 Since text characters encoding and end-of-line encoding are … … 123 122 These functions decode SRC_BYTES length text at SOURCE encoded in 124 123 CODING to Emacs' internal format (emacs-mule). The resulting text 125 goes to a place pointed to by DESTINATION, the length of which 126 should not exceed DST_BYTES. These functions set the information of 127 original and decoded texts in the members produced, produced_char, 128 consumed, and consumed_char of the structure *CODING. 129 130 The return value is an integer (CODING_FINISH_XXX) indicating how 131 the decoding finished. 132 133 DST_BYTES zero means that source area and destination area are 134 overlapped, which means that we can produce a decoded text until it 135 reaches at the head of not-yet-decoded source text. 136 137 Below is a template of these functions. */ 124 goes to a place pointed to by DESTINATION, the length of which should 125 not exceed DST_BYTES. The number of bytes actually processed is 126 returned as *CONSUMED. The return value is the length of the decoded 127 text. Below is a template of these functions. */ 138 128 #if 0 139 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes )129 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed) 140 130 struct coding_system *coding; 141 131 unsigned char *source, *destination; 142 132 int src_bytes, dst_bytes; 133 int *consumed; 143 134 { 144 135 ... … … 151 142 internal format (emacs-mule) to CODING. The resulting text goes to 152 143 a place pointed to by DESTINATION, the length of which should not 153 exceed DST_BYTES. These functions set the information of 154 original and encoded texts in the members produced, produced_char, 155 consumed, and consumed_char of the structure *CODING. 156 157 The return value is an integer (CODING_FINISH_XXX) indicating how 158 the encoding finished. 159 160 DST_BYTES zero means that source area and destination area are 161 overlapped, which means that we can produce a decoded text until it 162 reaches at the head of not-yet-decoded source text. 163 164 Below is a template of these functions. */ 144 exceed DST_BYTES. The number of bytes actually processed is 145 returned as *CONSUMED. The return value is the length of the 146 encoded text. Below is a template of these functions. */ 165 147 #if 0 166 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes )148 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed) 167 149 struct coding_system *coding; 168 150 unsigned char *source, *destination; 169 151 int src_bytes, dst_bytes; 152 int *consumed; 170 153 { 171 154 ... … … 219 202 *dst++ = 0xA0, *dst++ = (c) | 0x80; \ 220 203 else \ 221 { \ 222 *dst++ = (c); \ 223 coding->produced_char++; \ 224 } \ 204 *dst++ = (c); \ 225 205 } while (0) 226 206 … … 234 214 *dst++ = leading_code + 0x20; \ 235 215 else \ 236 { \ 237 *dst++ = leading_code; \ 238 coding->produced_char++; \ 239 } \ 216 *dst++ = leading_code; \ 240 217 if (leading_code = CHARSET_LEADING_CODE_EXT (charset)) \ 241 218 *dst++ = leading_code; \ … … 278 255 Lisp_Object Qno_conversion, Qundecided; 279 256 Lisp_Object Qcoding_system_history; 280 Lisp_Object Qsafe_charsets;281 Lisp_Object Qvalid_codes;282 257 283 258 extern Lisp_Object Qinsert_file_contents, Qwrite_region; … … 285 260 Lisp_Object Qstart_process, Qopen_network_stream; 286 261 Lisp_Object Qtarget_idx; 287 288 Lisp_Object Vselect_safe_coding_system_function;289 262 290 263 /* Mnemonic character of each format of end-of-line. */ … … 300 273 #ifdef emacs 301 274 302 Lisp_Object Vcoding_system_list, Vcoding_system_alist; 303 304 Lisp_Object Qcoding_system_p, Qcoding_system_error; 305 306 /* Coding system emacs-mule and raw-text are for converting only 307 end-of-line format. */ 308 Lisp_Object Qemacs_mule, Qraw_text; 275 Lisp_Object Qcoding_system_spec, Qcoding_system_p, Qcoding_system_error; 276 277 /* Coding system emacs-mule is for converting only end-of-line format. */ 278 Lisp_Object Qemacs_mule; 309 279 310 280 /* Coding-systems are handed between Emacs Lisp programs and C internal … … 318 288 319 289 /* A vector of length 256 which contains information about special 320 Latin codes (espe cially for dealing with Microsoft codes). */290 Latin codes (espepcially for dealing with Microsoft code). */ 321 291 Lisp_Object Vlatin_extra_code_table; 322 292 323 293 /* Flag to inhibit code conversion of end-of-line format. */ 324 294 int inhibit_eol_conversion; 325 326 /* Flag to make buffer-file-coding-system inherit from process-coding. */327 int inherit_process_coding_system;328 295 329 296 /* Coding system to be used to encode text for terminal display. */ … … 337 304 struct coding_system keyboard_coding; 338 305 339 /* Default coding system to be used to write a file. */340 struct coding_system default_buffer_file_coding;341 342 306 Lisp_Object Vfile_coding_system_alist; 343 307 Lisp_Object Vprocess_coding_system_alist; … … 346 310 #endif /* emacs */ 347 311 348 Lisp_Object Qcoding_category , Qcoding_category_index;312 Lisp_Object Qcoding_category_index; 349 313 350 314 /* List of symbols `coding-category-xxx' ordered by priority. */ 351 315 Lisp_Object Vcoding_category_list; 352 316 353 /* Table of coding categories (Lisp symbols). */354 Lisp_Object Vcoding_category_table;317 /* Table of coding-systems currently assigned to each coding-category. */ 318 Lisp_Object coding_category_table[CODING_CATEGORY_IDX_MAX]; 355 319 356 320 /* Table of names of symbol for each coding-category. */ … … 359 323 "coding-category-sjis", 360 324 "coding-category-iso-7", 361 "coding-category-iso-7-tight",362 325 "coding-category-iso-8-1", 363 326 "coding-category-iso-8-2", 364 327 "coding-category-iso-7-else", 365 328 "coding-category-iso-8-else", 366 "coding-category-ccl",367 329 "coding-category-big5", 368 330 "coding-category-raw-text", … … 370 332 }; 371 333 372 /* Table of pointers to coding systems corresponding to each coding 373 categories. */ 374 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX]; 375 376 /* Table of coding category masks. Nth element is a mask for a coding 377 cateogry of which priority is Nth. */ 378 static 379 int coding_priorities[CODING_CATEGORY_IDX_MAX]; 380 381 /* Flag to tell if we look up translation table on character code 334 /* Flag to tell if we look up unification table on character code 382 335 conversion. */ 383 Lisp_Object Venable_character_translation; 384 /* Standard translation table to look up on decoding (reading). */ 385 Lisp_Object Vstandard_translation_table_for_decode; 386 /* Standard translation table to look up on encoding (writing). */ 387 Lisp_Object Vstandard_translation_table_for_encode; 388 389 Lisp_Object Qtranslation_table; 390 Lisp_Object Qtranslation_table_id; 391 Lisp_Object Qtranslation_table_for_decode; 392 Lisp_Object Qtranslation_table_for_encode; 336 Lisp_Object Venable_character_unification; 337 /* Standard unification table to look up on decoding (reading). */ 338 Lisp_Object Vstandard_character_unification_table_for_decode; 339 /* Standard unification table to look up on encoding (writing). */ 340 Lisp_Object Vstandard_character_unification_table_for_encode; 341 342 Lisp_Object Qcharacter_unification_table; 343 Lisp_Object Qcharacter_unification_table_for_decode; 344 Lisp_Object Qcharacter_unification_table_for_encode; 393 345 394 346 /* Alist of charsets vs revision number. */ … … 446 398 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 447 399 Check if a text is encoded in Emacs' internal format. If it is, 448 return CODING_CATEGORY_MASK_EMA CS_MULE, else return 0. */400 return CODING_CATEGORY_MASK_EMASC_MULE, else return 0. */ 449 401 450 402 int … … 656 608 enum iso_code_class_type iso_code_class[256]; 657 609 658 #define CHARSET_OK(idx, charset) \659 (coding_system_table[idx] \660 && (coding_system_table[idx]->safe_charsets[charset] \661 || (CODING_SPEC_ISO_REQUESTED_DESIGNATION \662 (coding_system_table[idx], charset) \663 != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)))664 665 #define SHIFT_OUT_OK(idx) \666 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)667 668 610 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 669 611 Check if a text is encoded in ISO2022. If it is, returns an 670 612 integer in which appropriate flag bits any of: 671 613 CODING_CATEGORY_MASK_ISO_7 672 CODING_CATEGORY_MASK_ISO_7_TIGHT673 614 CODING_CATEGORY_MASK_ISO_8_1 674 615 CODING_CATEGORY_MASK_ISO_8_2 … … 682 623 unsigned char *src, *src_end; 683 624 { 684 int mask = CODING_CATEGORY_MASK_ISO; 685 int mask_found = 0; 686 int reg[4], shift_out = 0; 687 int c, c1, i, charset; 688 689 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; 625 int mask = (CODING_CATEGORY_MASK_ISO_7 626 | CODING_CATEGORY_MASK_ISO_8_1 627 | CODING_CATEGORY_MASK_ISO_8_2 628 | CODING_CATEGORY_MASK_ISO_7_ELSE 629 | CODING_CATEGORY_MASK_ISO_8_ELSE 630 ); 631 int g1 = 0; /* 1 iff designating to G1. */ 632 int c, i; 633 struct coding_system coding_iso_8_1, coding_iso_8_2; 634 635 /* Coding systems of these categories may accept latin extra codes. */ 636 setup_coding_system 637 (XSYMBOL (coding_category_table[CODING_CATEGORY_IDX_ISO_8_1])->value, 638 &coding_iso_8_1); 639 setup_coding_system 640 (XSYMBOL (coding_category_table[CODING_CATEGORY_IDX_ISO_8_2])->value, 641 &coding_iso_8_2); 642 690 643 while (mask && src < src_end) 691 644 { … … 697 650 break; 698 651 c = *src++; 699 if ( c >= '(' && c <= '/')652 if ((c >= '(' && c <= '/')) 700 653 { 701 654 /* Designation sequence for a charset of dimension 1. */ 702 655 if (src >= src_end) 703 656 break; 704 c1 = *src++; 705 if (c1 < ' ' || c1 >= 0x80 706 || (charset = iso_charset_table[0][c >= ','][c1]) < 0) 707 /* Invalid designation sequence. Just ignore. */ 708 break; 709 reg[(c - '(') % 4] = charset; 657 c = *src++; 658 if (c < ' ' || c >= 0x80) 659 /* Invalid designation sequence. */ 660 return 0; 710 661 } 711 662 else if (c == '$') … … 717 668 if (c >= '@' && c <= 'B') 718 669 /* Designation for JISX0208.1978, GB2312, or JISX0208. */ 719 reg[0] = charset = iso_charset_table[1][0][c];670 ; 720 671 else if (c >= '(' && c <= '/') 721 672 { 722 673 if (src >= src_end) 723 674 break; 724 c1 = *src++; 725 if (c1 < ' ' || c1 >= 0x80 726 || (charset = iso_charset_table[1][c >= ','][c1]) < 0) 727 /* Invalid designation sequence. Just ignore. */ 728 break; 729 reg[(c - '(') % 4] = charset; 675 c = *src++; 676 if (c < ' ' || c >= 0x80) 677 /* Invalid designation sequence. */ 678 return 0; 730 679 } 731 680 else 732 /* Invalid designation sequence. Just ignore.*/733 break;681 /* Invalid designation sequence. */ 682 return 0; 734 683 } 735 else if (c == 'N' || c == 'n') 736 { 737 if (shift_out == 0 738 && (reg[1] >= 0 739 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE) 740 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))) 741 { 742 /* Locking shift out. */ 743 mask &= ~CODING_CATEGORY_MASK_ISO_7BIT; 744 mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT; 745 shift_out = 1; 746 } 747 break; 748 } 749 else if (c == 'O' || c == 'o') 750 { 751 if (shift_out == 1) 752 { 753 /* Locking shift in. */ 754 mask &= ~CODING_CATEGORY_MASK_ISO_7BIT; 755 mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT; 756 shift_out = 0; 757 } 758 break; 759 } 684 else if (c == 'N' || c == 'O' || c == 'n' || c == 'o') 685 /* Locking shift. */ 686 mask &= (CODING_CATEGORY_MASK_ISO_7_ELSE 687 | CODING_CATEGORY_MASK_ISO_8_ELSE); 760 688 else if (c == '0' || c == '1' || c == '2') 761 /* Start/end composition. Just ignore.*/762 break;689 /* Start/end composition. */ 690 ; 763 691 else 764 /* Invalid escape sequence. Just ignore. */ 765 break; 766 767 /* We found a valid designation sequence for CHARSET. */ 768 mask &= ~CODING_CATEGORY_MASK_ISO_8BIT; 769 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset)) 770 mask_found |= CODING_CATEGORY_MASK_ISO_7; 771 else 772 mask &= ~CODING_CATEGORY_MASK_ISO_7; 773 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset)) 774 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT; 775 else 776 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT; 777 if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset)) 778 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE; 779 if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset)) 780 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE; 692 /* Invalid escape sequence. */ 693 return 0; 781 694 break; 782 695 783 696 case ISO_CODE_SO: 784 if (shift_out == 0 785 && (reg[1] >= 0 786 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE) 787 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))) 788 { 789 /* Locking shift out. */ 790 mask &= ~CODING_CATEGORY_MASK_ISO_7BIT; 791 mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT; 792 } 697 mask &= (CODING_CATEGORY_MASK_ISO_7_ELSE 698 | CODING_CATEGORY_MASK_ISO_8_ELSE); 793 699 break; 794 700 795 case ISO_CODE_SI:796 if (shift_out == 1)797 {798 /* Locking shift in. */799 mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;800 mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;801 }802 break;803 804 701 case ISO_CODE_CSI: 805 702 case ISO_CODE_SS2: … … 808 705 int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE; 809 706 810 if (c != ISO_CODE_CSI)811 {812 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags813 & CODING_FLAG_ISO_SINGLE_SHIFT)814 newmask |= CODING_CATEGORY_MASK_ISO_8_1;815 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags816 & CODING_FLAG_ISO_SINGLE_SHIFT)817 newmask |= CODING_CATEGORY_MASK_ISO_8_2;818 }819 707 if (VECTORP (Vlatin_extra_code_table) 820 708 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) 821 709 { 822 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags 823 & CODING_FLAG_ISO_LATIN_EXTRA) 710 if (coding_iso_8_1.flags & CODING_FLAG_ISO_LATIN_EXTRA) 824 711 newmask |= CODING_CATEGORY_MASK_ISO_8_1; 825 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags 826 & CODING_FLAG_ISO_LATIN_EXTRA) 712 if (coding_iso_8_2.flags & CODING_FLAG_ISO_LATIN_EXTRA) 827 713 newmask |= CODING_CATEGORY_MASK_ISO_8_2; 828 714 } 829 715 mask &= newmask; 830 mask_found |= newmask;831 716 } 832 717 break; … … 842 727 int newmask = 0; 843 728 844 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags 845 & CODING_FLAG_ISO_LATIN_EXTRA) 729 if (coding_iso_8_1.flags & CODING_FLAG_ISO_LATIN_EXTRA) 846 730 newmask |= CODING_CATEGORY_MASK_ISO_8_1; 847 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags 848 & CODING_FLAG_ISO_LATIN_EXTRA) 731 if (coding_iso_8_2.flags & CODING_FLAG_ISO_LATIN_EXTRA) 849 732 newmask |= CODING_CATEGORY_MASK_ISO_8_2; 850 733 mask &= newmask; 851 mask_found |= newmask;852 734 } 853 735 else … … 858 740 unsigned char *src_begin = src; 859 741 860 mask &= ~(CODING_CATEGORY_MASK_ISO_7 BIT742 mask &= ~(CODING_CATEGORY_MASK_ISO_7 861 743 | CODING_CATEGORY_MASK_ISO_7_ELSE); 862 mask_found |= CODING_CATEGORY_MASK_ISO_8_1;863 744 while (src < src_end && *src >= 0xA0) 864 745 src++; 865 746 if ((src - src_begin - 1) & 1 && src < src_end) 866 747 mask &= ~CODING_CATEGORY_MASK_ISO_8_2; 867 else868 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;869 748 } 870 749 break; … … 872 751 } 873 752 874 return (mask & mask_found);753 return mask; 875 754 } 876 755 … … 892 771 coding->composing += 2; \ 893 772 } \ 894 if ( charset_alt >= 0)\773 if ((charset) >= 0) \ 895 774 { \ 896 if (CHARSET_DIMENSION (charset_alt) == 2) \ 897 { \ 898 ONE_MORE_BYTE (c2); \ 899 if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F \ 900 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0) \ 901 { \ 902 src--; \ 903 charset_alt = CHARSET_ASCII; \ 904 } \ 905 } \ 906 if (!NILP (translation_table) \ 907 && ((c_alt = translate_char (translation_table, \ 908 -1, charset_alt, c1, c2)) >= 0)) \ 775 if (CHARSET_DIMENSION (charset) == 2) \ 776 ONE_MORE_BYTE (c2); \ 777 if (!NILP (unification_table) \ 778 && ((c_alt = unify_char (unification_table, \ 779 -1, (charset), c1, c2)) >= 0)) \ 909 780 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ 910 781 } \ … … 921 792 922 793 /* Set designation state into CODING. */ 923 #define DECODE_DESIGNATION(reg, dimension, chars, final_char) \ 924 do { \ 925 int charset = ISO_CHARSET_TABLE (make_number (dimension), \ 926 make_number (chars), \ 927 make_number (final_char)); \ 928 if (charset >= 0 \ 929 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \ 930 || coding->safe_charsets[charset])) \ 931 { \ 932 if (coding->spec.iso2022.last_invalid_designation_register == 0 \ 933 && reg == 0 \ 934 && charset == CHARSET_ASCII) \ 935 { \ 936 /* We should insert this designation sequence as is so \ 937 that it is surely written back to a file. */ \ 938 coding->spec.iso2022.last_invalid_designation_register = -1; \ 939 goto label_invalid_code; \ 940 } \ 941 coding->spec.iso2022.last_invalid_designation_register = -1; \ 942 if ((coding->mode & CODING_MODE_DIRECTION) \ 943 && CHARSET_REVERSE_CHARSET (charset) >= 0) \ 944 charset = CHARSET_REVERSE_CHARSET (charset); \ 945 CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \ 946 } \ 947 else \ 948 { \ 949 coding->spec.iso2022.last_invalid_designation_register = reg; \ 950 goto label_invalid_code; \ 951 } \ 794 #define DECODE_DESIGNATION(reg, dimension, chars, final_char) \ 795 do { \ 796 int charset = ISO_CHARSET_TABLE (make_number (dimension), \ 797 make_number (chars), \ 798 make_number (final_char)); \ 799 if (charset >= 0) \ 800 { \ 801 if (coding->direction == 1 \ 802 && CHARSET_REVERSE_CHARSET (charset) >= 0) \ 803 charset = CHARSET_REVERSE_CHARSET (charset); \ 804 CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \ 805 } \ 952 806 } while (0) 953 807 954 /* Check if the current composing sequence contains only valid codes. 955 If the composing sequence doesn't end before SRC_END, return -1. 956 Else, if it contains only valid codes, return 0. 957 Else return the length of the composing sequence. */ 808 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ 958 809 959 810 int 960 check_composing_code (coding, src, src_end) 961 struct coding_system *coding; 962 unsigned char *src, *src_end; 963 { 964 unsigned char *src_start = src; 965 int invalid_code_found = 0; 966 int charset, c, c1, dim; 967 968 while (src < src_end) 969 { 970 if (*src++ != ISO_CODE_ESC) continue; 971 if (src >= src_end) break; 972 if ((c = *src++) == '1') /* end of compsition */ 973 return (invalid_code_found ? src - src_start : 0); 974 if (src + 2 >= src_end) break; 975 if (!coding->flags & CODING_FLAG_ISO_DESIGNATION) 976 invalid_code_found = 1; 977 else 978 { 979 dim = 0; 980 if (c == '$') 981 { 982 dim = 1; 983 c = (*src >= '@' && *src <= 'B') ? '(' : *src++; 984 } 985 if (c >= '(' && c <= '/') 986 { 987 c1 = *src++; 988 if ((c1 < ' ' || c1 >= 0x80) 989 || (charset = iso_charset_table[dim][c >= ','][c1]) < 0 990 || ! coding->safe_charsets[charset] 991 || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) 992 == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)) 993 invalid_code_found = 1; 994 } 995 else 996 invalid_code_found = 1; 997 } 998 } 999 return (invalid_code_found 1000 ? src - src_start 1001 : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1)); 1002 } 1003 1004 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ 1005 1006 int 1007 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) 811 decode_coding_iso2022 (coding, source, destination, 812 src_bytes, dst_bytes, consumed) 1008 813 struct coding_system *coding; 1009 814 unsigned char *source, *destination; 1010 815 int src_bytes, dst_bytes; 816 int *consumed; 1011 817 { 1012 818 unsigned char *src = source; … … 1022 828 int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); 1023 829 int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1); 1024 Lisp_Object translation_table 1025 = coding->translation_table_for_decode; 1026 int result = CODING_FINISH_NORMAL; 1027 1028 if (!NILP (Venable_character_translation) && NILP (translation_table)) 1029 translation_table = Vstandard_translation_table_for_decode; 1030 1031 coding->produced_char = 0; 1032 coding->fake_multibyte = 0; 1033 while (src < src_end && (dst_bytes 1034 ? (dst < adjusted_dst_end) 1035 : (dst < src - 6))) 830 Lisp_Object unification_table 831 = coding->character_unification_table_for_decode; 832 833 if (!NILP (Venable_character_unification) && NILP (unification_table)) 834 unification_table = Vstandard_character_unification_table_for_decode; 835 836 while (src < src_end && dst < adjusted_dst_end) 1036 837 { 1037 838 /* SRC_BASE remembers the start position in source in each loop. … … 1051 852 /* This is SPACE or DEL. */ 1052 853 *dst++ = c1; 1053 coding->produced_char++;1054 854 break; 1055 855 } … … 1068 868 1069 869 case ISO_0xA0_or_0xFF: 1070 if (charset1 < 0 || CHARSET_CHARS (charset1) == 94 1071 || coding->flags & CODING_FLAG_ISO_SEVEN_BITS) 1072 goto label_invalid_code; 870 if (charset1 < 0 || CHARSET_CHARS (charset1) == 94) 871 { 872 /* Invalid code. */ 873 *dst++ = c1; 874 break; 875 } 1073 876 /* This is a graphic character, we fall down ... */ 1074 877 1075 878 case ISO_graphic_plane_1: 1076 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) 1077 goto label_invalid_code; 1078 else 1079 DECODE_ISO_CHARACTER (charset1, c1); 879 DECODE_ISO_CHARACTER (charset1, c1); 1080 880 break; 1081 881 … … 1083 883 /* All ISO2022 control characters in this class have the 1084 884 same representation in Emacs internal format. */ 1085 if (c1 == '\n'1086 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)1087 && (coding->eol_type == CODING_EOL_CR1088 || coding->eol_type == CODING_EOL_CRLF))1089 {1090 result = CODING_FINISH_INCONSISTENT_EOL;1091 goto label_end_of_loop_2;1092 }1093 885 *dst++ = c1; 1094 coding->produced_char++;1095 886 break; 1096 887 1097 888 case ISO_carriage_return: 1098 889 if (coding->eol_type == CODING_EOL_CR) 1099 *dst++ = '\n'; 890 { 891 *dst++ = '\n'; 892 } 1100 893 else if (coding->eol_type == CODING_EOL_CRLF) 1101 894 { … … 1105 898 else 1106 899 { 1107 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)1108 {1109 result = CODING_FINISH_INCONSISTENT_EOL;1110 goto label_end_of_loop_2;1111 }1112 900 src--; 1113 *dst++ = '\r';901 *dst++ = c1; 1114 902 } 1115 903 } 1116 904 else 1117 *dst++ = c1; 1118 coding->produced_char++; 905 { 906 *dst++ = c1; 907 } 1119 908 break; 1120 909 1121 910 case ISO_shift_out: 1122 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT) 1123 || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0) 1124 goto label_invalid_code; 911 if (CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0) 912 goto label_invalid_escape_sequence; 1125 913 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; 1126 914 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); … … 1128 916 1129 917 case ISO_shift_in: 1130 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))1131 goto label_invalid_code;1132 918 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; 1133 919 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); … … 1136 922 case ISO_single_shift_2_7: 1137 923 case ISO_single_shift_2: 1138 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))1139 goto label_invalid_code;1140 924 /* SS2 is handled as an escape sequence of ESC 'N' */ 1141 925 c1 = 'N'; … … 1143 927 1144 928 case ISO_single_shift_3: 1145 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))1146 goto label_invalid_code;1147 929 /* SS2 is handled as an escape sequence of ESC 'O' */ 1148 930 c1 = 'O'; … … 1165 947 ONE_MORE_BYTE (c1); 1166 948 if (!(c1 >= '@' && c1 <= '~')) 1167 goto label_invalid_ code;949 goto label_invalid_escape_sequence; 1168 950 ONE_MORE_BYTE (c1); 1169 951 if (c1 != ISO_CODE_ESC) 1170 goto label_invalid_ code;952 goto label_invalid_escape_sequence; 1171 953 ONE_MORE_BYTE (c1); 1172 954 goto label_escape_sequence; 1173 955 1174 956 case '$': /* designation of 2-byte character set */ 1175 if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))1176 goto label_invalid_code;1177 957 ONE_MORE_BYTE (c1); 1178 958 if (c1 >= '@' && c1 <= 'B') … … 1192 972 } 1193 973 else 1194 goto label_invalid_ code;974 goto label_invalid_escape_sequence; 1195 975 break; 1196 976 1197 977 case 'n': /* invocation of locking-shift-2 */ 1198 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT) 1199 || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0) 1200 goto label_invalid_code; 978 if (CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0) 979 goto label_invalid_escape_sequence; 1201 980 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; 1202 981 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); … … 1204 983 1205 984 case 'o': /* invocation of locking-shift-3 */ 1206 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT) 1207 || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0) 1208 goto label_invalid_code; 985 if (CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0) 986 goto label_invalid_escape_sequence; 1209 987 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; 1210 988 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); … … 1212 990 1213 991 case 'N': /* invocation of single-shift-2 */ 1214 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT) 1215 || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0) 1216 goto label_invalid_code; 992 if (CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0) 993 goto label_invalid_escape_sequence; 1217 994 ONE_MORE_BYTE (c1); 1218 995 charset = CODING_SPEC_ISO_DESIGNATION (coding, 2); … … 1221 998 1222 999 case 'O': /* invocation of single-shift-3 */ 1223 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT) 1224 || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0) 1225 goto label_invalid_code; 1000 if (CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0) 1001 goto label_invalid_escape_sequence; 1226 1002 ONE_MORE_BYTE (c1); 1227 1003 charset = CODING_SPEC_ISO_DESIGNATION (coding, 3); … … 1229 1005 break; 1230 1006 1231 case '0': case '2': /* start composing */ 1232 /* Before processing composing, we must be sure that all 1233 characters being composed are supported by CODING. 1234 If not, we must give up composing and insert the 1235 bunch of codes for composing as is without decoding. */ 1236 { 1237 int result1; 1238 1239 result1 = check_composing_code (coding, src, src_end); 1240 if (result1 == 0) 1241 { 1242 coding->composing = (c1 == '0' 1243 ? COMPOSING_NO_RULE_HEAD 1244 : COMPOSING_WITH_RULE_HEAD); 1245 coding->produced_char++; 1246 } 1247 else if (result1 > 0) 1248 { 1249 if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst) 1250 { 1251 bcopy (src_base, dst, result1 + 2); 1252 src += result1; 1253 dst += result1 + 2; 1254 coding->produced_char += result1 + 2; 1255 } 1256 else 1257 { 1258
