root/trunk/lib-src/ebrowse.c

Revision 4220, 94.4 kB (checked in by miyoshi, 6 months ago)

Sync up with Emacs22.2.

  • Property svn:eol-style set to native
Line 
1 /* ebrowse.c --- parsing files for the ebrowse C++ browser
2
3    Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
4                  2002, 2003, 2004, 2005, 2006, 2007, 2008
5                  Free Software Foundation, Inc.
6
7    This file is part of GNU Emacs.
8
9    GNU Emacs is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3, or (at your option)
12    any later version.
13
14    GNU Emacs is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18
19    You should have received a copy of the GNU General Public License
20    along with GNU Emacs; see the file COPYING.  If not, write to the
21    Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22    Boston, MA 02110-1301, USA.  */
23
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27
28 #include <stdio.h>
29
30 #ifdef HAVE_STDLIB_H
31 #include <stdlib.h>
32 #endif
33
34 #ifdef HAVE_STRING_H
35 #include <string.h>
36 #endif
37
38 #include <ctype.h>
39 #include <assert.h>
40 #include "getopt.h"
41
42 /* The SunOS compiler doesn't have SEEK_END.  */
43 #ifndef SEEK_END
44 #define SEEK_END 2
45 #endif
46
47 /* Conditionalize function prototypes.  */
48
49 #ifdef PROTOTYPES               /* From config.h.  */
50 #define P_(x) x
51 #else
52 #define P_(x) ()
53 #endif
54
55 /* Value is non-zero if strings X and Y compare equal.  */
56
57 #define streq(X, Y) (*(X) == *(Y) && strcmp ((X) + 1, (Y) + 1) == 0)
58
59 /* The ubiquitous `max' and `min' macros.  */
60
61 #ifndef max
62 #define max(X, Y)       ((X) > (Y) ? (X) : (Y))
63 #define min(X, Y)       ((X) < (Y) ? (X) : (Y))
64 #endif
65
66 /* Files are read in chunks of this number of bytes.  */
67
68 #define READ_CHUNK_SIZE (100 * 1024)
69
70 /* The character used as a separator in path lists (like $PATH).  */
71
72 #if defined(__MSDOS__)
73 #define PATH_LIST_SEPARATOR ';'
74 #define FILENAME_EQ(X,Y)    (strcasecmp(X,Y) == 0)
75 #else
76 #if defined(WINDOWSNT)
77 #define PATH_LIST_SEPARATOR ';'
78 #define FILENAME_EQ(X,Y)    (stricmp(X,Y) == 0)
79 #else
80 #define PATH_LIST_SEPARATOR ':'
81 #define FILENAME_EQ(X,Y)    (streq(X,Y))
82 #endif
83 #endif
84 /* The default output file name.  */
85
86 #define DEFAULT_OUTFILE "BROWSE"
87
88 /* A version string written to the output file.  Change this whenever
89    the structure of the output file changes.  */
90
91 #define EBROWSE_FILE_VERSION "ebrowse 5.0"
92
93 /* The output file consists of a tree of Lisp objects, with major
94    nodes built out of Lisp structures.  These are the heads of the
95    Lisp structs with symbols identifying their type.  */
96
97 #define TREE_HEADER_STRUCT      "[ebrowse-hs "
98 #define TREE_STRUCT             "[ebrowse-ts "
99 #define MEMBER_STRUCT           "[ebrowse-ms "
100 #define BROWSE_STRUCT           "[ebrowse-bs "
101 #define CLASS_STRUCT            "[ebrowse-cs "
102
103 /* The name of the symbol table entry for global functions, variables,
104    defines etc.  This name also appears in the browser display.  */
105
106 #define GLOBALS_NAME "*Globals*"
107
108 /* Token definitions.  */
109
110 enum token
111 {
112   YYEOF = 0,                    /* end of file */
113   CSTRING = 256,                /* string constant */
114   CCHAR,                        /* character constant */
115   CINT,                         /* integral constant */
116   CFLOAT,                       /* real constant */
117
118   ELLIPSIS,                     /* ... */
119   LSHIFTASGN,                   /* <<= */
120   RSHIFTASGN,                   /* >>= */
121   ARROWSTAR,                    /* ->* */
122   IDENT,                        /* identifier */
123   DIVASGN,                      /* /= */
124   INC,                          /* ++ */
125   ADDASGN,                      /* += */
126   DEC,                          /* -- */
127   ARROW,                        /* -> */
128   SUBASGN,                      /* -= */
129   MULASGN,                      /* *= */
130   MODASGN,                      /* %= */
131   LOR,                          /* || */
132   ORASGN,                       /* |= */
133   LAND,                         /* && */
134   ANDASGN,                      /* &= */
135   XORASGN,                      /* ^= */
136   POINTSTAR,                    /* .* */
137   DCOLON,                       /* :: */
138   EQ,                           /* == */
139   NE,                           /* != */
140   LE,                           /* <= */
141   LSHIFT,                       /* << */
142   GE,                           /* >= */
143   RSHIFT,                       /* >> */
144
145 /* Keywords.  The undef's are there because these
146    three symbols are very likely to be defined somewhere.  */
147 #undef BOOL
148 #undef TRUE
149 #undef FALSE
150
151   ASM,                          /* asm */
152   AUTO,                         /* auto */
153   BREAK,                        /* break */
154   CASE,                         /* case  */
155   CATCH,                        /* catch */
156   CHAR,                         /* char */
157   CLASS,                        /* class */
158   CONST,                        /* const */
159   CONTINUE,                     /* continue */
160   DEFAULT,                      /* default */
161   DELETE,                       /* delete */
162   DO,                           /* do */
163   DOUBLE,                       /* double */
164   ELSE,                         /* else */
165   ENUM,                         /* enum */
166   EXTERN,                       /* extern */
167   FLOAT,                        /* float */
168   FOR,                          /* for */
169   FRIEND,                       /* friend */
170   GOTO,                         /* goto */
171   IF,                           /* if */
172   T_INLINE,                     /* inline */
173   INT,                          /* int */
174   LONG,                         /* long */
175   NEW,                          /* new */
176   OPERATOR,                     /* operator */
177   PRIVATE,                      /* private */
178   PROTECTED,                    /* protected */
179   PUBLIC,                       /* public */
180   REGISTER,                     /* register */
181   RETURN,                       /* return */
182   SHORT,                        /* short */
183   SIGNED,                       /* signed */
184   SIZEOF,                       /* sizeof */
185   STATIC,                       /* static */
186   STRUCT,                       /* struct */
187   SWITCH,                       /* switch */
188   TEMPLATE,                     /* template */
189   THIS,                         /* this */
190   THROW,                        /* throw */
191   TRY,                          /* try */
192   TYPEDEF,                      /* typedef */
193   UNION,                        /* union */
194   UNSIGNED,                     /* unsigned */
195   VIRTUAL,                      /* virtual */
196   VOID,                         /* void */
197   VOLATILE,                     /* volatile */
198   WHILE,                        /* while */
199   MUTABLE,                      /* mutable */
200   BOOL,                         /* bool */
201   TRUE,                         /* true */
202   FALSE,                        /* false */
203   SIGNATURE,                    /* signature (GNU extension) */
204   NAMESPACE,                    /* namespace */
205   EXPLICIT,                     /* explicit */
206   TYPENAME,                     /* typename */
207   CONST_CAST,                   /* const_cast */
208   DYNAMIC_CAST,                 /* dynamic_cast */
209   REINTERPRET_CAST,             /* reinterpret_cast */
210   STATIC_CAST,                  /* static_cast */
211   TYPEID,                       /* typeid */
212   USING,                        /* using */
213   WCHAR                         /* wchar_t */
214 };
215
216 /* Storage classes, in a wider sense.  */
217
218 enum sc
219 {
220   SC_UNKNOWN,
221   SC_MEMBER,                    /* Is an instance member.  */
222   SC_STATIC,                    /* Is static member.  */
223   SC_FRIEND,                    /* Is friend function.  */
224   SC_TYPE                       /* Is a type definition.  */
225 };
226
227 /* Member visibility.  */
228
229 enum visibility
230 {
231   V_PUBLIC,
232   V_PROTECTED,
233   V_PRIVATE
234 };
235
236 /* Member flags.  */
237
238 #define F_VIRTUAL       1       /* Is virtual function.  */
239 #define F_INLINE        2       /* Is inline function.  */
240 #define F_CONST         4       /* Is const.  */
241 #define F_PURE          8       /* Is pure virtual function.  */
242 #define F_MUTABLE       16      /* Is mutable.  */
243 #define F_TEMPLATE      32      /* Is a template.  */
244 #define F_EXPLICIT      64      /* Is explicit constructor.  */
245 #define F_THROW         128     /* Has a throw specification.  */
246 #define F_EXTERNC       256     /* Is declared extern "C".  */
247 #define F_DEFINE        512     /* Is a #define.  */
248
249 /* Two macros to set and test a bit in an int.  */
250
251 #define SET_FLAG(F, FLAG)       ((F) |= (FLAG))
252 #define HAS_FLAG(F, FLAG)       (((F) & (FLAG)) != 0)
253
254 /* Structure describing a class member.  */
255
256 struct member
257 {
258   struct member *next;          /* Next in list of members.  */
259   struct member *anext;         /* Collision chain in member_table.  */
260   struct member **list;         /* Pointer to list in class.  */
261   unsigned param_hash;          /* Hash value for parameter types.  */
262   int vis;                      /* Visibility (public, ...).  */
263   int flags;                    /* See F_* above.  */
264   char *regexp;                 /* Matching regular expression.  */
265   char *filename;               /* Don't free this shared string.  */
266   int pos;                      /* Buffer position of occurrence.  */
267   char *def_regexp;             /* Regular expression matching definition.  */
268   char *def_filename;           /* File name of definition.  */
269   int def_pos;                  /* Buffer position of definition.  */
270   char name[1];                 /* Member name.  */
271 };
272
273 /* Structures of this type are used to connect class structures with
274    their super and subclasses.  */
275
276 struct link
277 {
278   struct sym *sym;              /* The super or subclass.  */
279   struct link *next;            /* Next in list or NULL.  */
280 };
281
282 /* Structure used to record namespace aliases.  */
283
284 struct alias
285 {
286   struct alias *next;           /* Next in list.  */
287   struct sym *namesp;           /* Namespace in which defined.  */
288   struct link *aliasee;         /* List of aliased namespaces (A::B::C...).  */
289   char name[1];                 /* Alias name.  */
290 };
291
292 /* The structure used to describe a class in the symbol table,
293    or a namespace in all_namespaces.  */
294
295 struct sym
296 {
297   int flags;                    /* Is class a template class?.  */
298   unsigned char visited;        /* Used to find circles.  */
299   struct sym *next;             /* Hash collision list.  */
300   struct link *subs;            /* List of subclasses.  */
301   struct link *supers;          /* List of superclasses.  */
302   struct member *vars;          /* List of instance variables.  */
303   struct member *fns;           /* List of instance functions.  */
304   struct member *static_vars;   /* List of static variables.  */
305   struct member *static_fns;    /* List of static functions.  */
306   struct member *friends;       /* List of friend functions.  */
307   struct member *types;         /* List of local types.  */
308   char *regexp;                 /* Matching regular expression.  */
309   int pos;                      /* Buffer position.  */
310   char *filename;               /* File in which it can be found.  */
311   char *sfilename;              /* File in which members can be found.  */
312   struct sym *namesp;           /* Namespace in which defined. .  */
313   char name[1];                 /* Name of the class.  */
314 };
315
316 /* Experimental: Print info for `--position-info'.  We print
317    '(CLASS-NAME SCOPE MEMBER-NAME).  */
318
319 #define P_DEFN  1
320 #define P_DECL  2
321
322 int info_where;
323 struct sym *info_cls = NULL;
324 struct member *info_member = NULL;
325
326 /* Experimental.  For option `--position-info', the buffer position we
327    are interested in.  When this position is reached, print out
328    information about what we know about that point.  */
329
330 int info_position = -1;
331
332 /* Command line options structure for getopt_long.  */
333
334 struct option options[] =
335 {
336   {"append",                    no_argument,       NULL, 'a'},
337   {"files",                     required_argument, NULL, 'f'},
338   {"help",                      no_argument,       NULL, -2},
339   {"min-regexp-length",         required_argument, NULL, 'm'},
340   {"max-regexp-length",         required_argument, NULL, 'M'},
341   {"no-nested-classes",         no_argument,       NULL, 'n'},
342   {"no-regexps",                no_argument,       NULL, 'x'},
343   {"no-structs-or-unions",      no_argument,       NULL, 's'},
344   {"output-file",               required_argument, NULL, 'o'},
345   {"position-info",             required_argument, NULL, 'p'},
346   {"search-path",               required_argument, NULL, 'I'},
347   {"verbose",                   no_argument,       NULL, 'v'},
348   {"version",                   no_argument,       NULL, -3},
349   {"very-verbose",              no_argument,       NULL, 'V'},
350   {NULL,                        0,                 NULL, 0}
351 };
352
353 /* Semantic values of tokens.  Set by yylex..  */
354
355 unsigned yyival;                /* Set for token CINT.  */
356 char *yytext;                   /* Set for token IDENT.  */
357 char *yytext_end;
358
359 /* Output file.  */
360
361 FILE *yyout;
362
363 /* Current line number.  */
364
365 int yyline;
366
367 /* The name of the current input file.  */
368
369 char *filename;
370
371 /* Three character class vectors, and macros to test membership
372    of characters.  */
373
374 char is_ident[255];
375 char is_digit[255];
376 char is_white[255];
377
378 #define IDENTP(C)       is_ident[(unsigned char) (C)]
379 #define DIGITP(C)       is_digit[(unsigned char) (C)]
380 #define WHITEP(C)       is_white[(unsigned char) (C)]
381
382 /* Command line flags.  */
383
384 int f_append;
385 int f_verbose;
386 int f_very_verbose;
387 int f_structs = 1;
388 int f_regexps = 1;
389 int f_nested_classes = 1;
390
391 /* Maximum and minimum lengths of regular expressions matching a
392    member, class etc., for writing them to the output file.  These are
393    overridable from the command line.  */
394
395 int min_regexp = 5;
396 int max_regexp = 50;
397
398 /* Input buffer.  */
399
400 char *inbuffer;
401 char *in;
402 int inbuffer_size;
403
404 /* Return the current buffer position in the input file.  */
405
406 #define BUFFER_POS() (in - inbuffer)
407
408 /* If current lookahead is CSTRING, the following points to the
409    first character in the string constant.  Used for recognizing
410    extern "C".  */
411
412 char *string_start;
413
414 /* The size of the hash tables for classes.and members.  Should be
415    prime.  */
416
417 #define TABLE_SIZE 1001
418
419 /* The hash table for class symbols.  */
420
421 struct sym *class_table[TABLE_SIZE];
422
423 /* Hash table containing all member structures.  This is generally
424    faster for member lookup than traversing the member lists of a
425    `struct sym'.  */
426
427 struct member *member_table[TABLE_SIZE];
428
429 /* Hash table for namespace aliases */
430
431 struct alias *namespace_alias_table[TABLE_SIZE];
432
433 /* The special class symbol used to hold global functions,
434    variables etc.  */
435
436 struct sym *global_symbols;
437
438 /* The current namespace.  */
439
440 struct sym *current_namespace;
441
442 /* The list of all known namespaces.  */
443
444 struct sym *all_namespaces;
445
446 /* Stack of namespaces we're currently nested in, during the parse.  */
447
448 struct sym **namespace_stack;
449 int namespace_stack_size;
450 int namespace_sp;
451
452 /* The current lookahead token.  */
453
454 int tk = -1;
455
456 /* Structure describing a keyword.  */
457
458 struct kw
459 {
460   char *name;                   /* Spelling.  */
461   int tk;                       /* Token value.  */
462   struct kw *next;              /* Next in collision chain.  */
463 };
464
465 /* Keywords are lookup up in a hash table of their own.  */
466
467 #define KEYWORD_TABLE_SIZE 1001
468 struct kw *keyword_table[KEYWORD_TABLE_SIZE];
469
470 /* Search path.  */
471
472 struct search_path
473 {
474   char *path;
475   struct search_path *next;
476 };
477
478 struct search_path *search_path;
479 struct search_path *search_path_tail;
480
481 /* Function prototypes.  */
482
483 int yylex P_ ((void));
484 void yyparse P_ ((void));
485 void re_init_parser P_ ((void));
486 char *token_string P_ ((int));
487 char *matching_regexp P_ ((void));
488 void init_sym P_ ((void));
489 struct sym *add_sym P_ ((char *, struct sym *));
490 void add_link P_ ((struct sym *, struct sym *));
491 void add_member_defn P_ ((struct sym *, char *, char *,
492                           int, unsigned, int, int, int));
493 void add_member_decl P_ ((struct sym *, char *, char *, int,
494                           unsigned, int, int, int, int));
495 void dump_roots P_ ((FILE *));
496 void *xmalloc P_ ((int));
497 void xfree P_ ((void *));
498 void add_global_defn P_ ((char *, char *, int, unsigned, int, int, int));
499 void add_global_decl P_ ((char *, char *, int, unsigned, int, int, int));
500 void add_define P_ ((char *, char *, int));
501 void mark_inherited_virtual P_ ((void));
502 void leave_namespace P_ ((void));
503 void enter_namespace P_ ((char *));
504 void register_namespace_alias P_ ((char *, struct link *));
505 void insert_keyword P_ ((char *, int));
506 void re_init_scanner P_ ((void));
507 void init_scanner P_ ((void));
508 void usage P_ ((int));
509 void version P_ ((void));
510 void process_file P_ ((char *));
511 void add_search_path P_ ((char *));
512 FILE *open_file P_ ((char *));
513 int process_pp_line P_ ((void));
514 int dump_members P_ ((FILE *, struct member *));
515 void dump_sym P_ ((FILE *, struct sym *));
516 int dump_tree P_ ((FILE *, struct sym *));
517 struct member *find_member P_ ((struct sym *, char *, int, int, unsigned));
518 struct member *add_member P_ ((struct sym *, char *, int, int, unsigned));
519 void mark_virtual P_ ((struct sym *));
520 void mark_virtual P_ ((struct sym *));
521 struct sym *make_namespace P_ ((char *, struct sym *));
522 char *sym_scope P_ ((struct sym *));
523 char *sym_scope_1 P_ ((struct sym *));
524 int skip_to P_ ((int));
525 void skip_matching P_ ((void));
526 void member P_ ((struct sym *, int));
527 void class_body P_ ((struct sym *, int));
528 void class_definition P_ ((struct sym *, int, int, int));
529 void declaration P_ ((int));
530 unsigned parm_list P_ ((int *));
531 char *operator_name P_ ((int *));
532 struct sym *parse_classname P_ ((void));
533 struct sym *parse_qualified_ident_or_type P_ ((char **));
534 void parse_qualified_param_ident_or_type P_ ((char **));
535 int globals P_ ((int));
536 void yyerror P_ ((char *, char *));
537 void usage P_ ((int)) NO_RETURN;
538 void version P_ (()) NO_RETURN;
539
540
541
542 /***********************************************************************
543                               Utilities
544  ***********************************************************************/
545
546 /* Print an error in a printf-like style with the current input file
547    name and line number.  */
548
549 void
550 yyerror (format, s)
551      char *format, *s;
552 {
553   fprintf (stderr, "%s:%d: ", filename, yyline);
554   fprintf (stderr, format, s);
555   putc ('\n', stderr);
556 }
557
558
559 /* Like malloc but print an error and exit if not enough memory is
560    available.  */
561
562 void *
563 xmalloc (nbytes)
564      int nbytes;
565 {
566   void *p = malloc (nbytes);
567   if (p == NULL)
568     {
569       yyerror ("out of memory", NULL);
570       exit (EXIT_FAILURE);
571     }
572   return p;
573 }
574
575
576 /* Like realloc but print an error and exit if out of memory.  */
577
578 void *
579 xrealloc (p, sz)
580      void *p;
581      int sz;
582 {
583   p = realloc (p, sz);
584   if (p == NULL)
585     {
586       yyerror ("out of memory", NULL);
587       exit (EXIT_FAILURE);
588     }
589   return p;
590 }
591
592
593 /* Like free but always check for null pointers..  */
594
595 void
596 xfree (p)
597      void *p;
598 {
599   if (p)
600     free (p);
601 }
602
603
604 /* Like strdup, but print an error and exit if not enough memory is
605    available..  If S is null, return null.  */
606
607 char *
608 xstrdup (s)
609      char *s;
610 {
611   if (s)
612     s = strcpy (xmalloc (strlen (s) + 1), s);
613   return s;
614 }
615
616
617
618 /***********************************************************************
619                                Symbols
620  ***********************************************************************/
621
622 /* Initialize the symbol table.  This currently only sets up the
623    special symbol for globals (`*Globals*').  */
624
625 void
626 init_sym ()
627 {
628   global_symbols = add_sym (GLOBALS_NAME,