blob: 38866717263216250b5c3f620b84af6d59858eb7 [file] [log] [blame]
shiqiane35fdd92008-12-10 05:08:54 +00001#!/usr/bin/env python
2#
3# Copyright 2007 Neal Norwitz
4# Portions Copyright 2007 Google Inc.
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""Generate an Abstract Syntax Tree (AST) for C++."""
19
20__author__ = 'nnorwitz@google.com (Neal Norwitz)'
21
22
23# TODO:
24# * Tokens should never be exported, need to convert to Nodes
25# (return types, parameters, etc.)
26# * Handle static class data for templatized classes
27# * Handle casts (both C++ and C-style)
28# * Handle conditions and loops (if/else, switch, for, while/do)
29#
30# TODO much, much later:
31# * Handle #define
32# * exceptions
33
34
35try:
36 # Python 3.x
37 import builtins
38except ImportError:
39 # Python 2.x
40 import __builtin__ as builtins
41
42import sys
43import traceback
44
45from cpp import keywords
46from cpp import tokenize
47from cpp import utils
48
49
50if not hasattr(builtins, 'reversed'):
51 # Support Python 2.3 and earlier.
52 def reversed(seq):
53 for i in range(len(seq)-1, -1, -1):
54 yield seq[i]
55
56if not hasattr(builtins, 'next'):
57 # Support Python 2.5 and earlier.
58 def next(obj):
59 return obj.next()
60
61
62VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3)
63
64FUNCTION_NONE = 0x00
65FUNCTION_CONST = 0x01
66FUNCTION_VIRTUAL = 0x02
67FUNCTION_PURE_VIRTUAL = 0x04
68FUNCTION_CTOR = 0x08
69FUNCTION_DTOR = 0x10
70FUNCTION_ATTRIBUTE = 0x20
71FUNCTION_UNKNOWN_ANNOTATION = 0x40
72FUNCTION_THROW = 0x80
kosakc26f9692014-03-12 23:27:35 +000073FUNCTION_OVERRIDE = 0x100
shiqiane35fdd92008-12-10 05:08:54 +000074
75"""
76These are currently unused. Should really handle these properly at some point.
77
78TYPE_MODIFIER_INLINE = 0x010000
79TYPE_MODIFIER_EXTERN = 0x020000
80TYPE_MODIFIER_STATIC = 0x040000
81TYPE_MODIFIER_CONST = 0x080000
82TYPE_MODIFIER_REGISTER = 0x100000
83TYPE_MODIFIER_VOLATILE = 0x200000
84TYPE_MODIFIER_MUTABLE = 0x400000
85
86TYPE_MODIFIER_MAP = {
87 'inline': TYPE_MODIFIER_INLINE,
88 'extern': TYPE_MODIFIER_EXTERN,
89 'static': TYPE_MODIFIER_STATIC,
90 'const': TYPE_MODIFIER_CONST,
91 'register': TYPE_MODIFIER_REGISTER,
92 'volatile': TYPE_MODIFIER_VOLATILE,
93 'mutable': TYPE_MODIFIER_MUTABLE,
94 }
95"""
96
97_INTERNAL_TOKEN = 'internal'
98_NAMESPACE_POP = 'ns-pop'
99
100
101# TODO(nnorwitz): use this as a singleton for templated_types, etc
102# where we don't want to create a new empty dict each time. It is also const.
103class _NullDict(object):
104 __contains__ = lambda self: False
105 keys = values = items = iterkeys = itervalues = iteritems = lambda self: ()
106
107
108# TODO(nnorwitz): move AST nodes into a separate module.
109class Node(object):
110 """Base AST node."""
111
112 def __init__(self, start, end):
113 self.start = start
114 self.end = end
115
116 def IsDeclaration(self):
117 """Returns bool if this node is a declaration."""
118 return False
119
120 def IsDefinition(self):
121 """Returns bool if this node is a definition."""
122 return False
123
124 def IsExportable(self):
125 """Returns bool if this node exportable from a header file."""
126 return False
127
128 def Requires(self, node):
129 """Does this AST node require the definition of the node passed in?"""
130 return False
131
132 def XXX__str__(self):
133 return self._StringHelper(self.__class__.__name__, '')
134
135 def _StringHelper(self, name, suffix):
136 if not utils.DEBUG:
137 return '%s(%s)' % (name, suffix)
138 return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix)
139
140 def __repr__(self):
141 return str(self)
142
143
144class Define(Node):
145 def __init__(self, start, end, name, definition):
146 Node.__init__(self, start, end)
147 self.name = name
148 self.definition = definition
149
150 def __str__(self):
151 value = '%s %s' % (self.name, self.definition)
152 return self._StringHelper(self.__class__.__name__, value)
153
154
155class Include(Node):
156 def __init__(self, start, end, filename, system):
157 Node.__init__(self, start, end)
158 self.filename = filename
159 self.system = system
160
161 def __str__(self):
162 fmt = '"%s"'
163 if self.system:
164 fmt = '<%s>'
165 return self._StringHelper(self.__class__.__name__, fmt % self.filename)
166
167
168class Goto(Node):
169 def __init__(self, start, end, label):
170 Node.__init__(self, start, end)
171 self.label = label
172
173 def __str__(self):
174 return self._StringHelper(self.__class__.__name__, str(self.label))
175
176
177class Expr(Node):
178 def __init__(self, start, end, expr):
179 Node.__init__(self, start, end)
180 self.expr = expr
181
182 def Requires(self, node):
183 # TODO(nnorwitz): impl.
184 return False
185
186 def __str__(self):
187 return self._StringHelper(self.__class__.__name__, str(self.expr))
188
189
190class Return(Expr):
191 pass
192
193
194class Delete(Expr):
195 pass
196
197
198class Friend(Expr):
199 def __init__(self, start, end, expr, namespace):
200 Expr.__init__(self, start, end, expr)
201 self.namespace = namespace[:]
202
203
204class Using(Node):
205 def __init__(self, start, end, names):
206 Node.__init__(self, start, end)
207 self.names = names
208
209 def __str__(self):
210 return self._StringHelper(self.__class__.__name__, str(self.names))
211
212
213class Parameter(Node):
214 def __init__(self, start, end, name, parameter_type, default):
215 Node.__init__(self, start, end)
216 self.name = name
217 self.type = parameter_type
218 self.default = default
219
220 def Requires(self, node):
221 # TODO(nnorwitz): handle namespaces, etc.
222 return self.type.name == node.name
223
224 def __str__(self):
225 name = str(self.type)
226 suffix = '%s %s' % (name, self.name)
227 if self.default:
228 suffix += ' = ' + ''.join([d.name for d in self.default])
229 return self._StringHelper(self.__class__.__name__, suffix)
230
231
232class _GenericDeclaration(Node):
233 def __init__(self, start, end, name, namespace):
234 Node.__init__(self, start, end)
235 self.name = name
236 self.namespace = namespace[:]
237
238 def FullName(self):
239 prefix = ''
240 if self.namespace and self.namespace[-1]:
241 prefix = '::'.join(self.namespace) + '::'
242 return prefix + self.name
243
244 def _TypeStringHelper(self, suffix):
245 if self.namespace:
246 names = [n or '<anonymous>' for n in self.namespace]
247 suffix += ' in ' + '::'.join(names)
248 return self._StringHelper(self.__class__.__name__, suffix)
249
250
251# TODO(nnorwitz): merge with Parameter in some way?
252class VariableDeclaration(_GenericDeclaration):
253 def __init__(self, start, end, name, var_type, initial_value, namespace):
254 _GenericDeclaration.__init__(self, start, end, name, namespace)
255 self.type = var_type
256 self.initial_value = initial_value
257
258 def Requires(self, node):
259 # TODO(nnorwitz): handle namespaces, etc.
260 return self.type.name == node.name
261
262 def ToString(self):
263 """Return a string that tries to reconstitute the variable decl."""
264 suffix = '%s %s' % (self.type, self.name)
265 if self.initial_value:
266 suffix += ' = ' + self.initial_value
267 return suffix
268
269 def __str__(self):
270 return self._StringHelper(self.__class__.__name__, self.ToString())
271
272
273class Typedef(_GenericDeclaration):
274 def __init__(self, start, end, name, alias, namespace):
275 _GenericDeclaration.__init__(self, start, end, name, namespace)
276 self.alias = alias
277
278 def IsDefinition(self):
279 return True
280
281 def IsExportable(self):
282 return True
283
284 def Requires(self, node):
285 # TODO(nnorwitz): handle namespaces, etc.
286 name = node.name
287 for token in self.alias:
288 if token is not None and name == token.name:
289 return True
290 return False
291
292 def __str__(self):
293 suffix = '%s, %s' % (self.name, self.alias)
294 return self._TypeStringHelper(suffix)
295
296
297class _NestedType(_GenericDeclaration):
298 def __init__(self, start, end, name, fields, namespace):
299 _GenericDeclaration.__init__(self, start, end, name, namespace)
300 self.fields = fields
301
302 def IsDefinition(self):
303 return True
304
305 def IsExportable(self):
306 return True
307
308 def __str__(self):
309 suffix = '%s, {%s}' % (self.name, self.fields)
310 return self._TypeStringHelper(suffix)
311
312
313class Union(_NestedType):
314 pass
315
316
317class Enum(_NestedType):
318 pass
319
320
321class Class(_GenericDeclaration):
322 def __init__(self, start, end, name, bases, templated_types, body, namespace):
323 _GenericDeclaration.__init__(self, start, end, name, namespace)
324 self.bases = bases
325 self.body = body
326 self.templated_types = templated_types
327
328 def IsDeclaration(self):
329 return self.bases is None and self.body is None
330
331 def IsDefinition(self):
332 return not self.IsDeclaration()
333
334 def IsExportable(self):
335 return not self.IsDeclaration()
336
337 def Requires(self, node):
338 # TODO(nnorwitz): handle namespaces, etc.
339 if self.bases:
340 for token_list in self.bases:
341 # TODO(nnorwitz): bases are tokens, do name comparision.
342 for token in token_list:
343 if token.name == node.name:
344 return True
345 # TODO(nnorwitz): search in body too.
346 return False
347
348 def __str__(self):
349 name = self.name
350 if self.templated_types:
351 name += '<%s>' % self.templated_types
352 suffix = '%s, %s, %s' % (name, self.bases, self.body)
353 return self._TypeStringHelper(suffix)
354
355
356class Struct(Class):
357 pass
358
359
360class Function(_GenericDeclaration):
361 def __init__(self, start, end, name, return_type, parameters,
362 modifiers, templated_types, body, namespace):
363 _GenericDeclaration.__init__(self, start, end, name, namespace)
364 converter = TypeConverter(namespace)
365 self.return_type = converter.CreateReturnType(return_type)
366 self.parameters = converter.ToParameters(parameters)
367 self.modifiers = modifiers
368 self.body = body
369 self.templated_types = templated_types
370
371 def IsDeclaration(self):
372 return self.body is None
373
374 def IsDefinition(self):
375 return self.body is not None
376
377 def IsExportable(self):
378 if self.return_type and 'static' in self.return_type.modifiers:
379 return False
380 return None not in self.namespace
381
382 def Requires(self, node):
383 if self.parameters:
384 # TODO(nnorwitz): parameters are tokens, do name comparision.
385 for p in self.parameters:
386 if p.name == node.name:
387 return True
388 # TODO(nnorwitz): search in body too.
389 return False
390
391 def __str__(self):
392 # TODO(nnorwitz): add templated_types.
393 suffix = ('%s %s(%s), 0x%02x, %s' %
394 (self.return_type, self.name, self.parameters,
395 self.modifiers, self.body))
396 return self._TypeStringHelper(suffix)
397
398
399class Method(Function):
400 def __init__(self, start, end, name, in_class, return_type, parameters,
401 modifiers, templated_types, body, namespace):
402 Function.__init__(self, start, end, name, return_type, parameters,
403 modifiers, templated_types, body, namespace)
404 # TODO(nnorwitz): in_class could also be a namespace which can
405 # mess up finding functions properly.
406 self.in_class = in_class
407
408
409class Type(_GenericDeclaration):
410 """Type used for any variable (eg class, primitive, struct, etc)."""
411
412 def __init__(self, start, end, name, templated_types, modifiers,
413 reference, pointer, array):
414 """
415 Args:
416 name: str name of main type
417 templated_types: [Class (Type?)] template type info between <>
418 modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
419 reference, pointer, array: bools
420 """
421 _GenericDeclaration.__init__(self, start, end, name, [])
422 self.templated_types = templated_types
423 if not name and modifiers:
424 self.name = modifiers.pop()
425 self.modifiers = modifiers
426 self.reference = reference
427 self.pointer = pointer
428 self.array = array
429
430 def __str__(self):
431 prefix = ''
432 if self.modifiers:
433 prefix = ' '.join(self.modifiers) + ' '
434 name = str(self.name)
435 if self.templated_types:
436 name += '<%s>' % self.templated_types
437 suffix = prefix + name
438 if self.reference:
439 suffix += '&'
440 if self.pointer:
441 suffix += '*'
442 if self.array:
443 suffix += '[]'
444 return self._TypeStringHelper(suffix)
445
446 # By definition, Is* are always False. A Type can only exist in
447 # some sort of variable declaration, parameter, or return value.
448 def IsDeclaration(self):
449 return False
450
451 def IsDefinition(self):
452 return False
453
454 def IsExportable(self):
455 return False
456
457
458class TypeConverter(object):
459
460 def __init__(self, namespace_stack):
461 self.namespace_stack = namespace_stack
462
463 def _GetTemplateEnd(self, tokens, start):
464 count = 1
465 end = start
466 while 1:
467 token = tokens[end]
468 end += 1
469 if token.name == '<':
470 count += 1
471 elif token.name == '>':
472 count -= 1
473 if count == 0:
474 break
475 return tokens[start:end-1], end
476
477 def ToType(self, tokens):
478 """Convert [Token,...] to [Class(...), ] useful for base classes.
479 For example, code like class Foo : public Bar<x, y> { ... };
480 the "Bar<x, y>" portion gets converted to an AST.
481
482 Returns:
483 [Class(...), ...]
484 """
485 result = []
486 name_tokens = []
487 reference = pointer = array = False
488
489 def AddType(templated_types):
490 # Partition tokens into name and modifier tokens.
491 names = []
492 modifiers = []
493 for t in name_tokens:
494 if keywords.IsKeyword(t.name):
495 modifiers.append(t.name)
496 else:
497 names.append(t.name)
498 name = ''.join(names)
499 result.append(Type(name_tokens[0].start, name_tokens[-1].end,
500 name, templated_types, modifiers,
501 reference, pointer, array))
502 del name_tokens[:]
503
504 i = 0
505 end = len(tokens)
506 while i < end:
507 token = tokens[i]
508 if token.name == '<':
509 new_tokens, new_end = self._GetTemplateEnd(tokens, i+1)
510 AddType(self.ToType(new_tokens))
511 # If there is a comma after the template, we need to consume
512 # that here otherwise it becomes part of the name.
513 i = new_end
514 reference = pointer = array = False
515 elif token.name == ',':
516 AddType([])
517 reference = pointer = array = False
518 elif token.name == '*':
519 pointer = True
520 elif token.name == '&':
521 reference = True
522 elif token.name == '[':
523 pointer = True
524 elif token.name == ']':
525 pass
526 else:
527 name_tokens.append(token)
528 i += 1
529
530 if name_tokens:
531 # No '<' in the tokens, just a simple name and no template.
532 AddType([])
533 return result
534
535 def DeclarationToParts(self, parts, needs_name_removed):
536 name = None
537 default = []
538 if needs_name_removed:
539 # Handle default (initial) values properly.
540 for i, t in enumerate(parts):
541 if t.name == '=':
542 default = parts[i+1:]
543 name = parts[i-1].name
544 if name == ']' and parts[i-2].name == '[':
545 name = parts[i-3].name
546 i -= 1
547 parts = parts[:i-1]
548 break
549 else:
550 if parts[-1].token_type == tokenize.NAME:
551 name = parts.pop().name
552 else:
553 # TODO(nnorwitz): this is a hack that happens for code like
554 # Register(Foo<T>); where it thinks this is a function call
555 # but it's actually a declaration.
556 name = '???'
557 modifiers = []
558 type_name = []
559 other_tokens = []
560 templated_types = []
561 i = 0
562 end = len(parts)
563 while i < end:
564 p = parts[i]
565 if keywords.IsKeyword(p.name):
566 modifiers.append(p.name)
567 elif p.name == '<':
568 templated_tokens, new_end = self._GetTemplateEnd(parts, i+1)
569 templated_types = self.ToType(templated_tokens)
570 i = new_end - 1
571 # Don't add a spurious :: to data members being initialized.
572 next_index = i + 1
573 if next_index < end and parts[next_index].name == '::':
574 i += 1
575 elif p.name in ('[', ']', '='):
576 # These are handled elsewhere.
577 other_tokens.append(p)
578 elif p.name not in ('*', '&', '>'):
579 # Ensure that names have a space between them.
580 if (type_name and type_name[-1].token_type == tokenize.NAME and
581 p.token_type == tokenize.NAME):
582 type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0))
583 type_name.append(p)
584 else:
585 other_tokens.append(p)
586 i += 1
587 type_name = ''.join([t.name for t in type_name])
588 return name, type_name, templated_types, modifiers, default, other_tokens
589
590 def ToParameters(self, tokens):
591 if not tokens:
592 return []
593
594 result = []
595 name = type_name = ''
596 type_modifiers = []
597 pointer = reference = array = False
598 first_token = None
599 default = []
600
601 def AddParameter():
602 if default:
603 del default[0] # Remove flag.
604 end = type_modifiers[-1].end
605 parts = self.DeclarationToParts(type_modifiers, True)
606 (name, type_name, templated_types, modifiers,
607 unused_default, unused_other_tokens) = parts
608 parameter_type = Type(first_token.start, first_token.end,
609 type_name, templated_types, modifiers,
610 reference, pointer, array)
611 p = Parameter(first_token.start, end, name,
612 parameter_type, default)
613 result.append(p)
614
615 template_count = 0
616 for s in tokens:
617 if not first_token:
618 first_token = s
619 if s.name == '<':
620 template_count += 1
621 elif s.name == '>':
622 template_count -= 1
623 if template_count > 0:
624 type_modifiers.append(s)
625 continue
626
627 if s.name == ',':
628 AddParameter()
629 name = type_name = ''
630 type_modifiers = []
631 pointer = reference = array = False
632 first_token = None
633 default = []
634 elif s.name == '*':
635 pointer = True
636 elif s.name == '&':
637 reference = True
638 elif s.name == '[':
639 array = True
640 elif s.name == ']':
641 pass # Just don't add to type_modifiers.
642 elif s.name == '=':
643 # Got a default value. Add any value (None) as a flag.
644 default.append(None)
645 elif default:
646 default.append(s)
647 else:
648 type_modifiers.append(s)
649 AddParameter()
650 return result
651
652 def CreateReturnType(self, return_type_seq):
653 if not return_type_seq:
654 return None
655 start = return_type_seq[0].start
656 end = return_type_seq[-1].end
657 _, name, templated_types, modifiers, default, other_tokens = \
658 self.DeclarationToParts(return_type_seq, False)
659 names = [n.name for n in other_tokens]
660 reference = '&' in names
661 pointer = '*' in names
662 array = '[' in names
663 return Type(start, end, name, templated_types, modifiers,
664 reference, pointer, array)
665
666 def GetTemplateIndices(self, names):
667 # names is a list of strings.
668 start = names.index('<')
669 end = len(names) - 1
670 while end > 0:
671 if names[end] == '>':
672 break
673 end -= 1
674 return start, end+1
675
676class AstBuilder(object):
677 def __init__(self, token_stream, filename, in_class='', visibility=None,
678 namespace_stack=[]):
679 self.tokens = token_stream
680 self.filename = filename
681 # TODO(nnorwitz): use a better data structure (deque) for the queue.
682 # Switching directions of the "queue" improved perf by about 25%.
683 # Using a deque should be even better since we access from both sides.
684 self.token_queue = []
685 self.namespace_stack = namespace_stack[:]
686 self.in_class = in_class
687 if in_class is None:
688 self.in_class_name_only = None
689 else:
690 self.in_class_name_only = in_class.split('::')[-1]
691 self.visibility = visibility
692 self.in_function = False
693 self.current_token = None
694 # Keep the state whether we are currently handling a typedef or not.
695 self._handling_typedef = False
696
697 self.converter = TypeConverter(self.namespace_stack)
698
699 def HandleError(self, msg, token):
700 printable_queue = list(reversed(self.token_queue[-20:]))
701 sys.stderr.write('Got %s in %s @ %s %s\n' %
702 (msg, self.filename, token, printable_queue))
703
704 def Generate(self):
705 while 1:
706 token = self._GetNextToken()
707 if not token:
708 break
709
710 # Get the next token.
711 self.current_token = token
712
713 # Dispatch on the next token type.
714 if token.token_type == _INTERNAL_TOKEN:
715 if token.name == _NAMESPACE_POP:
716 self.namespace_stack.pop()
717 continue
718
719 try:
720 result = self._GenerateOne(token)
721 if result is not None:
722 yield result
723 except:
724 self.HandleError('exception', token)
725 raise
726
727 def _CreateVariable(self, pos_token, name, type_name, type_modifiers,
728 ref_pointer_name_seq, templated_types, value=None):
729 reference = '&' in ref_pointer_name_seq
730 pointer = '*' in ref_pointer_name_seq
731 array = '[' in ref_pointer_name_seq
732 var_type = Type(pos_token.start, pos_token.end, type_name,
733 templated_types, type_modifiers,
734 reference, pointer, array)
735 return VariableDeclaration(pos_token.start, pos_token.end,
736 name, var_type, value, self.namespace_stack)
737
738 def _GenerateOne(self, token):
739 if token.token_type == tokenize.NAME:
740 if (keywords.IsKeyword(token.name) and
741 not keywords.IsBuiltinType(token.name)):
742 method = getattr(self, 'handle_' + token.name)
743 return method()
744 elif token.name == self.in_class_name_only:
745 # The token name is the same as the class, must be a ctor if
746 # there is a paren. Otherwise, it's the return type.
747 # Peek ahead to get the next token to figure out which.
748 next = self._GetNextToken()
749 self._AddBackToken(next)
750 if next.token_type == tokenize.SYNTAX and next.name == '(':
751 return self._GetMethod([token], FUNCTION_CTOR, None, True)
752 # Fall through--handle like any other method.
753
754 # Handle data or function declaration/definition.
755 syntax = tokenize.SYNTAX
756 temp_tokens, last_token = \
757 self._GetVarTokensUpTo(syntax, '(', ';', '{', '[')
758 temp_tokens.insert(0, token)
759 if last_token.name == '(':
760 # If there is an assignment before the paren,
761 # this is an expression, not a method.
762 expr = bool([e for e in temp_tokens if e.name == '='])
763 if expr:
764 new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';')
765 temp_tokens.append(last_token)
766 temp_tokens.extend(new_temp)
767 last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0)
768
769 if last_token.name == '[':
770 # Handle array, this isn't a method, unless it's an operator.
771 # TODO(nnorwitz): keep the size somewhere.
772 # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
773 temp_tokens.append(last_token)
774 if temp_tokens[-2].name == 'operator':
775 temp_tokens.append(self._GetNextToken())
776 else:
777 temp_tokens2, last_token = \
778 self._GetVarTokensUpTo(tokenize.SYNTAX, ';')
779 temp_tokens.extend(temp_tokens2)
780
781 if last_token.name == ';':
782 # Handle data, this isn't a method.
783 parts = self.converter.DeclarationToParts(temp_tokens, True)
784 (name, type_name, templated_types, modifiers, default,
785 unused_other_tokens) = parts
zhanyong.wanc2ad46a2009-06-02 20:41:21 +0000786
shiqiane35fdd92008-12-10 05:08:54 +0000787 t0 = temp_tokens[0]
788 names = [t.name for t in temp_tokens]
789 if templated_types:
790 start, end = self.converter.GetTemplateIndices(names)
791 names = names[:start] + names[end:]
792 default = ''.join([t.name for t in default])
793 return self._CreateVariable(t0, name, type_name, modifiers,
794 names, templated_types, default)
795 if last_token.name == '{':
796 self._AddBackTokens(temp_tokens[1:])
797 self._AddBackToken(last_token)
798 method_name = temp_tokens[0].name
799 method = getattr(self, 'handle_' + method_name, None)
800 if not method:
801 # Must be declaring a variable.
802 # TODO(nnorwitz): handle the declaration.
803 return None
804 return method()
805 return self._GetMethod(temp_tokens, 0, None, False)
806 elif token.token_type == tokenize.SYNTAX:
807 if token.name == '~' and self.in_class:
808 # Must be a dtor (probably not in method body).
809 token = self._GetNextToken()
810 # self.in_class can contain A::Name, but the dtor will only
811 # be Name. Make sure to compare against the right value.
812 if (token.token_type == tokenize.NAME and
813 token.name == self.in_class_name_only):
814 return self._GetMethod([token], FUNCTION_DTOR, None, True)
815 # TODO(nnorwitz): handle a lot more syntax.
816 elif token.token_type == tokenize.PREPROCESSOR:
817 # TODO(nnorwitz): handle more preprocessor directives.
818 # token starts with a #, so remove it and strip whitespace.
819 name = token.name[1:].lstrip()
820 if name.startswith('include'):
821 # Remove "include".
822 name = name[7:].strip()
823 assert name
824 # Handle #include \<newline> "header-on-second-line.h".
825 if name.startswith('\\'):
826 name = name[1:].strip()
827 assert name[0] in '<"', token
828 assert name[-1] in '>"', token
829 system = name[0] == '<'
830 filename = name[1:-1]
831 return Include(token.start, token.end, filename, system)
832 if name.startswith('define'):
833 # Remove "define".
834 name = name[6:].strip()
835 assert name
836 value = ''
837 for i, c in enumerate(name):
838 if c.isspace():
839 value = name[i:].lstrip()
840 name = name[:i]
841 break
842 return Define(token.start, token.end, name, value)
843 if name.startswith('if') and name[2:3].isspace():
844 condition = name[3:].strip()
845 if condition.startswith('0') or condition.startswith('(0)'):
846 self._SkipIf0Blocks()
847 return None
848
849 def _GetTokensUpTo(self, expected_token_type, expected_token):
850 return self._GetVarTokensUpTo(expected_token_type, expected_token)[0]
851
852 def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens):
853 last_token = self._GetNextToken()
854 tokens = []
855 while (last_token.token_type != expected_token_type or
856 last_token.name not in expected_tokens):
857 tokens.append(last_token)
858 last_token = self._GetNextToken()
859 return tokens, last_token
860
861 # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necesary.
862 def _IgnoreUpTo(self, token_type, token):
863 unused_tokens = self._GetTokensUpTo(token_type, token)
864
865 def _SkipIf0Blocks(self):
866 count = 1
867 while 1:
868 token = self._GetNextToken()
869 if token.token_type != tokenize.PREPROCESSOR:
870 continue
871
872 name = token.name[1:].lstrip()
873 if name.startswith('endif'):
874 count -= 1
875 if count == 0:
876 break
877 elif name.startswith('if'):
878 count += 1
879
880 def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None):
881 if GetNextToken is None:
882 GetNextToken = self._GetNextToken
883 # Assumes the current token is open_paren and we will consume
884 # and return up to the close_paren.
885 count = 1
886 token = GetNextToken()
887 while 1:
888 if token.token_type == tokenize.SYNTAX:
889 if token.name == open_paren:
890 count += 1
891 elif token.name == close_paren:
892 count -= 1
893 if count == 0:
894 break
895 yield token
896 token = GetNextToken()
897 yield token
898
899 def _GetParameters(self):
900 return self._GetMatchingChar('(', ')')
901
902 def GetScope(self):
903 return self._GetMatchingChar('{', '}')
904
905 def _GetNextToken(self):
906 if self.token_queue:
907 return self.token_queue.pop()
908 return next(self.tokens)
909
910 def _AddBackToken(self, token):
911 if token.whence == tokenize.WHENCE_STREAM:
912 token.whence = tokenize.WHENCE_QUEUE
913 self.token_queue.insert(0, token)
914 else:
915 assert token.whence == tokenize.WHENCE_QUEUE, token
916 self.token_queue.append(token)
917
918 def _AddBackTokens(self, tokens):
919 if tokens:
920 if tokens[-1].whence == tokenize.WHENCE_STREAM:
921 for token in tokens:
922 token.whence = tokenize.WHENCE_QUEUE
923 self.token_queue[:0] = reversed(tokens)
924 else:
925 assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens
926 self.token_queue.extend(reversed(tokens))
927
928 def GetName(self, seq=None):
929 """Returns ([tokens], next_token_info)."""
930 GetNextToken = self._GetNextToken
931 if seq is not None:
932 it = iter(seq)
933 GetNextToken = lambda: next(it)
934 next_token = GetNextToken()
935 tokens = []
936 last_token_was_name = False
937 while (next_token.token_type == tokenize.NAME or
938 (next_token.token_type == tokenize.SYNTAX and
939 next_token.name in ('::', '<'))):
940 # Two NAMEs in a row means the identifier should terminate.
941 # It's probably some sort of variable declaration.
942 if last_token_was_name and next_token.token_type == tokenize.NAME:
943 break
944 last_token_was_name = next_token.token_type == tokenize.NAME
945 tokens.append(next_token)
946 # Handle templated names.
947 if next_token.name == '<':
948 tokens.extend(self._GetMatchingChar('<', '>', GetNextToken))
949 last_token_was_name = True
950 next_token = GetNextToken()
951 return tokens, next_token
952
953 def GetMethod(self, modifiers, templated_types):
954 return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
955 assert len(return_type_and_name) >= 1
956 return self._GetMethod(return_type_and_name, modifiers, templated_types,
957 False)
958
959 def _GetMethod(self, return_type_and_name, modifiers, templated_types,
960 get_paren):
961 template_portion = None
962 if get_paren:
963 token = self._GetNextToken()
964 assert token.token_type == tokenize.SYNTAX, token
965 if token.name == '<':
966 # Handle templatized dtors.
967 template_portion = [token]
968 template_portion.extend(self._GetMatchingChar('<', '>'))
969 token = self._GetNextToken()
970 assert token.token_type == tokenize.SYNTAX, token
971 assert token.name == '(', token
972
973 name = return_type_and_name.pop()
974 # Handle templatized ctors.
975 if name.name == '>':
976 index = 1
977 while return_type_and_name[index].name != '<':
978 index += 1
979 template_portion = return_type_and_name[index:] + [name]
980 del return_type_and_name[index:]
981 name = return_type_and_name.pop()
982 elif name.name == ']':
983 rt = return_type_and_name
984 assert rt[-1].name == '[', return_type_and_name
985 assert rt[-2].name == 'operator', return_type_and_name
986 name_seq = return_type_and_name[-2:]
987 del return_type_and_name[-2:]
988 name = tokenize.Token(tokenize.NAME, 'operator[]',
989 name_seq[0].start, name.end)
990 # Get the open paren so _GetParameters() below works.
991 unused_open_paren = self._GetNextToken()
992
993 # TODO(nnorwitz): store template_portion.
994 return_type = return_type_and_name
995 indices = name
996 if return_type:
997 indices = return_type[0]
998
999 # Force ctor for templatized ctors.
1000 if name.name == self.in_class and not modifiers:
1001 modifiers |= FUNCTION_CTOR
1002 parameters = list(self._GetParameters())
1003 del parameters[-1] # Remove trailing ')'.
1004
1005 # Handling operator() is especially weird.
1006 if name.name == 'operator' and not parameters:
1007 token = self._GetNextToken()
1008 assert token.name == '(', token
1009 parameters = list(self._GetParameters())
1010 del parameters[-1] # Remove trailing ')'.
1011
1012 token = self._GetNextToken()
1013 while token.token_type == tokenize.NAME:
1014 modifier_token = token
1015 token = self._GetNextToken()
1016 if modifier_token.name == 'const':
1017 modifiers |= FUNCTION_CONST
1018 elif modifier_token.name == '__attribute__':
1019 # TODO(nnorwitz): handle more __attribute__ details.
1020 modifiers |= FUNCTION_ATTRIBUTE
1021 assert token.name == '(', token
1022 # Consume everything between the (parens).
1023 unused_tokens = list(self._GetMatchingChar('(', ')'))
1024 token = self._GetNextToken()
1025 elif modifier_token.name == 'throw':
1026 modifiers |= FUNCTION_THROW
1027 assert token.name == '(', token
1028 # Consume everything between the (parens).
1029 unused_tokens = list(self._GetMatchingChar('(', ')'))
1030 token = self._GetNextToken()
kosakc26f9692014-03-12 23:27:35 +00001031 elif modifier_token.name == 'override':
1032 modifiers |= FUNCTION_OVERRIDE
shiqiane35fdd92008-12-10 05:08:54 +00001033 elif modifier_token.name == modifier_token.name.upper():
1034 # HACK(nnorwitz): assume that all upper-case names
1035 # are some macro we aren't expanding.
1036 modifiers |= FUNCTION_UNKNOWN_ANNOTATION
1037 else:
1038 self.HandleError('unexpected token', modifier_token)
1039
1040 assert token.token_type == tokenize.SYNTAX, token
1041 # Handle ctor initializers.
1042 if token.name == ':':
1043 # TODO(nnorwitz): anything else to handle for initializer list?
1044 while token.name != ';' and token.name != '{':
1045 token = self._GetNextToken()
1046
1047 # Handle pointer to functions that are really data but look
1048 # like method declarations.
1049 if token.name == '(':
1050 if parameters[0].name == '*':
1051 # name contains the return type.
1052 name = parameters.pop()
1053 # parameters contains the name of the data.
1054 modifiers = [p.name for p in parameters]
1055 # Already at the ( to open the parameter list.
1056 function_parameters = list(self._GetMatchingChar('(', ')'))
1057 del function_parameters[-1] # Remove trailing ')'.
1058 # TODO(nnorwitz): store the function_parameters.
1059 token = self._GetNextToken()
1060 assert token.token_type == tokenize.SYNTAX, token
1061 assert token.name == ';', token
1062 return self._CreateVariable(indices, name.name, indices.name,
1063 modifiers, '', None)
1064 # At this point, we got something like:
1065 # return_type (type::*name_)(params);
1066 # This is a data member called name_ that is a function pointer.
1067 # With this code: void (sq_type::*field_)(string&);
1068 # We get: name=void return_type=[] parameters=sq_type ... field_
1069 # TODO(nnorwitz): is return_type always empty?
1070 # TODO(nnorwitz): this isn't even close to being correct.
1071 # Just put in something so we don't crash and can move on.
1072 real_name = parameters[-1]
1073 modifiers = [p.name for p in self._GetParameters()]
1074 del modifiers[-1] # Remove trailing ')'.
1075 return self._CreateVariable(indices, real_name.name, indices.name,
1076 modifiers, '', None)
1077
1078 if token.name == '{':
1079 body = list(self.GetScope())
1080 del body[-1] # Remove trailing '}'.
1081 else:
1082 body = None
1083 if token.name == '=':
1084 token = self._GetNextToken()
1085 assert token.token_type == tokenize.CONSTANT, token
1086 assert token.name == '0', token
1087 modifiers |= FUNCTION_PURE_VIRTUAL
1088 token = self._GetNextToken()
1089
1090 if token.name == '[':
1091 # TODO(nnorwitz): store tokens and improve parsing.
1092 # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
1093 tokens = list(self._GetMatchingChar('[', ']'))
1094 token = self._GetNextToken()
1095
1096 assert token.name == ';', (token, return_type_and_name, parameters)
1097
1098 # Looks like we got a method, not a function.
1099 if len(return_type) > 2 and return_type[-1].name == '::':
1100 return_type, in_class = \
1101 self._GetReturnTypeAndClassName(return_type)
1102 return Method(indices.start, indices.end, name.name, in_class,
1103 return_type, parameters, modifiers, templated_types,
1104 body, self.namespace_stack)
1105 return Function(indices.start, indices.end, name.name, return_type,
1106 parameters, modifiers, templated_types, body,
1107 self.namespace_stack)
1108
1109 def _GetReturnTypeAndClassName(self, token_seq):
1110 # Splitting the return type from the class name in a method
1111 # can be tricky. For example, Return::Type::Is::Hard::To::Find().
1112 # Where is the return type and where is the class name?
1113 # The heuristic used is to pull the last name as the class name.
1114 # This includes all the templated type info.
1115 # TODO(nnorwitz): if there is only One name like in the
1116 # example above, punt and assume the last bit is the class name.
1117
1118 # Ignore a :: prefix, if exists so we can find the first real name.
1119 i = 0
1120 if token_seq[0].name == '::':
1121 i = 1
1122 # Ignore a :: suffix, if exists.
1123 end = len(token_seq) - 1
1124 if token_seq[end-1].name == '::':
1125 end -= 1
1126
1127 # Make a copy of the sequence so we can append a sentinel
1128 # value. This is required for GetName will has to have some
1129 # terminating condition beyond the last name.
1130 seq_copy = token_seq[i:end]
1131 seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0))
1132 names = []
1133 while i < end:
1134 # Iterate through the sequence parsing out each name.
1135 new_name, next = self.GetName(seq_copy[i:])
1136 assert new_name, 'Got empty new_name, next=%s' % next
1137 # We got a pointer or ref. Add it to the name.
1138 if next and next.token_type == tokenize.SYNTAX:
1139 new_name.append(next)
1140 names.append(new_name)
1141 i += len(new_name)
1142
1143 # Now that we have the names, it's time to undo what we did.
1144
1145 # Remove the sentinel value.
1146 names[-1].pop()
1147 # Flatten the token sequence for the return type.
1148 return_type = [e for seq in names[:-1] for e in seq]
1149 # The class name is the last name.
1150 class_name = names[-1]
1151 return return_type, class_name
1152
1153 def handle_bool(self):
1154 pass
1155
1156 def handle_char(self):
1157 pass
1158
1159 def handle_int(self):
1160 pass
1161
1162 def handle_long(self):
1163 pass
1164
1165 def handle_short(self):
1166 pass
1167
1168 def handle_double(self):
1169 pass
1170
1171 def handle_float(self):
1172 pass
1173
1174 def handle_void(self):
1175 pass
1176
1177 def handle_wchar_t(self):
1178 pass
1179
1180 def handle_unsigned(self):
1181 pass
1182
1183 def handle_signed(self):
1184 pass
1185
1186 def _GetNestedType(self, ctor):
1187 name = None
1188 name_tokens, token = self.GetName()
1189 if name_tokens:
1190 name = ''.join([t.name for t in name_tokens])
1191
1192 # Handle forward declarations.
1193 if token.token_type == tokenize.SYNTAX and token.name == ';':
1194 return ctor(token.start, token.end, name, None,
1195 self.namespace_stack)
1196
1197 if token.token_type == tokenize.NAME and self._handling_typedef:
1198 self._AddBackToken(token)
1199 return ctor(token.start, token.end, name, None,
1200 self.namespace_stack)
1201
1202 # Must be the type declaration.
1203 fields = list(self._GetMatchingChar('{', '}'))
1204 del fields[-1] # Remove trailing '}'.
1205 if token.token_type == tokenize.SYNTAX and token.name == '{':
1206 next = self._GetNextToken()
1207 new_type = ctor(token.start, token.end, name, fields,
1208 self.namespace_stack)
1209 # A name means this is an anonymous type and the name
1210 # is the variable declaration.
1211 if next.token_type != tokenize.NAME:
1212 return new_type
1213 name = new_type
1214 token = next
1215
1216 # Must be variable declaration using the type prefixed with keyword.
1217 assert token.token_type == tokenize.NAME, token
1218 return self._CreateVariable(token, token.name, name, [], '', None)
1219
1220 def handle_struct(self):
1221 # Special case the handling typedef/aliasing of structs here.
1222 # It would be a pain to handle in the class code.
1223 name_tokens, var_token = self.GetName()
1224 if name_tokens:
1225 next_token = self._GetNextToken()
1226 is_syntax = (var_token.token_type == tokenize.SYNTAX and
1227 var_token.name[0] in '*&')
1228 is_variable = (var_token.token_type == tokenize.NAME and
1229 next_token.name == ';')
1230 variable = var_token
1231 if is_syntax and not is_variable:
1232 variable = next_token
1233 temp = self._GetNextToken()
1234 if temp.token_type == tokenize.SYNTAX and temp.name == '(':
1235 # Handle methods declared to return a struct.
1236 t0 = name_tokens[0]
1237 struct = tokenize.Token(tokenize.NAME, 'struct',
1238 t0.start-7, t0.start-2)
1239 type_and_name = [struct]
1240 type_and_name.extend(name_tokens)
1241 type_and_name.extend((var_token, next_token))
1242 return self._GetMethod(type_and_name, 0, None, False)
1243 assert temp.name == ';', (temp, name_tokens, var_token)
1244 if is_syntax or (is_variable and not self._handling_typedef):
1245 modifiers = ['struct']
1246 type_name = ''.join([t.name for t in name_tokens])
1247 position = name_tokens[0]
1248 return self._CreateVariable(position, variable.name, type_name,
1249 modifiers, var_token.name, None)
1250 name_tokens.extend((var_token, next_token))
1251 self._AddBackTokens(name_tokens)
1252 else:
1253 self._AddBackToken(var_token)
1254 return self._GetClass(Struct, VISIBILITY_PUBLIC, None)
1255
1256 def handle_union(self):
1257 return self._GetNestedType(Union)
1258
1259 def handle_enum(self):
1260 return self._GetNestedType(Enum)
1261
1262 def handle_auto(self):
1263 # TODO(nnorwitz): warn about using auto? Probably not since it
1264 # will be reclaimed and useful for C++0x.
1265 pass
1266
1267 def handle_register(self):
1268 pass
1269
1270 def handle_const(self):
1271 pass
1272
1273 def handle_inline(self):
1274 pass
1275
1276 def handle_extern(self):
1277 pass
1278
1279 def handle_static(self):
1280 pass
1281
1282 def handle_virtual(self):
1283 # What follows must be a method.
1284 token = token2 = self._GetNextToken()
1285 if token.name == 'inline':
1286 # HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
1287 token2 = self._GetNextToken()
1288 if token2.token_type == tokenize.SYNTAX and token2.name == '~':
1289 return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None)
1290 assert token.token_type == tokenize.NAME or token.name == '::', token
kosakc26f9692014-03-12 23:27:35 +00001291 return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(') # )
shiqiane35fdd92008-12-10 05:08:54 +00001292 return_type_and_name.insert(0, token)
1293 if token2 is not token:
1294 return_type_and_name.insert(1, token2)
1295 return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL,
1296 None, False)
1297
1298 def handle_volatile(self):
1299 pass
1300
1301 def handle_mutable(self):
1302 pass
1303
1304 def handle_public(self):
1305 assert self.in_class
1306 self.visibility = VISIBILITY_PUBLIC
1307
1308 def handle_protected(self):
1309 assert self.in_class
1310 self.visibility = VISIBILITY_PROTECTED
1311
1312 def handle_private(self):
1313 assert self.in_class
1314 self.visibility = VISIBILITY_PRIVATE
1315
1316 def handle_friend(self):
1317 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1318 assert tokens
1319 t0 = tokens[0]
1320 return Friend(t0.start, t0.end, tokens, self.namespace_stack)
1321
1322 def handle_static_cast(self):
1323 pass
1324
1325 def handle_const_cast(self):
1326 pass
1327
1328 def handle_dynamic_cast(self):
1329 pass
1330
1331 def handle_reinterpret_cast(self):
1332 pass
1333
1334 def handle_new(self):
1335 pass
1336
1337 def handle_delete(self):
1338 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1339 assert tokens
1340 return Delete(tokens[0].start, tokens[0].end, tokens)
1341
1342 def handle_typedef(self):
1343 token = self._GetNextToken()
1344 if (token.token_type == tokenize.NAME and
1345 keywords.IsKeyword(token.name)):
1346 # Token must be struct/enum/union/class.
1347 method = getattr(self, 'handle_' + token.name)
1348 self._handling_typedef = True
1349 tokens = [method()]
1350 self._handling_typedef = False
1351 else:
1352 tokens = [token]
1353
1354 # Get the remainder of the typedef up to the semi-colon.
1355 tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';'))
1356
1357 # TODO(nnorwitz): clean all this up.
1358 assert tokens
1359 name = tokens.pop()
1360 indices = name
1361 if tokens:
1362 indices = tokens[0]
1363 if not indices:
1364 indices = token
1365 if name.name == ')':
1366 # HACK(nnorwitz): Handle pointers to functions "properly".
1367 if (len(tokens) >= 4 and
1368 tokens[1].name == '(' and tokens[2].name == '*'):
1369 tokens.append(name)
1370 name = tokens[3]
1371 elif name.name == ']':
1372 # HACK(nnorwitz): Handle arrays properly.
1373 if len(tokens) >= 2:
1374 tokens.append(name)
1375 name = tokens[1]
1376 new_type = tokens
1377 if tokens and isinstance(tokens[0], tokenize.Token):
1378 new_type = self.converter.ToType(tokens)[0]
1379 return Typedef(indices.start, indices.end, name.name,
1380 new_type, self.namespace_stack)
1381
1382 def handle_typeid(self):
1383 pass # Not needed yet.
1384
1385 def handle_typename(self):
1386 pass # Not needed yet.
1387
1388 def _GetTemplatedTypes(self):
1389 result = {}
1390 tokens = list(self._GetMatchingChar('<', '>'))
1391 len_tokens = len(tokens) - 1 # Ignore trailing '>'.
1392 i = 0
1393 while i < len_tokens:
1394 key = tokens[i].name
1395 i += 1
1396 if keywords.IsKeyword(key) or key == ',':
1397 continue
1398 type_name = default = None
1399 if i < len_tokens:
1400 i += 1
1401 if tokens[i-1].name == '=':
1402 assert i < len_tokens, '%s %s' % (i, tokens)
1403 default, unused_next_token = self.GetName(tokens[i:])
1404 i += len(default)
1405 else:
1406 if tokens[i-1].name != ',':
1407 # We got something like: Type variable.
1408 # Re-adjust the key (variable) and type_name (Type).
1409 key = tokens[i-1].name
1410 type_name = tokens[i-2]
1411
1412 result[key] = (type_name, default)
1413 return result
1414
1415 def handle_template(self):
1416 token = self._GetNextToken()
1417 assert token.token_type == tokenize.SYNTAX, token
1418 assert token.name == '<', token
1419 templated_types = self._GetTemplatedTypes()
1420 # TODO(nnorwitz): for now, just ignore the template params.
1421 token = self._GetNextToken()
1422 if token.token_type == tokenize.NAME:
1423 if token.name == 'class':
1424 return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types)
1425 elif token.name == 'struct':
1426 return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types)
1427 elif token.name == 'friend':
1428 return self.handle_friend()
1429 self._AddBackToken(token)
1430 tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';')
1431 tokens.append(last)
1432 self._AddBackTokens(tokens)
1433 if last.name == '(':
1434 return self.GetMethod(FUNCTION_NONE, templated_types)
1435 # Must be a variable definition.
1436 return None
1437
1438 def handle_true(self):
1439 pass # Nothing to do.
1440
1441 def handle_false(self):
1442 pass # Nothing to do.
1443
1444 def handle_asm(self):
1445 pass # Not needed yet.
1446
1447 def handle_class(self):
1448 return self._GetClass(Class, VISIBILITY_PRIVATE, None)
1449
1450 def _GetBases(self):
1451 # Get base classes.
1452 bases = []
1453 while 1:
1454 token = self._GetNextToken()
1455 assert token.token_type == tokenize.NAME, token
1456 # TODO(nnorwitz): store kind of inheritance...maybe.
1457 if token.name not in ('public', 'protected', 'private'):
1458 # If inheritance type is not specified, it is private.
1459 # Just put the token back so we can form a name.
1460 # TODO(nnorwitz): it would be good to warn about this.
1461 self._AddBackToken(token)
1462 else:
1463 # Check for virtual inheritance.
1464 token = self._GetNextToken()
1465 if token.name != 'virtual':
1466 self._AddBackToken(token)
1467 else:
1468 # TODO(nnorwitz): store that we got virtual for this base.
1469 pass
1470 base, next_token = self.GetName()
1471 bases_ast = self.converter.ToType(base)
1472 assert len(bases_ast) == 1, bases_ast
1473 bases.append(bases_ast[0])
1474 assert next_token.token_type == tokenize.SYNTAX, next_token
1475 if next_token.name == '{':
1476 token = next_token
1477 break
1478 # Support multiple inheritance.
1479 assert next_token.name == ',', next_token
1480 return bases, token
1481
1482 def _GetClass(self, class_type, visibility, templated_types):
1483 class_name = None
1484 class_token = self._GetNextToken()
1485 if class_token.token_type != tokenize.NAME:
1486 assert class_token.token_type == tokenize.SYNTAX, class_token
1487 token = class_token
1488 else:
zhanyong.wan5b61ce32011-02-01 00:00:03 +00001489 # Skip any macro (e.g. storage class specifiers) after the
1490 # 'class' keyword.
1491 next_token = self._GetNextToken()
1492 if next_token.token_type == tokenize.NAME:
1493 self._AddBackToken(next_token)
1494 else:
1495 self._AddBackTokens([class_token, next_token])
shiqiane35fdd92008-12-10 05:08:54 +00001496 name_tokens, token = self.GetName()
1497 class_name = ''.join([t.name for t in name_tokens])
1498 bases = None
1499 if token.token_type == tokenize.SYNTAX:
1500 if token.name == ';':
1501 # Forward declaration.
1502 return class_type(class_token.start, class_token.end,
1503 class_name, None, templated_types, None,
1504 self.namespace_stack)
1505 if token.name in '*&':
1506 # Inline forward declaration. Could be method or data.
1507 name_token = self._GetNextToken()
1508 next_token = self._GetNextToken()
1509 if next_token.name == ';':
1510 # Handle data
1511 modifiers = ['class']
1512 return self._CreateVariable(class_token, name_token.name,
1513 class_name,
1514 modifiers, token.name, None)
1515 else:
1516 # Assume this is a method.
1517 tokens = (class_token, token, name_token, next_token)
1518 self._AddBackTokens(tokens)
1519 return self.GetMethod(FUNCTION_NONE, None)
1520 if token.name == ':':
1521 bases, token = self._GetBases()
1522
1523 body = None
1524 if token.token_type == tokenize.SYNTAX and token.name == '{':
1525 assert token.token_type == tokenize.SYNTAX, token
1526 assert token.name == '{', token
1527
1528 ast = AstBuilder(self.GetScope(), self.filename, class_name,
1529 visibility, self.namespace_stack)
1530 body = list(ast.Generate())
1531
1532 if not self._handling_typedef:
1533 token = self._GetNextToken()
1534 if token.token_type != tokenize.NAME:
1535 assert token.token_type == tokenize.SYNTAX, token
1536 assert token.name == ';', token
1537 else:
1538 new_class = class_type(class_token.start, class_token.end,
1539 class_name, bases, None,
1540 body, self.namespace_stack)
1541
1542 modifiers = []
1543 return self._CreateVariable(class_token,
1544 token.name, new_class,
1545 modifiers, token.name, None)
1546 else:
1547 if not self._handling_typedef:
1548 self.HandleError('non-typedef token', token)
1549 self._AddBackToken(token)
1550
1551 return class_type(class_token.start, class_token.end, class_name,
zhanyong.wan45fef502013-09-06 22:52:14 +00001552 bases, templated_types, body, self.namespace_stack)
shiqiane35fdd92008-12-10 05:08:54 +00001553
1554 def handle_namespace(self):
1555 token = self._GetNextToken()
1556 # Support anonymous namespaces.
1557 name = None
1558 if token.token_type == tokenize.NAME:
1559 name = token.name
1560 token = self._GetNextToken()
1561 self.namespace_stack.append(name)
1562 assert token.token_type == tokenize.SYNTAX, token
zhanyong.wanc2ad46a2009-06-02 20:41:21 +00001563 # Create an internal token that denotes when the namespace is complete.
1564 internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP,
1565 None, None)
1566 internal_token.whence = token.whence
shiqiane35fdd92008-12-10 05:08:54 +00001567 if token.name == '=':
1568 # TODO(nnorwitz): handle aliasing namespaces.
1569 name, next_token = self.GetName()
1570 assert next_token.name == ';', next_token
zhanyong.wanc2ad46a2009-06-02 20:41:21 +00001571 self._AddBackToken(internal_token)
shiqiane35fdd92008-12-10 05:08:54 +00001572 else:
1573 assert token.name == '{', token
1574 tokens = list(self.GetScope())
zhanyong.wanc2ad46a2009-06-02 20:41:21 +00001575 # Replace the trailing } with the internal namespace pop token.
1576 tokens[-1] = internal_token
shiqiane35fdd92008-12-10 05:08:54 +00001577 # Handle namespace with nothing in it.
1578 self._AddBackTokens(tokens)
shiqiane35fdd92008-12-10 05:08:54 +00001579 return None
1580
1581 def handle_using(self):
1582 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1583 assert tokens
1584 return Using(tokens[0].start, tokens[0].end, tokens)
1585
1586 def handle_explicit(self):
1587 assert self.in_class
1588 # Nothing much to do.
1589 # TODO(nnorwitz): maybe verify the method name == class name.
1590 # This must be a ctor.
1591 return self.GetMethod(FUNCTION_CTOR, None)
1592
1593 def handle_this(self):
1594 pass # Nothing to do.
1595
1596 def handle_operator(self):
1597 # Pull off the next token(s?) and make that part of the method name.
1598 pass
1599
1600 def handle_sizeof(self):
1601 pass
1602
1603 def handle_case(self):
1604 pass
1605
1606 def handle_switch(self):
1607 pass
1608
1609 def handle_default(self):
1610 token = self._GetNextToken()
1611 assert token.token_type == tokenize.SYNTAX
1612 assert token.name == ':'
1613
1614 def handle_if(self):
1615 pass
1616
1617 def handle_else(self):
1618 pass
1619
1620 def handle_return(self):
1621 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1622 if not tokens:
1623 return Return(self.current_token.start, self.current_token.end, None)
1624 return Return(tokens[0].start, tokens[0].end, tokens)
1625
1626 def handle_goto(self):
1627 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1628 assert len(tokens) == 1, str(tokens)
1629 return Goto(tokens[0].start, tokens[0].end, tokens[0].name)
1630
1631 def handle_try(self):
1632 pass # Not needed yet.
1633
1634 def handle_catch(self):
1635 pass # Not needed yet.
1636
1637 def handle_throw(self):
1638 pass # Not needed yet.
1639
1640 def handle_while(self):
1641 pass
1642
1643 def handle_do(self):
1644 pass
1645
1646 def handle_for(self):
1647 pass
1648
1649 def handle_break(self):
1650 self._IgnoreUpTo(tokenize.SYNTAX, ';')
1651
1652 def handle_continue(self):
1653 self._IgnoreUpTo(tokenize.SYNTAX, ';')
1654
1655
1656def BuilderFromSource(source, filename):
1657 """Utility method that returns an AstBuilder from source code.
1658
1659 Args:
1660 source: 'C++ source code'
1661 filename: 'file1'
1662
1663 Returns:
1664 AstBuilder
1665 """
1666 return AstBuilder(tokenize.GetTokens(source), filename)
1667
1668
1669def PrintIndentifiers(filename, should_print):
1670 """Prints all identifiers for a C++ source file.
1671
1672 Args:
1673 filename: 'file1'
1674 should_print: predicate with signature: bool Function(token)
1675 """
1676 source = utils.ReadFile(filename, False)
1677 if source is None:
1678 sys.stderr.write('Unable to find: %s\n' % filename)
1679 return
1680
1681 #print('Processing %s' % actual_filename)
1682 builder = BuilderFromSource(source, filename)
1683 try:
1684 for node in builder.Generate():
1685 if should_print(node):
1686 print(node.name)
1687 except KeyboardInterrupt:
zhanyong.wanc2ad46a2009-06-02 20:41:21 +00001688 return
shiqiane35fdd92008-12-10 05:08:54 +00001689 except:
1690 pass
1691
1692
1693def PrintAllIndentifiers(filenames, should_print):
1694 """Prints all identifiers for each C++ source file in filenames.
1695
1696 Args:
1697 filenames: ['file1', 'file2', ...]
1698 should_print: predicate with signature: bool Function(token)
1699 """
1700 for path in filenames:
1701 PrintIndentifiers(path, should_print)
1702
1703
1704def main(argv):
1705 for filename in argv[1:]:
1706 source = utils.ReadFile(filename)
1707 if source is None:
1708 continue
1709
1710 print('Processing %s' % filename)
1711 builder = BuilderFromSource(source, filename)
1712 try:
1713 entire_ast = filter(None, builder.Generate())
1714 except KeyboardInterrupt:
1715 return
1716 except:
1717 # Already printed a warning, print the traceback and continue.
1718 traceback.print_exc()
1719 else:
1720 if utils.DEBUG:
1721 for ast in entire_ast:
1722 print(ast)
1723
1724
1725if __name__ == '__main__':
1726 main(sys.argv)