blob: 47dc9a07daba611e059156d3aa586eaef4a757e1 [file] [log] [blame]
shiqiane35fdd92008-12-10 05:08:54 +00001#!/usr/bin/env python
2#
3# Copyright 2007 Neal Norwitz
4# Portions Copyright 2007 Google Inc.
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""Generate an Abstract Syntax Tree (AST) for C++."""
19
20__author__ = 'nnorwitz@google.com (Neal Norwitz)'
21
22
23# TODO:
24# * Tokens should never be exported, need to convert to Nodes
25# (return types, parameters, etc.)
26# * Handle static class data for templatized classes
27# * Handle casts (both C++ and C-style)
28# * Handle conditions and loops (if/else, switch, for, while/do)
29#
30# TODO much, much later:
31# * Handle #define
32# * exceptions
33
34
35try:
36 # Python 3.x
37 import builtins
38except ImportError:
39 # Python 2.x
40 import __builtin__ as builtins
41
42import sys
43import traceback
44
45from cpp import keywords
46from cpp import tokenize
47from cpp import utils
48
49
50if not hasattr(builtins, 'reversed'):
51 # Support Python 2.3 and earlier.
52 def reversed(seq):
53 for i in range(len(seq)-1, -1, -1):
54 yield seq[i]
55
56if not hasattr(builtins, 'next'):
57 # Support Python 2.5 and earlier.
58 def next(obj):
59 return obj.next()
60
61
62VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3)
63
64FUNCTION_NONE = 0x00
65FUNCTION_CONST = 0x01
66FUNCTION_VIRTUAL = 0x02
67FUNCTION_PURE_VIRTUAL = 0x04
68FUNCTION_CTOR = 0x08
69FUNCTION_DTOR = 0x10
70FUNCTION_ATTRIBUTE = 0x20
71FUNCTION_UNKNOWN_ANNOTATION = 0x40
72FUNCTION_THROW = 0x80
73
74"""
75These are currently unused. Should really handle these properly at some point.
76
77TYPE_MODIFIER_INLINE = 0x010000
78TYPE_MODIFIER_EXTERN = 0x020000
79TYPE_MODIFIER_STATIC = 0x040000
80TYPE_MODIFIER_CONST = 0x080000
81TYPE_MODIFIER_REGISTER = 0x100000
82TYPE_MODIFIER_VOLATILE = 0x200000
83TYPE_MODIFIER_MUTABLE = 0x400000
84
85TYPE_MODIFIER_MAP = {
86 'inline': TYPE_MODIFIER_INLINE,
87 'extern': TYPE_MODIFIER_EXTERN,
88 'static': TYPE_MODIFIER_STATIC,
89 'const': TYPE_MODIFIER_CONST,
90 'register': TYPE_MODIFIER_REGISTER,
91 'volatile': TYPE_MODIFIER_VOLATILE,
92 'mutable': TYPE_MODIFIER_MUTABLE,
93 }
94"""
95
96_INTERNAL_TOKEN = 'internal'
97_NAMESPACE_POP = 'ns-pop'
98
99
100# TODO(nnorwitz): use this as a singleton for templated_types, etc
101# where we don't want to create a new empty dict each time. It is also const.
102class _NullDict(object):
103 __contains__ = lambda self: False
104 keys = values = items = iterkeys = itervalues = iteritems = lambda self: ()
105
106
107# TODO(nnorwitz): move AST nodes into a separate module.
108class Node(object):
109 """Base AST node."""
110
111 def __init__(self, start, end):
112 self.start = start
113 self.end = end
114
115 def IsDeclaration(self):
116 """Returns bool if this node is a declaration."""
117 return False
118
119 def IsDefinition(self):
120 """Returns bool if this node is a definition."""
121 return False
122
123 def IsExportable(self):
124 """Returns bool if this node exportable from a header file."""
125 return False
126
127 def Requires(self, node):
128 """Does this AST node require the definition of the node passed in?"""
129 return False
130
131 def XXX__str__(self):
132 return self._StringHelper(self.__class__.__name__, '')
133
134 def _StringHelper(self, name, suffix):
135 if not utils.DEBUG:
136 return '%s(%s)' % (name, suffix)
137 return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix)
138
139 def __repr__(self):
140 return str(self)
141
142
143class Define(Node):
144 def __init__(self, start, end, name, definition):
145 Node.__init__(self, start, end)
146 self.name = name
147 self.definition = definition
148
149 def __str__(self):
150 value = '%s %s' % (self.name, self.definition)
151 return self._StringHelper(self.__class__.__name__, value)
152
153
154class Include(Node):
155 def __init__(self, start, end, filename, system):
156 Node.__init__(self, start, end)
157 self.filename = filename
158 self.system = system
159
160 def __str__(self):
161 fmt = '"%s"'
162 if self.system:
163 fmt = '<%s>'
164 return self._StringHelper(self.__class__.__name__, fmt % self.filename)
165
166
167class Goto(Node):
168 def __init__(self, start, end, label):
169 Node.__init__(self, start, end)
170 self.label = label
171
172 def __str__(self):
173 return self._StringHelper(self.__class__.__name__, str(self.label))
174
175
176class Expr(Node):
177 def __init__(self, start, end, expr):
178 Node.__init__(self, start, end)
179 self.expr = expr
180
181 def Requires(self, node):
182 # TODO(nnorwitz): impl.
183 return False
184
185 def __str__(self):
186 return self._StringHelper(self.__class__.__name__, str(self.expr))
187
188
189class Return(Expr):
190 pass
191
192
193class Delete(Expr):
194 pass
195
196
197class Friend(Expr):
198 def __init__(self, start, end, expr, namespace):
199 Expr.__init__(self, start, end, expr)
200 self.namespace = namespace[:]
201
202
203class Using(Node):
204 def __init__(self, start, end, names):
205 Node.__init__(self, start, end)
206 self.names = names
207
208 def __str__(self):
209 return self._StringHelper(self.__class__.__name__, str(self.names))
210
211
212class Parameter(Node):
213 def __init__(self, start, end, name, parameter_type, default):
214 Node.__init__(self, start, end)
215 self.name = name
216 self.type = parameter_type
217 self.default = default
218
219 def Requires(self, node):
220 # TODO(nnorwitz): handle namespaces, etc.
221 return self.type.name == node.name
222
223 def __str__(self):
224 name = str(self.type)
225 suffix = '%s %s' % (name, self.name)
226 if self.default:
227 suffix += ' = ' + ''.join([d.name for d in self.default])
228 return self._StringHelper(self.__class__.__name__, suffix)
229
230
231class _GenericDeclaration(Node):
232 def __init__(self, start, end, name, namespace):
233 Node.__init__(self, start, end)
234 self.name = name
235 self.namespace = namespace[:]
236
237 def FullName(self):
238 prefix = ''
239 if self.namespace and self.namespace[-1]:
240 prefix = '::'.join(self.namespace) + '::'
241 return prefix + self.name
242
243 def _TypeStringHelper(self, suffix):
244 if self.namespace:
245 names = [n or '<anonymous>' for n in self.namespace]
246 suffix += ' in ' + '::'.join(names)
247 return self._StringHelper(self.__class__.__name__, suffix)
248
249
250# TODO(nnorwitz): merge with Parameter in some way?
251class VariableDeclaration(_GenericDeclaration):
252 def __init__(self, start, end, name, var_type, initial_value, namespace):
253 _GenericDeclaration.__init__(self, start, end, name, namespace)
254 self.type = var_type
255 self.initial_value = initial_value
256
257 def Requires(self, node):
258 # TODO(nnorwitz): handle namespaces, etc.
259 return self.type.name == node.name
260
261 def ToString(self):
262 """Return a string that tries to reconstitute the variable decl."""
263 suffix = '%s %s' % (self.type, self.name)
264 if self.initial_value:
265 suffix += ' = ' + self.initial_value
266 return suffix
267
268 def __str__(self):
269 return self._StringHelper(self.__class__.__name__, self.ToString())
270
271
272class Typedef(_GenericDeclaration):
273 def __init__(self, start, end, name, alias, namespace):
274 _GenericDeclaration.__init__(self, start, end, name, namespace)
275 self.alias = alias
276
277 def IsDefinition(self):
278 return True
279
280 def IsExportable(self):
281 return True
282
283 def Requires(self, node):
284 # TODO(nnorwitz): handle namespaces, etc.
285 name = node.name
286 for token in self.alias:
287 if token is not None and name == token.name:
288 return True
289 return False
290
291 def __str__(self):
292 suffix = '%s, %s' % (self.name, self.alias)
293 return self._TypeStringHelper(suffix)
294
295
296class _NestedType(_GenericDeclaration):
297 def __init__(self, start, end, name, fields, namespace):
298 _GenericDeclaration.__init__(self, start, end, name, namespace)
299 self.fields = fields
300
301 def IsDefinition(self):
302 return True
303
304 def IsExportable(self):
305 return True
306
307 def __str__(self):
308 suffix = '%s, {%s}' % (self.name, self.fields)
309 return self._TypeStringHelper(suffix)
310
311
312class Union(_NestedType):
313 pass
314
315
316class Enum(_NestedType):
317 pass
318
319
320class Class(_GenericDeclaration):
321 def __init__(self, start, end, name, bases, templated_types, body, namespace):
322 _GenericDeclaration.__init__(self, start, end, name, namespace)
323 self.bases = bases
324 self.body = body
325 self.templated_types = templated_types
326
327 def IsDeclaration(self):
328 return self.bases is None and self.body is None
329
330 def IsDefinition(self):
331 return not self.IsDeclaration()
332
333 def IsExportable(self):
334 return not self.IsDeclaration()
335
336 def Requires(self, node):
337 # TODO(nnorwitz): handle namespaces, etc.
338 if self.bases:
339 for token_list in self.bases:
340 # TODO(nnorwitz): bases are tokens, do name comparision.
341 for token in token_list:
342 if token.name == node.name:
343 return True
344 # TODO(nnorwitz): search in body too.
345 return False
346
347 def __str__(self):
348 name = self.name
349 if self.templated_types:
350 name += '<%s>' % self.templated_types
351 suffix = '%s, %s, %s' % (name, self.bases, self.body)
352 return self._TypeStringHelper(suffix)
353
354
355class Struct(Class):
356 pass
357
358
359class Function(_GenericDeclaration):
360 def __init__(self, start, end, name, return_type, parameters,
361 modifiers, templated_types, body, namespace):
362 _GenericDeclaration.__init__(self, start, end, name, namespace)
363 converter = TypeConverter(namespace)
364 self.return_type = converter.CreateReturnType(return_type)
365 self.parameters = converter.ToParameters(parameters)
366 self.modifiers = modifiers
367 self.body = body
368 self.templated_types = templated_types
369
370 def IsDeclaration(self):
371 return self.body is None
372
373 def IsDefinition(self):
374 return self.body is not None
375
376 def IsExportable(self):
377 if self.return_type and 'static' in self.return_type.modifiers:
378 return False
379 return None not in self.namespace
380
381 def Requires(self, node):
382 if self.parameters:
383 # TODO(nnorwitz): parameters are tokens, do name comparision.
384 for p in self.parameters:
385 if p.name == node.name:
386 return True
387 # TODO(nnorwitz): search in body too.
388 return False
389
390 def __str__(self):
391 # TODO(nnorwitz): add templated_types.
392 suffix = ('%s %s(%s), 0x%02x, %s' %
393 (self.return_type, self.name, self.parameters,
394 self.modifiers, self.body))
395 return self._TypeStringHelper(suffix)
396
397
398class Method(Function):
399 def __init__(self, start, end, name, in_class, return_type, parameters,
400 modifiers, templated_types, body, namespace):
401 Function.__init__(self, start, end, name, return_type, parameters,
402 modifiers, templated_types, body, namespace)
403 # TODO(nnorwitz): in_class could also be a namespace which can
404 # mess up finding functions properly.
405 self.in_class = in_class
406
407
408class Type(_GenericDeclaration):
409 """Type used for any variable (eg class, primitive, struct, etc)."""
410
411 def __init__(self, start, end, name, templated_types, modifiers,
412 reference, pointer, array):
413 """
414 Args:
415 name: str name of main type
416 templated_types: [Class (Type?)] template type info between <>
417 modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
418 reference, pointer, array: bools
419 """
420 _GenericDeclaration.__init__(self, start, end, name, [])
421 self.templated_types = templated_types
422 if not name and modifiers:
423 self.name = modifiers.pop()
424 self.modifiers = modifiers
425 self.reference = reference
426 self.pointer = pointer
427 self.array = array
428
429 def __str__(self):
430 prefix = ''
431 if self.modifiers:
432 prefix = ' '.join(self.modifiers) + ' '
433 name = str(self.name)
434 if self.templated_types:
435 name += '<%s>' % self.templated_types
436 suffix = prefix + name
437 if self.reference:
438 suffix += '&'
439 if self.pointer:
440 suffix += '*'
441 if self.array:
442 suffix += '[]'
443 return self._TypeStringHelper(suffix)
444
445 # By definition, Is* are always False. A Type can only exist in
446 # some sort of variable declaration, parameter, or return value.
447 def IsDeclaration(self):
448 return False
449
450 def IsDefinition(self):
451 return False
452
453 def IsExportable(self):
454 return False
455
456
457class TypeConverter(object):
458
459 def __init__(self, namespace_stack):
460 self.namespace_stack = namespace_stack
461
462 def _GetTemplateEnd(self, tokens, start):
463 count = 1
464 end = start
465 while 1:
466 token = tokens[end]
467 end += 1
468 if token.name == '<':
469 count += 1
470 elif token.name == '>':
471 count -= 1
472 if count == 0:
473 break
474 return tokens[start:end-1], end
475
476 def ToType(self, tokens):
477 """Convert [Token,...] to [Class(...), ] useful for base classes.
478 For example, code like class Foo : public Bar<x, y> { ... };
479 the "Bar<x, y>" portion gets converted to an AST.
480
481 Returns:
482 [Class(...), ...]
483 """
484 result = []
485 name_tokens = []
486 reference = pointer = array = False
487
488 def AddType(templated_types):
489 # Partition tokens into name and modifier tokens.
490 names = []
491 modifiers = []
492 for t in name_tokens:
493 if keywords.IsKeyword(t.name):
494 modifiers.append(t.name)
495 else:
496 names.append(t.name)
497 name = ''.join(names)
498 result.append(Type(name_tokens[0].start, name_tokens[-1].end,
499 name, templated_types, modifiers,
500 reference, pointer, array))
501 del name_tokens[:]
502
503 i = 0
504 end = len(tokens)
505 while i < end:
506 token = tokens[i]
507 if token.name == '<':
508 new_tokens, new_end = self._GetTemplateEnd(tokens, i+1)
509 AddType(self.ToType(new_tokens))
510 # If there is a comma after the template, we need to consume
511 # that here otherwise it becomes part of the name.
512 i = new_end
513 reference = pointer = array = False
514 elif token.name == ',':
515 AddType([])
516 reference = pointer = array = False
517 elif token.name == '*':
518 pointer = True
519 elif token.name == '&':
520 reference = True
521 elif token.name == '[':
522 pointer = True
523 elif token.name == ']':
524 pass
525 else:
526 name_tokens.append(token)
527 i += 1
528
529 if name_tokens:
530 # No '<' in the tokens, just a simple name and no template.
531 AddType([])
532 return result
533
534 def DeclarationToParts(self, parts, needs_name_removed):
535 name = None
536 default = []
537 if needs_name_removed:
538 # Handle default (initial) values properly.
539 for i, t in enumerate(parts):
540 if t.name == '=':
541 default = parts[i+1:]
542 name = parts[i-1].name
543 if name == ']' and parts[i-2].name == '[':
544 name = parts[i-3].name
545 i -= 1
546 parts = parts[:i-1]
547 break
548 else:
549 if parts[-1].token_type == tokenize.NAME:
550 name = parts.pop().name
551 else:
552 # TODO(nnorwitz): this is a hack that happens for code like
553 # Register(Foo<T>); where it thinks this is a function call
554 # but it's actually a declaration.
555 name = '???'
556 modifiers = []
557 type_name = []
558 other_tokens = []
559 templated_types = []
560 i = 0
561 end = len(parts)
562 while i < end:
563 p = parts[i]
564 if keywords.IsKeyword(p.name):
565 modifiers.append(p.name)
566 elif p.name == '<':
567 templated_tokens, new_end = self._GetTemplateEnd(parts, i+1)
568 templated_types = self.ToType(templated_tokens)
569 i = new_end - 1
570 # Don't add a spurious :: to data members being initialized.
571 next_index = i + 1
572 if next_index < end and parts[next_index].name == '::':
573 i += 1
574 elif p.name in ('[', ']', '='):
575 # These are handled elsewhere.
576 other_tokens.append(p)
577 elif p.name not in ('*', '&', '>'):
578 # Ensure that names have a space between them.
579 if (type_name and type_name[-1].token_type == tokenize.NAME and
580 p.token_type == tokenize.NAME):
581 type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0))
582 type_name.append(p)
583 else:
584 other_tokens.append(p)
585 i += 1
586 type_name = ''.join([t.name for t in type_name])
587 return name, type_name, templated_types, modifiers, default, other_tokens
588
589 def ToParameters(self, tokens):
590 if not tokens:
591 return []
592
593 result = []
594 name = type_name = ''
595 type_modifiers = []
596 pointer = reference = array = False
597 first_token = None
598 default = []
599
600 def AddParameter():
601 if default:
602 del default[0] # Remove flag.
603 end = type_modifiers[-1].end
604 parts = self.DeclarationToParts(type_modifiers, True)
605 (name, type_name, templated_types, modifiers,
606 unused_default, unused_other_tokens) = parts
607 parameter_type = Type(first_token.start, first_token.end,
608 type_name, templated_types, modifiers,
609 reference, pointer, array)
610 p = Parameter(first_token.start, end, name,
611 parameter_type, default)
612 result.append(p)
613
614 template_count = 0
615 for s in tokens:
616 if not first_token:
617 first_token = s
618 if s.name == '<':
619 template_count += 1
620 elif s.name == '>':
621 template_count -= 1
622 if template_count > 0:
623 type_modifiers.append(s)
624 continue
625
626 if s.name == ',':
627 AddParameter()
628 name = type_name = ''
629 type_modifiers = []
630 pointer = reference = array = False
631 first_token = None
632 default = []
633 elif s.name == '*':
634 pointer = True
635 elif s.name == '&':
636 reference = True
637 elif s.name == '[':
638 array = True
639 elif s.name == ']':
640 pass # Just don't add to type_modifiers.
641 elif s.name == '=':
642 # Got a default value. Add any value (None) as a flag.
643 default.append(None)
644 elif default:
645 default.append(s)
646 else:
647 type_modifiers.append(s)
648 AddParameter()
649 return result
650
651 def CreateReturnType(self, return_type_seq):
652 if not return_type_seq:
653 return None
654 start = return_type_seq[0].start
655 end = return_type_seq[-1].end
656 _, name, templated_types, modifiers, default, other_tokens = \
657 self.DeclarationToParts(return_type_seq, False)
658 names = [n.name for n in other_tokens]
659 reference = '&' in names
660 pointer = '*' in names
661 array = '[' in names
662 return Type(start, end, name, templated_types, modifiers,
663 reference, pointer, array)
664
665 def GetTemplateIndices(self, names):
666 # names is a list of strings.
667 start = names.index('<')
668 end = len(names) - 1
669 while end > 0:
670 if names[end] == '>':
671 break
672 end -= 1
673 return start, end+1
674
675class AstBuilder(object):
676 def __init__(self, token_stream, filename, in_class='', visibility=None,
677 namespace_stack=[]):
678 self.tokens = token_stream
679 self.filename = filename
680 # TODO(nnorwitz): use a better data structure (deque) for the queue.
681 # Switching directions of the "queue" improved perf by about 25%.
682 # Using a deque should be even better since we access from both sides.
683 self.token_queue = []
684 self.namespace_stack = namespace_stack[:]
685 self.in_class = in_class
686 if in_class is None:
687 self.in_class_name_only = None
688 else:
689 self.in_class_name_only = in_class.split('::')[-1]
690 self.visibility = visibility
691 self.in_function = False
692 self.current_token = None
693 # Keep the state whether we are currently handling a typedef or not.
694 self._handling_typedef = False
695
696 self.converter = TypeConverter(self.namespace_stack)
697
698 def HandleError(self, msg, token):
699 printable_queue = list(reversed(self.token_queue[-20:]))
700 sys.stderr.write('Got %s in %s @ %s %s\n' %
701 (msg, self.filename, token, printable_queue))
702
703 def Generate(self):
704 while 1:
705 token = self._GetNextToken()
706 if not token:
707 break
708
709 # Get the next token.
710 self.current_token = token
711
712 # Dispatch on the next token type.
713 if token.token_type == _INTERNAL_TOKEN:
714 if token.name == _NAMESPACE_POP:
715 self.namespace_stack.pop()
716 continue
717
718 try:
719 result = self._GenerateOne(token)
720 if result is not None:
721 yield result
722 except:
723 self.HandleError('exception', token)
724 raise
725
726 def _CreateVariable(self, pos_token, name, type_name, type_modifiers,
727 ref_pointer_name_seq, templated_types, value=None):
728 reference = '&' in ref_pointer_name_seq
729 pointer = '*' in ref_pointer_name_seq
730 array = '[' in ref_pointer_name_seq
731 var_type = Type(pos_token.start, pos_token.end, type_name,
732 templated_types, type_modifiers,
733 reference, pointer, array)
734 return VariableDeclaration(pos_token.start, pos_token.end,
735 name, var_type, value, self.namespace_stack)
736
737 def _GenerateOne(self, token):
738 if token.token_type == tokenize.NAME:
739 if (keywords.IsKeyword(token.name) and
740 not keywords.IsBuiltinType(token.name)):
741 method = getattr(self, 'handle_' + token.name)
742 return method()
743 elif token.name == self.in_class_name_only:
744 # The token name is the same as the class, must be a ctor if
745 # there is a paren. Otherwise, it's the return type.
746 # Peek ahead to get the next token to figure out which.
747 next = self._GetNextToken()
748 self._AddBackToken(next)
749 if next.token_type == tokenize.SYNTAX and next.name == '(':
750 return self._GetMethod([token], FUNCTION_CTOR, None, True)
751 # Fall through--handle like any other method.
752
753 # Handle data or function declaration/definition.
754 syntax = tokenize.SYNTAX
755 temp_tokens, last_token = \
756 self._GetVarTokensUpTo(syntax, '(', ';', '{', '[')
757 temp_tokens.insert(0, token)
758 if last_token.name == '(':
759 # If there is an assignment before the paren,
760 # this is an expression, not a method.
761 expr = bool([e for e in temp_tokens if e.name == '='])
762 if expr:
763 new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';')
764 temp_tokens.append(last_token)
765 temp_tokens.extend(new_temp)
766 last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0)
767
768 if last_token.name == '[':
769 # Handle array, this isn't a method, unless it's an operator.
770 # TODO(nnorwitz): keep the size somewhere.
771 # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
772 temp_tokens.append(last_token)
773 if temp_tokens[-2].name == 'operator':
774 temp_tokens.append(self._GetNextToken())
775 else:
776 temp_tokens2, last_token = \
777 self._GetVarTokensUpTo(tokenize.SYNTAX, ';')
778 temp_tokens.extend(temp_tokens2)
779
780 if last_token.name == ';':
781 # Handle data, this isn't a method.
782 parts = self.converter.DeclarationToParts(temp_tokens, True)
783 (name, type_name, templated_types, modifiers, default,
784 unused_other_tokens) = parts
zhanyong.wanc2ad46a2009-06-02 20:41:21 +0000785
shiqiane35fdd92008-12-10 05:08:54 +0000786 t0 = temp_tokens[0]
787 names = [t.name for t in temp_tokens]
788 if templated_types:
789 start, end = self.converter.GetTemplateIndices(names)
790 names = names[:start] + names[end:]
791 default = ''.join([t.name for t in default])
792 return self._CreateVariable(t0, name, type_name, modifiers,
793 names, templated_types, default)
794 if last_token.name == '{':
795 self._AddBackTokens(temp_tokens[1:])
796 self._AddBackToken(last_token)
797 method_name = temp_tokens[0].name
798 method = getattr(self, 'handle_' + method_name, None)
799 if not method:
800 # Must be declaring a variable.
801 # TODO(nnorwitz): handle the declaration.
802 return None
803 return method()
804 return self._GetMethod(temp_tokens, 0, None, False)
805 elif token.token_type == tokenize.SYNTAX:
806 if token.name == '~' and self.in_class:
807 # Must be a dtor (probably not in method body).
808 token = self._GetNextToken()
809 # self.in_class can contain A::Name, but the dtor will only
810 # be Name. Make sure to compare against the right value.
811 if (token.token_type == tokenize.NAME and
812 token.name == self.in_class_name_only):
813 return self._GetMethod([token], FUNCTION_DTOR, None, True)
814 # TODO(nnorwitz): handle a lot more syntax.
815 elif token.token_type == tokenize.PREPROCESSOR:
816 # TODO(nnorwitz): handle more preprocessor directives.
817 # token starts with a #, so remove it and strip whitespace.
818 name = token.name[1:].lstrip()
819 if name.startswith('include'):
820 # Remove "include".
821 name = name[7:].strip()
822 assert name
823 # Handle #include \<newline> "header-on-second-line.h".
824 if name.startswith('\\'):
825 name = name[1:].strip()
826 assert name[0] in '<"', token
827 assert name[-1] in '>"', token
828 system = name[0] == '<'
829 filename = name[1:-1]
830 return Include(token.start, token.end, filename, system)
831 if name.startswith('define'):
832 # Remove "define".
833 name = name[6:].strip()
834 assert name
835 value = ''
836 for i, c in enumerate(name):
837 if c.isspace():
838 value = name[i:].lstrip()
839 name = name[:i]
840 break
841 return Define(token.start, token.end, name, value)
842 if name.startswith('if') and name[2:3].isspace():
843 condition = name[3:].strip()
844 if condition.startswith('0') or condition.startswith('(0)'):
845 self._SkipIf0Blocks()
846 return None
847
848 def _GetTokensUpTo(self, expected_token_type, expected_token):
849 return self._GetVarTokensUpTo(expected_token_type, expected_token)[0]
850
851 def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens):
852 last_token = self._GetNextToken()
853 tokens = []
854 while (last_token.token_type != expected_token_type or
855 last_token.name not in expected_tokens):
856 tokens.append(last_token)
857 last_token = self._GetNextToken()
858 return tokens, last_token
859
860 # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necesary.
861 def _IgnoreUpTo(self, token_type, token):
862 unused_tokens = self._GetTokensUpTo(token_type, token)
863
864 def _SkipIf0Blocks(self):
865 count = 1
866 while 1:
867 token = self._GetNextToken()
868 if token.token_type != tokenize.PREPROCESSOR:
869 continue
870
871 name = token.name[1:].lstrip()
872 if name.startswith('endif'):
873 count -= 1
874 if count == 0:
875 break
876 elif name.startswith('if'):
877 count += 1
878
879 def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None):
880 if GetNextToken is None:
881 GetNextToken = self._GetNextToken
882 # Assumes the current token is open_paren and we will consume
883 # and return up to the close_paren.
884 count = 1
885 token = GetNextToken()
886 while 1:
887 if token.token_type == tokenize.SYNTAX:
888 if token.name == open_paren:
889 count += 1
890 elif token.name == close_paren:
891 count -= 1
892 if count == 0:
893 break
894 yield token
895 token = GetNextToken()
896 yield token
897
898 def _GetParameters(self):
899 return self._GetMatchingChar('(', ')')
900
901 def GetScope(self):
902 return self._GetMatchingChar('{', '}')
903
904 def _GetNextToken(self):
905 if self.token_queue:
906 return self.token_queue.pop()
907 return next(self.tokens)
908
909 def _AddBackToken(self, token):
910 if token.whence == tokenize.WHENCE_STREAM:
911 token.whence = tokenize.WHENCE_QUEUE
912 self.token_queue.insert(0, token)
913 else:
914 assert token.whence == tokenize.WHENCE_QUEUE, token
915 self.token_queue.append(token)
916
917 def _AddBackTokens(self, tokens):
918 if tokens:
919 if tokens[-1].whence == tokenize.WHENCE_STREAM:
920 for token in tokens:
921 token.whence = tokenize.WHENCE_QUEUE
922 self.token_queue[:0] = reversed(tokens)
923 else:
924 assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens
925 self.token_queue.extend(reversed(tokens))
926
927 def GetName(self, seq=None):
928 """Returns ([tokens], next_token_info)."""
929 GetNextToken = self._GetNextToken
930 if seq is not None:
931 it = iter(seq)
932 GetNextToken = lambda: next(it)
933 next_token = GetNextToken()
934 tokens = []
935 last_token_was_name = False
936 while (next_token.token_type == tokenize.NAME or
937 (next_token.token_type == tokenize.SYNTAX and
938 next_token.name in ('::', '<'))):
939 # Two NAMEs in a row means the identifier should terminate.
940 # It's probably some sort of variable declaration.
941 if last_token_was_name and next_token.token_type == tokenize.NAME:
942 break
943 last_token_was_name = next_token.token_type == tokenize.NAME
944 tokens.append(next_token)
945 # Handle templated names.
946 if next_token.name == '<':
947 tokens.extend(self._GetMatchingChar('<', '>', GetNextToken))
948 last_token_was_name = True
949 next_token = GetNextToken()
950 return tokens, next_token
951
952 def GetMethod(self, modifiers, templated_types):
953 return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
954 assert len(return_type_and_name) >= 1
955 return self._GetMethod(return_type_and_name, modifiers, templated_types,
956 False)
957
958 def _GetMethod(self, return_type_and_name, modifiers, templated_types,
959 get_paren):
960 template_portion = None
961 if get_paren:
962 token = self._GetNextToken()
963 assert token.token_type == tokenize.SYNTAX, token
964 if token.name == '<':
965 # Handle templatized dtors.
966 template_portion = [token]
967 template_portion.extend(self._GetMatchingChar('<', '>'))
968 token = self._GetNextToken()
969 assert token.token_type == tokenize.SYNTAX, token
970 assert token.name == '(', token
971
972 name = return_type_and_name.pop()
973 # Handle templatized ctors.
974 if name.name == '>':
975 index = 1
976 while return_type_and_name[index].name != '<':
977 index += 1
978 template_portion = return_type_and_name[index:] + [name]
979 del return_type_and_name[index:]
980 name = return_type_and_name.pop()
981 elif name.name == ']':
982 rt = return_type_and_name
983 assert rt[-1].name == '[', return_type_and_name
984 assert rt[-2].name == 'operator', return_type_and_name
985 name_seq = return_type_and_name[-2:]
986 del return_type_and_name[-2:]
987 name = tokenize.Token(tokenize.NAME, 'operator[]',
988 name_seq[0].start, name.end)
989 # Get the open paren so _GetParameters() below works.
990 unused_open_paren = self._GetNextToken()
991
992 # TODO(nnorwitz): store template_portion.
993 return_type = return_type_and_name
994 indices = name
995 if return_type:
996 indices = return_type[0]
997
998 # Force ctor for templatized ctors.
999 if name.name == self.in_class and not modifiers:
1000 modifiers |= FUNCTION_CTOR
1001 parameters = list(self._GetParameters())
1002 del parameters[-1] # Remove trailing ')'.
1003
1004 # Handling operator() is especially weird.
1005 if name.name == 'operator' and not parameters:
1006 token = self._GetNextToken()
1007 assert token.name == '(', token
1008 parameters = list(self._GetParameters())
1009 del parameters[-1] # Remove trailing ')'.
1010
1011 token = self._GetNextToken()
1012 while token.token_type == tokenize.NAME:
1013 modifier_token = token
1014 token = self._GetNextToken()
1015 if modifier_token.name == 'const':
1016 modifiers |= FUNCTION_CONST
1017 elif modifier_token.name == '__attribute__':
1018 # TODO(nnorwitz): handle more __attribute__ details.
1019 modifiers |= FUNCTION_ATTRIBUTE
1020 assert token.name == '(', token
1021 # Consume everything between the (parens).
1022 unused_tokens = list(self._GetMatchingChar('(', ')'))
1023 token = self._GetNextToken()
1024 elif modifier_token.name == 'throw':
1025 modifiers |= FUNCTION_THROW
1026 assert token.name == '(', token
1027 # Consume everything between the (parens).
1028 unused_tokens = list(self._GetMatchingChar('(', ')'))
1029 token = self._GetNextToken()
1030 elif modifier_token.name == modifier_token.name.upper():
1031 # HACK(nnorwitz): assume that all upper-case names
1032 # are some macro we aren't expanding.
1033 modifiers |= FUNCTION_UNKNOWN_ANNOTATION
1034 else:
1035 self.HandleError('unexpected token', modifier_token)
1036
1037 assert token.token_type == tokenize.SYNTAX, token
1038 # Handle ctor initializers.
1039 if token.name == ':':
1040 # TODO(nnorwitz): anything else to handle for initializer list?
1041 while token.name != ';' and token.name != '{':
1042 token = self._GetNextToken()
1043
1044 # Handle pointer to functions that are really data but look
1045 # like method declarations.
1046 if token.name == '(':
1047 if parameters[0].name == '*':
1048 # name contains the return type.
1049 name = parameters.pop()
1050 # parameters contains the name of the data.
1051 modifiers = [p.name for p in parameters]
1052 # Already at the ( to open the parameter list.
1053 function_parameters = list(self._GetMatchingChar('(', ')'))
1054 del function_parameters[-1] # Remove trailing ')'.
1055 # TODO(nnorwitz): store the function_parameters.
1056 token = self._GetNextToken()
1057 assert token.token_type == tokenize.SYNTAX, token
1058 assert token.name == ';', token
1059 return self._CreateVariable(indices, name.name, indices.name,
1060 modifiers, '', None)
1061 # At this point, we got something like:
1062 # return_type (type::*name_)(params);
1063 # This is a data member called name_ that is a function pointer.
1064 # With this code: void (sq_type::*field_)(string&);
1065 # We get: name=void return_type=[] parameters=sq_type ... field_
1066 # TODO(nnorwitz): is return_type always empty?
1067 # TODO(nnorwitz): this isn't even close to being correct.
1068 # Just put in something so we don't crash and can move on.
1069 real_name = parameters[-1]
1070 modifiers = [p.name for p in self._GetParameters()]
1071 del modifiers[-1] # Remove trailing ')'.
1072 return self._CreateVariable(indices, real_name.name, indices.name,
1073 modifiers, '', None)
1074
1075 if token.name == '{':
1076 body = list(self.GetScope())
1077 del body[-1] # Remove trailing '}'.
1078 else:
1079 body = None
1080 if token.name == '=':
1081 token = self._GetNextToken()
1082 assert token.token_type == tokenize.CONSTANT, token
1083 assert token.name == '0', token
1084 modifiers |= FUNCTION_PURE_VIRTUAL
1085 token = self._GetNextToken()
1086
1087 if token.name == '[':
1088 # TODO(nnorwitz): store tokens and improve parsing.
1089 # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
1090 tokens = list(self._GetMatchingChar('[', ']'))
1091 token = self._GetNextToken()
1092
1093 assert token.name == ';', (token, return_type_and_name, parameters)
1094
1095 # Looks like we got a method, not a function.
1096 if len(return_type) > 2 and return_type[-1].name == '::':
1097 return_type, in_class = \
1098 self._GetReturnTypeAndClassName(return_type)
1099 return Method(indices.start, indices.end, name.name, in_class,
1100 return_type, parameters, modifiers, templated_types,
1101 body, self.namespace_stack)
1102 return Function(indices.start, indices.end, name.name, return_type,
1103 parameters, modifiers, templated_types, body,
1104 self.namespace_stack)
1105
1106 def _GetReturnTypeAndClassName(self, token_seq):
1107 # Splitting the return type from the class name in a method
1108 # can be tricky. For example, Return::Type::Is::Hard::To::Find().
1109 # Where is the return type and where is the class name?
1110 # The heuristic used is to pull the last name as the class name.
1111 # This includes all the templated type info.
1112 # TODO(nnorwitz): if there is only One name like in the
1113 # example above, punt and assume the last bit is the class name.
1114
1115 # Ignore a :: prefix, if exists so we can find the first real name.
1116 i = 0
1117 if token_seq[0].name == '::':
1118 i = 1
1119 # Ignore a :: suffix, if exists.
1120 end = len(token_seq) - 1
1121 if token_seq[end-1].name == '::':
1122 end -= 1
1123
1124 # Make a copy of the sequence so we can append a sentinel
1125 # value. This is required for GetName will has to have some
1126 # terminating condition beyond the last name.
1127 seq_copy = token_seq[i:end]
1128 seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0))
1129 names = []
1130 while i < end:
1131 # Iterate through the sequence parsing out each name.
1132 new_name, next = self.GetName(seq_copy[i:])
1133 assert new_name, 'Got empty new_name, next=%s' % next
1134 # We got a pointer or ref. Add it to the name.
1135 if next and next.token_type == tokenize.SYNTAX:
1136 new_name.append(next)
1137 names.append(new_name)
1138 i += len(new_name)
1139
1140 # Now that we have the names, it's time to undo what we did.
1141
1142 # Remove the sentinel value.
1143 names[-1].pop()
1144 # Flatten the token sequence for the return type.
1145 return_type = [e for seq in names[:-1] for e in seq]
1146 # The class name is the last name.
1147 class_name = names[-1]
1148 return return_type, class_name
1149
1150 def handle_bool(self):
1151 pass
1152
1153 def handle_char(self):
1154 pass
1155
1156 def handle_int(self):
1157 pass
1158
1159 def handle_long(self):
1160 pass
1161
1162 def handle_short(self):
1163 pass
1164
1165 def handle_double(self):
1166 pass
1167
1168 def handle_float(self):
1169 pass
1170
1171 def handle_void(self):
1172 pass
1173
1174 def handle_wchar_t(self):
1175 pass
1176
1177 def handle_unsigned(self):
1178 pass
1179
1180 def handle_signed(self):
1181 pass
1182
1183 def _GetNestedType(self, ctor):
1184 name = None
1185 name_tokens, token = self.GetName()
1186 if name_tokens:
1187 name = ''.join([t.name for t in name_tokens])
1188
1189 # Handle forward declarations.
1190 if token.token_type == tokenize.SYNTAX and token.name == ';':
1191 return ctor(token.start, token.end, name, None,
1192 self.namespace_stack)
1193
1194 if token.token_type == tokenize.NAME and self._handling_typedef:
1195 self._AddBackToken(token)
1196 return ctor(token.start, token.end, name, None,
1197 self.namespace_stack)
1198
1199 # Must be the type declaration.
1200 fields = list(self._GetMatchingChar('{', '}'))
1201 del fields[-1] # Remove trailing '}'.
1202 if token.token_type == tokenize.SYNTAX and token.name == '{':
1203 next = self._GetNextToken()
1204 new_type = ctor(token.start, token.end, name, fields,
1205 self.namespace_stack)
1206 # A name means this is an anonymous type and the name
1207 # is the variable declaration.
1208 if next.token_type != tokenize.NAME:
1209 return new_type
1210 name = new_type
1211 token = next
1212
1213 # Must be variable declaration using the type prefixed with keyword.
1214 assert token.token_type == tokenize.NAME, token
1215 return self._CreateVariable(token, token.name, name, [], '', None)
1216
1217 def handle_struct(self):
1218 # Special case the handling typedef/aliasing of structs here.
1219 # It would be a pain to handle in the class code.
1220 name_tokens, var_token = self.GetName()
1221 if name_tokens:
1222 next_token = self._GetNextToken()
1223 is_syntax = (var_token.token_type == tokenize.SYNTAX and
1224 var_token.name[0] in '*&')
1225 is_variable = (var_token.token_type == tokenize.NAME and
1226 next_token.name == ';')
1227 variable = var_token
1228 if is_syntax and not is_variable:
1229 variable = next_token
1230 temp = self._GetNextToken()
1231 if temp.token_type == tokenize.SYNTAX and temp.name == '(':
1232 # Handle methods declared to return a struct.
1233 t0 = name_tokens[0]
1234 struct = tokenize.Token(tokenize.NAME, 'struct',
1235 t0.start-7, t0.start-2)
1236 type_and_name = [struct]
1237 type_and_name.extend(name_tokens)
1238 type_and_name.extend((var_token, next_token))
1239 return self._GetMethod(type_and_name, 0, None, False)
1240 assert temp.name == ';', (temp, name_tokens, var_token)
1241 if is_syntax or (is_variable and not self._handling_typedef):
1242 modifiers = ['struct']
1243 type_name = ''.join([t.name for t in name_tokens])
1244 position = name_tokens[0]
1245 return self._CreateVariable(position, variable.name, type_name,
1246 modifiers, var_token.name, None)
1247 name_tokens.extend((var_token, next_token))
1248 self._AddBackTokens(name_tokens)
1249 else:
1250 self._AddBackToken(var_token)
1251 return self._GetClass(Struct, VISIBILITY_PUBLIC, None)
1252
1253 def handle_union(self):
1254 return self._GetNestedType(Union)
1255
1256 def handle_enum(self):
1257 return self._GetNestedType(Enum)
1258
1259 def handle_auto(self):
1260 # TODO(nnorwitz): warn about using auto? Probably not since it
1261 # will be reclaimed and useful for C++0x.
1262 pass
1263
1264 def handle_register(self):
1265 pass
1266
1267 def handle_const(self):
1268 pass
1269
1270 def handle_inline(self):
1271 pass
1272
1273 def handle_extern(self):
1274 pass
1275
1276 def handle_static(self):
1277 pass
1278
1279 def handle_virtual(self):
1280 # What follows must be a method.
1281 token = token2 = self._GetNextToken()
1282 if token.name == 'inline':
1283 # HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
1284 token2 = self._GetNextToken()
1285 if token2.token_type == tokenize.SYNTAX and token2.name == '~':
1286 return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None)
1287 assert token.token_type == tokenize.NAME or token.name == '::', token
1288 return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
1289 return_type_and_name.insert(0, token)
1290 if token2 is not token:
1291 return_type_and_name.insert(1, token2)
1292 return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL,
1293 None, False)
1294
1295 def handle_volatile(self):
1296 pass
1297
1298 def handle_mutable(self):
1299 pass
1300
1301 def handle_public(self):
1302 assert self.in_class
1303 self.visibility = VISIBILITY_PUBLIC
1304
1305 def handle_protected(self):
1306 assert self.in_class
1307 self.visibility = VISIBILITY_PROTECTED
1308
1309 def handle_private(self):
1310 assert self.in_class
1311 self.visibility = VISIBILITY_PRIVATE
1312
1313 def handle_friend(self):
1314 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1315 assert tokens
1316 t0 = tokens[0]
1317 return Friend(t0.start, t0.end, tokens, self.namespace_stack)
1318
1319 def handle_static_cast(self):
1320 pass
1321
1322 def handle_const_cast(self):
1323 pass
1324
1325 def handle_dynamic_cast(self):
1326 pass
1327
1328 def handle_reinterpret_cast(self):
1329 pass
1330
1331 def handle_new(self):
1332 pass
1333
1334 def handle_delete(self):
1335 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1336 assert tokens
1337 return Delete(tokens[0].start, tokens[0].end, tokens)
1338
1339 def handle_typedef(self):
1340 token = self._GetNextToken()
1341 if (token.token_type == tokenize.NAME and
1342 keywords.IsKeyword(token.name)):
1343 # Token must be struct/enum/union/class.
1344 method = getattr(self, 'handle_' + token.name)
1345 self._handling_typedef = True
1346 tokens = [method()]
1347 self._handling_typedef = False
1348 else:
1349 tokens = [token]
1350
1351 # Get the remainder of the typedef up to the semi-colon.
1352 tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';'))
1353
1354 # TODO(nnorwitz): clean all this up.
1355 assert tokens
1356 name = tokens.pop()
1357 indices = name
1358 if tokens:
1359 indices = tokens[0]
1360 if not indices:
1361 indices = token
1362 if name.name == ')':
1363 # HACK(nnorwitz): Handle pointers to functions "properly".
1364 if (len(tokens) >= 4 and
1365 tokens[1].name == '(' and tokens[2].name == '*'):
1366 tokens.append(name)
1367 name = tokens[3]
1368 elif name.name == ']':
1369 # HACK(nnorwitz): Handle arrays properly.
1370 if len(tokens) >= 2:
1371 tokens.append(name)
1372 name = tokens[1]
1373 new_type = tokens
1374 if tokens and isinstance(tokens[0], tokenize.Token):
1375 new_type = self.converter.ToType(tokens)[0]
1376 return Typedef(indices.start, indices.end, name.name,
1377 new_type, self.namespace_stack)
1378
1379 def handle_typeid(self):
1380 pass # Not needed yet.
1381
1382 def handle_typename(self):
1383 pass # Not needed yet.
1384
1385 def _GetTemplatedTypes(self):
1386 result = {}
1387 tokens = list(self._GetMatchingChar('<', '>'))
1388 len_tokens = len(tokens) - 1 # Ignore trailing '>'.
1389 i = 0
1390 while i < len_tokens:
1391 key = tokens[i].name
1392 i += 1
1393 if keywords.IsKeyword(key) or key == ',':
1394 continue
1395 type_name = default = None
1396 if i < len_tokens:
1397 i += 1
1398 if tokens[i-1].name == '=':
1399 assert i < len_tokens, '%s %s' % (i, tokens)
1400 default, unused_next_token = self.GetName(tokens[i:])
1401 i += len(default)
1402 else:
1403 if tokens[i-1].name != ',':
1404 # We got something like: Type variable.
1405 # Re-adjust the key (variable) and type_name (Type).
1406 key = tokens[i-1].name
1407 type_name = tokens[i-2]
1408
1409 result[key] = (type_name, default)
1410 return result
1411
1412 def handle_template(self):
1413 token = self._GetNextToken()
1414 assert token.token_type == tokenize.SYNTAX, token
1415 assert token.name == '<', token
1416 templated_types = self._GetTemplatedTypes()
1417 # TODO(nnorwitz): for now, just ignore the template params.
1418 token = self._GetNextToken()
1419 if token.token_type == tokenize.NAME:
1420 if token.name == 'class':
1421 return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types)
1422 elif token.name == 'struct':
1423 return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types)
1424 elif token.name == 'friend':
1425 return self.handle_friend()
1426 self._AddBackToken(token)
1427 tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';')
1428 tokens.append(last)
1429 self._AddBackTokens(tokens)
1430 if last.name == '(':
1431 return self.GetMethod(FUNCTION_NONE, templated_types)
1432 # Must be a variable definition.
1433 return None
1434
1435 def handle_true(self):
1436 pass # Nothing to do.
1437
1438 def handle_false(self):
1439 pass # Nothing to do.
1440
1441 def handle_asm(self):
1442 pass # Not needed yet.
1443
1444 def handle_class(self):
1445 return self._GetClass(Class, VISIBILITY_PRIVATE, None)
1446
1447 def _GetBases(self):
1448 # Get base classes.
1449 bases = []
1450 while 1:
1451 token = self._GetNextToken()
1452 assert token.token_type == tokenize.NAME, token
1453 # TODO(nnorwitz): store kind of inheritance...maybe.
1454 if token.name not in ('public', 'protected', 'private'):
1455 # If inheritance type is not specified, it is private.
1456 # Just put the token back so we can form a name.
1457 # TODO(nnorwitz): it would be good to warn about this.
1458 self._AddBackToken(token)
1459 else:
1460 # Check for virtual inheritance.
1461 token = self._GetNextToken()
1462 if token.name != 'virtual':
1463 self._AddBackToken(token)
1464 else:
1465 # TODO(nnorwitz): store that we got virtual for this base.
1466 pass
1467 base, next_token = self.GetName()
1468 bases_ast = self.converter.ToType(base)
1469 assert len(bases_ast) == 1, bases_ast
1470 bases.append(bases_ast[0])
1471 assert next_token.token_type == tokenize.SYNTAX, next_token
1472 if next_token.name == '{':
1473 token = next_token
1474 break
1475 # Support multiple inheritance.
1476 assert next_token.name == ',', next_token
1477 return bases, token
1478
1479 def _GetClass(self, class_type, visibility, templated_types):
1480 class_name = None
1481 class_token = self._GetNextToken()
1482 if class_token.token_type != tokenize.NAME:
1483 assert class_token.token_type == tokenize.SYNTAX, class_token
1484 token = class_token
1485 else:
1486 self._AddBackToken(class_token)
1487 name_tokens, token = self.GetName()
1488 class_name = ''.join([t.name for t in name_tokens])
1489 bases = None
1490 if token.token_type == tokenize.SYNTAX:
1491 if token.name == ';':
1492 # Forward declaration.
1493 return class_type(class_token.start, class_token.end,
1494 class_name, None, templated_types, None,
1495 self.namespace_stack)
1496 if token.name in '*&':
1497 # Inline forward declaration. Could be method or data.
1498 name_token = self._GetNextToken()
1499 next_token = self._GetNextToken()
1500 if next_token.name == ';':
1501 # Handle data
1502 modifiers = ['class']
1503 return self._CreateVariable(class_token, name_token.name,
1504 class_name,
1505 modifiers, token.name, None)
1506 else:
1507 # Assume this is a method.
1508 tokens = (class_token, token, name_token, next_token)
1509 self._AddBackTokens(tokens)
1510 return self.GetMethod(FUNCTION_NONE, None)
1511 if token.name == ':':
1512 bases, token = self._GetBases()
1513
1514 body = None
1515 if token.token_type == tokenize.SYNTAX and token.name == '{':
1516 assert token.token_type == tokenize.SYNTAX, token
1517 assert token.name == '{', token
1518
1519 ast = AstBuilder(self.GetScope(), self.filename, class_name,
1520 visibility, self.namespace_stack)
1521 body = list(ast.Generate())
1522
1523 if not self._handling_typedef:
1524 token = self._GetNextToken()
1525 if token.token_type != tokenize.NAME:
1526 assert token.token_type == tokenize.SYNTAX, token
1527 assert token.name == ';', token
1528 else:
1529 new_class = class_type(class_token.start, class_token.end,
1530 class_name, bases, None,
1531 body, self.namespace_stack)
1532
1533 modifiers = []
1534 return self._CreateVariable(class_token,
1535 token.name, new_class,
1536 modifiers, token.name, None)
1537 else:
1538 if not self._handling_typedef:
1539 self.HandleError('non-typedef token', token)
1540 self._AddBackToken(token)
1541
1542 return class_type(class_token.start, class_token.end, class_name,
1543 bases, None, body, self.namespace_stack)
1544
1545 def handle_namespace(self):
1546 token = self._GetNextToken()
1547 # Support anonymous namespaces.
1548 name = None
1549 if token.token_type == tokenize.NAME:
1550 name = token.name
1551 token = self._GetNextToken()
1552 self.namespace_stack.append(name)
1553 assert token.token_type == tokenize.SYNTAX, token
zhanyong.wanc2ad46a2009-06-02 20:41:21 +00001554 # Create an internal token that denotes when the namespace is complete.
1555 internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP,
1556 None, None)
1557 internal_token.whence = token.whence
shiqiane35fdd92008-12-10 05:08:54 +00001558 if token.name == '=':
1559 # TODO(nnorwitz): handle aliasing namespaces.
1560 name, next_token = self.GetName()
1561 assert next_token.name == ';', next_token
zhanyong.wanc2ad46a2009-06-02 20:41:21 +00001562 self._AddBackToken(internal_token)
shiqiane35fdd92008-12-10 05:08:54 +00001563 else:
1564 assert token.name == '{', token
1565 tokens = list(self.GetScope())
zhanyong.wanc2ad46a2009-06-02 20:41:21 +00001566 # Replace the trailing } with the internal namespace pop token.
1567 tokens[-1] = internal_token
shiqiane35fdd92008-12-10 05:08:54 +00001568 # Handle namespace with nothing in it.
1569 self._AddBackTokens(tokens)
shiqiane35fdd92008-12-10 05:08:54 +00001570 return None
1571
1572 def handle_using(self):
1573 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1574 assert tokens
1575 return Using(tokens[0].start, tokens[0].end, tokens)
1576
1577 def handle_explicit(self):
1578 assert self.in_class
1579 # Nothing much to do.
1580 # TODO(nnorwitz): maybe verify the method name == class name.
1581 # This must be a ctor.
1582 return self.GetMethod(FUNCTION_CTOR, None)
1583
1584 def handle_this(self):
1585 pass # Nothing to do.
1586
1587 def handle_operator(self):
1588 # Pull off the next token(s?) and make that part of the method name.
1589 pass
1590
1591 def handle_sizeof(self):
1592 pass
1593
1594 def handle_case(self):
1595 pass
1596
1597 def handle_switch(self):
1598 pass
1599
1600 def handle_default(self):
1601 token = self._GetNextToken()
1602 assert token.token_type == tokenize.SYNTAX
1603 assert token.name == ':'
1604
1605 def handle_if(self):
1606 pass
1607
1608 def handle_else(self):
1609 pass
1610
1611 def handle_return(self):
1612 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1613 if not tokens:
1614 return Return(self.current_token.start, self.current_token.end, None)
1615 return Return(tokens[0].start, tokens[0].end, tokens)
1616
1617 def handle_goto(self):
1618 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1619 assert len(tokens) == 1, str(tokens)
1620 return Goto(tokens[0].start, tokens[0].end, tokens[0].name)
1621
1622 def handle_try(self):
1623 pass # Not needed yet.
1624
1625 def handle_catch(self):
1626 pass # Not needed yet.
1627
1628 def handle_throw(self):
1629 pass # Not needed yet.
1630
1631 def handle_while(self):
1632 pass
1633
1634 def handle_do(self):
1635 pass
1636
1637 def handle_for(self):
1638 pass
1639
1640 def handle_break(self):
1641 self._IgnoreUpTo(tokenize.SYNTAX, ';')
1642
1643 def handle_continue(self):
1644 self._IgnoreUpTo(tokenize.SYNTAX, ';')
1645
1646
1647def BuilderFromSource(source, filename):
1648 """Utility method that returns an AstBuilder from source code.
1649
1650 Args:
1651 source: 'C++ source code'
1652 filename: 'file1'
1653
1654 Returns:
1655 AstBuilder
1656 """
1657 return AstBuilder(tokenize.GetTokens(source), filename)
1658
1659
1660def PrintIndentifiers(filename, should_print):
1661 """Prints all identifiers for a C++ source file.
1662
1663 Args:
1664 filename: 'file1'
1665 should_print: predicate with signature: bool Function(token)
1666 """
1667 source = utils.ReadFile(filename, False)
1668 if source is None:
1669 sys.stderr.write('Unable to find: %s\n' % filename)
1670 return
1671
1672 #print('Processing %s' % actual_filename)
1673 builder = BuilderFromSource(source, filename)
1674 try:
1675 for node in builder.Generate():
1676 if should_print(node):
1677 print(node.name)
1678 except KeyboardInterrupt:
zhanyong.wanc2ad46a2009-06-02 20:41:21 +00001679 return
shiqiane35fdd92008-12-10 05:08:54 +00001680 except:
1681 pass
1682
1683
1684def PrintAllIndentifiers(filenames, should_print):
1685 """Prints all identifiers for each C++ source file in filenames.
1686
1687 Args:
1688 filenames: ['file1', 'file2', ...]
1689 should_print: predicate with signature: bool Function(token)
1690 """
1691 for path in filenames:
1692 PrintIndentifiers(path, should_print)
1693
1694
1695def main(argv):
1696 for filename in argv[1:]:
1697 source = utils.ReadFile(filename)
1698 if source is None:
1699 continue
1700
1701 print('Processing %s' % filename)
1702 builder = BuilderFromSource(source, filename)
1703 try:
1704 entire_ast = filter(None, builder.Generate())
1705 except KeyboardInterrupt:
1706 return
1707 except:
1708 # Already printed a warning, print the traceback and continue.
1709 traceback.print_exc()
1710 else:
1711 if utils.DEBUG:
1712 for ast in entire_ast:
1713 print(ast)
1714
1715
1716if __name__ == '__main__':
1717 main(sys.argv)