To do this you need to get the tokens, not the cursors. If I run this script on the file above:
import sys
import clang.cindex
def srcrangestr(x):
return '%s:%d:%d - %s:%d:%d' % (x.start.file, x.start.line, x.start.column, x.end.file, x.end.line, x.end.column)
def main():
index = clang.cindex.Index.create()
tu = index.parse(sys.argv[1], args=['-x', 'c++'])
for x in tu.cursor.get_tokens():
print x.kind
print " " + srcrangestr(x.extent)
print " '" + str(x.spelling) + "'"
if __name__ == '__main__':
main()
I get the following:
TokenKind.PUNCTUATION
test2.h:1:1 - test2.h:1:2
'#'
TokenKind.IDENTIFIER
test2.h:1:2 - test2.h:1:8
'ifndef'
TokenKind.IDENTIFIER
test2.h:1:9 - test2.h:1:21
'__HEADER_FOO'
TokenKind.PUNCTUATION
test2.h:2:1 - test2.h:2:2
'#'
TokenKind.IDENTIFIER
test2.h:2:2 - test2.h:2:8
'define'
TokenKind.IDENTIFIER
test2.h:2:9 - test2.h:2:21
'__HEADER_FOO'
TokenKind.COMMENT
test2.h:4:1 - test2.h:4:11
'//+reflect'
TokenKind.KEYWORD
test2.h:5:1 - test2.h:5:6
'class'
TokenKind.IDENTIFIER
test2.h:5:7 - test2.h:5:10
'Foo'
TokenKind.PUNCTUATION
test2.h:6:1 - test2.h:6:2
'{'
TokenKind.KEYWORD
test2.h:7:5 - test2.h:7:11
'public'
TokenKind.PUNCTUATION
test2.h:7:11 - test2.h:7:12
':'
TokenKind.KEYWORD
test2.h:8:5 - test2.h:8:12
'private'
TokenKind.PUNCTUATION
test2.h:8:12 - test2.h:8:13
':'
TokenKind.KEYWORD
test2.h:9:9 - test2.h:9:12
'int'
TokenKind.IDENTIFIER
test2.h:9:13 - test2.h:9:18
'm_int'
TokenKind.PUNCTUATION
test2.h:9:18 - test2.h:9:19
';'
TokenKind.COMMENT
test2.h:9:20 - test2.h:9:30
'//+reflect'
TokenKind.PUNCTUATION
test2.h:10:1 - test2.h:10:2
'}'
TokenKind.PUNCTUATION
test2.h:10:2 - test2.h:10:3
';'
TokenKind.PUNCTUATION
test2.h:12:1 - test2.h:12:2
'#'
TokenKind.IDENTIFIER
test2.h:12:2 - test2.h:12:7
'endif'
Which should be enough for me to work with.