Here's an example of building a tree and then recursively building a list of the leaves. The sample text is take from the online standford parser.
# class for tree nodes
class Node:
def __init__(self,start):
self.start = start
self.children = []
self.text = ''
# make a tree
def make_tree(s):
stack = []
nodes = []
cur = None
root = None
for i, c in enumerate(s):
if c == '(':
cur = Node(i)
if stack:
stack[-1].children.append(cur)
stack.append(cur)
if root is None:
root = cur
elif c == ')' and stack:
topnode = stack.pop()
text = s[topnode.start + 1: i]
topnode.text = text
return root
# list of leaves
def list_of_leaves(node):
result = []
for child in node.children:
result.extend(list_of_leaves(child))
if not result:
return [node]
return result
s = """(ROOT
(SQ (VBD Did)
(NP (NNP Matt))
(VP (VB win)
(NP (DT the) (NNS men) (NN slalom)))
(. ?)))"""
root = make_tree(s)
for node in list_of_leaves(root):
print node.text