grouping values for grammar generation in python

Question 1

Hi here is another solution, please check :

#!/usr/bin/python

lol = list()
marker = '--->'
with open('txt', 'r') as fh:
    mem = None
    lo = []
    for line in fh.readlines():
        k,v = line.strip().split(marker)
        k, v = [ x.strip() for x in [k,v]]
        if not mem or mem == k:
            lo.append((k,v))
            mem = k
        else:
            lol.append(lo)
            lo = [(k,v)]
            mem = k
    lol.append(lo)

for i in lol:
    k,v = zip(*i)
    print '%s%s %s' % (k[0],marker,' '.join(v))

Output:

NP---> N_NNP N_NN_S_NU N_NNP N_NNP
VGF---> V_VM_VF
NP---> N_NN

Question 2

sep = "--->  "

input = """                                                                                                               
NP--->  N_NNP                                                                                                             
NP--->  N_NN_S_NU                                                                                                         
NP--->  N_NNP                                                                                                             
NP--->  N_NNP                                                                                                             
VGF--->  V_VM_VF                                                                                                          
NP--->  N_NN                                                                                                              
"""

formatted = [x.strip().split(sep) for x in input.split("\n") if len(x.strip()) > 0]
output = {}
prev_index,prev_key = 0,None
for index,items in enumerate(formatted):
    prev_index = index if prev_key != items[0] else prev_index
    prev_key = items[0]
    output.setdefault("{0}_{1}".format(prev_key,prev_index),[]).append(items[1])

for key,result in output.iteritems():
    print("{0}{1}{2}".format(key.split("_")[0],sep," ".join(result)))

output

NP--->  N_NN
VGF--->  V_VM_VF
NP--->  N_NNP N_NN_S_NU N_NNP N_NNP

demo : http://ideone.com/NadEmX

Question 3

Use Below Code:

import os
def parser(inFile):
    """
    """
    if not os.path.exists(inFile):
        raise Exception("File does not exist, inFile: %s" %inFile)

    lines = [line.strip() for line in open(inFile)]
    curToken = prevToken = None
    newLines = tmpLines = []
    for line in lines:
        curToken, data = line.split('--->')
        if curToken != prevToken: # Token is changed            
            if prevToken:
                newLines.append((prevToken, tmpLines))
            tmpLines = []
        tmpLines.append(data.strip())
        prevToken = curToken
    # Update the last line.
    if curToken:
        newLines.append((curToken, tmpLines))
    newLines = map(lambda x:"%s--->%s"%(x[0],' '.join(x[1])), newLines)
    outData = '\n'.join(newLines)

    outFile = "/home/akshay/myfile_new.txt"
    with open(outFile, 'w') as file:
        file.write(outData)

if __name__ == "__main__":
    inFile = "/home/akshay/myfile.txt"
    parser(inFile)