Question

I've got a text file with the following information:

NP--->  N_NNP
NP--->  N_NN_S_NU
NP--->  N_NNP
NP--->  N_NNP
VGF---> V_VM_VF
NP--->  N_NN

I want to group the pairs together as follows:

NP-->N_NNP N_NN_S_NU N_NNP N_NNP
VGF--->V_VM_VF
NP--->N_NN

This is for sentence generation,these should be in same order.and another thing is input is in file.

That is, adjacent values are grouped.

How can I do this?

Was it helpful?

Solution 2

Hi here is another solution, please check :

#!/usr/bin/python

lol = list()
marker = '--->'
with open('txt', 'r') as fh:
    mem = None
    lo = []
    for line in fh.readlines():
        k,v = line.strip().split(marker)
        k, v = [ x.strip() for x in [k,v]]
        if not mem or mem == k:
            lo.append((k,v))
            mem = k
        else:
            lol.append(lo)
            lo = [(k,v)]
            mem = k
    lol.append(lo)

for i in lol:
    k,v = zip(*i)
    print '%s%s %s' % (k[0],marker,' '.join(v))

Output:

NP---> N_NNP N_NN_S_NU N_NNP N_NNP
VGF---> V_VM_VF
NP---> N_NN

OTHER TIPS

sep = "--->  "

input = """                                                                                                               
NP--->  N_NNP                                                                                                             
NP--->  N_NN_S_NU                                                                                                         
NP--->  N_NNP                                                                                                             
NP--->  N_NNP                                                                                                             
VGF--->  V_VM_VF                                                                                                          
NP--->  N_NN                                                                                                              
"""

formatted = [x.strip().split(sep) for x in input.split("\n") if len(x.strip()) > 0]
output = {}
prev_index,prev_key = 0,None
for index,items in enumerate(formatted):
    prev_index = index if prev_key != items[0] else prev_index
    prev_key = items[0]
    output.setdefault("{0}_{1}".format(prev_key,prev_index),[]).append(items[1])

for key,result in output.iteritems():
    print("{0}{1}{2}".format(key.split("_")[0],sep," ".join(result)))

output

NP--->  N_NN
VGF--->  V_VM_VF
NP--->  N_NNP N_NN_S_NU N_NNP N_NNP

demo : http://ideone.com/NadEmX

Use Below Code:

import os
def parser(inFile):
    """
    """
    if not os.path.exists(inFile):
        raise Exception("File does not exist, inFile: %s" %inFile)

    lines = [line.strip() for line in open(inFile)]
    curToken = prevToken = None
    newLines = tmpLines = []
    for line in lines:
        curToken, data = line.split('--->')
        if curToken != prevToken: # Token is changed            
            if prevToken:
                newLines.append((prevToken, tmpLines))
            tmpLines = []
        tmpLines.append(data.strip())
        prevToken = curToken
    # Update the last line.
    if curToken:
        newLines.append((curToken, tmpLines))
    newLines = map(lambda x:"%s--->%s"%(x[0],' '.join(x[1])), newLines)
    outData = '\n'.join(newLines)

    outFile = "/home/akshay/myfile_new.txt"
    with open(outFile, 'w') as file:
        file.write(outData)

if __name__ == "__main__":
    inFile = "/home/akshay/myfile.txt"
    parser(inFile)
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top