Python process text file with these criteria

Question 1

You can loop through the lines in a file like this:

with open('filename.txt') as f:
    for line in f.readlines():
        #do stuff

To take the information from a line that you want, you can do this:

cleaned = []
items = line.split()
for item in items:
    if item.startswith('(') and item.endswith(')'):
        cleaned.append(item.strip('()'))
        break
cleaned.append(items[-1])
cleaned = ' '.join(cleaned)

Full program:

in_file = 'reach.txt'
out_file = 'outreach.txt'

def clean(string):
    if not string:
        return string

    cleaned = []
    items = string.split()
    for item in items:
        if item.startswith('(') and item.endswith(')'):
            cleaned.append(item.strip('()'))
            break
    cleaned.append(items[-1])
    return ' '.join(cleaned)

with open(in_file) as i, open(out_file, 'w') as o:
    o.write('\n'.join([clean(line) for line in i]))

Question 2

fileName='reach.txt'
fileName2='outreach.txt'

def isfloat(s):
    try:
        float(s)
        return True
    except ValueError:
        return False

g = open(fileName2, 'w')
with open(fileName, 'r') as fh:
    for row in fh:
        x = row.split()
        for item in x:
            if '(' in item and ')' in item:
                first = item.strip('()')
                break
        for i in range(-1, 0-len(x), -1):
            second = x[i]
            if isfloat(second):
                break
        print(first, second)
        g.write(' '.join((first, second)) + '\n')
g.close()

Which gives:

ZS.MC.BGE.0424SPVCOS 14.08
ZS.MC.BLK.0424SPVCOS 14.08
ZS.MC.GRY.0424SPVCOS 14.08
ZS.MC.BLK.0525SPVCOS3 14.08
ZS.MC.GRY.0525SPVCOS2 14.08
ZS.MC.BGE.0424SPVCOS 14.08

There we go, this code will handle all kinds of faults in the data. For instance, if the floating value isn't at the very end that will be covered as well, if the (...) data isn't fixed in lets say the second position but the first, that will be covered as well.

Question 3

blacklist = set('1234567890.')
with open('reach.txt') as infile, open('outreach.txt', 'w') as outfile:
    for line in infile:
        line = line.strip()
        if not line:
            continue
        _left, line = line.split("(", 1)
        parts = [p.rstrip(")").lstrip("(") for p in line.split()]
        parts = [p for i,p in enumerate(parts) if not all(char in blacklist for char in p) or i==len(parts)-1]
        outfile.write("%s\n" %(' '.join(parts)))

With your example reach.txt, I get

ZS.MC.BGE.0424SPVCOS 14.08
ZS.MC.BLK.0424SPVCOS 14.08
ZS.MC.GRY.0424SPVCOS 14.08
ZS.MC.BLK.0525SPVCOS3 14.08
ZS.MC.GRY.0525SPVCOS2 14.08
ZS.MC.BGE.0424SPVCOS 14.08

Question 4

You could try using regular expressions if every line has something like (code you want) (thing you don't want).

import re
infile = 'reach.txt'
outfile = 'outreach.txt'

with open(infile, 'r') as inf, open(outfile, 'w') as outf:
    for line in inf:
        # each line has "* (what you want) (trash) *"
        # always take first one
        first = re.findall("(\([A-z0-9\.]*\))", line)[0]

        items = line.strip().split(" ")
        second = line[-1]
        to_write = " ".join((first, second))
        outf.write(to_write + "\n")

The regex "(\([A-z0-9\.]*\))" matches any combination (denoted by [ ]*) of:

letters (A-z),
numbers (0-9), and
period (\.)

that is inside parentheses (\( \)).

From your example, there will always be two matches, something like ZS.MC.BLK.0424SPVCOS and 21.12. re.findall will find both of these in the order given. Since the one you want is always first, grab that with re.findall(regex, line)[0].