Question

I'm trying to extract values from numerous text files in python. The numbers I require are in the scientific notation form. My result text files are as follows

    ADDITIONAL DATA

Tip Rotation (degrees)
Node    , UR[x] , UR[y] , UR[z]
21  , 1.0744    , 1.2389    , -4.3271
22  , -1.0744   , -1.2389   , -4.3271
53  , 0.9670    , 1.0307    , -3.8990
54  , -0.0000   , -0.0000   , -3.5232
55  , -0.9670   , -1.0307   , -3.8990

Mean rotation variation along blade
Region  , Rotation (degrees)
Partition line 0,   7.499739E-36
Partition line 1,   -3.430092E-01
Partition line 2,   -1.019287E+00
Partition line 3,   -1.499808E+00
Partition line 4,   -1.817651E+00
Partition line 5,   -2.136372E+00
Partition line 6,   -2.448321E+00
Partition line 7,   -2.674414E+00
Partition line 8,   -2.956737E+00
Partition line 9,   -3.457806E+00
Partition line 10,  -3.995106E+00

I've been using regexp successfully in the past but its doesnt seem to want to pick up the numbers. The number of the nodes changes in my results file so can't search by line. My python script is as follows.

import re
from pylab import *
from scipy import *
import matplotlib
from numpy import *
import numpy as np
from matplotlib import pyplot as plt
import csv

########################################

minTheta        = -90
maxTheta        = 0
thetaIncrements = 10

numberOfPartitions = 10

########################################

numberOfThetas = ((maxTheta - minTheta)/thetaIncrements)+1
print 'Number of thetas = '+str(numberOfThetas)

thetas              = linspace(minTheta,maxTheta,numberOfThetas)
print 'Thetas = '+str(thetas)
part                = linspace(1,numberOfPartitions,numberOfPartitions)
print 'Parts = '+str(part)

meanRotations       = np.zeros((numberOfPartitions+1,numberOfThetas))
#print meanRotations

theta = minTheta
n=0
m=0
while theta <= maxTheta:

        fileName = str(theta)+'.0.txt'
        #print fileName

        regexp = re.compile(r'Partition line 0, .*?([-+0-9.E]+)')
        with open(fileName) as f:
            for line in f:
                match = regexp.match(line)
                if match:
                    print (float((match.group(1))))
                    meanRotations[0,m]=(float((match.group(1))))

        regexp = re.compile(r'Partition line 1, .*?([-+0-9.E]+)')
        with open(fileName) as f:
            for line in f:
                match = regexp.match(line)
                if match:
                    print (float((match.group(1))))
                    meanRotations[1,m]=(float((match.group(1))))

        regexp = re.compile(r'Partition line 2, .*?([-+0-9.E]+)')
        with open(fileName) as f:
            for line in f:
                match = regexp.match(line)
                if match:
                    print (float((match.group(1))))
                    meanRotations[2,m]=(float((match.group(1))))

        regexp = re.compile(r'Partition line 3, .*?([-+0-9.E]+)')
        with open(fileName) as f:
            for line in f:
                match = regexp.match(line)
                if match:
                    print (float((match.group(1))))
                    meanRotations[3,m]=(float((match.group(1))))

        regexp = re.compile(r'Partition line 4, .*?([-+0-9.E]+)')
        with open(fileName) as f:
            for line in f:
                match = regexp.match(line)
                if match:
                    print (float((match.group(1))))
                    meanRotations[4,m]=(float((match.group(1))))

        regexp = re.compile(r'Partition line 5, .*?([-+0-9.E]+)')
        with open(fileName) as f:
            for line in f:
                match = regexp.match(line)
                if match:
                    print (float((match.group(1))))
                    meanRotations[5,m]=(float((match.group(1))))

        regexp = re.compile(r'Partition line 6, .*?([-+0-9.E]+)')
        with open(fileName) as f:
            for line in f:
                match = regexp.match(line)
                if match:
                    print (float((match.group(1))))
                    meanRotations[6,m]=(float((match.group(1))))

        regexp = re.compile(r'Partition line 7, .*?([-+0-9.E]+)')
        with open(fileName) as f:
            for line in f:
                match = regexp.match(line)
                if match:
                    print (float((match.group(1))))
                    meanRotations[7,m]=(float((match.group(1))))

        regexp = re.compile(r'Partition line 8, .*?([-+0-9.E]+)')
        with open(fileName) as f:
            for line in f:
                match = regexp.match(line)
                if match:
                    print (float((match.group(1))))
                    meanRotations[8,m]=(float((match.group(1))))

        regexp = re.compile(r'Partition line 9, .*?([-+0-9.E]+)')
        with open(fileName) as f:
            for line in f:
                match = regexp.match(line)
                if match:
                    print (float((match.group(1))))
                    meanRotations[9,m]=(float((match.group(1))))

        regexp = re.compile(r'Partition line 10, .*?([-+0-9.E]+)')
        with open(fileName) as f:
            for line in f:
                match = regexp.match(line)
                if match:
                    print (float((match.group(1))))
                    meanRotations[10,m]=(float((match.group(1))))

        m=m+1
        theta = theta+thetaIncrements

print 'Mean rotations on partition lines = '
print meanRotations

Any help would be much appreciated!!

Was it helpful?

Solution 2

I don't get the need for regex, to be honest. Something like this should do what you need:

with open(fileName) as f:
    for line in f:
        if line.startswith('Partition line'):
            number=float(line.split(',')[1])
            print number # or do whatever you want with it
        # read other file contents with different if clauses

OTHER TIPS

Is this format of file standard one? If so? you can get all your float values with another technic. So, here is the code:

str = """    ADDITIONAL DATA

Tip Rotation (degrees)
Node    , UR[x] , UR[y] , UR[z]
21  , 1.0744    , 1.2389    , -4.3271
22  , -1.0744   , -1.2389   , -4.3271
53  , 0.9670    , 1.0307    , -3.8990
54  , -0.0000   , -0.0000   , -3.5232
55  , -0.9670   , -1.0307   , -3.8990

Mean rotation variation along blade
Region  , Rotation (degrees)
Partition line 0,   7.499739E-36
Partition line 1,   -3.430092E-01
Partition line 2,   -1.019287E+00
Partition line 3,   -1.499808E+00
Partition line 4,   -1.817651E+00
Partition line 5,   -2.136372E+00
Partition line 6,   -2.448321E+00
Partition line 7,   -2.674414E+00
Partition line 8,   -2.956737E+00
Partition line 9,   -3.457806E+00
Partition line 10,  -3.995106E+00
"""

arr = str.split()
for index in enumerate(arr):
    print index  # just to see the list
start = 59  # from this position the numbers begin
step = 4    # current number is each fourth
ar = []
for j in range(start, len(arr), step):
    ar.append(arr[j])

floatAr = []
# or you can use this expression instead of the following loop 
# floatAr = [float(x) for x in ar]
for n in range(len(ar)):
    floatAr.append(float(ar[n]))

print floatAr

At the end you will recive a list called floatAr with all your float values. You can add try-except block for better usability.

Or, alternatively, if you want to use regex, here is the code:

<!--language:python -->

str = """   ADDITIONAL DATA

Tip Rotation (degrees)
Node    , UR[x] , UR[y] , UR[z]
21  , 1.0744    , 1.2389    , -4.3271
22  , -1.0744   , -1.2389   , -4.3271
53  , 0.9670    , 1.0307    , -3.8990
54  , -0.0000   , -0.0000   , -3.5232
55  , -0.9670   , -1.0307   , -3.8990

Mean rotation variation along blade
Region  , Rotation (degrees)
Partition line 0,   7.499739E-36
Partition line 1,   -3.430092E-01
Partition line 2,   -1.019287E+00
Partition line 3,   -1.499808E+00
Partition line 4,   -1.817651E+00
Partition line 5,   -2.136372E+00
Partition line 6,   -2.448321E+00
Partition line 7,   -2.674414E+00
Partition line 8,   -2.956737E+00
Partition line 9,   -3.457806E+00
Partition line 10,  -3.995106E+00"""

regex = '\s-?[1-9]+[0-9]*.?[0-9]*E-?\+?[0-9]+\s?'

import re

values = re.findall(regex, str)
floatAr = [float(x) for x in values]
print floatAr

By the way, here is a good on-line regex checker for python pythex

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top