Formatting a txt file of equations into the same format and then manipulating them for linear algebra calculations in Python

Question 1

An alternative approach that is possibly more robust to unstructured input is to use a combination of the Python symbolic math package (sympy), and a few parsing tricks. This scales to the variables in the equations being written in an arbitrary order.

Although sympy has some tools for parsing, (your input is very close in appearance to Mathematica), it appears that the sympy.parsing.mathematica module can't deal with some of the input (particularly leading minus signs).

import sympy
from sympy.parsing.sympy_parser import parse_expr
import re

def text_to_equations(text):
    lines = text.split('\n')
    lines = [line.split('=') for line in lines]
    eqns = []
    for lhs, rhs in lines:
        # clobber all the spaces
        lhs = lhs.replace(' ','')
        # *assume* that a number followed by a letter is an
        # implicit multiplication
        lhs = re.sub(r'(\d)([a-z])', r'\g<1>*\g<2>', lhs)
        eqns.append( (parse_expr(lhs), parse_expr(rhs)) )
    return eqns

def get_all_symbols(eqns):
    symbs = set()
    for lhs, rhs in eqns:
        for sym in lhs.atoms(sympy.Symbol):
            symbs.add(sym)
    return symbs

def text_to_eqn_matrix(text):
    eqns = text_to_equations(text)
    symbs = get_all_symbols(eqns)
    n = len(eqns)
    m = len(symbs)
    A = numpy.zeros((m, n))
    b = numpy.zeros((m, 1))
    for i, (lhs, rhs) in enumerate(eqns):
        d = lhs.as_coefficients_dict()
        b[i] = int(rhs)
        for j, s in enumerate(symbs):
            A[i, j] = d[s]
    x = sympy.Matrix([list(symbs)]).T
    return sympy.Matrix(A), x, sympy.Matrix(b)

s = '''-38 y1  +  35 y2  +  31 y3  = -3047
11 y1  + -13 y2  + -34 y3  = 784
34 y1  + -21 y2  +  19 y3  = 2949'''
A, x, b = text_to_eqn_matrix(s)
print A
print x
print b

Question 2

For your example format, it's very easy to process it by numpy.loadtxt():

import numpy as np
data = np.loadtxt("equations.txt", dtype=str)[:, ::3].astype(np.float)
a = data[:, :-1]
b = data[:, -1]
x = np.linalg.solve(a, b)

The steps are: