The following script works properly if:
- csv aren't too big (i.e. can be loaded in memory)
- the first row of the CSV contains the column names
You only have to fill files
and final_headers
import csv
files = ['c1.csv', 'c2.csv', 'c3.csv']
final_headers = ['col1', 'col2', 'col3']
merged_rows = set()
for f in files:
with open(f, 'rb') as csv_in:
csvreader = csv.reader(csv_in, delimiter=',')
headers = dict((h, i) for i, h in enumerate(csvreader.next()))
for row in csvreader:
merged_rows.add(tuple(row[headers[x]] for x in final_headers))
with open('output.csv', 'wb') as csv_out:
csvwriter = csv.writer(csv_out, delimiter=',')
csvwriter.writerows(merged_rows)