I have a CSV file which is formatted somewhat like this:
name subname value1 value2
a a 1 21
a a 2 22
a a 3 23
a a 4 24
b a 5 25
b a 6 26
b a 7 27
b a 8 28
c c 9 29
c c 10 30
c c 11 31
c c 12 32
....
etc
Using a simple CSV to json script I have managed to output each row as a valid json entry, however this is very redundant since there are so many repeated values.
I am trying to read this file and output it to a form that looks just like this:
[
{
"name":"a",
"subname":"a",
"data": {
"attr1":{"name":"value1", "values":[1,2,3,4]},
"attr2":{"name":"value2", "values":[21,22,23,24]}
}
},
{
"name":"b",
"subname":"a",
"data": {
"attr1":{"name":"value1", "values":[5,6,7,8]},
"attr2":{"name":"value2", "values":[25,26,27,28]}
}
},
{
"name":"c",
"subname":"c",
"data": {
"attr1":{"name":"value1", "values":[9,10,11,12]},
"attr2":{"name":"value2", "values":[29,30,31,32]}
}
},
....
etc
]
I know that the script should work something like this:
loop until no more rows:
skip row 1
for the next 4 rows
{
"name":row 1, column 1 ,
"subname":row 1, column 2 ,
"data": {
"attr1":{"name":"value1", "values":[row 1 to 4, column 3]}
"attr2":{"name":"value2", "values":[row 1 to 4, column 4]}
}
}
With this particular dataset there will always be this pattern (however, the actual data is has many more entries and columns). I know what I would like for output, but I am not exactly sure how to implement it.
How would I do this with python?
Any suggestions and solutions are greatly appreciated.
edit: Here is the solution in straight javascript using underscore.js
var headers = this.get('headers')
var grid = this.get('grid')
var transposed = grid.transpose()
var tables = [];
var grid =
var rows = []
keys = ["name", "subname"]
var numberOfEntries = grid.length - 2;
_(numberOfEntries).times(function(n) {keys.push("attr" + (n+1) ) } )
_.each(transposed, function(row) {
rows.push(_.object(keys, row))
})
var names = _.uniq(grid[0])
_.each(names, function(name) {
var entries = _.where(rows, {name: name})
_.each(entries, function(entry) {
var exists = _.where(tables, {name: entry.name, subname: entry.subname})
var obj = {};
if(exists.length > 0) {
obj = exists[0]
}
else {
obj = {name: entry.name, subname: entry.subname, data: {}}
tables.push(obj)
}
_(numberOfEntries).times(function(n) {
var i = n + 1;
if( !obj.data["attr" + i] ) {
obj.data["attr" + i ] = {"name":headers[n+2], "values": []};
} else {
obj.data["attr" + i].values.push(entry["attr" + i])
}
})
})
})