Here's a way to do it without Pandas (thus you need to track the column labels manually):
import numpy as np
from itertools import izip_longest
from collections import defaultdict
a = np.random.rand(50)
b = np.random.randint(10, 15, 50)
d = defaultdict(lambda:[])
for i, key_val in enumerate(b):
d[key_val].append(a[i])
output = np.asarray(list(izip_longest(*(d.values()),
fillvalue=np.NaN)))
print (a)
print (b)
print (output)
This gives:
a
:
array([ 0.98688273, 0.95584584, 0.91011945, 0.56402919, 0.86185936,
0.09380343, 0.69290659, 0.97238284, 0.81297425, 0.73446398,
0.25927151, 0.44622982, 0.20537961, 0.61665218, 0.90168399,
0.58556404, 0.47017152, 0.32278718, 0.15044929, 0.07859976,
0.26715756, 0.38281878, 0.30169241, 0.47785937, 0.15377038,
0.93395325, 0.79099068, 0.92471442, 0.03154578, 0.0437627 ,
0.31711433, 0.78550517, 0.77062104, 0.76002167, 0.1842867 ,
0.52935392, 0.16038216, 0.46510856, 0.4311615 , 0.73923847,
0.45499238, 0.2630405 , 0.67722848, 0.1391463 , 0.50800704,
0.50618842, 0.19540159, 0.38150066, 0.82831838, 0.3383787 ])
b
:
array([14, 10, 13, 12, 12, 13, 13, 12, 11, 10, 10, 13, 14, 12, 11, 12, 14,
12, 12, 14, 11, 10, 13, 13, 13, 10, 14, 11, 13, 11, 11, 11, 12, 10,
11, 11, 14, 12, 12, 14, 13, 10, 11, 14, 13, 11, 10, 11, 12, 12])
output
:
array([[ 0.95584584, 0.81297425, 0.56402919, 0.91011945, 0.98688273],
[ 0.73446398, 0.90168399, 0.86185936, 0.09380343, 0.20537961],
[ 0.25927151, 0.26715756, 0.97238284, 0.69290659, 0.47017152],
[ 0.38281878, 0.92471442, 0.61665218, 0.44622982, 0.07859976],
[ 0.93395325, 0.0437627 , 0.58556404, 0.30169241, 0.79099068],
[ 0.76002167, 0.31711433, 0.32278718, 0.47785937, 0.16038216],
[ 0.2630405 , 0.78550517, 0.15044929, 0.15377038, 0.73923847],
[ 0.19540159, 0.1842867 , 0.77062104, 0.03154578, 0.1391463 ],
[ nan, 0.52935392, 0.46510856, 0.45499238, nan],
[ nan, 0.67722848, 0.4311615 , 0.50800704, nan],
[ nan, 0.50618842, 0.82831838, nan, nan],
[ nan, 0.38150066, 0.3383787 , nan, nan]])