Вопрос

I'm trying to multiplicate 2 big matrices with memory limit using hdf5 (pytables) but function numpy.dot seems to give me error:

Valueerror: array is too big

I need to do matrix multiplication by myself maybe blockwise or there is some another python function similar to numpy.dot?

import numpy as np
import time
import tables
import cProfile
import numexpr as ne

n_row=10000
n_col=100
n_batch=10

rows = n_row
cols = n_col
batches = n_batch

atom = tables.UInt8Atom()  #?
filters = tables.Filters(complevel=9, complib='blosc') # tune parameters

fileName_a = 'C:\carray_a.h5'
shape_a = (rows*batches, cols)  # predefined size

h5f_a = tables.open_file(fileName_a, 'w')
ca_a = h5f_a.create_carray(h5f_a.root, 'carray', atom, shape_a, filters=filters)

for i in range(batches):
    data = np.random.rand(rows,cols)
    ca_a[i*rows:(i+1)*rows]= data[:]
#h5f_0.close()


rows = n_col
cols = n_row
batches = n_batch

fileName_b = 'C:\carray_b.h5'
shape_b = (rows, cols*batches)  # predefined size

h5f_b = tables.open_file(fileName_b, 'w')
ca_b = h5f_b.create_carray(h5f_b.root, 'carray', atom, shape_b, filters=filters)

#need to batch by cols
sz= rows/batches
for i in range(batches):
    data = np.random.rand(sz, cols*batches)
    ca_b[i*sz:(i+1)*sz]= data[:]
#h5f_1.close()

rows = n_batch*n_row
cols = n_batch*n_row

fileName_c = 'C:\carray_c.h5'
shape_c = (rows, cols)  # predefined size

h5f_c = tables.open_file(fileName_c, 'w')
ca_c = h5f_c.create_carray(h5f_c.root, 'carray', atom, shape_c, filters=filters)


a= h5f_a.root.carray#[:]
b= h5f_b.root.carray#[:]
c= h5f_c.root.carray

t0= time.time()
c= np.dot(a,b) #error if aray is big
print (time.time()-t0)

Update: so here is the code.It's interesting but using hdf5 it works even faster.

import numpy as np
import tables
import time

sz= 100 #chunk size
n_row=10000 #m
n_col=1000 #n

#for arbitrary size
A=np.random.rand(n_row,n_col)
B=np.random.rand(n_col,n_row)
# A=np.random.randint(5, size=(n_row,n_col))
# B=np.random.randint(5, size=(n_col,n_row))

#using numpy array
#C= np.zeros((n_row,n_row))

#using hdf5
fileName_C = 'CArray_C.h5'
atom = tables.Float32Atom()
shape = (A.shape[0], B.shape[1])
Nchunk = 128  # ?
chunkshape = (Nchunk, Nchunk)
chunk_multiple = 1
block_size = chunk_multiple * Nchunk
h5f_C = tables.open_file(fileName_C, 'w')
C = h5f_C.create_carray(h5f_C.root, 'CArray', atom, shape, chunkshape=chunkshape)

sz= block_size

t0= time.time()
for i in range(0, A.shape[0], sz):
    for j in range(0, B.shape[1], sz):
        for k in range(0, A.shape[1], sz):
            C[i:i+sz,j:j+sz] += np.dot(A[i:i+sz,k:k+sz],B[k:k+sz,j:j+sz])
print (time.time()-t0)

t0= time.time()
res= np.dot(A,B)
print (time.time()-t0)

print (C== res)

h5f_C.close()
Это было полезно?

Решение

I don't know of a np.dot that work without loading into memory. I think blocking would work pretty well. Create a an output array (called "c" below) as pytables CArray and fill in blocks. You should choose the chunkshape when you create it to match your blocking scheme. Something like

atom = tables.Float32Atom() # you have UInt8Atom() above.  do you mean that?
shape = (a.shape[0], b.shape[1])

# you can vary block_size and chunkshape independently, but I would
# aim to have block_size an integer multiple of chunkshape
# your mileage may vary and depends on the array size and how you'll
# access it in the future.

Nchunk = 128  # ?
chunkshape = (Nchunk, Nchunk)
chunk_multiple = 1
block_size = chunk_multiple * Nchunk
c = h5f.create_carray(h5.root, 'c', atom, shape, chunkshape=chunkshape)

for i_start in range(0, a.shape[0], block_size):
    for j_start in range(0, b.shape[1], block_size):
        for k_start in range(0, a.shape[1], block_size):
            c[i_start:i_start+block_size, j_start:j_start + block_size] += \ 
                    np.dot(a[i_start:i_start + block_size, k_start:k_start + block_size],
                           b[k_start:k_start + block_size, j_start:j_start + block_size]
Лицензировано под: CC-BY-SA с атрибуция
Не связан с StackOverflow
scroll top