OS: Windows 8.1 64 Bit - fully updated
IDE: Visual Studio Professional 2013 - Version 12.0.30110.00 Update 1 - fully updated
I have a situation, where I get the following exception not during compile-, but run-time.
The number of writable data containers referenced in the entry function of the parallel_for_each call (17) exceeds the selected accelerator's limit (8).
The function where this happens looks like the following
void run_epoch(
accelerator_view mainAccelView,
ActivatorState activatorState,
TrainingState trainingState,
array_view<double, 2> avLayer1,
array_view<double, 2> avLayer2,
array_view<double, 2> avLayer3,
array_view<const double, 2> avPredictors,
array_view<const double, 2> avTargets,
array_view<double> avErrors,
int epoch
){
accelerator_view mainAccelView = accelerator::accelerator().create_view(queuing_mode::queuing_mode_immediate);
int noOfColumnsPredictors = AmpUtils::get_no_of_columns(avPredictors);
int noOfRowsPredictors = AmpUtils::get_no_of_rows(avPredictors, noOfColumnsPredictors);
int noOfColumnsLayer1 = AmpUtils::get_no_of_columns(avLayer1);
int noOfColumnsLayer2 = AmpUtils::get_no_of_columns(avLayer2);
int noOfColumnsLayer3 = AmpUtils::get_no_of_columns(avLayer3);
int noOfRowsLayer1 = AmpUtils::get_no_of_rows(avLayer1, noOfColumnsLayer1);
int noOfRowsLayer2 = AmpUtils::get_no_of_rows(avLayer2, noOfColumnsLayer2);
int noOfRowsLayer3 = AmpUtils::get_no_of_rows(avLayer3, noOfColumnsLayer3);
array_view<double, 2> avOutputLayer1(noOfRowsPredictors, noOfRowsLayer1);
array_view<double, 2> avOutputLayer2(noOfRowsPredictors, noOfRowsLayer2);
array_view<double, 2> avOutputLayer3(noOfRowsPredictors, noOfRowsLayer3);
array_view<double, 2> avErrorsLayer1(noOfRowsPredictors, noOfRowsLayer1);
array_view<double, 2> avErrorsLayer2(noOfRowsPredictors, noOfRowsLayer2);
array_view<double, 2> avErrorsLayer3(noOfRowsPredictors, noOfRowsLayer3);
array_view<double, 2> avThresholdLayer1(noOfRowsPredictors, noOfRowsLayer1);
array_view<double, 2> avThresholdLayer2(noOfRowsPredictors, noOfRowsLayer2);
array_view<double, 2> avThresholdLayer3(noOfRowsPredictors, noOfRowsLayer3);
array_view<double, 3> avWeightsLayer1(noOfRowsPredictors, noOfRowsLayer1, (noOfColumnsLayer1 - 1));
array_view<double, 3> avWeightsLayer2(noOfRowsPredictors, noOfRowsLayer2, (noOfColumnsLayer2 - 1));
array_view<double, 3> avWeightsLayer3(noOfRowsPredictors, noOfRowsLayer3, (noOfColumnsLayer3 - 1));
array_view<double, 2> avErrorsTempBuffer(noOfRowsPredictors, noOfRowsLayer3);
int errorTempBufferSize = avErrorsTempBuffer.extent.size();
array_view<double> avEpochErrors(noOfRowsPredictors);
try{
parallel_for_each(extent<1>(AmpUtils::get_no_of_rows(avPredictors)), [=](index<1> idx) restrict(cpu, amp){
int predictorRow = idx[0];
// step 1: compute
// step 11: compute layer 1
compute_layer(activatorState, avPredictors[predictorRow], avLayer1, avOutputLayer1, noOfColumnsLayer1, predictorRow);
// step 12: compute layer 2
compute_layer(activatorState, avPredictors[predictorRow], avLayer2, avOutputLayer2, noOfColumnsLayer2, predictorRow);
// step 13: compute layer 3
compute_layer(activatorState, avPredictors[predictorRow], avLayer3, avOutputLayer3, noOfColumnsLayer3, predictorRow);
// step 2: calculate_error
// step 21: calculate_error layer 3
for (int column = 0; column < noOfRowsLayer3; column++){
double neuronError = avTargets[predictorRow][column] - avOutputLayer3[predictorRow][column];
avErrorsTempBuffer[predictorRow][column] = neuronError * neuronError;
avErrorsLayer3[predictorRow][column] = neuronError * AmpActivator::derivative2(activatorState, avOutputLayer3[predictorRow][column]);
}
double errorSum = 0.0;
for (int column = 0; column < errorTempBufferSize; column++){
errorSum += avErrorsTempBuffer[predictorRow][column];
}
avEpochErrors[predictorRow] = errorSum;
// step 22: calculate_error layer 2
calculate_error_layer(activatorState, avErrorsLayer2[predictorRow], avErrorsLayer3, avLayer3, avOutputLayer2[predictorRow], noOfRowsLayer3, noOfRowsLayer3);
// step 23: calculate_error layer 1
calculate_error_layer(activatorState, avErrorsLayer1[predictorRow], avErrorsLayer2, avLayer2, avOutputLayer1[predictorRow], noOfRowsLayer2, noOfRowsLayer2);
// step 3: calculate_updates
// step 31: calculate_updates layer 1
calculate_updates_layer(trainingState, avErrorsLayer1[predictorRow], avPredictors[predictorRow], avThresholdLayer1[predictorRow], avWeightsLayer1[predictorRow], (noOfColumnsLayer1 - 1), noOfRowsLayer1);
// step 31: calculate_updates layer 2
calculate_updates_layer(trainingState, avErrorsLayer2[predictorRow], avPredictors[predictorRow], avThresholdLayer2[predictorRow], avWeightsLayer2[predictorRow], (noOfColumnsLayer2 - 1), noOfRowsLayer2);
// step 31: calculate_updates layer 3
calculate_updates_layer(trainingState, avErrorsLayer3[predictorRow], avPredictors[predictorRow], avThresholdLayer3[predictorRow], avWeightsLayer3[predictorRow], (noOfColumnsLayer3 - 1), noOfRowsLayer3);
// step 4: update_network
// step 41: update_network layer 1
update_layer(avLayer1, avWeightsLayer1[predictorRow], avThresholdLayer1[predictorRow], noOfColumnsLayer1, noOfRowsLayer1);
// step 42: update_network layer 2
update_layer(avLayer2, avWeightsLayer2[predictorRow], avThresholdLayer2[predictorRow], noOfColumnsLayer2, noOfRowsLayer2);
// step 43: update_network layer 3
update_layer(avLayer3, avWeightsLayer3[predictorRow], avThresholdLayer3[predictorRow], noOfColumnsLayer3, noOfRowsLayer3);
});
avEpochErrors.synchronize();
double epochErrorsSum = 0.0;
for (int i = 0; i < (int)avEpochErrors.extent.size(); i++){
epochErrorsSum += avEpochErrors[i];
}
avErrors[epoch] = epochErrorsSum;
}
catch (std::exception e){
std::wcout << "Exception Project::run_epoch: " << e.what() << std::endl;
}
}
According to this MSDN-post here and also here, the maximum number of writeable containers should have been increased to 64 since Windows 8.
My question is now, are there different types of writeable containers whereas I still only might use a maximum of 8 of a certain type?