Below is the iterative serial version of MergeSort which is indeed faster than the recursive version and also doesnot involve calculation of middle so avoids the overflow error for it. However overflow errors can occur for other integers as well. You can try for parallelizing it if you are interested.
protected static int[] ASC(int input_array[]) // Sorts in ascending order
{
int num = input_array.length;
int[] temp_array = new int[num];
int temp_indx;
int left;
int mid,j;
int right;
int[] swap;
int LIMIT = 1;
while (LIMIT < num)
{
left = 0;
mid = LIMIT ; // The mid point
right = LIMIT << 1;
while (mid < num)
{
if (right > num){ right = num; }
temp_indx = left;
j = mid;
while ((left < mid) && (j < right))
{
if (input_array[left] < input_array[j]){ temp_array[temp_indx++] = input_array[left++]; }
else{ temp_array[temp_indx++] = input_array[j++]; }
}
while (left < mid){ temp_array[temp_indx++] = input_array[left++]; }
while (j < right){ temp_array[temp_indx++] = input_array[j++]; }
// Do not copy back the elements to input_array
left = right;
mid = left + LIMIT;
right = mid + LIMIT;
}
// Instead of copying back in previous loop, copy remaining elements to temp_array, then swap the array pointers
while (left < num){ temp_array[left] = input_array[left++]; }
swap = input_array;
input_array = temp_array;
temp_array = swap;
LIMIT <<= 1;
}
return input_array ;
}
Use the java executor service, thats a lot faster, even with threads exceeding the number of cores ( you can build scalable multithreaded applications with it ), I have a code that uses only threads but its very very slow, and Im new to executors so cant help much, but its an interesting area to explore.
Also there is a cost for parallelism, because thread management is a big deal, so go for parallelism at high N, if you are looking for a serial alternative to merge sort, I suggest the Dual-Pivot-QuickSort or 3-Partition-Quick-Sort as they are known to beat merge sort often. Reason is that they have low constant factors than MergeSort and the worst case time complexity has the probability of occuring only 1/(n!). If N is large, the worst case probability becomes very small paving way for increased probability of average case. You could multithread both and see which one among the 4 programs ( 1 serial and 1 multithreaded for each : DPQ and 3PQ ) runs the fastest.
But Dual-Pivot-QuickSort works best when there are no, or almost no duplicate keys and 3-Partition-Quick-Sort works best when there are many duplicate keys. I have never seen 3-Partition-Quick-Sort beat the Dual-Pivot-QuickSort when there are none or very few duplicate keys, but I have seen Dual-Pivot-QuickSort beat 3-Partition-Quick-Sort a very small number of times in case of many duplicate keys. In case you are interested, DPQ serial code is below( both ascending and descending)
protected static void ASC(int[]a, int left, int right, int div)
{
int len = 1 + right - left;
if (len < 27)
{
// insertion sort for small array
int P1 = left + 1;
int P2 = left;
while ( P1 <= right )
{
div = a[P1];
while(( P2 >= left )&&( a[P2] > div ))
{
a[P2 + 1] = a[P2];
P2--;
}
a[P2 + 1] = div;
P2 = P1;
P1++;
}
return;
}
int third = len / div;
// "medians"
int P1 = left + third;
int P2 = right - third;
if (P1 <= left)
{
P1 = left + 1;
}
if (P2 >= right)
{
P2 = right - 1;
}
int temp;
if (a[P1] < a[P2])
{
temp = a[P1]; a[P1] = a[left]; a[left] = temp;
temp = a[P2]; a[P2] = a[right]; a[right] = temp;
}
else
{
temp = a[P1]; a[P1] = a[right]; a[right] = temp;
temp = a[P2]; a[P2] = a[left]; a[left] = temp;
}
// pivots
int pivot1 = a[left];
int pivot2 = a[right];
// pointers
int less = left + 1;
int great = right - 1;
// sorting
for (int k = less; k <= great; k++)
{
if (a[k] < pivot1)
{
temp = a[k]; a[k] = a[less]; a[less] = temp;
less++;
}
else if (a[k] > pivot2)
{
while (k < great && a[great] > pivot2)
{
great--;
}
temp = a[k]; a[k] = a[great]; a[great] = temp;
great--;
if (a[k] < pivot1)
{
temp = a[k]; a[k] = a[less]; a[less] = temp;
less++;
}
}
}
int dist = great - less;
if (dist < 13)
{
div++;
}
temp = a[less-1]; a[less-1] = a[left]; a[left] = temp;
temp = a[great+1]; a[great+1] = a[right]; a[right] = temp;
// subarrays
ASC(a, left, less - 2, div);
ASC(a, great + 2, right, div);
// equal elements
if (dist > len - 13 && pivot1 != pivot2)
{
for (int k = less; k <= great; k++)
{
if (a[k] == pivot1)
{
temp = a[k]; a[k] = a[less]; a[less] = temp;
less++;
}
else if (a[k] == pivot2)
{
temp = a[k]; a[k] = a[great]; a[great] = temp;
great--;
if (a[k] == pivot1)
{
temp = a[k]; a[k] = a[less]; a[less] = temp;
less++;
}
}
}
}
// subarray
if (pivot1 < pivot2)
{
ASC(a, less, great, div);
}
}
protected static void DSC(int[]a, int left, int right, int div)
{
int len = 1 + right - left;
if (len < 27)
{
// insertion sort for large array
int P1 = left + 1;
int P2 = left;
while ( P1 <= right )
{
div = a[P1];
while(( P2 >= left )&&( a[P2] < div ))
{
a[P2 + 1] = a[P2];
P2--;
}
a[P2 + 1] = div;
P2 = P1;
P1++;
}
return;
}
int third = len / div;
// "medians"
int P1 = left + third;
int P2 = right - third;
if (P1 >= left)
{
P1 = left + 1;
}
if (P2 <= right)
{
P2 = right - 1;
}
int temp;
if (a[P1] > a[P2])
{
temp = a[P1]; a[P1] = a[left]; a[left] = temp;
temp = a[P2]; a[P2] = a[right]; a[right] = temp;
}
else
{
temp = a[P1]; a[P1] = a[right]; a[right] = temp;
temp = a[P2]; a[P2] = a[left]; a[left] = temp;
}
// pivots
int pivot1 = a[left];
int pivot2 = a[right];
// pointers
int less = left + 1;
int great = right - 1;
// sorting
for (int k = less; k <= great; k++)
{
if (a[k] > pivot1)
{
temp = a[k]; a[k] = a[less]; a[less] = temp;
less++;
}
else if (a[k] < pivot2)
{
while (k < great && a[great] < pivot2)
{
great--;
}
temp = a[k]; a[k] = a[great]; a[great] = temp;
great--;
if (a[k] > pivot1)
{
temp = a[k]; a[k] = a[less]; a[less] = temp;
less++;
}
}
}
int dist = great - less;
if (dist < 13)
{
div++;
}
temp = a[less-1]; a[less-1] = a[left]; a[left] = temp;
temp = a[great+1]; a[great+1] = a[right]; a[right] = temp;
// subarrays
DSC(a, left, less - 2, div);
DSC(a, great + 2, right, div);
// equal elements
if (dist > len - 13 && pivot1 != pivot2)
{
for (int k = less; k <= great; k++)
{
if (a[k] == pivot1)
{
temp = a[k]; a[k] = a[less]; a[less] = temp;
less++;
}
else if (a[k] == pivot2)
{
temp = a[k]; a[k] = a[great]; a[great] = temp;
great--;
if (a[k] == pivot1)
{
temp = a[k]; a[k] = a[less]; a[less] = temp;
less++;
}
}
}
}
// subarray
if (pivot1 > pivot2)
{
DSC(a, less, great, div);
}
}