You are looking at each character which gives you O(n)
, since you compare on each character the next (maximum) five characters this gives you a constant c
:
var data = get_input();
var compare = { `A`, `T`, `G`, `A`, `T` } // or whatever
var MAX_LOOKAHEAD = compare.length
var n
var c
for(n = data_array.length; n < size; i++) { // Has runtime O(n)
for(c = 0; c < MAX_LOOKAHEAD; c++) { // Maximum O(c)
if( compare[c] != data[i+c] ) {
break;
} else {
report( "found match at position " + i )
}
}
}
It is easy to see that this runs O(n*c)
times. Since c
is very small it can be ignored - and I think one can not get rid of that constant - which results in a total runtime of O(n)
.
The good news:
You can speed this up with parallelization. E.g. you could split this up in k
intervals and let multiple threads do the job for you by giving them appropriate start and end indices. This could give you a linear speedup.
If you do that make sure that you treat the intersections as special cases since you could miss a match if your intervals split a match in two.
E.g. n = 50000
:
Partition for 4 threads: (n/10000) - 1 = 4
. The 5th thread won't have a lot to do since it just handles the intersections which is why we don't need to consider its (in our case tiny) overhead.
1 10000 20000 40000 50000
|-------------------|-------------------|-------------------|-------------------|
| <- thread 1 -> | <- thread 2 -> | <- thread 3 -> | <- thread 4 -> |
|---| |---| |---|
|___________________|___________________|
|
thread 5
And this is how it could look like:
var data;
var compare = { `A`, `T`, `G`, `A`, `T` };
var MAX_LOOKAHEAD = compare.length;
thread_function(args[]) {
var from = args[0];
var to = args[1];
for(n = from ; n < to ; i++) {
for(c = 0; c < MAX_LOOKAHEAD; c++) {
if( compare[c] != data[i+c] ) {
break;
} else {
report( "found match at position " + i )
}
}
}
}
main() {
var data_size = 50000;
var thread_count = 4;
var interval_size = data_size / ( thread_count + 1) ;
var tid[]
// This loop starts the threads for us:
for( var i = 0; i < thread_count; i++ ) {
var args = { interval_size * i, (interval_size * i) + interval_size };
tid.add( create_thread( thread_function, args ) );
}
// And this handles the intersections:
for( var i = 1; i < thread_count - 1; i++ ) {
var args = { interval_size * i, (interval_size * i) + interval_size };
from = (interval_size * i) - compare.length + 1;
to = (interval_size * i) + compare.length - 1;
for(j = from; j < to ; j++) {
for(k = 0; k < MAX_LOOKAHEAD; k++) {
if( compare[k] != data[j+k] ) {
break;
} else {
report( "found match at position " + j )
}
}
}
}
wait_for_multiple_threads( tid );
}