質問

So, my assignment is to write a spell check program and then parallelize it using openMPI. My take was to load the words from a text file into my array called dict[] and this is used as my dictionary. Next, I get input from the user and then it's supposed go through the dictionary array and check whether the current word is within the threshold percentage, if it is, print it out. But I'm only supposed to print out a certain amount of words. My problem is, is that, my suggestions[] array, doesn't seem to fill up the way I need it to, and it gets a lot of blank spots in it, whereas, I thought at least, is that the way I wrote it, is to just fill it when a word is within threshold. So it shouldn't get any blanks in it until there are no more words being added. I think it's close to being finished but I can't seem to figure this part out. Any help is appreciated.

#include <stdio.h>
#include <mpi.h>
#include <string.h>
#include <stdlib.h>
#define SIZE 30
#define max(x,y) (((x) > (y)) ? (x) : (y))
char *dict[50000];
char *suggestions[50000];
char enterWord[50];
char *myWord;
int wordsToPrint = 20;
int threshold = 40;
int i;
int words_added = 0;


   int levenshtein(const char *word1, int len1, const char *word2, int len2){
      int matrix[len1 + 1][len2 + 1];
      int a;
      for(a=0; a<= len1; a++){
         matrix[a][0] = a;
      }
      for(a=0;a<=len2;a++){
         matrix[0][a] = a;
      }

      for(a = 1; a <= len1; a++){
         int j;
         char c1;

         c1 = word1[a-1];
         for(j = 1; j <= len2; j++){
            char c2;

            c2 = word2[j-1];
            if(c1 == c2){
               matrix[a][j] = matrix[a-1][j-1];
            }
            else{
               int delete, insert, substitute, minimum;

               delete = matrix[a-1][j] +1;
               insert = matrix[a][j-1] +1;
               substitute = matrix[a-1][j-1] +1;
               minimum = delete;

               if(insert < minimum){
                  minimum = insert;
               }
               if(substitute < minimum){
                  minimum = substitute;
               }
               matrix[a][j] = minimum;
            }//else
         }//for
      }//for
      return matrix[len1][len2];
   }//levenshtein

   void prompt(){
      printf("Enter word to search for: \n");
      scanf("%s", &enterWord);
   }


   int p0_compute_output(int num_processes, char *word1){
      int totalNumber = 0;
      int k = 0;
      int chunk = 50000 / num_processes;
      for(i = 0; i < chunk; i++){
         int minedits = levenshtein(word1, strlen(word1), dict[i], strlen(dict[i]));
         int thresholdPercentage = (100 * minedits) / max(strlen(word1), strlen(dict[i]));
         if(thresholdPercentage < threshold){
            suggestions[totalNumber] = dict[i];
            totalNumber = totalNumber + 1;
         }
      }//for
      return totalNumber;
   }//p0_compute_output

   void p0_receive_output(int next_addition){
      int num_to_add;
      MPI_Comm comm;
      MPI_Status status;
         MPI_Recv(&num_to_add,1,MPI_INT,MPI_ANY_SOURCE, MPI_ANY_TAG,MPI_COMM_WORLD, MPI_STATUS_IGNORE);
         printf("--%d\n", num_to_add);
         suggestions[next_addition] = dict[num_to_add];
         next_addition = next_addition + 1;
   }

   void compute_output(int num_processes, int me, char *word1){
      int chunk = 0;
      int last_chunk = 0;
      MPI_Comm comm;
      if(50000 % num_processes == 0){
         chunk = 50000 / num_processes;
         last_chunk = chunk;
         int start = me * chunk;
         int end = me * chunk + chunk;
         for(i = start; i < end;i++){
            int minedits = levenshtein(word1, strlen(word1), dict[i], strlen(dict[i]));
            int thresholdPercentage = (100 * minedits) / max(strlen(word1), strlen(dict[i]));
            if(thresholdPercentage < threshold){
               int number_to_send = i;
               MPI_Send(&number_to_send, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
            }
         }
      }
      else{
         chunk = 50000 / num_processes;
         last_chunk = 50000 - ((num_processes - 1) * chunk);
         if(me != num_processes){
            int start = me * chunk;
            int end = me * chunk + chunk;
            for(i = start; i < end; i++){
               int minedits = levenshtein(word1, strlen(word1), dict[i], strlen(dict[i]));
               int thresholdPercentage = (100 * minedits) / max(strlen(word1), strlen(dict[i]));
               if(thresholdPercentage < threshold){
                  int number_to_send = i;
                  MPI_Send(&number_to_send, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
               }//if
            }//for
         }//if me != num_processes
         else{
            int start = me * chunk;
            int end = 50000 - start;
            for(i = start; i < end; i++){
               int minedits = levenshtein(word1, strlen(word1), dict[i], strlen(dict[i]));
               int thresholdPercentage = (100 * minedits) / max(strlen(word1), strlen(dict[i]));
               if(thresholdPercentage < threshold){
                  int number_to_send = i;
                  MPI_Send(&number_to_send, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
               }
            }
         }//me == num_processes
      }//BIG else
      return;
   }//COMPUTE OUTPUT

   void set_data(){
      prompt();
      MPI_Bcast(&enterWord,20 ,MPI_CHAR, 0, MPI_COMM_WORLD);
   }//p0_send_inpui


//--------------------------MAIN-----------------------------//
main(int argc, char **argv){
   int ierr, num_procs, my_id, loop;
   FILE *myFile;
   loop = 0;

   for(i=0;i<50000;i++){
      suggestions[i] = calloc(SIZE, sizeof(char));
   }

   ierr = MPI_Init(NULL, NULL);
   ierr = MPI_Comm_rank(MPI_COMM_WORLD, &my_id);
   ierr = MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
   printf("Check in from %d of %d processors\n", my_id, num_procs);

   set_data();
   myWord = enterWord;

   myFile = fopen("words", "r");
   if(myFile != NULL){
      for(i=0;i<50000;i++){
         dict[i] = calloc(SIZE, sizeof(char));
         fscanf(myFile, "%s", dict[i]);
      }//for
      fclose(myFile);
   }//read word list into dictionary
   else printf("File not found");

   if(my_id == 0){
      words_added = p0_compute_output(num_procs, enterWord);
      printf("words added so far: %d\n", words_added);
      p0_receive_output(words_added);
      printf("Threshold: %d\nWords To print: %d\n%s\n", threshold, wordsToPrint, myWord);
      ierr = MPI_Finalize();
   }
   else{
      printf("my word %s*\n", enterWord);
      compute_output(num_procs, my_id, enterWord);
     // printf("Process %d terminating...\n", my_id);
      ierr = MPI_Finalize();
   }

   for(i=0;i<wordsToPrint;i++){
      printf("*%s\n", suggestions[i]);
   }//print suggestions

   return (0);
}//END MAIN
役に立ちましたか?

解決

Here are a few problems I see with what you're doing:

  • prompt() should only be called by rank 0.
  • The dictionary file should be read only by rank 0, then broadcast the array out to the other ranks
    • Alternatively, have rank 1 read the file while rank 0 is waiting for input, broadcast input and dictionary afterwards.
  • You're making the compute_output step overly complex. You can merge p0_compute_output and compute_output into one routine.
    • Store an array of indices into dict in each rank
    • This array will not be the same size in every rank, so the simplest way to do this would be to send from each rank a single integer indicating the size of the array, then send the array with this size. (The receiving rank must know how much data to expect). You could also use the sizes for MPI_Gatherv, but I expect this is more than you're wanting to do right now.
    • Once you have a single array of indices in rank 0, then use this to fill suggestions.
  • Save the MPI_Finalize call until immediately before the return call
  • For the final printf call, only rank 0 should be printing that. I suspect this is causing a large part of the "incorrect" result. As you have it, all ranks are printing suggestions, but it is only filled in rank 0. So the others will all be printing blank entries.

Try some of these changes, especially the last one, and see if that helps.

ライセンス: CC-BY-SA帰属
所属していません StackOverflow
scroll top