/****************************************************************************** * MPI Sieve of Eratosthenes * * Matthew Bohnsack & Erik Erhardt * April 3rd 2006 * * CS 442 - Introduction to Parallel Processing - Project #1 qsub -I -l nodes=16:ppn=2,walltime=2:00:00 mpicc MPI_primes.c -o MPI_primes mpirun -np 8 -machinefile $PBS_NODEFILE ./MPI_primes -v -n 1000 ******************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include /********** Globals ***********************************************************/ struct program_options { long n; int block_size; int debug_level; } opts; struct program_options opts = {0, 1, 0}; /*********** Function Prototypes **********************************************/ void iota(int rank, long *num_array, long *nums); void recv_from(int from, long *num_array, int *loop, int *nums_size, long *nums); void send_to(int to, long *num_array, int *loop, int *nums_size, long *nums, int increment_loop); void display_primes(long int* array, int n_primes, int buff_size, int numprocs); void parse_command_line_args(int argc, char *argv[]); int compare_longs(const void *a, const void *b); void disp_nums(char *string, int rank, long *nums, int nums_size, int index); /*********** Main ************************************************************/ int main (int argc, char *argv[]) { double elapsed_time; /* Parallel execution time */ int i = 0; long int n_primes = 0; int k = 0; long int *primes = NULL; long int *all_primes = NULL; int kill_sw = 0; int added_prime_sw = 0; int checking_complete_sw = 0; int rank0_done_sw = 0; int last_prime_index = 0; int indy_gather_buffer_size = 0; int numprocs = 0; int rank = 0; long *num_array = NULL; long *nums = NULL; int loop=0; int nums_size=1; int i_live = -1; /* communications */ MPI_Status status_sreq, status_rreq; MPI_Request sreq, rreq; int ierr, tag=0; /* set STDOUT to unbuffered */ setvbuf(stdout, NULL, _IONBF, 0); MPI_Init(&argc, &argv); parse_command_line_args(argc, argv); /* Start the timer */ MPI_Barrier(MPI_COMM_WORLD); elapsed_time = -MPI_Wtime(); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); primes = (long int *)malloc(opts.n * sizeof(long int)); if(primes == NULL) { fprintf(stderr, "Out of memory!\n");exit( EXIT_FAILURE ); } num_array = (long int *)malloc((opts.block_size+2) * sizeof(long int)); if(num_array == NULL) { fprintf(stderr, "Out of memory!\n"); exit( EXIT_FAILURE ); } nums = &num_array[2]; /* setup initial list of primes */ if (rank==1) { primes[0] = 2; /* rank 1 starts with primes array with 2 */ last_prime_index = 0; } else { last_prime_index = -1; /* ranks > 1 start with empty primes array */ } if(opts.debug_level) { printf("There are %d processes and I am rank #%d\n", numprocs, rank); } if (rank==0) /* rank 0 generates integers 2..n */ { iota(rank, num_array, nums); } else /* positive ranks identify primes from numbers from rank 0 */ { int flag=0; /* used in MPI_Iprobe */ do{ kill_sw = 0; added_prime_sw = 0; checking_complete_sw = 0; if(rank==1) /* can receive from rank 0 and from rank (numprocs-1) */ { /* rank 1 probes last rank to see if it has sent something */ ierr = MPI_Iprobe(numprocs-1, tag, MPI_COMM_WORLD, &flag, &status_rreq); if (ierr != MPI_SUCCESS) { printf("MPI_Iprobe ERROR status %d.\n", ierr); ierr=MPI_SUCCESS; } /* message ready to be received from rank (numprocs-1) */ if(flag || rank0_done_sw) { recv_from(numprocs-1, num_array, &loop, &nums_size, nums); } else /* receive from rank 0 */ { recv_from(0, num_array, &loop, &nums_size, nums); if (nums[0] == -1) { rank0_done_sw = 1; } } } else /* receive from rank-1 */ { recv_from(rank-1, num_array, &loop, &nums_size, nums); } if(loop > last_prime_index) /* add seived number to list of primes */ { if(nums[0] != -1) { last_prime_index++; primes[last_prime_index] = nums[0]; /* shift the remaining values in nums up by one index */ for(i = 0; i < nums_size - 1; i++) { nums[i] = nums[i+1]; } nums_size--; } else /* nums[0] == -1, so done checking for primes */ { checking_complete_sw = 1; } } if(!checking_complete_sw){ /* LOOP THROUGH THE NUMS ARRAY TO KILL THEM */ /* CRUNCH THE REMAINING LIST BY nums[i_live]=nums[ind] */ /* UPDATE SIZE */ if(opts.debug_level>1) { disp_nums("Before Loop", rank, nums, nums_size, 0); } if(opts.debug_level>1) { printf("Rank %d nums_size = %d, prime=%d\n", rank, nums_size, primes[loop]); } i_live = -1; /* loop through nums array, keeping relatively prime values */ for(i=0; i< nums_size; i++) { /* relatively prime - so we retain the number */ if(nums[i] % primes[loop]) { i_live++; /* retain number by moving up to the i_live index */ nums[i_live] = nums[i]; if(opts.debug_level>1) { disp_nums("After Shuffle", rank, nums, nums_size, i); }; } } nums_size = i_live+1; if(nums_size == 0) { /* all nums killed, nothing to send to next process */ kill_sw = 1; } if(opts.debug_level>1) { disp_nums("After Loop", rank, nums, nums_size, 0); } if(opts.debug_level>1) { printf("Rank %d nums_size = %d\n", rank, nums_size); } } /* the number has not been killed, send on to next rank */ if (!kill_sw || checking_complete_sw) { if (rank < numprocs-1) /* non last rank sends to next rank */ { send_to(rank+1, num_array, &loop, &nums_size, nums, 0); } else /* last rank sends to rank 1 */ { send_to(1, num_array, &loop, &nums_size, nums, 1); } } } while (!checking_complete_sw); } if(opts.debug_level){printf("FINISHED rank %d\n", rank);}; /* Each rank prints the primes it found */ if(opts.debug_level && rank !=0) { printf("I found these primes (and I am very proud!):\n"); for(k=0; k <= last_prime_index; k++) { printf("rank: %d prime: %d\n", rank, primes[k]); } } /* Tell rank 0 how many primes total */ last_prime_index++; MPI_Reduce(&last_prime_index, &n_primes, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); if (rank==0) { if(opts.debug_level) { printf("rank: %d n_prime = %d\n", rank, n_primes); } } /* rank 1 has the most primes so tells rank 0 indy_gather_buffer_size, to * use as MPI_Gather buffer size */ if (rank==1) { ierr = MPI_Isend(&last_prime_index, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &sreq); if (ierr != MPI_SUCCESS) { printf("MPI_Isend ERROR status %d.\n", ierr); ierr=MPI_SUCCESS; } ierr = MPI_Wait(&sreq, MPI_STATUSES_IGNORE); if (ierr != MPI_SUCCESS) { printf("MPI_Wait ERROR status %d.\n", ierr); ierr=MPI_SUCCESS; } } /* rank 1 tells rank 0 indy_gather_buffer_size, the MPI_Gather buffer size */ if (rank==0) { ierr = MPI_Irecv(&indy_gather_buffer_size, 1, MPI_INT, 1, tag, MPI_COMM_WORLD, &rreq); if (ierr != MPI_SUCCESS) { printf("MPI_Irecv ERROR status %d.\n", ierr); ierr=MPI_SUCCESS; } ierr = MPI_Wait(&rreq, MPI_STATUSES_IGNORE); if (ierr != MPI_SUCCESS) { printf("MPI_Wait ERROR status %d.\n", ierr); ierr=MPI_SUCCESS; } } /* rank 1 tells all other ranks the indy_gather_buffer_size */ if (rank==1) { indy_gather_buffer_size = last_prime_index; } ierr = MPI_Bcast(&indy_gather_buffer_size, 1, MPI_INT, 1, MPI_COMM_WORLD); if (ierr != MPI_SUCCESS) { printf("MPI_Isend ERROR status %d.\n", ierr); ierr=MPI_SUCCESS; } /* rank 0 allocate array of primes */ if ( rank==0 ) { all_primes = (long int *) malloc(indy_gather_buffer_size * numprocs * sizeof(long int)); } /* rank 0 gathers array of primes from other ranks */ MPI_Gather(primes, indy_gather_buffer_size, MPI_LONG, all_primes, indy_gather_buffer_size, MPI_LONG, 0, MPI_COMM_WORLD); if(opts.debug_level && rank == 0) { display_primes(all_primes, n_primes, indy_gather_buffer_size, numprocs); } MPI_Barrier(MPI_COMM_WORLD); if(rank == 0) { /* Stop the timer */ elapsed_time += MPI_Wtime(); printf("Total elapsed time: %10.6f s\n", elapsed_time); } MPI_Finalize(); return 0; } /* END of main() */ /***************** Functions **************************************************/ void iota(int rank, long *num_array, long *nums) { long num=2; int loop=0, nums_size=0; MPI_Status status_sreq, status_rreq; MPI_Request sreq, rreq; int ierr, tag=0; int iblock=0, block=0, nblocks=0, last_block_size=0; if(opts.debug_level){printf("block size= %d ", opts.block_size);}; /* first array element has loop which will always be 0 from iota (rank=0) */ num_array[0] = loop; nums_size = opts.block_size; num_array[1] = nums_size; nblocks = (int)(opts.n/opts.block_size); /* number of blocks */ last_block_size = (opts.n%opts.block_size); /* last may have diff size */ for(block = 0; block < nblocks ; block++) /* create candidate numbers */ { /* create blocks */ for(iblock = 0; iblock < opts.block_size ; iblock++, num++) { num_array[iblock+2] = num; /* block*opts.block_size + iblock; */ } send_to(1, num_array, &loop, &nums_size, nums, 0); } /* If there is a last block having a different size */ if(last_block_size) { for(iblock = 0; iblock < last_block_size; iblock++, num++) { num_array[iblock+2] = num; /* block*opts.block_size + iblock; */ } nums_size = last_block_size; send_to(1, num_array, &loop, &nums_size, nums, 0); } /* end of numbers, send -1 as an indicator */ num = -1; nums_size = 1; num_array[1] = nums_size; num_array[2] = num; send_to(1, num_array, &loop, &nums_size, nums, 0); } void recv_from( int from, /* rank receiving message from */ long *num_array, /* block of loop/nums_size/nums being received */ int *loop, /* loop number from num_array[0] */ int *nums_size, /* the number of remaining candiates in nums */ long *nums /* the list of remaining candidates: num_array[2-end] */ ) { MPI_Status status_sreq, status_rreq; MPI_Request sreq, rreq; int ierr, tag=0; ierr = MPI_Irecv(num_array, opts.block_size+2, MPI_LONG, from, tag, MPI_COMM_WORLD, &rreq); if (ierr != MPI_SUCCESS) { printf("MPI_Irecv ERROR status %d.\n", ierr); ierr=MPI_SUCCESS; } ierr = MPI_Wait(&rreq, MPI_STATUSES_IGNORE); if (ierr != MPI_SUCCESS) { printf("MPI_Wait ERROR status %d.\n", ierr); ierr=MPI_SUCCESS; } *loop=num_array[0]; *nums_size=num_array[1]; nums=&num_array[2]; } void send_to( int to, /* rank sending message to */ long *num_array, /* block of loop/nums_size/nums being sent */ int *loop, /* loop number from num_array[0] */ int *nums_size, /* the number of remaining candiates from num_array[1] */ long *nums, /* [NOT NEEDED] - remaining candidates: num_array[2-end] */ int increment_loop /* 0=don't increment loop, 1=do (when sending to rank 1)*/ ) { MPI_Status status_sreq, status_rreq; MPI_Request sreq, rreq; int ierr, tag=0; num_array[0] = (*loop)+increment_loop; num_array[1] = *nums_size; /* Don't need to assign nums to num_array[2] since shared address space */ ierr = MPI_Isend(num_array, opts.block_size+2, MPI_LONG, to, tag, MPI_COMM_WORLD, &sreq); if (ierr != MPI_SUCCESS) { printf("MPI_Isend ERROR status %d.\n", ierr); ierr=MPI_SUCCESS; } ierr = MPI_Wait(&sreq, MPI_STATUSES_IGNORE); if (ierr != MPI_SUCCESS) { printf("MPI_Wait ERROR status %d.\n", ierr); ierr=MPI_SUCCESS; } } void display_primes(long int* array, int n_primes, int buff_size, int numprocs) { int i = 0; int array_size = buff_size * numprocs; qsort(array, array_size, sizeof(long int), compare_longs); for( i = 0; i < n_primes; i++) { /* to start with first non-zero prime, +(array_size-n_primes)*/ printf("Prime #%d is: %d\n", i+1, array[i+(array_size-n_primes)]); } } int compare_longs(const void *a, const void *b) { const long int *da = (const long int *) a; const long int *db = (const long int *) b; return (*da > *db) - (*da < *db); } void parse_command_line_args(int argc, char *argv[]) { int c = 0; while( (c = getopt( argc, argv, "n:vb:" )) != EOF ) { switch( c ) { case 'n': opts.n = strtol( optarg, (char **)NULL, 10 ); break; case 'b': opts.block_size = strtol( optarg, (char **)NULL, 10 ); break; case 'v': opts.debug_level++; break; } } } void disp_nums(char * string, int rank, long *nums, int nums_size, int index) { int i = 0; printf("Rank %d nums %s on i=%d\n", rank, string, index); for( i = 0 ; i < nums_size; i++) { printf("Rank %d nums[%d]: %d\n", rank, i, nums[i]); } }