Actual source code: mpinit.c


 3:  #include petsc.h
 4:  #include petscsys.h

  6: static MPI_Comm saved_PETSC_COMM_WORLD = 0;
  7: MPI_Comm PETSC_COMM_LOCAL_WORLD        = 0;           /* comm for a single node (local set of processes) */
  8: PetscTruth PetscOpenMPWorker           = PETSC_FALSE;  /* this is a regular process, nonworker process */


 12: #if defined(PETSC_HAVE_MPI_COMM_SPAWN)
 15: /*@C
 16:    PetscOpenMPSpawn - Initialize additional processes to be used as "worker" processes. This is not generally 
 17:      called by users. One should use -openmp_spawn_size <n> to indicate that you wish to have n-1 new MPI 
 18:      processes spawned for each current process.

 20:    Not Collective (could make collective on MPI_COMM_WORLD, generate one huge comm and then split it up)

 22:    Input Parameter:
 23: .  nodesize - size of each compute node that will share processors

 25:    Options Database:
 26: .   -openmp_spawn_size nodesize

 28:    Notes: This is only supported on systems with an MPI 2 implementation that includes the MPI_Comm_Spawn() routine.

 30: $    Comparison of two approaches for OpenMP usage (MPI started with N processes)
 31: $
 32: $    -openmp_spawn_size <n> requires MPI 2, results in n*N total processes with N directly used by application code
 33: $                                           and n-1 worker processes (used by PETSc) for each application node.
 34: $                           You MUST launch MPI so that only ONE MPI process is created for each hardware node.
 35: $
 36: $    -openmp_merge_size <n> results in N total processes, N/n used by the application code and the rest worker processes
 37: $                            (used by PETSc)
 38: $                           You MUST launch MPI so that n MPI processes are created for each hardware node.
 39: $
 40: $    petscmpiexec -n 2 ./ex1 -openmp_spawn_size 3 gives 2 application nodes (and 4 PETSc worker nodes)
 41: $    petscmpiexec -n 6 ./ex1 -openmp_merge_size 3 gives the SAME 2 application nodes and 4 PETSc worker nodes
 42: $       This is what would use if each of the computers hardware nodes had 3 CPUs.
 43: $
 44: $      These are intended to be used in conjunction with USER OpenMP code. The user will have 1 process per
 45: $   computer (hardware) node (where the computer node has p cpus), the user's code will use threads to fully
 46: $   utilize all the CPUs on the node. The PETSc code will have p processes to fully use the compute node for 
 47: $   PETSc calculations. The user THREADS and PETSc PROCESSES will NEVER run at the same time so the p CPUs 
 48: $   are always working on p task, never more than p.
 49: $
 50: $    See PCOPENMP for a PETSc preconditioner that can use this functionality
 51: $

 53:    For both PetscOpenMPSpawn() and PetscOpenMPMerge() PETSC_COMM_WORLD consists of one process per "node", PETSC_COMM_LOCAL_WORLD
 54:    consists of all the processes in a "node."

 56:    In both cases the user's code is running ONLY on PETSC_COMM_WORLD (that was newly generated by running this command).

 58:    Level: developer

 60:    Concepts: OpenMP
 61:    
 62: .seealso: PetscFinalize(), PetscInitializeFortran(), PetscGetArgs(), PetscOpenMPFinalize(), PetscInitialize(), PetscOpenMPMerge(), PCOpenMPRun()

 64: @*/
 65: PetscErrorCode  PetscOpenMPSpawn(PetscMPIInt nodesize)
 66: {
 68:   PetscMPIInt    size;
 69:   MPI_Comm       parent,children;
 70: 
 72:   MPI_Comm_get_parent(&parent);
 73:   if (parent == MPI_COMM_NULL) {  /* the original processes started by user */
 74:     char programname[PETSC_MAX_PATH_LEN];
 75:     char **argv;

 77:     PetscGetProgramName(programname,PETSC_MAX_PATH_LEN);
 78:     PetscGetArguments(&argv);
 79:     MPI_Comm_spawn(programname,argv,nodesize-1,MPI_INFO_NULL,0,PETSC_COMM_SELF,&children,MPI_ERRCODES_IGNORE);
 80:     PetscFreeArguments(argv);
 81:     MPI_Intercomm_merge(children,0,&PETSC_COMM_LOCAL_WORLD);

 83:     MPI_Comm_size(PETSC_COMM_WORLD,&size);
 84:     PetscInfo2(0,"PETSc OpenMP successfully spawned: number of nodes = %d node size = %d\n",size,nodesize);
 85:     saved_PETSC_COMM_WORLD = PETSC_COMM_WORLD;
 86:   } else { /* worker nodes that get spawned */
 87:     MPI_Intercomm_merge(parent,1,&PETSC_COMM_LOCAL_WORLD);
 88:     PetscOpenMPHandle(PETSC_COMM_LOCAL_WORLD);
 89:     PetscOpenMPWorker = PETSC_TRUE; /* so that PetscOpenMPIFinalize() will not attempt a broadcast from this process */
 90:     PetscEnd();  /* cannot continue into user code */
 91:   }
 92:   return(0);
 93: }
 94: #endif

 98: /*@C
 99:    PetscOpenMPMerge - Initializes the PETSc and MPI to work with OpenMP. This is not usually called
100:       by the user. One should use -openmp_merge_size <n> to indicate the node size of merged communicator
101:       to be.

103:    Collective on MPI_COMM_WORLD or PETSC_COMM_WORLD if it has been set

105:    Input Parameter:
106: .  nodesize - size of each compute node that will share processors

108:    Options Database:
109: .   -openmp_merge_size <n>

111:    Level: developer

113: $    Comparison of two approaches for OpenMP usage (MPI started with N processes)
114: $
115: $    -openmp_spawn_size <n> requires MPI 2, results in n*N total processes with N directly used by application code
116: $                                           and n-1 worker processes (used by PETSc) for each application node.
117: $                           You MUST launch MPI so that only ONE MPI process is created for each hardware node.
118: $
119: $    -openmp_merge_size <n> results in N total processes, N/n used by the application code and the rest worker processes
120: $                            (used by PETSc)
121: $                           You MUST launch MPI so that n MPI processes are created for each hardware node.
122: $
123: $    petscmpiexec -n 2 ./ex1 -openmp_spawn_size 3 gives 2 application nodes (and 4 PETSc worker nodes)
124: $    petscmpiexec -n 6 ./ex1 -openmp_merge_size 3 gives the SAME 2 application nodes and 4 PETSc worker nodes
125: $       This is what would use if each of the computers hardware nodes had 3 CPUs.
126: $
127: $      These are intended to be used in conjunction with USER OpenMP code. The user will have 1 process per
128: $   computer (hardware) node (where the computer node has p cpus), the user's code will use threads to fully
129: $   utilize all the CPUs on the node. The PETSc code will have p processes to fully use the compute node for 
130: $   PETSc calculations. The user THREADS and PETSc PROCESSES will NEVER run at the same time so the p CPUs 
131: $   are always working on p task, never more than p.
132: $
133: $    See PCOPENMP for a PETSc preconditioner that can use this functionality
134: $

136:    For both PetscOpenMPSpawn() and PetscOpenMPMerge() PETSC_COMM_WORLD consists of one process per "node", PETSC_COMM_LOCAL_WORLD
137:    consists of all the processes in a "node."

139:    In both cases the user's code is running ONLY on PETSC_COMM_WORLD (that was newly generated by running this command).

141:    Concepts: OpenMP
142:    
143: .seealso: PetscFinalize(), PetscInitializeFortran(), PetscGetArgs(), PetscOpenMPFinalize(), PetscInitialize(), PetscOpenMPSpawn(), PCOpenMPRun()

145: @*/
146: PetscErrorCode  PetscOpenMPMerge(PetscMPIInt nodesize)
147: {
149:   PetscMPIInt    size,rank,*ranks,i;
150:   MPI_Group      group,newgroup;

153:   saved_PETSC_COMM_WORLD = PETSC_COMM_WORLD;

155:   MPI_Comm_size(saved_PETSC_COMM_WORLD,&size);
156:   if (size % nodesize) SETERRQ2(PETSC_ERR_ARG_SIZ,"Total number of process nodes %d is not divisible by number of processes per node %d",size,nodesize);
157:   MPI_Comm_rank(saved_PETSC_COMM_WORLD,&rank);


160:   /* create two communicators 
161:       *) one that contains the first process from each node: 0,nodesize,2*nodesize,...
162:       *) one that contains all processes in a node:  (0,1,2...,nodesize-1), (nodesize,nodesize+1,...2*nodesize-), ...
163:   */
164:   MPI_Comm_group(saved_PETSC_COMM_WORLD,&group);
165:   PetscMalloc((size/nodesize)*sizeof(PetscMPIInt),&ranks);
166:   for (i=0; i<(size/nodesize); i++) ranks[i] = i*nodesize;
167:   MPI_Group_incl(group,size/nodesize,ranks,&newgroup);
168:   PetscFree(ranks);
169:   MPI_Comm_create(saved_PETSC_COMM_WORLD,newgroup,&PETSC_COMM_WORLD);
170:   if (rank % nodesize) PETSC_COMM_WORLD = 0; /* mark invalid processes for easy debugging */
171:   MPI_Group_free(&group);
172:   MPI_Group_free(&newgroup);

174:   MPI_Comm_split(saved_PETSC_COMM_WORLD,rank/nodesize,rank % nodesize,&PETSC_COMM_LOCAL_WORLD);

176:   PetscInfo2(0,"PETSc OpenMP successfully started: number of nodes = %d node size = %d\n",size/nodesize,nodesize);
177:   PetscInfo1(0,"PETSc OpenMP process %sactive\n",(rank % nodesize) ? "in" : "");

179:   /* 
180:      All process not involved in user application code wait here
181:   */
182:   if (!PETSC_COMM_WORLD) {
183:     PetscOpenMPHandle(PETSC_COMM_LOCAL_WORLD);
184:     PETSC_COMM_WORLD  = saved_PETSC_COMM_WORLD;
185:     PetscOpenMPWorker = PETSC_TRUE; /* so that PetscOpenMPIFinalize() will not attempt a broadcast from this process */
186:     PetscEnd();  /* cannot continue into user code */
187:   }
188:   return(0);
189: }

193: /*@C
194:    PetscOpenMPFinalize - Finalizes the PETSc and MPI to work with OpenMP. Called by PetscFinalize() cannot
195:        be called by user.

197:    Collective on the entire system

199:    Level: developer
200:            
201: .seealso: PetscFinalize(), PetscGetArgs(), PetscOpenMPMerge(), PCOpenMPRun()

203: @*/
204: PetscErrorCode  PetscOpenMPFinalize(void)
205: {
206:   PetscErrorCode 0;
207:   PetscInt       command = 3;

210:   if (!PetscOpenMPWorker && PETSC_COMM_LOCAL_WORLD) {
211:     MPI_Bcast(&command,1,MPIU_INT,0,PETSC_COMM_LOCAL_WORLD); /* broadcast to my worker group to end program */
212:     PETSC_COMM_WORLD = saved_PETSC_COMM_WORLD;
213:   }
214:   PetscFunctionReturn(ierr);
215: }

217: static PetscInt numberobjects = 0;
218: static void     *objects[100];

222: /*@C
223:    PetscOpenMPHandle - Receives commands from the master node and processes them

225:    Collective on MPI_Comm

227:    Level: developer

229:    Notes: this is usually handled automatically, likely you do not need to use this directly
230:            
231: .seealso: PetscOpenMPMerge(), PCOpenMPRun(), PCOpenMPNew()

233: @*/
234: PetscErrorCode  PetscOpenMPHandle(MPI_Comm comm)
235: {
237:   PetscInt       command;
238:   PetscTruth     exitwhileloop = PETSC_FALSE;

241:   while (!exitwhileloop) {
242:     MPI_Bcast(&command,1,MPIU_INT,0,comm);
243:     switch (command) {
244:     case 0: { /* allocate some memory on this worker process */
245:       PetscInt n;
246:       void     *ptr;
247:       MPI_Bcast(&n,1,MPIU_INT,0,comm);
248:       /* cannot use PetscNew() cause it requires struct argument */
249:       PetscMalloc(n,&ptr);
250:       PetscMemzero(ptr,n);
251:       objects[numberobjects++] = ptr;
252:       break;
253:     }
254:     case 1: {  /* free some memory on this worker process */
255:       PetscInt i;
256:       MPI_Bcast(&i,1,MPIU_INT,0,comm);
257:       PetscFree(objects[i]);
258:       objects[i] = 0;
259:       break;
260:     }
261:     case 2: {  /* run a function on this worker process */
262:       PetscInt       i;
263:       PetscErrorCode (*f)(MPI_Comm,void*);
264:       MPI_Bcast(&i,1,MPIU_INT,0,comm);
265:       MPI_Bcast(&f,1,MPIU_INT,0,comm);
266:       (*f)(comm,objects[i]);
267:       break;
268:     }
269:     case 3: {
270:       exitwhileloop = PETSC_TRUE;
271:       break;
272:     }
273:     default:
274:       SETERRQ1(PETSC_ERR_PLIB,"Unknown OpenMP command %D",command);
275:     }
276:   }
277:   return(0);
278: }

282: /*@C
283:    PetscOpenMPNew - Creates a "c struct" on all nodes of an OpenMP communicator

285:    Collective on MPI_Comm

287:    Level: developer
288:            
289:    Note: n is a PetscInt when it "really" should be a size_t

291: .seealso: PetscOpenMPMerge(), PCOpenMPRun(), PCOpenMPFree()

293: @*/
294: PetscErrorCode  PetscOpenMPNew(MPI_Comm comm,PetscInt n,void **ptr)
295: {
297:   PetscInt       command = 0;

300:   if (PetscOpenMPWorker) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not using OpenMP feature of PETSc");

302:   MPI_Bcast(&command,1,MPIU_INT,0,comm);
303:   MPI_Bcast(&n,1,MPIU_INT,0,comm);
304:   /* cannot use PetscNew() cause it requires struct argument */
305:   PetscMalloc(n,ptr);
306:   PetscMemzero(*ptr,n);
307:   objects[numberobjects++] = *ptr;
308:   return(0);
309: }

313: /*@C
314:    PetscOpenMPFree - Frees a "c struct" on all nodes of an OpenMP communicator

316:    Collective on MPI_Comm

318:    Level: developer
319:            
320: .seealso: PetscOpenMPMerge(), PetscOpenMPNew()

322: @*/
323: PetscErrorCode  PetscOpenMPFree(MPI_Comm comm,void *ptr)
324: {
326:   PetscInt       command = 1,i;

329:   if (PetscOpenMPWorker) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not using OpenMP feature of PETSc");

331:   MPI_Bcast(&command,1,MPIU_INT,0,comm);
332:   for (i=0; i<numberobjects; i++) {
333:     if (objects[i] == ptr) {
334:       MPI_Bcast(&i,1,MPIU_INT,0,comm);
335:       PetscFree(ptr);
336:       objects[i] = 0;
337:       return(0);
338:     }
339:   }
340:   SETERRQ(PETSC_ERR_ARG_WRONG,"Pointer does not appear to have been created with PetscOpenMPNew()");
341:   PetscFunctionReturn(ierr);
342: }

346: /*@C
347:    PetscOpenMPRun - runs a function on all the processes of a node

349:    Collective on MPI_Comm

351:    Level: developer
352:            
353: .seealso: PetscOpenMPMerge(), PetscOpenMPNew(), PetscOpenMPFree()

355: @*/
356: PetscErrorCode  PetscOpenMPRun(MPI_Comm comm,PetscErrorCode (*f)(MPI_Comm,void *),void *ptr)
357: {
359:   PetscInt       command = 2,i;

362:   if (PetscOpenMPWorker) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not using OpenMP feature of PETSc");

364:   MPI_Bcast(&command,1,MPIU_INT,0,comm);
365:   for (i=0; i<numberobjects; i++) {
366:     if (objects[i] == ptr) {
367:       MPI_Bcast(&i,1,MPIU_INT,0,comm);
368:       MPI_Bcast(&f,1,MPIU_INT,0,comm);
369:       (*f)(comm,ptr);
370:       return(0);
371:     }
372:   }
373:   SETERRQ(PETSC_ERR_ARG_WRONG,"Pointer does not appear to have been created with PetscOpenMPNew()");
374:   PetscFunctionReturn(ierr);
375: }