Actual source code: comb.c

  1: #define PETSCVEC_DLL
  2: /*
  3:       Split phase global vector reductions with support for combining the
  4:    communication portion of several operations. Using MPI-1.1 support only

  6:       The idea for this and much of the initial code is contributed by 
  7:    Victor Eijkhout.

  9:        Usage:
 10:              VecDotBegin(Vec,Vec,PetscScalar *);
 11:              VecNormBegin(Vec,NormType,PetscReal *);
 12:              ....
 13:              VecDotEnd(Vec,Vec,PetscScalar *);
 14:              VecNormEnd(Vec,NormType,PetscReal *);

 16:        Limitations: 
 17:          - The order of the xxxEnd() functions MUST be in the same order
 18:            as the xxxBegin(). There is extensive error checking to try to 
 19:            insure that the user calls the routines in the correct order
 20: */

 22:  #include private/vecimpl.h

 24: #define STATE_BEGIN 0
 25: #define STATE_END   1

 27: #define REDUCE_SUM  0
 28: #define REDUCE_MAX  1
 29: #define REDUCE_MIN  2

 31: typedef struct {
 32:   MPI_Comm     comm;
 33:   PetscScalar  *lvalues;    /* this are the reduced values before call to MPI_Allreduce() */
 34:   PetscScalar  *gvalues;    /* values after call to MPI_Allreduce() */
 35:   void         **invecs;    /* for debugging only, vector/memory used with each op */
 36:   PetscInt     *reducetype; /* is particular value to be summed or maxed? */
 37:   PetscInt     state;       /* are we calling xxxBegin() or xxxEnd()? */
 38:   PetscInt     maxops;      /* total amount of space we have for requests */
 39:   PetscInt     numopsbegin; /* number of requests that have been queued in */
 40:   PetscInt     numopsend;   /* number of requests that have been gotten by user */
 41: } PetscSplitReduction;
 42: /*
 43:    Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
 44: the entries to have a flag indicating if they are REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN these are used by 
 45: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
 46: some of each.
 47: */

 51: /*
 52:    PetscSplitReductionCreate - Creates a data structure to contain the queued information.
 53: */
 54: PetscErrorCode  PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
 55: {

 59:   PetscNew(PetscSplitReduction,sr);
 60:   (*sr)->numopsbegin = 0;
 61:   (*sr)->numopsend   = 0;
 62:   (*sr)->state       = STATE_BEGIN;
 63:   (*sr)->maxops      = 32;
 64:   PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->lvalues);
 65:   PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->gvalues);
 66:   PetscMalloc(32*sizeof(void*),&(*sr)->invecs);
 67:   (*sr)->comm        = comm;
 68:   PetscMalloc(32*sizeof(PetscInt),&(*sr)->reducetype);
 69:   return(0);
 70: }

 72: /*
 73:        This function is the MPI reduction operation used when there is 
 74:    a combination of sums and max in the reduction. The call below to 
 75:    MPI_Op_create() converts the function PetscSplitReduction_Local() to the 
 76:    MPI operator PetscSplitReduction_Op.
 77: */
 78: MPI_Op PetscSplitReduction_Op = 0;

 83: void  MPIAPI PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
 84: {
 85:   PetscScalar *xin = (PetscScalar *)in,*xout = (PetscScalar*)out;
 86:   PetscInt    i,count = (PetscInt)*cnt;

 89:   if (*datatype != MPIU_REAL) {
 90:     (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
 91:     MPI_Abort(MPI_COMM_WORLD,1);
 92:   }
 93: #if defined(PETSC_USE_COMPLEX)
 94:   count = count/2;
 95: #endif
 96:   count = count/2;
 97:   for (i=0; i<count; i++) {
 98:     if (((int)PetscRealPart(xin[count+i])) == REDUCE_SUM) { /* second half of xin[] is flags for reduction type */
 99:       xout[i] += xin[i];
100:     } else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MAX) {
101:       xout[i] = PetscMax(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
102:     } else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MIN) {
103:       xout[i] = PetscMin(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
104:     } else {
105:       (*PetscErrorPrintf)("Reduction type input is not REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN");
106:       MPI_Abort(MPI_COMM_WORLD,1);
107:     }
108:   }
109:   PetscStackPop; /* since function returns void cannot use PetscFunctionReturn(); */
110:   return;
111: }

116: /*
117:    PetscSplitReductionApply - Actually do the communication required for a split phase reduction
118: */
119: PetscErrorCode  PetscSplitReductionApply(PetscSplitReduction *sr)
120: {
122:   PetscInt       i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
123:   PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
124:   PetscInt       sum_flg = 0,max_flg = 0, min_flg = 0;
125:   MPI_Comm       comm = sr->comm;
126:   PetscMPIInt    size;

129:   if (sr->numopsend > 0) {
130:     SETERRQ(PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
131:   }

133:   PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,comm);
134:   MPI_Comm_size(sr->comm,&size);
135:   if (size == 1) {
136:     PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
137:   } else {
138:     /* determine if all reductions are sum, max, or min */
139:     for (i=0; i<numops; i++) {
140:       if (reducetype[i] == REDUCE_MAX) {
141:         max_flg = 1;
142:       } else if (reducetype[i] == REDUCE_SUM) {
143:         sum_flg = 1;
144:       } else if (reducetype[i] == REDUCE_MIN) {
145:         min_flg = 1;
146:       } else {
147:         SETERRQ(PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
148:       }
149:     }
150:     if (sum_flg + max_flg + min_flg > 1) {
151:       /* 
152:          after all the entires in lvalues we store the reducetype flags to indicate
153:          to the reduction operations what are sums and what are max
154:       */
155:       for (i=0; i<numops; i++) {
156:         lvalues[numops+i] = reducetype[i];
157:       }
158: #if defined(PETSC_USE_COMPLEX)
159:       MPI_Allreduce(lvalues,gvalues,2*2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
160: #else
161:       MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
162: #endif
163:     } else if (max_flg) {
164: #if defined(PETSC_USE_COMPLEX)
165:       /* 
166:         complex case we max both the real and imaginary parts, the imaginary part
167:         is just ignored later
168:       */
169:       MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPI_MAX,comm);
170: #else
171:       MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPI_MAX,comm);
172: #endif
173:     } else if (min_flg) {
174: #if defined(PETSC_USE_COMPLEX)
175:       /* 
176:         complex case we min both the real and imaginary parts, the imaginary part
177:         is just ignored later
178:       */
179:       MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPI_MIN,comm);
180: #else
181:       MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPI_MIN,comm);
182: #endif
183:     } else {
184:       MPI_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,PetscSum_Op,comm);
185:     }
186:   }
187:   sr->state     = STATE_END;
188:   sr->numopsend = 0;
189:   PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,comm);
190:   return(0);
191: }


196: /*
197:    PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
198: */
199: PetscErrorCode  PetscSplitReductionExtend(PetscSplitReduction *sr)
200: {
202:   PetscInt         maxops = sr->maxops,*reducetype = sr->reducetype;
203:   PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
204:   void        *invecs = sr->invecs;

207:   sr->maxops     = 2*maxops;
208:   PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->lvalues);
209:   PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->gvalues);
210:   PetscMalloc(2*maxops*sizeof(PetscInt),&sr->reducetype);
211:   PetscMalloc(2*maxops*sizeof(void*),&sr->invecs);
212:   PetscMemcpy(sr->lvalues,lvalues,maxops*sizeof(PetscScalar));
213:   PetscMemcpy(sr->gvalues,gvalues,maxops*sizeof(PetscScalar));
214:   PetscMemcpy(sr->reducetype,reducetype,maxops*sizeof(PetscInt));
215:   PetscMemcpy(sr->invecs,invecs,maxops*sizeof(void*));
216:   PetscFree(lvalues);
217:   PetscFree(gvalues);
218:   PetscFree(reducetype);
219:   PetscFree(invecs);
220:   return(0);
221: }

225: PetscErrorCode  PetscSplitReductionDestroy(PetscSplitReduction *sr)
226: {

230:   PetscFree(sr->lvalues);
231:   PetscFree(sr->gvalues);
232:   PetscFree(sr->reducetype);
233:   PetscFree(sr->invecs);
234:   PetscFree(sr);
235:   return(0);
236: }

238: static PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;

243: /*
244:    Private routine to delete internal storage when a communicator is freed.
245:   This is called by MPI, not by users.

247:   The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
248:   it was MPI_Comm *comm.  
249: */
250: int  MPIAPI Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
251: {

255:   PetscInfo1(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);
256:   PetscSplitReductionDestroy((PetscSplitReduction *)attr_val);
257:   return(0);
258: }

261: /*
262:      PetscSplitReductionGet - Gets the split reduction object from a 
263:         PETSc vector, creates if it does not exit.

265: */
268: PetscErrorCode  PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
269: {
271:   PetscMPIInt    flag;

274:   if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
275:     /* 
276:        The calling sequence of the 2nd argument to this function changed
277:        between MPI Standard 1.0 and the revisions 1.1 Here we match the 
278:        new standard, if you are using an MPI implementation that uses 
279:        the older version you will get a warning message about the next line;
280:        it is only a warning message and should do no harm.
281:     */
282:     MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
283:   }
284:   MPI_Attr_get(comm,Petsc_Reduction_keyval,(void **)sr,&flag);
285:   if (!flag) {  /* doesn't exist yet so create it and put it in */
286:     PetscSplitReductionCreate(comm,sr);
287:     MPI_Attr_put(comm,Petsc_Reduction_keyval,*sr);
288:     PetscInfo1(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
289:   }

291:   return(0);
292: }

294: /* ----------------------------------------------------------------------------------------------------*/

298: /*@
299:    VecDotBegin - Starts a split phase dot product computation.

301:    Input Parameters:
302: +   x - the first vector
303: .   y - the second vector
304: -   result - where the result will go (can be PETSC_NULL)

306:    Level: advanced

308:    Notes:
309:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

311: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
312:          VecTDotBegin(), VecTDotEnd()
313: @*/
314: PetscErrorCode  VecDotBegin(Vec x,Vec y,PetscScalar *result)
315: {
316:   PetscErrorCode      ierr;
317:   PetscSplitReduction *sr;
318:   MPI_Comm            comm;

321:   PetscObjectGetComm((PetscObject)x,&comm);
322:   PetscSplitReductionGet(comm,&sr);
323:   if (sr->state == STATE_END) {
324:     SETERRQ(PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
325:   }
326:   if (sr->numopsbegin >= sr->maxops) {
327:     PetscSplitReductionExtend(sr);
328:   }
329:   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
330:   sr->invecs[sr->numopsbegin]     = (void*)x;
331:   if (!x->ops->dot_local) SETERRQ(PETSC_ERR_SUP,"Vector does not suppport local dots");
332:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
333:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
334:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
335:   return(0);
336: }

340: /*@
341:    VecDotEnd - Ends a split phase dot product computation.

343:    Input Parameters:
344: +  x - the first vector (can be PETSC_NULL)
345: .  y - the second vector (can be PETSC_NULL)
346: -  result - where the result will go

348:    Level: advanced

350:    Notes:
351:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

353: seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
354:          VecTDotBegin(),VecTDotEnd()

356: @*/
357: PetscErrorCode  VecDotEnd(Vec x,Vec y,PetscScalar *result)
358: {
359:   PetscErrorCode      ierr;
360:   PetscSplitReduction *sr;
361:   MPI_Comm            comm;

364:   PetscObjectGetComm((PetscObject)x,&comm);
365:   PetscSplitReductionGet(comm,&sr);
366: 
367:   if (sr->state != STATE_END) {
368:     /* this is the first call to VecxxxEnd() so do the communication */
369:     PetscSplitReductionApply(sr);
370:   }

372:   if (sr->numopsend >= sr->numopsbegin) {
373:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
374:   }
375:   if (x && (void*) x != sr->invecs[sr->numopsend]) {
376:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
377:   }
378:   if (sr->reducetype[sr->numopsend] != REDUCE_SUM) {
379:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
380:   }
381:   *result = sr->gvalues[sr->numopsend++];

383:   /*
384:      We are finished getting all the results so reset to no outstanding requests
385:   */
386:   if (sr->numopsend == sr->numopsbegin) {
387:     sr->state        = STATE_BEGIN;
388:     sr->numopsend    = 0;
389:     sr->numopsbegin  = 0;
390:   }
391:   return(0);
392: }

396: /*@
397:    VecTDotBegin - Starts a split phase transpose dot product computation.

399:    Input Parameters:
400: +  x - the first vector
401: .  y - the second vector
402: -  result - where the result will go (can be PETSC_NULL)

404:    Level: advanced

406:    Notes:
407:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

409: seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
410:          VecDotBegin(), VecDotEnd()

412: @*/
413: PetscErrorCode  VecTDotBegin(Vec x,Vec y,PetscScalar *result)
414: {
415:   PetscErrorCode      ierr;
416:   PetscSplitReduction *sr;
417:   MPI_Comm            comm;

420:   PetscObjectGetComm((PetscObject)x,&comm);
421:   PetscSplitReductionGet(comm,&sr);
422:   if (sr->state == STATE_END) {
423:     SETERRQ(PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
424:   }
425:   if (sr->numopsbegin >= sr->maxops) {
426:     PetscSplitReductionExtend(sr);
427:   }
428:   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
429:   sr->invecs[sr->numopsbegin]     = (void*)x;
430:   if (!x->ops->tdot_local) SETERRQ(PETSC_ERR_SUP,"Vector does not suppport local dots");
431:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
432:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
433:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
434:   return(0);
435: }

439: /*@
440:    VecTDotEnd - Ends a split phase transpose dot product computation.

442:    Input Parameters:
443: +  x - the first vector (can be PETSC_NULL)
444: .  y - the second vector (can be PETSC_NULL)
445: -  result - where the result will go

447:    Level: advanced

449:    Notes:
450:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

452: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
453:          VecDotBegin(), VecDotEnd()
454: @*/
455: PetscErrorCode  VecTDotEnd(Vec x,Vec y,PetscScalar *result)
456: {

460:   /*
461:       TDotEnd() is the same as DotEnd() so reuse the code
462:   */
463:   VecDotEnd(x,y,result);
464:   return(0);
465: }

467: /* -------------------------------------------------------------------------*/

471: /*@
472:    VecNormBegin - Starts a split phase norm computation.

474:    Input Parameters:
475: +  x - the first vector
476: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
477: -  result - where the result will go (can be PETSC_NULL)

479:    Level: advanced

481:    Notes:
482:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

484: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd()

486: @*/
487: PetscErrorCode  VecNormBegin(Vec x,NormType ntype,PetscReal *result)
488: {
489:   PetscErrorCode      ierr;
490:   PetscSplitReduction *sr;
491:   PetscReal           lresult[2];
492:   MPI_Comm            comm;

495:   PetscObjectGetComm((PetscObject)x,&comm);
496:   PetscSplitReductionGet(comm,&sr);
497:   if (sr->state == STATE_END) {
498:     SETERRQ(PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
499:   }
500:   if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
501:     PetscSplitReductionExtend(sr);
502:   }
503: 
504:   sr->invecs[sr->numopsbegin]     = (void*)x;
505:   if (!x->ops->norm_local) SETERRQ(PETSC_ERR_SUP,"Vector does not support local norms");
506:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
507:   (*x->ops->norm_local)(x,ntype,lresult);
508:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
509:   if (ntype == NORM_2)         lresult[0]                = lresult[0]*lresult[0];
510:   if (ntype == NORM_1_AND_2)   lresult[1]                = lresult[1]*lresult[1];
511:   if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = REDUCE_MAX;
512:   else                   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
513:   sr->lvalues[sr->numopsbegin++] = lresult[0];
514:   if (ntype == NORM_1_AND_2) {
515:     sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
516:     sr->lvalues[sr->numopsbegin++]  = lresult[1];
517:   }
518:   return(0);
519: }

523: /*@
524:    VecNormEnd - Ends a split phase norm computation.

526:    Input Parameters:
527: +  x - the first vector (can be PETSC_NULL)
528: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
529: -  result - where the result will go

531:    Level: advanced

533:    Notes:
534:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

536: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd()

538: @*/
539: PetscErrorCode  VecNormEnd(Vec x,NormType ntype,PetscReal *result)
540: {
541:   PetscErrorCode      ierr;
542:   PetscSplitReduction *sr;
543:   MPI_Comm            comm;

546:   PetscObjectGetComm((PetscObject)x,&comm);
547:   PetscSplitReductionGet(comm,&sr);
548: 
549:   if (sr->state != STATE_END) {
550:     /* this is the first call to VecxxxEnd() so do the communication */
551:     PetscSplitReductionApply(sr);
552:   }

554:   if (sr->numopsend >= sr->numopsbegin) {
555:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
556:   }
557:   if (x && (void*)x != sr->invecs[sr->numopsend]) {
558:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
559:   }
560:   if (sr->reducetype[sr->numopsend] != REDUCE_MAX && ntype == NORM_MAX) {
561:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
562:   }
563:   result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);

565:   if (ntype == NORM_2) {
566:     result[0] = sqrt(result[0]);
567:   } else if (ntype == NORM_1_AND_2) {
568:     result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
569:     result[1] = sqrt(result[1]);
570:   }
571:   if (ntype!=NORM_1_AND_2) {
572:     PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
573:   }

575:   if (sr->numopsend == sr->numopsbegin) {
576:     sr->state        = STATE_BEGIN;
577:     sr->numopsend    = 0;
578:     sr->numopsbegin  = 0;
579:   }
580:   return(0);
581: }

583: /*
584:    Possibly add

586:      PetscReductionSumBegin/End()
587:      PetscReductionMaxBegin/End()
588:      PetscReductionMinBegin/End()
589:    or have more like MPI with a single function with flag for Op? Like first better
590: */

594: /*@
595:    VecMDotBegin - Starts a split phase multiple dot product computation.

597:    Input Parameters:
598: +   x - the first vector
599: .   nv - number of vectors
600: .   y - array of vectors
601: -   result - where the result will go (can be PETSC_NULL)

603:    Level: advanced

605:    Notes:
606:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

608: seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
609:          VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd()
610: @*/
611: PetscErrorCode  VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
612: {
613:   PetscErrorCode      ierr;
614:   PetscSplitReduction *sr;
615:   MPI_Comm            comm;
616:   int                 i;

619:   PetscObjectGetComm((PetscObject)x,&comm);
620:   PetscSplitReductionGet(comm,&sr);
621:   if (sr->state == STATE_END) {
622:     SETERRQ(PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
623:   }
624:   for (i=0;i<nv;i++) {
625:     if (sr->numopsbegin+i >= sr->maxops) {
626:       PetscSplitReductionExtend(sr);
627:     }
628:     sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
629:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
630:   }
631:   if (!x->ops->mdot_local) SETERRQ(PETSC_ERR_SUP,"Vector does not suppport local mdots");
632:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
633:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
634:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
635:   sr->numopsbegin += nv;
636:   return(0);
637: }

641: /*@
642:    VecMDotEnd - Ends a split phase multiple dot product computation.

644:    Input Parameters:
645: +   x - the first vector (can be PETSC_NULL)
646: .   nv - number of vectors
647: -   y - array of vectors (can be PETSC_NULL)

649:    Output Parameters:
650: .   result - where the result will go

652:    Level: advanced

654:    Notes:
655:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

657: seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
658:          VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd()

660: @*/
661: PetscErrorCode  VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
662: {
663:   PetscErrorCode      ierr;
664:   PetscSplitReduction *sr;
665:   MPI_Comm            comm;
666:   int                 i;

669:   PetscObjectGetComm((PetscObject)x,&comm);
670:   PetscSplitReductionGet(comm,&sr);
671: 
672:   if (sr->state != STATE_END) {
673:     /* this is the first call to VecxxxEnd() so do the communication */
674:     PetscSplitReductionApply(sr);
675:   }

677:   if (sr->numopsend >= sr->numopsbegin) {
678:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
679:   }
680:   if (x && (void*) x != sr->invecs[sr->numopsend]) {
681:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
682:   }
683:   if (sr->reducetype[sr->numopsend] != REDUCE_SUM) {
684:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
685:   }
686:   for (i=0;i<nv;i++) {
687:     result[i] = sr->gvalues[sr->numopsend++];
688:   }
689: 
690:   /*
691:      We are finished getting all the results so reset to no outstanding requests
692:   */
693:   if (sr->numopsend == sr->numopsbegin) {
694:     sr->state        = STATE_BEGIN;
695:     sr->numopsend    = 0;
696:     sr->numopsbegin  = 0;
697:   }
698:   return(0);
699: }

703: /*@
704:    VecMTDotBegin - Starts a split phase transpose multiple dot product computation.

706:    Input Parameters:
707: +  x - the first vector
708: .  nv - number of vectors
709: .  y - array of  vectors
710: -  result - where the result will go (can be PETSC_NULL)

712:    Level: advanced

714:    Notes:
715:    Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().

717: seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
718:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd()

720: @*/
721: PetscErrorCode  VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
722: {
723:   PetscErrorCode      ierr;
724:   PetscSplitReduction *sr;
725:   MPI_Comm            comm;
726:   int                 i;

729:   PetscObjectGetComm((PetscObject)x,&comm);
730:   PetscSplitReductionGet(comm,&sr);
731:   if (sr->state == STATE_END) {
732:     SETERRQ(PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
733:   }
734:   for (i=0;i<nv;i++) {
735:     if (sr->numopsbegin+i >= sr->maxops) {
736:       PetscSplitReductionExtend(sr);
737:     }
738:     sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
739:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
740:   }
741:   if (!x->ops->mtdot_local) SETERRQ(PETSC_ERR_SUP,"Vector does not suppport local mdots");
742:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
743:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
744:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
745:   sr->numopsbegin += nv;
746:   return(0);
747: }

751: /*@
752:    VecMTDotEnd - Ends a split phase transpose multiple dot product computation.

754:    Input Parameters:
755: +  x - the first vector (can be PETSC_NULL)
756: .  nv - number of vectors
757: -  y - array of  vectors (can be PETSC_NULL)

759:    Output Parameters
760: .  result - where the result will go

762:    Level: advanced

764:    Notes:
765:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

767: seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
768:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMdotEnd()
769: @*/
770: PetscErrorCode  VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
771: {

775:   /*
776:       MTDotEnd() is the same as MDotEnd() so reuse the code
777:   */
778:   VecMDotEnd(x,nv,y,result);
779:   return(0);
780: }