Actual source code: vpscat.c

  1: #define PETSCVEC_DLL

  3: /*
  4:     Defines parallel vector scatters.
  5: */

 7:  #include private/isimpl.h
 8:  #include private/vecimpl.h
 9:  #include ../src/vec/vec/impls/dvecimpl.h
 10:  #include ../src/vec/vec/impls/mpi/pvecimpl.h
 11:  #include petscsys.h

 15: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
 16: {
 17:   VecScatter_MPI_General *to=(VecScatter_MPI_General*)ctx->todata;
 18:   VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
 19:   PetscErrorCode         ierr;
 20:   PetscInt               i;
 21:   PetscMPIInt            rank;
 22:   PetscViewerFormat      format;
 23:   PetscTruth             iascii;

 26:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
 27:   if (iascii) {
 28:     MPI_Comm_rank(((PetscObject)ctx)->comm,&rank);
 29:     PetscViewerGetFormat(viewer,&format);
 30:     if (format ==  PETSC_VIEWER_ASCII_INFO) {
 31:       PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;

 33:       MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
 34:       MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
 35:       itmp = to->starts[to->n+1];
 36:       MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
 37:       itmp = from->starts[from->n+1];
 38:       MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
 39:       MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,((PetscObject)ctx)->comm);

 41:       PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
 42:       PetscViewerASCIIPrintf(viewer,"  Maximum number sends %D\n",nsend_max);
 43:       PetscViewerASCIIPrintf(viewer,"  Maximum number receives %D\n",nrecv_max);
 44:       PetscViewerASCIIPrintf(viewer,"  Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
 45:       PetscViewerASCIIPrintf(viewer,"  Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
 46:       PetscViewerASCIIPrintf(viewer,"  Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));

 48:     } else {
 49:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
 50:       if (to->n) {
 51:         for (i=0; i<to->n; i++){
 52:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]   %D length = %D to whom %D\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
 53:         }
 54:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
 55:         for (i=0; i<to->starts[to->n]; i++){
 56:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,to->indices[i]);
 57:         }
 58:       }

 60:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
 61:       if (from->n) {
 62:         for (i=0; i<from->n; i++){
 63:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %D\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
 64:         }

 66:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
 67:         for (i=0; i<from->starts[from->n]; i++){
 68:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,from->indices[i]);
 69:         }
 70:       }
 71:       if (to->local.n) {
 72:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Indices for local part of scatter\n",rank);
 73:         for (i=0; i<to->local.n; i++){
 74:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]From %D to %D \n",rank,from->local.vslots[i],to->local.vslots[i]);
 75:         }
 76:       }

 78:       PetscViewerFlush(viewer);
 79:     }
 80:   } else {
 81:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for this scatter",((PetscObject)viewer)->type_name);
 82:   }
 83:   return(0);
 84: }

 86: /* -----------------------------------------------------------------------------------*/
 87: /*
 88:       The next routine determines what part of  the local part of the scatter is an
 89:   exact copy of values into their current location. We check this here and
 90:   then know that we need not perform that portion of the scatter when the vector is
 91:   scattering to itself with INSERT_VALUES.

 93:      This is currently not used but would speed up, for example DALocalToLocalBegin/End()

 95: */
 98: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
 99: {
100:   PetscInt       n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
102:   PetscInt       *nto_slots,*nfrom_slots,j = 0;
103: 
105:   for (i=0; i<n; i++) {
106:     if (to_slots[i] != from_slots[i]) n_nonmatching++;
107:   }

109:   if (!n_nonmatching) {
110:     to->nonmatching_computed = PETSC_TRUE;
111:     to->n_nonmatching        = from->n_nonmatching = 0;
112:     PetscInfo1(scatter,"Reduced %D to 0\n", n);
113:   } else if (n_nonmatching == n) {
114:     to->nonmatching_computed = PETSC_FALSE;
115:     PetscInfo(scatter,"All values non-matching\n");
116:   } else {
117:     to->nonmatching_computed= PETSC_TRUE;
118:     to->n_nonmatching       = from->n_nonmatching = n_nonmatching;
119:     PetscMalloc(n_nonmatching*sizeof(PetscInt),&nto_slots);
120:     PetscMalloc(n_nonmatching*sizeof(PetscInt),&nfrom_slots);
121:     to->slots_nonmatching   = nto_slots;
122:     from->slots_nonmatching = nfrom_slots;
123:     for (i=0; i<n; i++) {
124:       if (to_slots[i] != from_slots[i]) {
125:         nto_slots[j]   = to_slots[i];
126:         nfrom_slots[j] = from_slots[i];
127:         j++;
128:       }
129:     }
130:     PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
131:   }
132:   return(0);
133: }

135: /* --------------------------------------------------------------------------------------*/

137: /* -------------------------------------------------------------------------------------*/
140: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
141: {
142:   VecScatter_MPI_General *to   = (VecScatter_MPI_General*)ctx->todata;
143:   VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
144:   PetscErrorCode         ierr;
145:   PetscInt               i;

148:   if (to->use_readyreceiver) {
149:     /*
150:        Since we have already posted sends we must cancel them before freeing 
151:        the requests
152:     */
153:     for (i=0; i<from->n; i++) {
154:       MPI_Cancel(from->requests+i);
155:     }
156:     for (i=0; i<to->n; i++) {
157:       MPI_Cancel(to->rev_requests+i);
158:     }
159:   }

161: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
162:   if (to->use_alltoallw) {
163:     PetscFree3(to->wcounts,to->wdispls,to->types);
164:     PetscFree3(from->wcounts,from->wdispls,from->types);
165:   }
166: #endif

168: #if defined(PETSC_HAVE_MPI_WINDOW)
169:   if (to->use_window) {
170:     MPI_Win_free(&from->window);
171:     MPI_Win_free(&to->window);
172:   }
173: #endif

175:   if (to->use_alltoallv) {
176:     PetscFree2(to->counts,to->displs);
177:     PetscFree2(from->counts,from->displs);
178:   }

180:   /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
181:   /* 
182:      IBM's PE version of MPI has a bug where freeing these guys will screw up later
183:      message passing.
184:   */
185: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
186:   if (!to->use_alltoallv) {   /* currently the to->requests etc are ALWAYS allocated even if not used */
187:     if (to->requests) {
188:       for (i=0; i<to->n; i++) {
189:         MPI_Request_free(to->requests + i);
190:       }
191:     }
192:     if (to->rev_requests) {
193:       for (i=0; i<to->n; i++) {
194:         MPI_Request_free(to->rev_requests + i);
195:       }
196:     }
197:   }
198:   /*
199:       MPICH could not properly cancel requests thus with ready receiver mode we
200:     cannot free the requests. It may be fixed now, if not then put the following 
201:     code inside a if !to->use_readyreceiver) {
202:   */
203:   if (!to->use_alltoallv) {    /* currently the from->requests etc are ALWAYS allocated even if not used */
204:     if (from->requests) {
205:       for (i=0; i<from->n; i++) {
206:         MPI_Request_free(from->requests + i);
207:       }
208:     }

210:     if (from->rev_requests) {
211:       for (i=0; i<from->n; i++) {
212:         MPI_Request_free(from->rev_requests + i);
213:       }
214:     }
215:   }
216: #endif

218:   PetscFree(to->local.vslots);
219:   PetscFree(from->local.vslots);
220:   PetscFree2(to->counts,to->displs);
221:   PetscFree2(from->counts,from->displs);
222:   PetscFree(to->local.slots_nonmatching);
223:   PetscFree(from->local.slots_nonmatching);
224:   PetscFree(to->rev_requests);
225:   PetscFree(from->rev_requests);
226:   PetscFree(to->requests);
227:   PetscFree(from->requests);
228:   PetscFree4(to->values,to->indices,to->starts,to->procs);
229:   PetscFree2(to->sstatus,to->rstatus);
230:   PetscFree4(from->values,from->indices,from->starts,from->procs);
231:   PetscFree(from);
232:   PetscFree(to);
233:   PetscHeaderDestroy(ctx);
234:   return(0);
235: }



239: /* --------------------------------------------------------------------------------------*/
240: /*
241:     Special optimization to see if the local part of the scatter is actually 
242:     a copy. The scatter routines call PetscMemcpy() instead.
243:  
244: */
247: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
248: {
249:   PetscInt       n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
250:   PetscInt       to_start,from_start;

254:   to_start   = to_slots[0];
255:   from_start = from_slots[0];

257:   for (i=1; i<n; i++) {
258:     to_start   += bs;
259:     from_start += bs;
260:     if (to_slots[i]   != to_start)   return(0);
261:     if (from_slots[i] != from_start) return(0);
262:   }
263:   to->is_copy       = PETSC_TRUE;
264:   to->copy_start    = to_slots[0];
265:   to->copy_length   = bs*sizeof(PetscScalar)*n;
266:   from->is_copy     = PETSC_TRUE;
267:   from->copy_start  = from_slots[0];
268:   from->copy_length = bs*sizeof(PetscScalar)*n;
269:   PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
270:   return(0);
271: }

273: /* --------------------------------------------------------------------------------------*/

277: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
278: {
279:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
280:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
281:   PetscErrorCode         ierr;
282:   PetscInt               ny,bs = in_from->bs;

285:   out->begin     = in->begin;
286:   out->end       = in->end;
287:   out->copy      = in->copy;
288:   out->destroy   = in->destroy;
289:   out->view      = in->view;

291:   /* allocate entire send scatter context */
292:   PetscNewLog(out,VecScatter_MPI_General,&out_to);
293:   PetscNewLog(out,VecScatter_MPI_General,&out_from);

295:   ny                = in_to->starts[in_to->n];
296:   out_to->n         = in_to->n;
297:   out_to->type      = in_to->type;
298:   out_to->sendfirst = in_to->sendfirst;

300:   PetscMalloc(out_to->n*sizeof(MPI_Request),&out_to->requests);
301:   PetscMalloc4(bs*ny,PetscScalar,&out_to->values,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,out_to->n,PetscMPIInt,&out_to->procs);
302:   PetscMalloc2(PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->rstatus);
303:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
304:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
305:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
306: 
307:   out->todata       = (void*)out_to;
308:   out_to->local.n   = in_to->local.n;
309:   out_to->local.nonmatching_computed = PETSC_FALSE;
310:   out_to->local.n_nonmatching        = 0;
311:   out_to->local.slots_nonmatching    = 0;
312:   if (in_to->local.n) {
313:     PetscMalloc(in_to->local.n*sizeof(PetscInt),&out_to->local.vslots);
314:     PetscMalloc(in_from->local.n*sizeof(PetscInt),&out_from->local.vslots);
315:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
316:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
317:   } else {
318:     out_to->local.vslots   = 0;
319:     out_from->local.vslots = 0;
320:   }

322:   /* allocate entire receive context */
323:   out_from->type      = in_from->type;
324:   ny                  = in_from->starts[in_from->n];
325:   out_from->n         = in_from->n;
326:   out_from->sendfirst = in_from->sendfirst;

328:   PetscMalloc(out_from->n*sizeof(MPI_Request),&out_from->requests);
329:   PetscMalloc4(ny*bs,PetscScalar,&out_from->values,ny,PetscInt,&out_from->indices,out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
330:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
331:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
332:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
333:   out->fromdata       = (void*)out_from;
334:   out_from->local.n   = in_from->local.n;
335:   out_from->local.nonmatching_computed = PETSC_FALSE;
336:   out_from->local.n_nonmatching        = 0;
337:   out_from->local.slots_nonmatching    = 0;

339:   /* 
340:       set up the request arrays for use with isend_init() and irecv_init()
341:   */
342:   {
343:     PetscMPIInt tag;
344:     MPI_Comm    comm;
345:     PetscInt    *sstarts = out_to->starts,  *rstarts = out_from->starts;
346:     PetscMPIInt *sprocs  = out_to->procs,   *rprocs  = out_from->procs;
347:     PetscInt    i;
348:     PetscTruth  flg;
349:     MPI_Request *swaits  = out_to->requests,*rwaits  = out_from->requests;
350:     MPI_Request *rev_swaits,*rev_rwaits;
351:     PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;

353:     PetscMalloc(in_to->n*sizeof(MPI_Request),&out_to->rev_requests);
354:     PetscMalloc(in_from->n*sizeof(MPI_Request),&out_from->rev_requests);

356:     rev_rwaits = out_to->rev_requests;
357:     rev_swaits = out_from->rev_requests;

359:     out_from->bs = out_to->bs = bs;
360:     tag     = ((PetscObject)out)->tag;
361:     comm    = ((PetscObject)out)->comm;

363:     /* Register the receives that you will use later (sends for scatter reverse) */
364:     for (i=0; i<out_from->n; i++) {
365:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
366:       MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
367:     }

369:     PetscOptionsHasName(PETSC_NULL,"-vecscatter_rsend",&flg);
370:     if (flg) {
371:       out_to->use_readyreceiver    = PETSC_TRUE;
372:       out_from->use_readyreceiver  = PETSC_TRUE;
373:       for (i=0; i<out_to->n; i++) {
374:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
375:       }
376:       if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
377:       MPI_Barrier(comm);
378:       PetscInfo(in,"Using VecScatter ready receiver mode\n");
379:     } else {
380:       out_to->use_readyreceiver    = PETSC_FALSE;
381:       out_from->use_readyreceiver  = PETSC_FALSE;
382:       PetscOptionsHasName(PETSC_NULL,"-vecscatter_ssend",&flg);
383:       if (flg) {
384:         PetscInfo(in,"Using VecScatter Ssend mode\n");
385:       }
386:       for (i=0; i<out_to->n; i++) {
387:         if (!flg) {
388:           MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
389:         } else {
390:           MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
391:         }
392:       }
393:     }
394:     /* Register receives for scatter reverse */
395:     for (i=0; i<out_to->n; i++) {
396:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
397:     }
398:   }

400:   return(0);
401: }
402: /* --------------------------------------------------------------------------------------------------
403:     Packs and unpacks the message data into send or from receive buffers. 

405:     These could be generated automatically. 

407:     Fortran kernels etc. could be used.
408: */
409: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
410: {
411:   PetscInt i;
412:   for (i=0; i<n; i++) {
413:     y[i]  = x[indicesx[i]];
414:   }
415: }
416: PETSC_STATIC_INLINE void UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
417: {
418:   PetscInt i;
419:   switch (addv) {
420:   case INSERT_VALUES:
421:     for (i=0; i<n; i++) {
422:       y[indicesy[i]] = x[i];
423:     }
424:     break;
425:   case ADD_VALUES:
426:     for (i=0; i<n; i++) {
427:       y[indicesy[i]] += x[i];
428:     }
429:     break;
430: #if !defined(PETSC_USE_COMPLEX)
431:   case MAX_VALUES:
432:     for (i=0; i<n; i++) {
433:       y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
434:     }
435: #else
436:   case MAX_VALUES:
437: #endif
438:   case NOT_SET_VALUES:
439:     break;
440:   }
441: }

443: PETSC_STATIC_INLINE void Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
444: {
445:   PetscInt i;
446:   switch (addv) {
447:   case INSERT_VALUES:
448:     for (i=0; i<n; i++) {
449:       y[indicesy[i]] = x[indicesx[i]];
450:     }
451:     break;
452:   case ADD_VALUES:
453:     for (i=0; i<n; i++) {
454:       y[indicesy[i]] += x[indicesx[i]];
455:     }
456:     break;
457: #if !defined(PETSC_USE_COMPLEX)
458:   case MAX_VALUES:
459:     for (i=0; i<n; i++) {
460:       y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
461:     }
462: #else
463:   case MAX_VALUES:
464: #endif
465:   case NOT_SET_VALUES:
466:     break;
467:   }
468: }

470:   /* ----------------------------------------------------------------------------------------------- */
471: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
472: {
473:   PetscInt i,idx;

475:   for (i=0; i<n; i++) {
476:     idx   = *indicesx++;
477:     y[0]  = x[idx];
478:     y[1]  = x[idx+1];
479:     y    += 2;
480:   }
481: }
482: PETSC_STATIC_INLINE void UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
483: {
484:   PetscInt i,idy;

486:   switch (addv) {
487:   case INSERT_VALUES:
488:     for (i=0; i<n; i++) {
489:       idy       = *indicesy++;
490:       y[idy]    = x[0];
491:       y[idy+1]  = x[1];
492:       x    += 2;
493:     }
494:     break;
495:   case ADD_VALUES:
496:     for (i=0; i<n; i++) {
497:       idy       = *indicesy++;
498:       y[idy]    += x[0];
499:       y[idy+1]  += x[1];
500:       x    += 2;
501:     }
502:     break;
503: #if !defined(PETSC_USE_COMPLEX)
504:   case MAX_VALUES:
505:     for (i=0; i<n; i++) {
506:       idy       = *indicesy++;
507:       y[idy]    = PetscMax(y[idy],x[0]);
508:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
509:       x    += 2;
510:     }
511: #else
512:   case MAX_VALUES:
513: #endif
514:   case NOT_SET_VALUES:
515:     break;
516:   }
517: }

519: PETSC_STATIC_INLINE void Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
520: {
521:   PetscInt i,idx,idy;

523:   switch (addv) {
524:   case INSERT_VALUES:
525:     for (i=0; i<n; i++) {
526:       idx       = *indicesx++;
527:       idy       = *indicesy++;
528:       y[idy]    = x[idx];
529:       y[idy+1]  = x[idx+1];
530:     }
531:     break;
532:   case ADD_VALUES:
533:     for (i=0; i<n; i++) {
534:       idx       = *indicesx++;
535:       idy       = *indicesy++;
536:       y[idy]    += x[idx];
537:       y[idy+1]  += x[idx+1];
538:     }
539:     break;
540: #if !defined(PETSC_USE_COMPLEX)
541:   case MAX_VALUES:
542:     for (i=0; i<n; i++) {
543:       idx       = *indicesx++;
544:       idy       = *indicesy++;
545:       y[idy]    = PetscMax(y[idy],x[idx]);
546:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
547:     }
548: #else
549:   case MAX_VALUES:
550: #endif
551:   case NOT_SET_VALUES:
552:     break;
553:   }
554: }
555:   /* ----------------------------------------------------------------------------------------------- */
556: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
557: {
558:   PetscInt i,idx;

560:   for (i=0; i<n; i++) {
561:     idx   = *indicesx++;
562:     y[0]  = x[idx];
563:     y[1]  = x[idx+1];
564:     y[2]  = x[idx+2];
565:     y    += 3;
566:   }
567: }
568: PETSC_STATIC_INLINE void UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
569: {
570:   PetscInt i,idy;

572:   switch (addv) {
573:   case INSERT_VALUES:
574:     for (i=0; i<n; i++) {
575:       idy       = *indicesy++;
576:       y[idy]    = x[0];
577:       y[idy+1]  = x[1];
578:       y[idy+2]  = x[2];
579:       x    += 3;
580:     }
581:     break;
582:   case ADD_VALUES:
583:     for (i=0; i<n; i++) {
584:       idy       = *indicesy++;
585:       y[idy]    += x[0];
586:       y[idy+1]  += x[1];
587:       y[idy+2]  += x[2];
588:       x    += 3;
589:     }
590:     break;
591: #if !defined(PETSC_USE_COMPLEX)
592:   case MAX_VALUES:
593:     for (i=0; i<n; i++) {
594:       idy       = *indicesy++;
595:       y[idy]    = PetscMax(y[idy],x[0]);
596:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
597:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
598:       x    += 3;
599:     }
600: #else
601:   case MAX_VALUES:
602: #endif
603:   case NOT_SET_VALUES:
604:     break;
605:   }
606: }

608: PETSC_STATIC_INLINE void Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
609: {
610:   PetscInt i,idx,idy;

612:   switch (addv) {
613:   case INSERT_VALUES:
614:     for (i=0; i<n; i++) {
615:       idx       = *indicesx++;
616:       idy       = *indicesy++;
617:       y[idy]    = x[idx];
618:       y[idy+1]  = x[idx+1];
619:       y[idy+2]  = x[idx+2];
620:     }
621:     break;
622:   case ADD_VALUES:
623:     for (i=0; i<n; i++) {
624:       idx       = *indicesx++;
625:       idy       = *indicesy++;
626:       y[idy]    += x[idx];
627:       y[idy+1]  += x[idx+1];
628:       y[idy+2]  += x[idx+2];
629:     }
630:     break;
631: #if !defined(PETSC_USE_COMPLEX)
632:   case MAX_VALUES:
633:     for (i=0; i<n; i++) {
634:       idx       = *indicesx++;
635:       idy       = *indicesy++;
636:       y[idy]    = PetscMax(y[idy],x[idx]);
637:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
638:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
639:     }
640: #else
641:   case MAX_VALUES:
642: #endif
643:   case NOT_SET_VALUES:
644:     break;
645:   }
646: }
647:   /* ----------------------------------------------------------------------------------------------- */
648: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
649: {
650:   PetscInt i,idx;

652:   for (i=0; i<n; i++) {
653:     idx   = *indicesx++;
654:     y[0]  = x[idx];
655:     y[1]  = x[idx+1];
656:     y[2]  = x[idx+2];
657:     y[3]  = x[idx+3];
658:     y    += 4;
659:   }
660: }
661: PETSC_STATIC_INLINE void UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
662: {
663:   PetscInt i,idy;

665:   switch (addv) {
666:   case INSERT_VALUES:
667:     for (i=0; i<n; i++) {
668:       idy       = *indicesy++;
669:       y[idy]    = x[0];
670:       y[idy+1]  = x[1];
671:       y[idy+2]  = x[2];
672:       y[idy+3]  = x[3];
673:       x    += 4;
674:     }
675:     break;
676:   case ADD_VALUES:
677:     for (i=0; i<n; i++) {
678:       idy       = *indicesy++;
679:       y[idy]    += x[0];
680:       y[idy+1]  += x[1];
681:       y[idy+2]  += x[2];
682:       y[idy+3]  += x[3];
683:       x    += 4;
684:     }
685:     break;
686: #if !defined(PETSC_USE_COMPLEX)
687:   case MAX_VALUES:
688:     for (i=0; i<n; i++) {
689:       idy       = *indicesy++;
690:       y[idy]    = PetscMax(y[idy],x[0]);
691:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
692:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
693:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
694:       x    += 4;
695:     }
696: #else
697:   case MAX_VALUES:
698: #endif
699:   case NOT_SET_VALUES:
700:     break;
701:   }
702: }

704: PETSC_STATIC_INLINE void Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
705: {
706:   PetscInt i,idx,idy;

708:   switch (addv) {
709:   case INSERT_VALUES:
710:     for (i=0; i<n; i++) {
711:       idx       = *indicesx++;
712:       idy       = *indicesy++;
713:       y[idy]    = x[idx];
714:       y[idy+1]  = x[idx+1];
715:       y[idy+2]  = x[idx+2];
716:       y[idy+3]  = x[idx+3];
717:     }
718:     break;
719:   case ADD_VALUES:
720:     for (i=0; i<n; i++) {
721:       idx       = *indicesx++;
722:       idy       = *indicesy++;
723:       y[idy]    += x[idx];
724:       y[idy+1]  += x[idx+1];
725:       y[idy+2]  += x[idx+2];
726:       y[idy+3]  += x[idx+3];
727:     }
728:     break;
729: #if !defined(PETSC_USE_COMPLEX)
730:   case MAX_VALUES:
731:     for (i=0; i<n; i++) {
732:       idx       = *indicesx++;
733:       idy       = *indicesy++;
734:       y[idy]    = PetscMax(y[idy],x[idx]);
735:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
736:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
737:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
738:     }
739: #else
740:   case MAX_VALUES:
741: #endif
742:   case NOT_SET_VALUES:
743:     break;
744:   }
745: }
746:   /* ----------------------------------------------------------------------------------------------- */
747: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
748: {
749:   PetscInt i,idx;

751:   for (i=0; i<n; i++) {
752:     idx   = *indicesx++;
753:     y[0]  = x[idx];
754:     y[1]  = x[idx+1];
755:     y[2]  = x[idx+2];
756:     y[3]  = x[idx+3];
757:     y[4]  = x[idx+4];
758:     y    += 5;
759:   }
760: }
761: PETSC_STATIC_INLINE void UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
762: {
763:   PetscInt i,idy;

765:   switch (addv) {
766:   case INSERT_VALUES:
767:     for (i=0; i<n; i++) {
768:       idy       = *indicesy++;
769:       y[idy]    = x[0];
770:       y[idy+1]  = x[1];
771:       y[idy+2]  = x[2];
772:       y[idy+3]  = x[3];
773:       y[idy+4]  = x[4];
774:       x    += 5;
775:     }
776:     break;
777:   case ADD_VALUES:
778:     for (i=0; i<n; i++) {
779:       idy       = *indicesy++;
780:       y[idy]    += x[0];
781:       y[idy+1]  += x[1];
782:       y[idy+2]  += x[2];
783:       y[idy+3]  += x[3];
784:       y[idy+4]  += x[4];
785:       x    += 5;
786:     }
787:     break;
788: #if !defined(PETSC_USE_COMPLEX)
789:   case MAX_VALUES:
790:     for (i=0; i<n; i++) {
791:       idy       = *indicesy++;
792:       y[idy]    = PetscMax(y[idy],x[0]);
793:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
794:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
795:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
796:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
797:       x    += 5;
798:     }
799: #else
800:   case MAX_VALUES:
801: #endif
802:   case NOT_SET_VALUES:
803:     break;
804:   }
805: }

807: PETSC_STATIC_INLINE void Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
808: {
809:   PetscInt i,idx,idy;

811:   switch (addv) {
812:   case INSERT_VALUES:
813:     for (i=0; i<n; i++) {
814:       idx       = *indicesx++;
815:       idy       = *indicesy++;
816:       y[idy]    = x[idx];
817:       y[idy+1]  = x[idx+1];
818:       y[idy+2]  = x[idx+2];
819:       y[idy+3]  = x[idx+3];
820:       y[idy+4]  = x[idx+4];
821:     }
822:     break;
823:   case ADD_VALUES:
824:     for (i=0; i<n; i++) {
825:       idx       = *indicesx++;
826:       idy       = *indicesy++;
827:       y[idy]    += x[idx];
828:       y[idy+1]  += x[idx+1];
829:       y[idy+2]  += x[idx+2];
830:       y[idy+3]  += x[idx+3];
831:       y[idy+4]  += x[idx+4];
832:     }
833:     break;
834: #if !defined(PETSC_USE_COMPLEX)
835:   case MAX_VALUES:
836:     for (i=0; i<n; i++) {
837:       idx       = *indicesx++;
838:       idy       = *indicesy++;
839:       y[idy]    = PetscMax(y[idy],x[idx]);
840:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
841:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
842:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
843:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
844:     }
845: #else
846:   case MAX_VALUES:
847: #endif
848:   case NOT_SET_VALUES:
849:     break;
850:   }
851: }
852:   /* ----------------------------------------------------------------------------------------------- */
853: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
854: {
855:   PetscInt i,idx;

857:   for (i=0; i<n; i++) {
858:     idx   = *indicesx++;
859:     y[0]  = x[idx];
860:     y[1]  = x[idx+1];
861:     y[2]  = x[idx+2];
862:     y[3]  = x[idx+3];
863:     y[4]  = x[idx+4];
864:     y[5]  = x[idx+5];
865:     y    += 6;
866:   }
867: }
868: PETSC_STATIC_INLINE void UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
869: {
870:   PetscInt i,idy;

872:   switch (addv) {
873:   case INSERT_VALUES:
874:     for (i=0; i<n; i++) {
875:       idy       = *indicesy++;
876:       y[idy]    = x[0];
877:       y[idy+1]  = x[1];
878:       y[idy+2]  = x[2];
879:       y[idy+3]  = x[3];
880:       y[idy+4]  = x[4];
881:       y[idy+5]  = x[5];
882:       x    += 6;
883:     }
884:     break;
885:   case ADD_VALUES:
886:     for (i=0; i<n; i++) {
887:       idy       = *indicesy++;
888:       y[idy]    += x[0];
889:       y[idy+1]  += x[1];
890:       y[idy+2]  += x[2];
891:       y[idy+3]  += x[3];
892:       y[idy+4]  += x[4];
893:       y[idy+5]  += x[5];
894:       x    += 6;
895:     }
896:     break;
897: #if !defined(PETSC_USE_COMPLEX)
898:   case MAX_VALUES:
899:     for (i=0; i<n; i++) {
900:       idy       = *indicesy++;
901:       y[idy]    = PetscMax(y[idy],x[0]);
902:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
903:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
904:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
905:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
906:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
907:       x    += 6;
908:     }
909: #else
910:   case MAX_VALUES:
911: #endif
912:   case NOT_SET_VALUES:
913:     break;
914:   }
915: }

917: PETSC_STATIC_INLINE void Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
918: {
919:   PetscInt i,idx,idy;

921:   switch (addv) {
922:   case INSERT_VALUES:
923:     for (i=0; i<n; i++) {
924:       idx       = *indicesx++;
925:       idy       = *indicesy++;
926:       y[idy]    = x[idx];
927:       y[idy+1]  = x[idx+1];
928:       y[idy+2]  = x[idx+2];
929:       y[idy+3]  = x[idx+3];
930:       y[idy+4]  = x[idx+4];
931:       y[idy+5]  = x[idx+5];
932:     }
933:     break;
934:   case ADD_VALUES:
935:     for (i=0; i<n; i++) {
936:       idx       = *indicesx++;
937:       idy       = *indicesy++;
938:       y[idy]    += x[idx];
939:       y[idy+1]  += x[idx+1];
940:       y[idy+2]  += x[idx+2];
941:       y[idy+3]  += x[idx+3];
942:       y[idy+4]  += x[idx+4];
943:       y[idy+5]  += x[idx+5];
944:     }
945:     break;
946: #if !defined(PETSC_USE_COMPLEX)
947:   case MAX_VALUES:
948:     for (i=0; i<n; i++) {
949:       idx       = *indicesx++;
950:       idy       = *indicesy++;
951:       y[idy]    = PetscMax(y[idy],x[idx]);
952:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
953:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
954:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
955:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
956:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
957:     }
958: #else
959:   case MAX_VALUES:
960: #endif
961:   case NOT_SET_VALUES:
962:     break;
963:   }
964: }
965:   /* ----------------------------------------------------------------------------------------------- */
966: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
967: {
968:   PetscInt i,idx;

970:   for (i=0; i<n; i++) {
971:     idx   = *indicesx++;
972:     y[0]  = x[idx];
973:     y[1]  = x[idx+1];
974:     y[2]  = x[idx+2];
975:     y[3]  = x[idx+3];
976:     y[4]  = x[idx+4];
977:     y[5]  = x[idx+5];
978:     y[6]  = x[idx+6];
979:     y    += 7;
980:   }
981: }
982: PETSC_STATIC_INLINE void UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
983: {
984:   PetscInt i,idy;

986:   switch (addv) {
987:   case INSERT_VALUES:
988:     for (i=0; i<n; i++) {
989:       idy       = *indicesy++;
990:       y[idy]    = x[0];
991:       y[idy+1]  = x[1];
992:       y[idy+2]  = x[2];
993:       y[idy+3]  = x[3];
994:       y[idy+4]  = x[4];
995:       y[idy+5]  = x[5];
996:       y[idy+6]  = x[6];
997:       x    += 7;
998:     }
999:     break;
1000:   case ADD_VALUES:
1001:     for (i=0; i<n; i++) {
1002:       idy       = *indicesy++;
1003:       y[idy]    += x[0];
1004:       y[idy+1]  += x[1];
1005:       y[idy+2]  += x[2];
1006:       y[idy+3]  += x[3];
1007:       y[idy+4]  += x[4];
1008:       y[idy+5]  += x[5];
1009:       y[idy+6]  += x[6];
1010:       x    += 7;
1011:     }
1012:     break;
1013: #if !defined(PETSC_USE_COMPLEX)
1014:   case MAX_VALUES:
1015:     for (i=0; i<n; i++) {
1016:       idy       = *indicesy++;
1017:       y[idy]    = PetscMax(y[idy],x[0]);
1018:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1019:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1020:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1021:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1022:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1023:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1024:       x    += 7;
1025:     }
1026: #else
1027:   case MAX_VALUES:
1028: #endif
1029:   case NOT_SET_VALUES:
1030:     break;
1031:   }
1032: }

1034: PETSC_STATIC_INLINE void Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1035: {
1036:   PetscInt i,idx,idy;

1038:   switch (addv) {
1039:   case INSERT_VALUES:
1040:     for (i=0; i<n; i++) {
1041:       idx       = *indicesx++;
1042:       idy       = *indicesy++;
1043:       y[idy]    = x[idx];
1044:       y[idy+1]  = x[idx+1];
1045:       y[idy+2]  = x[idx+2];
1046:       y[idy+3]  = x[idx+3];
1047:       y[idy+4]  = x[idx+4];
1048:       y[idy+5]  = x[idx+5];
1049:       y[idy+6]  = x[idx+6];
1050:     }
1051:     break;
1052:   case ADD_VALUES:
1053:     for (i=0; i<n; i++) {
1054:       idx       = *indicesx++;
1055:       idy       = *indicesy++;
1056:       y[idy]    += x[idx];
1057:       y[idy+1]  += x[idx+1];
1058:       y[idy+2]  += x[idx+2];
1059:       y[idy+3]  += x[idx+3];
1060:       y[idy+4]  += x[idx+4];
1061:       y[idy+5]  += x[idx+5];
1062:       y[idy+6]  += x[idx+6];
1063:     }
1064:     break;
1065: #if !defined(PETSC_USE_COMPLEX)
1066:   case MAX_VALUES:
1067:     for (i=0; i<n; i++) {
1068:       idx       = *indicesx++;
1069:       idy       = *indicesy++;
1070:       y[idy]    = PetscMax(y[idy],x[idx]);
1071:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1072:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1073:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1074:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1075:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1076:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1077:     }
1078: #else
1079:   case MAX_VALUES:
1080: #endif
1081:   case NOT_SET_VALUES:
1082:     break;
1083:   }
1084: }
1085:   /* ----------------------------------------------------------------------------------------------- */
1086: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1087: {
1088:   PetscInt i,idx;

1090:   for (i=0; i<n; i++) {
1091:     idx   = *indicesx++;
1092:     y[0]  = x[idx];
1093:     y[1]  = x[idx+1];
1094:     y[2]  = x[idx+2];
1095:     y[3]  = x[idx+3];
1096:     y[4]  = x[idx+4];
1097:     y[5]  = x[idx+5];
1098:     y[6]  = x[idx+6];
1099:     y[7]  = x[idx+7];
1100:     y    += 8;
1101:   }
1102: }
1103: PETSC_STATIC_INLINE void UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1104: {
1105:   PetscInt i,idy;

1107:   switch (addv) {
1108:   case INSERT_VALUES:
1109:     for (i=0; i<n; i++) {
1110:       idy       = *indicesy++;
1111:       y[idy]    = x[0];
1112:       y[idy+1]  = x[1];
1113:       y[idy+2]  = x[2];
1114:       y[idy+3]  = x[3];
1115:       y[idy+4]  = x[4];
1116:       y[idy+5]  = x[5];
1117:       y[idy+6]  = x[6];
1118:       y[idy+7]  = x[7];
1119:       x    += 8;
1120:     }
1121:     break;
1122:   case ADD_VALUES:
1123:     for (i=0; i<n; i++) {
1124:       idy       = *indicesy++;
1125:       y[idy]    += x[0];
1126:       y[idy+1]  += x[1];
1127:       y[idy+2]  += x[2];
1128:       y[idy+3]  += x[3];
1129:       y[idy+4]  += x[4];
1130:       y[idy+5]  += x[5];
1131:       y[idy+6]  += x[6];
1132:       y[idy+7]  += x[7];
1133:       x    += 8;
1134:     }
1135:     break;
1136: #if !defined(PETSC_USE_COMPLEX)
1137:   case MAX_VALUES:
1138:     for (i=0; i<n; i++) {
1139:       idy       = *indicesy++;
1140:       y[idy]    = PetscMax(y[idy],x[0]);
1141:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1142:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1143:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1144:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1145:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1146:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1147:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1148:       x    += 8;
1149:     }
1150: #else
1151:   case MAX_VALUES:
1152: #endif
1153:   case NOT_SET_VALUES:
1154:     break;
1155:   }
1156: }

1158: PETSC_STATIC_INLINE void Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1159: {
1160:   PetscInt i,idx,idy;

1162:   switch (addv) {
1163:   case INSERT_VALUES:
1164:     for (i=0; i<n; i++) {
1165:       idx       = *indicesx++;
1166:       idy       = *indicesy++;
1167:       y[idy]    = x[idx];
1168:       y[idy+1]  = x[idx+1];
1169:       y[idy+2]  = x[idx+2];
1170:       y[idy+3]  = x[idx+3];
1171:       y[idy+4]  = x[idx+4];
1172:       y[idy+5]  = x[idx+5];
1173:       y[idy+6]  = x[idx+6];
1174:       y[idy+7]  = x[idx+7];
1175:     }
1176:     break;
1177:   case ADD_VALUES:
1178:     for (i=0; i<n; i++) {
1179:       idx       = *indicesx++;
1180:       idy       = *indicesy++;
1181:       y[idy]    += x[idx];
1182:       y[idy+1]  += x[idx+1];
1183:       y[idy+2]  += x[idx+2];
1184:       y[idy+3]  += x[idx+3];
1185:       y[idy+4]  += x[idx+4];
1186:       y[idy+5]  += x[idx+5];
1187:       y[idy+6]  += x[idx+6];
1188:       y[idy+7]  += x[idx+7];
1189:     }
1190:     break;
1191: #if !defined(PETSC_USE_COMPLEX)
1192:   case MAX_VALUES:
1193:     for (i=0; i<n; i++) {
1194:       idx       = *indicesx++;
1195:       idy       = *indicesy++;
1196:       y[idy]    = PetscMax(y[idy],x[idx]);
1197:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1198:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1199:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1200:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1201:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1202:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1203:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1204:     }
1205: #else
1206:   case MAX_VALUES:
1207: #endif
1208:   case NOT_SET_VALUES:
1209:     break;
1210:   }
1211: }

1213:   /* ----------------------------------------------------------------------------------------------- */
1214: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1215: {
1216:   PetscInt i,idx;

1218:   for (i=0; i<n; i++) {
1219:     idx   = *indicesx++;
1220:     y[0]  = x[idx];
1221:     y[1]  = x[idx+1];
1222:     y[2]  = x[idx+2];
1223:     y[3]  = x[idx+3];
1224:     y[4]  = x[idx+4];
1225:     y[5]  = x[idx+5];
1226:     y[6]  = x[idx+6];
1227:     y[7]  = x[idx+7];
1228:     y[8]  = x[idx+8];
1229:     y[9]  = x[idx+9];
1230:     y[10] = x[idx+10];
1231:     y[11] = x[idx+11];
1232:     y    += 12;
1233:   }
1234: }
1235: PETSC_STATIC_INLINE void UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1236: {
1237:   PetscInt i,idy;

1239:   switch (addv) {
1240:   case INSERT_VALUES:
1241:     for (i=0; i<n; i++) {
1242:       idy       = *indicesy++;
1243:       y[idy]    = x[0];
1244:       y[idy+1]  = x[1];
1245:       y[idy+2]  = x[2];
1246:       y[idy+3]  = x[3];
1247:       y[idy+4]  = x[4];
1248:       y[idy+5]  = x[5];
1249:       y[idy+6]  = x[6];
1250:       y[idy+7]  = x[7];
1251:       y[idy+8]  = x[8];
1252:       y[idy+9]  = x[9];
1253:       y[idy+10] = x[10];
1254:       y[idy+11] = x[11];
1255:       x    += 12;
1256:     }
1257:     break;
1258:   case ADD_VALUES:
1259:     for (i=0; i<n; i++) {
1260:       idy       = *indicesy++;
1261:       y[idy]    += x[0];
1262:       y[idy+1]  += x[1];
1263:       y[idy+2]  += x[2];
1264:       y[idy+3]  += x[3];
1265:       y[idy+4]  += x[4];
1266:       y[idy+5]  += x[5];
1267:       y[idy+6]  += x[6];
1268:       y[idy+7]  += x[7];
1269:       y[idy+8]  += x[8];
1270:       y[idy+9]  += x[9];
1271:       y[idy+10] += x[10];
1272:       y[idy+11] += x[11];
1273:       x    += 12;
1274:     }
1275:     break;
1276: #if !defined(PETSC_USE_COMPLEX)
1277:   case MAX_VALUES:
1278:     for (i=0; i<n; i++) {
1279:       idy       = *indicesy++;
1280:       y[idy]    = PetscMax(y[idy],x[0]);
1281:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1282:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1283:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1284:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1285:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1286:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1287:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1288:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1289:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1290:       y[idy+10] = PetscMax(y[idy+10],x[10]);
1291:       y[idy+11] = PetscMax(y[idy+11],x[11]);
1292:       x    += 12;
1293:     }
1294: #else
1295:   case MAX_VALUES:
1296: #endif
1297:   case NOT_SET_VALUES:
1298:     break;
1299:   }
1300: }

1302: PETSC_STATIC_INLINE void Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1303: {
1304:   PetscInt i,idx,idy;

1306:   switch (addv) {
1307:   case INSERT_VALUES:
1308:     for (i=0; i<n; i++) {
1309:       idx       = *indicesx++;
1310:       idy       = *indicesy++;
1311:       y[idy]    = x[idx];
1312:       y[idy+1]  = x[idx+1];
1313:       y[idy+2]  = x[idx+2];
1314:       y[idy+3]  = x[idx+3];
1315:       y[idy+4]  = x[idx+4];
1316:       y[idy+5]  = x[idx+5];
1317:       y[idy+6]  = x[idx+6];
1318:       y[idy+7]  = x[idx+7];
1319:       y[idy+8]  = x[idx+8];
1320:       y[idy+9]  = x[idx+9];
1321:       y[idy+10] = x[idx+10];
1322:       y[idy+11] = x[idx+11];
1323:     }
1324:     break;
1325:   case ADD_VALUES:
1326:     for (i=0; i<n; i++) {
1327:       idx       = *indicesx++;
1328:       idy       = *indicesy++;
1329:       y[idy]    += x[idx];
1330:       y[idy+1]  += x[idx+1];
1331:       y[idy+2]  += x[idx+2];
1332:       y[idy+3]  += x[idx+3];
1333:       y[idy+4]  += x[idx+4];
1334:       y[idy+5]  += x[idx+5];
1335:       y[idy+6]  += x[idx+6];
1336:       y[idy+7]  += x[idx+7];
1337:       y[idy+8]  += x[idx+8];
1338:       y[idy+9]  += x[idx+9];
1339:       y[idy+10] += x[idx+10];
1340:       y[idy+11] += x[idx+11];
1341:     }
1342:     break;
1343: #if !defined(PETSC_USE_COMPLEX)
1344:   case MAX_VALUES:
1345:     for (i=0; i<n; i++) {
1346:       idx       = *indicesx++;
1347:       idy       = *indicesy++;
1348:       y[idy]    = PetscMax(y[idy],x[idx]);
1349:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1350:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1351:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1352:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1353:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1354:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1355:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1356:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1357:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1358:       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1359:       y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1360:     }
1361: #else
1362:   case MAX_VALUES:
1363: #endif
1364:   case NOT_SET_VALUES:
1365:     break;
1366:   }
1367: }

1369: /* Create the VecScatterBegin/End_P for our chosen block sizes */
1370: #define BS 1
1371:  #include ../src/vec/vec/utils/vpscat.h
1372: #define BS 2
1373:  #include ../src/vec/vec/utils/vpscat.h
1374: #define BS 3
1375:  #include ../src/vec/vec/utils/vpscat.h
1376: #define BS 4
1377:  #include ../src/vec/vec/utils/vpscat.h
1378: #define BS 5
1379:  #include ../src/vec/vec/utils/vpscat.h
1380: #define BS 6
1381:  #include ../src/vec/vec/utils/vpscat.h
1382: #define BS 7
1383:  #include ../src/vec/vec/utils/vpscat.h
1384: #define BS 8
1385:  #include ../src/vec/vec/utils/vpscat.h
1386: #define BS 12
1387:  #include ../src/vec/vec/utils/vpscat.h

1389: /* ==========================================================================================*/

1391: /*              create parallel to sequential scatter context                           */

1393: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *,VecScatter_MPI_General *,VecScatter);

1397: /*@
1398:      VecScatterCreateLocal - Creates a VecScatter from a list of messages it must send and receive.

1400:      Collective on VecScatter

1402:    Input Parameters:
1403: +     VecScatter - obtained with VecScatterCreateEmpty()
1404: .     nsends -
1405: .     sendSizes -
1406: .     sendProcs -
1407: .     sendIdx - indices where the sent entries are obtained from (in local, on process numbering), this is one long array of size \sum_{i=0,i<nsends} sendSizes[i]
1408: .     nrecvs - number of receives to expect
1409: .     recvSizes - 
1410: .     recvProcs - processes who are sending to me
1411: .     recvIdx - indices of where received entries are to be put, (in local, on process numbering), this is one long array of size \sum_{i=0,i<nrecvs} recvSizes[i]
1412: -     bs - size of block

1414:      Notes:  sendSizes[] and recvSizes[] cannot have any 0 entries. If you want to support having 0 entries you need
1415:       to change the code below to "compress out" the sendProcs[] and recvProcs[] entries that have 0 entries.

1417:        Probably does not handle sends to self properly. It should remove those from the counts that are used
1418:       in allocating space inside of the from struct

1420:   Level: intermediate

1422: @*/
1423: PetscErrorCode VecScatterCreateLocal(VecScatter ctx,PetscInt nsends,const PetscInt sendSizes[],const PetscInt sendProcs[],const PetscInt sendIdx[],PetscInt nrecvs,const PetscInt recvSizes[],const PetscInt recvProcs[],const PetscInt recvIdx[],PetscInt bs)
1424: {
1425:   VecScatter_MPI_General *from, *to;
1426:   PetscInt               sendSize, recvSize;
1427:   PetscInt               n, i;
1428:   PetscErrorCode         ierr;

1430:   /* allocate entire send scatter context */
1431:   PetscNewLog(ctx,VecScatter_MPI_General,&to);
1432:   to->n = nsends;
1433:   for(n = 0, sendSize = 0; n < to->n; n++) {sendSize += sendSizes[n];}
1434:   PetscMalloc(to->n*sizeof(MPI_Request),&to->requests);
1435:   PetscMalloc4(bs*sendSize,PetscScalar,&to->values,sendSize,PetscInt,&to->indices,to->n+1,PetscInt,&to->starts,to->n,PetscMPIInt,&to->procs);
1436:   PetscMalloc2(PetscMax(to->n,nrecvs),MPI_Status,&to->sstatus,PetscMax(to->n,nrecvs),MPI_Status,&to->rstatus);
1437:   to->starts[0] = 0;
1438:   for(n = 0; n < to->n; n++) {
1439:     if (sendSizes[n] <=0 ) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"sendSizes[n=%D] = %D cannot be less than 1",n,sendSizes[n]);
1440:     to->starts[n+1] = to->starts[n] + sendSizes[n];
1441:     to->procs[n] = sendProcs[n];
1442:     for(i = to->starts[n]; i < to->starts[n]+sendSizes[n]; i++) {
1443:       to->indices[i] = sendIdx[i];
1444:     }
1445:   }
1446:   ctx->todata = (void *) to;

1448:   /* allocate entire receive scatter context */
1449:   PetscNewLog(ctx,VecScatter_MPI_General,&from);
1450:   from->n = nrecvs;
1451:   for(n = 0, recvSize = 0; n < from->n; n++) {recvSize += recvSizes[n];}
1452:   PetscMalloc(from->n*sizeof(MPI_Request),&from->requests);
1453:   PetscMalloc4(bs*recvSize,PetscScalar,&from->values,recvSize,PetscInt,&from->indices,from->n+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
1454:   from->starts[0] = 0;
1455:   for(n = 0; n < from->n; n++) {
1456:     if (recvSizes[n] <=0 ) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"recvSizes[n=%D] = %D cannot be less than 1",n,recvSizes[n]);
1457:     from->starts[n+1] = from->starts[n] + recvSizes[n];
1458:     from->procs[n] = recvProcs[n];
1459:     for(i = from->starts[n]; i < from->starts[n]+recvSizes[n]; i++) {
1460:       from->indices[i] = recvIdx[i];
1461:     }
1462:   }
1463:   ctx->fromdata = (void *)from;

1465:   /* No local scatter optimization */
1466:   from->local.n      = 0;
1467:   from->local.vslots = 0;
1468:   to->local.n        = 0;
1469:   to->local.vslots   = 0;
1470:   from->local.nonmatching_computed = PETSC_FALSE;
1471:   from->local.n_nonmatching        = 0;
1472:   from->local.slots_nonmatching    = 0;
1473:   to->local.nonmatching_computed   = PETSC_FALSE;
1474:   to->local.n_nonmatching          = 0;
1475:   to->local.slots_nonmatching      = 0;

1477:   from->type = VEC_SCATTER_MPI_GENERAL;
1478:   to->type   = VEC_SCATTER_MPI_GENERAL;
1479:   from->bs = bs;
1480:   to->bs   = bs;
1481:   VecScatterCreateCommon_PtoS(from, to, ctx);
1482:   return(0);
1483: }

1485: /*
1486:    bs indicates how many elements there are in each block. Normally this would be 1.

1488:    contains check that PetscMPIInt can handle the sizes needed 
1489: */
1492: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
1493: {
1494:   VecScatter_MPI_General *from,*to;
1495:   PetscMPIInt            size,rank,imdex,tag,n;
1496:   PetscInt               *source = PETSC_NULL,*owners = PETSC_NULL;
1497:   PetscInt               *lowner = PETSC_NULL,*start = PETSC_NULL,lengthy,lengthx;
1498:   PetscMPIInt            *nprocs = PETSC_NULL,nrecvs;
1499:   PetscInt               i,j,idx,nsends;
1500:   PetscInt               *owner = PETSC_NULL,*starts = PETSC_NULL,count,slen;
1501:   PetscInt               *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
1502:   PetscMPIInt            *onodes1,*olengths1;
1503:   MPI_Comm               comm;
1504:   MPI_Request            *send_waits = PETSC_NULL,*recv_waits = PETSC_NULL;
1505:   MPI_Status             recv_status,*send_status;
1506:   PetscErrorCode         ierr;

1509:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
1510:   PetscObjectGetComm((PetscObject)xin,&comm);
1511:   MPI_Comm_rank(comm,&rank);
1512:   MPI_Comm_size(comm,&size);
1513:   owners = xin->map->range;
1514:   VecGetSize(yin,&lengthy);
1515:   VecGetSize(xin,&lengthx);

1517:   /*  first count number of contributors to each processor */
1518:   PetscMalloc2(size,PetscMPIInt,&nprocs,nx,PetscInt,&owner);
1519:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
1520:   j      = 0;
1521:   nsends = 0;
1522:   for (i=0; i<nx; i++) {
1523:     idx = inidx[i];
1524:     if (idx < owners[j]) j = 0;
1525:     for (; j<size; j++) {
1526:       if (idx < owners[j+1]) {
1527:         if (!nprocs[j]++) nsends++;
1528:         owner[i] = j;
1529:         break;
1530:       }
1531:     }
1532:   }
1533:   nprocslocal  = nprocs[rank];
1534:   nprocs[rank] = 0;
1535:   if (nprocslocal) nsends--;
1536:   /* inform other processors of number of messages and max length*/
1537:   PetscGatherNumberOfMessages(comm,PETSC_NULL,nprocs,&nrecvs);
1538:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
1539:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
1540:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

1542:   /* post receives:   */
1543:   PetscMalloc3(recvtotal,PetscInt,&rvalues,nrecvs,PetscInt,&source,nrecvs,MPI_Request,&recv_waits);
1544:   count  = 0;
1545:   for (i=0; i<nrecvs; i++) {
1546:     MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
1547:     count += olengths1[i];
1548:   }

1550:   /* do sends:
1551:      1) starts[i] gives the starting index in svalues for stuff going to 
1552:      the ith processor
1553:   */
1554:   PetscMalloc3(nx,PetscInt,&svalues,nsends,MPI_Request,&send_waits,size+1,PetscInt,&starts);
1555:   starts[0]  = 0;
1556:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1557:   for (i=0; i<nx; i++) {
1558:     if (owner[i] != rank) {
1559:       svalues[starts[owner[i]]++] = inidx[i];
1560:     }
1561:   }

1563:   starts[0] = 0;
1564:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1565:   count = 0;
1566:   for (i=0; i<size; i++) {
1567:     if (nprocs[i]) {
1568:       MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
1569:     }
1570:   }

1572:   /*  wait on receives */
1573:   count  = nrecvs;
1574:   slen   = 0;
1575:   while (count) {
1576:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1577:     /* unpack receives into our local space */
1578:     MPI_Get_count(&recv_status,MPIU_INT,&n);
1579:     slen += n;
1580:     count--;
1581:   }

1583:   if (slen != recvtotal) SETERRQ2(PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
1584: 
1585:   /* allocate entire send scatter context */
1586:   PetscNewLog(ctx,VecScatter_MPI_General,&to);
1587:   to->n = nrecvs;
1588:   PetscMalloc(nrecvs*sizeof(MPI_Request),&to->requests);
1589:   PetscMalloc4(bs*slen,PetscScalar,&to->values,slen,PetscInt,&to->indices,nrecvs+1,PetscInt,&to->starts,nrecvs,PetscMPIInt,&to->procs);
1590:   PetscMalloc2(PetscMax(to->n,nsends),MPI_Status,&to->sstatus,PetscMax(to->n,nsends),MPI_Status,&to->rstatus);
1591:   ctx->todata   = (void*)to;
1592:   to->starts[0] = 0;

1594:   if (nrecvs) {

1596:     /* move the data into the send scatter */
1597:     base     = owners[rank];
1598:     rsvalues = rvalues;
1599:     for (i=0; i<nrecvs; i++) {
1600:       to->starts[i+1] = to->starts[i] + olengths1[i];
1601:       to->procs[i]    = onodes1[i];
1602:       values = rsvalues;
1603:       rsvalues += olengths1[i];
1604:       for (j=0; j<olengths1[i]; j++) {
1605:         to->indices[to->starts[i] + j] = values[j] - base;
1606:       }
1607:     }
1608:   }
1609:   PetscFree(olengths1);
1610:   PetscFree(onodes1);
1611:   PetscFree3(rvalues,source,recv_waits);

1613:   /* allocate entire receive scatter context */
1614:   PetscNewLog(ctx,VecScatter_MPI_General,&from);
1615:   from->n = nsends;

1617:   PetscMalloc(nsends*sizeof(MPI_Request),&from->requests);
1618:   PetscMalloc4((ny-nprocslocal)*bs,PetscScalar,&from->values,ny-nprocslocal,PetscInt,&from->indices,nsends+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
1619:   ctx->fromdata  = (void*)from;

1621:   /* move data into receive scatter */
1622:   PetscMalloc2(size,PetscInt,&lowner,nsends+1,PetscInt,&start);
1623:   count = 0; from->starts[0] = start[0] = 0;
1624:   for (i=0; i<size; i++) {
1625:     if (nprocs[i]) {
1626:       lowner[i]            = count;
1627:       from->procs[count++] = i;
1628:       from->starts[count]  = start[count] = start[count-1] + nprocs[i];
1629:     }
1630:   }

1632:   for (i=0; i<nx; i++) {
1633:     if (owner[i] != rank) {
1634:       from->indices[start[lowner[owner[i]]]++] = inidy[i];
1635:       if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1636:     }
1637:   }
1638:   PetscFree2(lowner,start);
1639:   PetscFree2(nprocs,owner);
1640: 
1641:   /* wait on sends */
1642:   if (nsends) {
1643:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1644:     MPI_Waitall(nsends,send_waits,send_status);
1645:     PetscFree(send_status);
1646:   }
1647:   PetscFree3(svalues,send_waits,starts);

1649:   if (nprocslocal) {
1650:     PetscInt nt = from->local.n = to->local.n = nprocslocal;
1651:     /* we have a scatter to ourselves */
1652:     PetscMalloc(nt*sizeof(PetscInt),&to->local.vslots);
1653:     PetscMalloc(nt*sizeof(PetscInt),&from->local.vslots);
1654:     nt   = 0;
1655:     for (i=0; i<nx; i++) {
1656:       idx = inidx[i];
1657:       if (idx >= owners[rank] && idx < owners[rank+1]) {
1658:         to->local.vslots[nt]     = idx - owners[rank];
1659:         from->local.vslots[nt++] = inidy[i];
1660:         if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1661:       }
1662:     }
1663:   } else {
1664:     from->local.n      = 0;
1665:     from->local.vslots = 0;
1666:     to->local.n        = 0;
1667:     to->local.vslots   = 0;
1668:   }

1670:   from->local.nonmatching_computed = PETSC_FALSE;
1671:   from->local.n_nonmatching        = 0;
1672:   from->local.slots_nonmatching    = 0;
1673:   to->local.nonmatching_computed   = PETSC_FALSE;
1674:   to->local.n_nonmatching          = 0;
1675:   to->local.slots_nonmatching      = 0;

1677:   from->type = VEC_SCATTER_MPI_GENERAL;
1678:   to->type   = VEC_SCATTER_MPI_GENERAL;
1679:   from->bs = bs;
1680:   to->bs   = bs;
1681:   VecScatterCreateCommon_PtoS(from,to,ctx);
1682:   return(0);
1683: }

1685: /*
1686:    bs indicates how many elements there are in each block. Normally this would be 1.
1687: */
1690: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
1691: {
1692:   MPI_Comm       comm = ((PetscObject)ctx)->comm;
1693:   PetscMPIInt    tag  = ((PetscObject)ctx)->tag, tagr;
1694:   PetscInt       bs   = to->bs;
1695:   PetscMPIInt    size;
1696:   PetscInt       i, n;
1698: 
1700:   PetscObjectGetNewTag((PetscObject)ctx,&tagr);
1701:   ctx->destroy = VecScatterDestroy_PtoP;

1703:   PetscOptionsHasName(PETSC_NULL,"-vecscatter_reproduce",&ctx->reproduce);
1704:   PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst);
1705:   from->sendfirst = to->sendfirst;

1707:   MPI_Comm_size(comm,&size);
1708:   /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
1709:   to->contiq = PETSC_FALSE;
1710:   n = from->starts[from->n];
1711:   from->contiq = PETSC_TRUE;
1712:   for (i=1; i<n; i++) {
1713:     if (from->indices[i] != from->indices[i-1] + bs) {
1714:       from->contiq = PETSC_FALSE;
1715:       break;
1716:     }
1717:   }

1719:   PetscOptionsHasName(PETSC_NULL,"-vecscatter_alltoall",&to->use_alltoallv);
1720:   from->use_alltoallv = to->use_alltoallv;
1721:   if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
1722: #if defined(PETSC_HAVE_MPI_ALLTOALLW)  && !defined(PETSC_USE_64BIT_INDICES)
1723:   if (to->use_alltoallv) {
1724:     PetscOptionsHasName(PETSC_NULL,"-vecscatter_nopack",&to->use_alltoallw);
1725:   }
1726:   from->use_alltoallw = to->use_alltoallw;
1727:   if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
1728: #endif

1730: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
1731:   PetscOptionsHasName(PETSC_NULL,"-vecscatter_window",&to->use_window);
1732:   from->use_window = to->use_window;
1733: #endif

1735:   if (to->use_alltoallv) {

1737:     PetscMalloc2(size,PetscMPIInt,&to->counts,size,PetscMPIInt,&to->displs);
1738:     PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
1739:     for (i=0; i<to->n; i++) {
1740:       to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);
1741:     }
1742:     to->displs[0] = 0;
1743:     for (i=1; i<size; i++) {
1744:       to->displs[i] = to->displs[i-1] + to->counts[i-1];
1745:     }

1747:     PetscMalloc2(size,PetscMPIInt,&from->counts,size,PetscMPIInt,&from->displs);
1748:     PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
1749:     for (i=0; i<from->n; i++) {
1750:       from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
1751:     }
1752:     from->displs[0] = 0;
1753:     for (i=1; i<size; i++) {
1754:       from->displs[i] = from->displs[i-1] + from->counts[i-1];
1755:     }
1756: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
1757:     if (to->use_alltoallw) {
1758:       PetscMPIInt mpibs = PetscMPIIntCast(bs), mpilen;
1759:       ctx->packtogether = PETSC_FALSE;
1760:       PetscMalloc3(size,PetscMPIInt,&to->wcounts,size,PetscMPIInt,&to->wdispls,size,MPI_Datatype,&to->types);
1761:       PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
1762:       PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
1763:       for (i=0; i<size; i++) {
1764:         to->types[i] = MPIU_SCALAR;
1765:       }

1767:       for (i=0; i<to->n; i++) {
1768:         to->wcounts[to->procs[i]] = 1;
1769:         mpilen = PetscMPIIntCast(to->starts[i+1]-to->starts[i]);
1770:         MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
1771:         MPI_Type_commit(to->types+to->procs[i]);
1772:       }
1773:       PetscMalloc3(size,PetscMPIInt,&from->wcounts,size,PetscMPIInt,&from->wdispls,size,MPI_Datatype,&from->types);
1774:       PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
1775:       PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
1776:       for (i=0; i<size; i++) {
1777:         from->types[i] = MPIU_SCALAR;
1778:       }
1779:       if (from->contiq) {
1780:         PetscInfo(ctx,"Scattered vector entries are stored contiquously, taking advantage of this with -vecscatter_alltoall\n");
1781:         for (i=0; i<from->n; i++) {
1782:           from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
1783:         }
1784:         if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
1785:         for (i=1; i<from->n; i++) {
1786:           from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];
1787:         }
1788:       } else {
1789:         for (i=0; i<from->n; i++) {
1790:           from->wcounts[from->procs[i]] = 1;
1791:           mpilen = PetscMPIIntCast(from->starts[i+1]-from->starts[i]);
1792:           MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
1793:           MPI_Type_commit(from->types+from->procs[i]);
1794:         }
1795:       }
1796:     }
1797: #else 
1798:     to->use_alltoallw   = PETSC_FALSE;
1799:     from->use_alltoallw = PETSC_FALSE;
1800: #endif
1801: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
1802:   } else if (to->use_window) {
1803:     PetscMPIInt temptag,winsize;
1804:     MPI_Request *request;
1805:     MPI_Status  *status;
1806: 
1807:     PetscObjectGetNewTag((PetscObject)ctx,&temptag);
1808:     winsize = (to->n ? to->starts[to->n] : 0)*sizeof(PetscScalar);
1809:     MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
1810:     PetscMalloc(to->n,&to->winstarts);
1811:     PetscMalloc2(to->n,MPI_Request,&request,to->n,MPI_Status,&status);
1812:     for (i=0; i<to->n; i++) {
1813:       MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
1814:     }
1815:     for (i=0; i<from->n; i++) {
1816:       MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
1817:     }
1818:     MPI_Waitall(to->n,request,status);
1819:     PetscFree2(request,status);

1821:     winsize = (from->n ? from->starts[from->n] : 0)*sizeof(PetscScalar);
1822:     MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
1823:     PetscMalloc(from->n,&from->winstarts);
1824:     PetscMalloc2(from->n,MPI_Request,&request,from->n,MPI_Status,&status);
1825:     for (i=0; i<from->n; i++) {
1826:       MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
1827:     }
1828:     for (i=0; i<to->n; i++) {
1829:       MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
1830:     }
1831:     MPI_Waitall(from->n,request,status);
1832:     PetscFree2(request,status);
1833: #endif
1834:   } else {
1835:     PetscTruth  use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
1836:     PetscInt    *sstarts = to->starts,  *rstarts = from->starts;
1837:     PetscMPIInt *sprocs  = to->procs,   *rprocs  = from->procs;
1838:     MPI_Request *swaits  = to->requests,*rwaits  = from->requests;
1839:     MPI_Request *rev_swaits,*rev_rwaits;
1840:     PetscScalar *Ssvalues = to->values, *Srvalues = from->values;

1842:     /* allocate additional wait variables for the "reverse" scatter */
1843:     PetscMalloc(to->n*sizeof(MPI_Request),&rev_rwaits);
1844:     PetscMalloc(from->n*sizeof(MPI_Request),&rev_swaits);
1845:     to->rev_requests   = rev_rwaits;
1846:     from->rev_requests = rev_swaits;

1848:     /* Register the receives that you will use later (sends for scatter reverse) */
1849:     PetscOptionsHasName(PETSC_NULL,"-vecscatter_rsend",&use_rsend);
1850:     PetscOptionsHasName(PETSC_NULL,"-vecscatter_ssend",&use_ssend);
1851:     if (use_rsend) {
1852:       PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
1853:       to->use_readyreceiver    = PETSC_TRUE;
1854:       from->use_readyreceiver  = PETSC_TRUE;
1855:     } else {
1856:       to->use_readyreceiver    = PETSC_FALSE;
1857:       from->use_readyreceiver  = PETSC_FALSE;
1858:     }
1859:     if (use_ssend) {
1860:       PetscInfo(ctx,"Using VecScatter Ssend mode\n");
1861:     }

1863:     for (i=0; i<from->n; i++) {
1864:       if (use_rsend) {
1865:         MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
1866:       } else if (use_ssend) {
1867:         MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
1868:       } else {
1869:         MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
1870:       }
1871:     }

1873:     for (i=0; i<to->n; i++) {
1874:       if (use_rsend) {
1875:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
1876:       } else if (use_ssend) {
1877:         MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
1878:       } else {
1879:         MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
1880:       }
1881:     }
1882:     /* Register receives for scatter and reverse */
1883:     for (i=0; i<from->n; i++) {
1884:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
1885:     }
1886:     for (i=0; i<to->n; i++) {
1887:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
1888:     }
1889:     if (use_rsend) {
1890:       if (to->n)   {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
1891:       if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
1892:       MPI_Barrier(comm);
1893:     }

1895:     ctx->copy      = VecScatterCopy_PtoP_X;
1896:   }
1897:   PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);
1898:   switch (bs) {
1899:   case 12:
1900:     ctx->begin     = VecScatterBegin_12;
1901:     ctx->end       = VecScatterEnd_12;
1902:     break;
1903:   case 8:
1904:     ctx->begin     = VecScatterBegin_8;
1905:     ctx->end       = VecScatterEnd_8;
1906:     break;
1907:   case 7:
1908:     ctx->begin     = VecScatterBegin_7;
1909:     ctx->end       = VecScatterEnd_7;
1910:     break;
1911:   case 6:
1912:     ctx->begin     = VecScatterBegin_6;
1913:     ctx->end       = VecScatterEnd_6;
1914:     break;
1915:   case 5:
1916:     ctx->begin     = VecScatterBegin_5;
1917:     ctx->end       = VecScatterEnd_5;
1918:     break;
1919:   case 4:
1920:     ctx->begin     = VecScatterBegin_4;
1921:     ctx->end       = VecScatterEnd_4;
1922:     break;
1923:   case 3:
1924:     ctx->begin     = VecScatterBegin_3;
1925:     ctx->end       = VecScatterEnd_3;
1926:     break;
1927:   case 2:
1928:     ctx->begin     = VecScatterBegin_2;
1929:     ctx->end       = VecScatterEnd_2;
1930:     break;
1931:   case 1:
1932:     ctx->begin     = VecScatterBegin_1;
1933:     ctx->end       = VecScatterEnd_1;
1934:     break;
1935:   default:
1936:     SETERRQ(PETSC_ERR_SUP,"Blocksize not supported");
1937:   }
1938:   ctx->view      = VecScatterView_MPI;
1939:   /* Check if the local scatter is actually a copy; important special case */
1940:   if (to->local.n) {
1941:     VecScatterLocalOptimizeCopy_Private(ctx,&to->local,&from->local,bs);
1942:   }
1943:   return(0);
1944: }



1948: /* ------------------------------------------------------------------------------------*/
1949: /*
1950:          Scatter from local Seq vectors to a parallel vector. 
1951:          Reverses the order of the arguments, calls VecScatterCreate_PtoS() then
1952:          reverses the result.
1953: */
1956: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
1957: {
1958:   PetscErrorCode         ierr;
1959:   MPI_Request            *waits;
1960:   VecScatter_MPI_General *to,*from;

1963:   VecScatterCreate_PtoS(ny,inidy,nx,inidx,yin,xin,bs,ctx);
1964:   to            = (VecScatter_MPI_General*)ctx->fromdata;
1965:   from          = (VecScatter_MPI_General*)ctx->todata;
1966:   ctx->todata   = (void*)to;
1967:   ctx->fromdata = (void*)from;
1968:   /* these two are special, they are ALWAYS stored in to struct */
1969:   to->sstatus   = from->sstatus;
1970:   to->rstatus   = from->rstatus;

1972:   from->sstatus = 0;
1973:   from->rstatus = 0;

1975:   waits              = from->rev_requests;
1976:   from->rev_requests = from->requests;
1977:   from->requests     = waits;
1978:   waits              = to->rev_requests;
1979:   to->rev_requests   = to->requests;
1980:   to->requests       = waits;
1981:   return(0);
1982: }

1984: /* ---------------------------------------------------------------------------------*/
1987: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,VecScatter ctx)
1988: {
1990:   PetscMPIInt    size,rank,tag,imdex,n;
1991:   PetscInt       *owners = xin->map->range;
1992:   PetscMPIInt    *nprocs = PETSC_NULL;
1993:   PetscInt       i,j,idx,nsends,*local_inidx = PETSC_NULL,*local_inidy = PETSC_NULL;
1994:   PetscInt       *owner = PETSC_NULL,*starts = PETSC_NULL,count,slen;
1995:   PetscInt       *rvalues = PETSC_NULL,*svalues = PETSC_NULL,base,*values = PETSC_NULL,*rsvalues,recvtotal,lastidx;
1996:   PetscMPIInt    *onodes1,*olengths1,nrecvs;
1997:   MPI_Comm       comm;
1998:   MPI_Request    *send_waits = PETSC_NULL,*recv_waits = PETSC_NULL;
1999:   MPI_Status     recv_status,*send_status = PETSC_NULL;
2000:   PetscTruth     duplicate = PETSC_FALSE;
2001: #if defined(PETSC_USE_DEBUG)
2002:   PetscTruth     found = PETSC_FALSE;
2003: #endif

2006:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2007:   PetscObjectGetComm((PetscObject)xin,&comm);
2008:   MPI_Comm_size(comm,&size);
2009:   MPI_Comm_rank(comm,&rank);
2010:   if (size == 1) {
2011:     VecScatterCreate_StoP(nx,inidx,ny,inidy,xin,yin,1,ctx);
2012:     return(0);
2013:   }

2015:   /*
2016:      Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2017:      They then call the StoPScatterCreate()
2018:   */
2019:   /*  first count number of contributors to each processor */
2020:   PetscMalloc3(size,PetscMPIInt,&nprocs,nx,PetscInt,&owner,(size+1),PetscInt,&starts);
2021:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2022:   lastidx = -1;
2023:   j       = 0;
2024:   for (i=0; i<nx; i++) {
2025:     /* if indices are NOT locally sorted, need to start search at the beginning */
2026:     if (lastidx > (idx = inidx[i])) j = 0;
2027:     lastidx = idx;
2028:     for (; j<size; j++) {
2029:       if (idx >= owners[j] && idx < owners[j+1]) {
2030:         nprocs[j]++;
2031:         owner[i] = j;
2032: #if defined(PETSC_USE_DEBUG)
2033:         found = PETSC_TRUE;
2034: #endif
2035:         break;
2036:       }
2037:     }
2038: #if defined(PETSC_USE_DEBUG)
2039:     if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2040:     found = PETSC_FALSE;
2041: #endif
2042:   }
2043:   nsends = 0;  for (i=0; i<size; i++) { nsends += (nprocs[i] > 0);}

2045:   /* inform other processors of number of messages and max length*/
2046:   PetscGatherNumberOfMessages(comm,PETSC_NULL,nprocs,&nrecvs);
2047:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2048:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2049:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

2051:   /* post receives:   */
2052:   PetscMalloc5(2*recvtotal,PetscInt,&rvalues,2*nx,PetscInt,&svalues,nrecvs,MPI_Request,&recv_waits,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);

2054:   count = 0;
2055:   for (i=0; i<nrecvs; i++) {
2056:     MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2057:     count += olengths1[i];
2058:   }
2059:   PetscFree(onodes1);

2061:   /* do sends:
2062:       1) starts[i] gives the starting index in svalues for stuff going to 
2063:          the ith processor
2064:   */
2065:   starts[0]= 0;
2066:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
2067:   for (i=0; i<nx; i++) {
2068:     svalues[2*starts[owner[i]]]       = inidx[i];
2069:     svalues[1 + 2*starts[owner[i]]++] = inidy[i];
2070:   }

2072:   starts[0] = 0;
2073:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
2074:   count = 0;
2075:   for (i=0; i<size; i++) {
2076:     if (nprocs[i]) {
2077:       MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2078:       count++;
2079:     }
2080:   }
2081:   PetscFree3(nprocs,owner,starts);

2083:   /*  wait on receives */
2084:   count = nrecvs;
2085:   slen  = 0;
2086:   while (count) {
2087:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2088:     /* unpack receives into our local space */
2089:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2090:     slen += n/2;
2091:     count--;
2092:   }
2093:   if (slen != recvtotal) SETERRQ2(PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2094: 
2095:   PetscMalloc2(slen,PetscInt,&local_inidx,slen,PetscInt,&local_inidy);
2096:   base  = owners[rank];
2097:   count = 0;
2098:   rsvalues = rvalues;
2099:   for (i=0; i<nrecvs; i++) {
2100:     values = rsvalues;
2101:     rsvalues += 2*olengths1[i];
2102:     for (j=0; j<olengths1[i]; j++) {
2103:       local_inidx[count]   = values[2*j] - base;
2104:       local_inidy[count++] = values[2*j+1];
2105:     }
2106:   }
2107:   PetscFree(olengths1);

2109:   /* wait on sends */
2110:   if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2111:   PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);

2113:   /*
2114:      should sort and remove duplicates from local_inidx,local_inidy 
2115:   */

2117: #if defined(do_it_slow)
2118:   /* sort on the from index */
2119:   PetscSortIntWithArray(slen,local_inidx,local_inidy);
2120:   start = 0;
2121:   while (start < slen) {
2122:     count = start+1;
2123:     last  = local_inidx[start];
2124:     while (count < slen && last == local_inidx[count]) count++;
2125:     if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2126:       /* sort on to index */
2127:       PetscSortInt(count-start,local_inidy+start);
2128:     }
2129:     /* remove duplicates; not most efficient way, but probably good enough */
2130:     i = start;
2131:     while (i < count-1) {
2132:       if (local_inidy[i] != local_inidy[i+1]) {
2133:         i++;
2134:       } else { /* found a duplicate */
2135:         duplicate = PETSC_TRUE;
2136:         for (j=i; j<slen-1; j++) {
2137:           local_inidx[j] = local_inidx[j+1];
2138:           local_inidy[j] = local_inidy[j+1];
2139:         }
2140:         slen--;
2141:         count--;
2142:       }
2143:     }
2144:     start = count;
2145:   }
2146: #endif
2147:   if (duplicate) {
2148:     PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2149:   }
2150:   VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,xin,yin,1,ctx);
2151:   PetscFree2(local_inidx,local_inidy);
2152:   return(0);
2153: }