Actual source code: mpidense.c

  1: #define PETSCMAT_DLL

  3: /*
  4:    Basic functions for basic parallel dense matrices.
  5: */

  7: 
 8:  #include ../src/mat/impls/dense/mpi/mpidense.h
  9: #if defined(PETSC_HAVE_PLAPACK)
 10: static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
 11: static MPI_Comm Plapack_comm_2d;
 12: #endif

 16: /*@

 18:       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
 19:               matrix that represents the operator. For sequential matrices it returns itself.

 21:     Input Parameter:
 22: .      A - the Seq or MPI dense matrix

 24:     Output Parameter:
 25: .      B - the inner matrix

 27:     Level: intermediate

 29: @*/
 30: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
 31: {
 32:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 34:   PetscTruth     flg;

 37:   PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
 38:   if (flg) {
 39:     *B = mat->A;
 40:   } else {
 41:     *B = A;
 42:   }
 43:   return(0);
 44: }

 48: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 49: {
 50:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 52:   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;

 55:   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
 56:   lrow = row - rstart;
 57:   MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);
 58:   return(0);
 59: }

 63: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 64: {

 68:   if (idx) {PetscFree(*idx);}
 69:   if (v) {PetscFree(*v);}
 70:   return(0);
 71: }

 76: PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
 77: {
 78:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
 80:   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
 81:   PetscScalar    *array;
 82:   MPI_Comm       comm;

 85:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");

 87:   /* The reuse aspect is not implemented efficiently */
 88:   if (reuse) { MatDestroy(*B);}

 90:   PetscObjectGetComm((PetscObject)(mdn->A),&comm);
 91:   MatGetArray(mdn->A,&array);
 92:   MatCreate(comm,B);
 93:   MatSetSizes(*B,m,m,m,m);
 94:   MatSetType(*B,((PetscObject)mdn->A)->type_name);
 95:   MatSeqDenseSetPreallocation(*B,array+m*rstart);
 96:   MatRestoreArray(mdn->A,&array);
 97:   MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
 98:   MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
 99: 
100:   *iscopy = PETSC_TRUE;
101:   return(0);
102: }

107: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
108: {
109:   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
111:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
112:   PetscTruth     roworiented = A->roworiented;

115:   for (i=0; i<m; i++) {
116:     if (idxm[i] < 0) continue;
117:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118:     if (idxm[i] >= rstart && idxm[i] < rend) {
119:       row = idxm[i] - rstart;
120:       if (roworiented) {
121:         MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
122:       } else {
123:         for (j=0; j<n; j++) {
124:           if (idxn[j] < 0) continue;
125:           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126:           MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
127:         }
128:       }
129:     } else {
130:       if (!A->donotstash) {
131:         if (roworiented) {
132:           MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
133:         } else {
134:           MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
135:         }
136:       }
137:     }
138:   }
139:   return(0);
140: }

144: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
145: {
146:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
148:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;

151:   for (i=0; i<m; i++) {
152:     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
153:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
154:     if (idxm[i] >= rstart && idxm[i] < rend) {
155:       row = idxm[i] - rstart;
156:       for (j=0; j<n; j++) {
157:         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
158:         if (idxn[j] >= mat->cmap->N) {
159:           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160:         }
161:         MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
162:       }
163:     } else {
164:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
165:     }
166:   }
167:   return(0);
168: }

172: PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
173: {
174:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

178:   MatGetArray(a->A,array);
179:   return(0);
180: }

184: static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
185: {
186:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
187:   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
189:   PetscInt       i,j,rstart,rend,nrows,ncols,nlrows,nlcols;
190:   const PetscInt *irow,*icol;
191:   PetscScalar    *av,*bv,*v = lmat->v;
192:   Mat            newmat;

195:   ISGetIndices(isrow,&irow);
196:   ISGetIndices(iscol,&icol);
197:   ISGetLocalSize(isrow,&nrows);
198:   ISGetLocalSize(iscol,&ncols);

200:   /* No parallel redistribution currently supported! Should really check each index set
201:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
202:      original matrix! */

204:   MatGetLocalSize(A,&nlrows,&nlcols);
205:   MatGetOwnershipRange(A,&rstart,&rend);
206: 
207:   /* Check submatrix call */
208:   if (scall == MAT_REUSE_MATRIX) {
209:     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
210:     /* Really need to test rows and column sizes! */
211:     newmat = *B;
212:   } else {
213:     /* Create and fill new matrix */
214:     MatCreate(((PetscObject)A)->comm,&newmat);
215:     MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);
216:     MatSetType(newmat,((PetscObject)A)->type_name);
217:     MatMPIDenseSetPreallocation(newmat,PETSC_NULL);
218:   }

220:   /* Now extract the data pointers and do the copy, column at a time */
221:   newmatd = (Mat_MPIDense*)newmat->data;
222:   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
223: 
224:   for (i=0; i<ncols; i++) {
225:     av = v + nlrows*icol[i];
226:     for (j=0; j<nrows; j++) {
227:       *bv++ = av[irow[j] - rstart];
228:     }
229:   }

231:   /* Assemble the matrices so that the correct flags are set */
232:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
233:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

235:   /* Free work space */
236:   ISRestoreIndices(isrow,&irow);
237:   ISRestoreIndices(iscol,&icol);
238:   *B = newmat;
239:   return(0);
240: }

244: PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
245: {
247:   return(0);
248: }

252: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
253: {
254:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
255:   MPI_Comm       comm = ((PetscObject)mat)->comm;
257:   PetscInt       nstash,reallocs;
258:   InsertMode     addv;

261:   /* make sure all processors are either in INSERTMODE or ADDMODE */
262:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
263:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
264:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
265:   }
266:   mat->insertmode = addv; /* in case this processor had no cache */

268:   MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
269:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
270:   PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
271:   return(0);
272: }

276: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
277: {
278:   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
279:   PetscErrorCode  ierr;
280:   PetscInt        i,*row,*col,flg,j,rstart,ncols;
281:   PetscMPIInt     n;
282:   PetscScalar     *val;
283:   InsertMode      addv=mat->insertmode;

286:   /*  wait on receives */
287:   while (1) {
288:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
289:     if (!flg) break;
290: 
291:     for (i=0; i<n;) {
292:       /* Now identify the consecutive vals belonging to the same row */
293:       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
294:       if (j < n) ncols = j-i;
295:       else       ncols = n-i;
296:       /* Now assemble all these values with a single function call */
297:       MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
298:       i = j;
299:     }
300:   }
301:   MatStashScatterEnd_Private(&mat->stash);
302: 
303:   MatAssemblyBegin(mdn->A,mode);
304:   MatAssemblyEnd(mdn->A,mode);

306:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
307:     MatSetUpMultiply_MPIDense(mat);
308:   }
309:   return(0);
310: }

314: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
315: {
317:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;

320:   MatZeroEntries(l->A);
321:   return(0);
322: }

324: /* the code does not do the diagonal entries correctly unless the 
325:    matrix is square and the column and row owerships are identical.
326:    This is a BUG. The only way to fix it seems to be to access 
327:    mdn->A and mdn->B directly and not through the MatZeroRows() 
328:    routine. 
329: */
332: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
333: {
334:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
336:   PetscInt       i,*owners = A->rmap->range;
337:   PetscInt       *nprocs,j,idx,nsends;
338:   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
339:   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
340:   PetscInt       *lens,*lrows,*values;
341:   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
342:   MPI_Comm       comm = ((PetscObject)A)->comm;
343:   MPI_Request    *send_waits,*recv_waits;
344:   MPI_Status     recv_status,*send_status;
345:   PetscTruth     found;

348:   /*  first count number of contributors to each processor */
349:   PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
350:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
351:   PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
352:   for (i=0; i<N; i++) {
353:     idx = rows[i];
354:     found = PETSC_FALSE;
355:     for (j=0; j<size; j++) {
356:       if (idx >= owners[j] && idx < owners[j+1]) {
357:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
358:       }
359:     }
360:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
361:   }
362:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}

364:   /* inform other processors of number of messages and max length*/
365:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

367:   /* post receives:   */
368:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
369:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
370:   for (i=0; i<nrecvs; i++) {
371:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
372:   }

374:   /* do sends:
375:       1) starts[i] gives the starting index in svalues for stuff going to 
376:          the ith processor
377:   */
378:   PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
379:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
380:   PetscMalloc((size+1)*sizeof(PetscInt),&starts);
381:   starts[0]  = 0;
382:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
383:   for (i=0; i<N; i++) {
384:     svalues[starts[owner[i]]++] = rows[i];
385:   }

387:   starts[0] = 0;
388:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
389:   count = 0;
390:   for (i=0; i<size; i++) {
391:     if (nprocs[2*i+1]) {
392:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
393:     }
394:   }
395:   PetscFree(starts);

397:   base = owners[rank];

399:   /*  wait on receives */
400:   PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
401:   source = lens + nrecvs;
402:   count  = nrecvs; slen = 0;
403:   while (count) {
404:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
405:     /* unpack receives into our local space */
406:     MPI_Get_count(&recv_status,MPIU_INT,&n);
407:     source[imdex]  = recv_status.MPI_SOURCE;
408:     lens[imdex]    = n;
409:     slen += n;
410:     count--;
411:   }
412:   PetscFree(recv_waits);
413: 
414:   /* move the data into the send scatter */
415:   PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
416:   count = 0;
417:   for (i=0; i<nrecvs; i++) {
418:     values = rvalues + i*nmax;
419:     for (j=0; j<lens[i]; j++) {
420:       lrows[count++] = values[j] - base;
421:     }
422:   }
423:   PetscFree(rvalues);
424:   PetscFree(lens);
425:   PetscFree(owner);
426:   PetscFree(nprocs);
427: 
428:   /* actually zap the local rows */
429:   MatZeroRows(l->A,slen,lrows,diag);
430:   PetscFree(lrows);

432:   /* wait on sends */
433:   if (nsends) {
434:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
435:     MPI_Waitall(nsends,send_waits,send_status);
436:     PetscFree(send_status);
437:   }
438:   PetscFree(send_waits);
439:   PetscFree(svalues);

441:   return(0);
442: }

446: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
447: {
448:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

452:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
453:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
454:   MatMult_SeqDense(mdn->A,mdn->lvec,yy);
455:   return(0);
456: }

460: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
461: {
462:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

466:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
467:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
468:   MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
469:   return(0);
470: }

474: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
475: {
476:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
478:   PetscScalar    zero = 0.0;

481:   VecSet(yy,zero);
482:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
483:   VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
484:   VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
485:   return(0);
486: }

490: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
491: {
492:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

496:   VecCopy(yy,zz);
497:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
498:   VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
499:   VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
500:   return(0);
501: }

505: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
506: {
507:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
508:   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
510:   PetscInt       len,i,n,m = A->rmap->n,radd;
511:   PetscScalar    *x,zero = 0.0;
512: 
514:   VecSet(v,zero);
515:   VecGetArray(v,&x);
516:   VecGetSize(v,&n);
517:   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
518:   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
519:   radd = A->rmap->rstart*m;
520:   for (i=0; i<len; i++) {
521:     x[i] = aloc->v[radd + i*m + i];
522:   }
523:   VecRestoreArray(v,&x);
524:   return(0);
525: }

529: PetscErrorCode MatDestroy_MPIDense(Mat mat)
530: {
531:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
533: #if defined(PETSC_HAVE_PLAPACK)
534:   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
535: #endif


539: #if defined(PETSC_USE_LOG)
540:   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
541: #endif
542:   MatStashDestroy_Private(&mat->stash);
543:   MatDestroy(mdn->A);
544:   if (mdn->lvec)   {VecDestroy(mdn->lvec);}
545:   if (mdn->Mvctx)  {VecScatterDestroy(mdn->Mvctx);}
546: #if defined(PETSC_HAVE_PLAPACK)
547:   if (lu) {
548:     PLA_Obj_free(&lu->A);
549:     PLA_Obj_free (&lu->pivots);
550:     PLA_Temp_free(&lu->templ);

552:     if (lu->is_pla) {
553:       ISDestroy(lu->is_pla);
554:       ISDestroy(lu->is_petsc);
555:       VecScatterDestroy(lu->ctx);
556:     }
557:   }
558: #endif

560:   PetscFree(mdn);
561:   PetscObjectChangeTypeName((PetscObject)mat,0);
562:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
563:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);
564:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);
565:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);
566:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);
567:   return(0);
568: }

572: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
573: {
574:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
575:   PetscErrorCode    ierr;
576:   PetscViewerFormat format;
577:   int               fd;
578:   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
579:   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
580:   PetscScalar       *work,*v,*vv;
581:   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
582:   MPI_Status        status;

585:   if (mdn->size == 1) {
586:     MatView(mdn->A,viewer);
587:   } else {
588:     PetscViewerBinaryGetDescriptor(viewer,&fd);
589:     MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
590:     MPI_Comm_size(((PetscObject)mat)->comm,&size);

592:     PetscViewerGetFormat(viewer,&format);
593:     if (format == PETSC_VIEWER_NATIVE) {

595:       if (!rank) {
596:         /* store the matrix as a dense matrix */
597:         header[0] = MAT_FILE_COOKIE;
598:         header[1] = mat->rmap->N;
599:         header[2] = N;
600:         header[3] = MATRIX_BINARY_FORMAT_DENSE;
601:         PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);

603:         /* get largest work array needed for transposing array */
604:         mmax = mat->rmap->n;
605:         for (i=1; i<size; i++) {
606:           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
607:         }
608:         PetscMalloc(mmax*N*sizeof(PetscScalar),&work);

610:         /* write out local array, by rows */
611:         m    = mat->rmap->n;
612:         v    = a->v;
613:         for (j=0; j<N; j++) {
614:           for (i=0; i<m; i++) {
615:             work[j + i*N] = *v++;
616:           }
617:         }
618:         PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
619:         /* get largest work array to receive messages from other processes, excludes process zero */
620:         mmax = 0;
621:         for (i=1; i<size; i++) {
622:           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
623:         }
624:         PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);
625:         for(k = 1; k < size; k++) {
626:           v    = vv;
627:           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
628:           MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);

630:           for(j = 0; j < N; j++) {
631:             for(i = 0; i < m; i++) {
632:               work[j + i*N] = *v++;
633:             }
634:           }
635:           PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
636:         }
637:         PetscFree(work);
638:         PetscFree(vv);
639:       } else {
640:         MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);
641:       }
642:     } else {
643:       SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE");
644:     }
645:   }
646:   return(0);
647: }

651: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
652: {
653:   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
654:   PetscErrorCode        ierr;
655:   PetscMPIInt           size = mdn->size,rank = mdn->rank;
656:   const PetscViewerType vtype;
657:   PetscTruth            iascii,isdraw;
658:   PetscViewer           sviewer;
659:   PetscViewerFormat     format;
660: #if defined(PETSC_HAVE_PLAPACK)
661:   Mat_Plapack           *lu;
662: #endif

665:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
666:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
667:   if (iascii) {
668:     PetscViewerGetType(viewer,&vtype);
669:     PetscViewerGetFormat(viewer,&format);
670:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
671:       MatInfo info;
672:       MatGetInfo(mat,MAT_LOCAL,&info);
673:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
674:                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
675:       PetscViewerFlush(viewer);
676: #if defined(PETSC_HAVE_PLAPACK)
677:       PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");
678:       PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);
679:       PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);
680:       PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);
681:       if (mat->factor){
682:         lu=(Mat_Plapack*)(mat->spptr);
683:         PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);
684:       }
685: #else
686:       VecScatterView(mdn->Mvctx,viewer);
687: #endif
688:       return(0);
689:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
690:       return(0);
691:     }
692:   } else if (isdraw) {
693:     PetscDraw  draw;
694:     PetscTruth isnull;

696:     PetscViewerDrawGetDraw(viewer,0,&draw);
697:     PetscDrawIsNull(draw,&isnull);
698:     if (isnull) return(0);
699:   }

701:   if (size == 1) {
702:     MatView(mdn->A,viewer);
703:   } else {
704:     /* assemble the entire matrix onto first processor. */
705:     Mat         A;
706:     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
707:     PetscInt    *cols;
708:     PetscScalar *vals;

710:     MatCreate(((PetscObject)mat)->comm,&A);
711:     if (!rank) {
712:       MatSetSizes(A,M,N,M,N);
713:     } else {
714:       MatSetSizes(A,0,0,M,N);
715:     }
716:     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
717:     MatSetType(A,MATMPIDENSE);
718:     MatMPIDenseSetPreallocation(A,PETSC_NULL);
719:     PetscLogObjectParent(mat,A);

721:     /* Copy the matrix ... This isn't the most efficient means,
722:        but it's quick for now */
723:     A->insertmode = INSERT_VALUES;
724:     row = mat->rmap->rstart; m = mdn->A->rmap->n;
725:     for (i=0; i<m; i++) {
726:       MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
727:       MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
728:       MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
729:       row++;
730:     }

732:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
733:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
734:     PetscViewerGetSingleton(viewer,&sviewer);
735:     if (!rank) {
736:       MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
737:     }
738:     PetscViewerRestoreSingleton(viewer,&sviewer);
739:     PetscViewerFlush(viewer);
740:     MatDestroy(A);
741:   }
742:   return(0);
743: }

747: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
748: {
750:   PetscTruth     iascii,isbinary,isdraw,issocket;
751: 
753: 
754:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
755:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
756:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
757:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);

759:   if (iascii || issocket || isdraw) {
760:     MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
761:   } else if (isbinary) {
762:     MatView_MPIDense_Binary(mat,viewer);
763:   } else {
764:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
765:   }
766:   return(0);
767: }

771: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
772: {
773:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
774:   Mat            mdn = mat->A;
776:   PetscReal      isend[5],irecv[5];

779:   info->block_size     = 1.0;
780:   MatGetInfo(mdn,MAT_LOCAL,info);
781:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
782:   isend[3] = info->memory;  isend[4] = info->mallocs;
783:   if (flag == MAT_LOCAL) {
784:     info->nz_used      = isend[0];
785:     info->nz_allocated = isend[1];
786:     info->nz_unneeded  = isend[2];
787:     info->memory       = isend[3];
788:     info->mallocs      = isend[4];
789:   } else if (flag == MAT_GLOBAL_MAX) {
790:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);
791:     info->nz_used      = irecv[0];
792:     info->nz_allocated = irecv[1];
793:     info->nz_unneeded  = irecv[2];
794:     info->memory       = irecv[3];
795:     info->mallocs      = irecv[4];
796:   } else if (flag == MAT_GLOBAL_SUM) {
797:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);
798:     info->nz_used      = irecv[0];
799:     info->nz_allocated = irecv[1];
800:     info->nz_unneeded  = irecv[2];
801:     info->memory       = irecv[3];
802:     info->mallocs      = irecv[4];
803:   }
804:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
805:   info->fill_ratio_needed = 0;
806:   info->factor_mallocs    = 0;
807:   return(0);
808: }

812: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
813: {
814:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

818:   switch (op) {
819:   case MAT_NEW_NONZERO_LOCATIONS:
820:   case MAT_NEW_NONZERO_LOCATION_ERR:
821:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
822:     MatSetOption(a->A,op,flg);
823:     break;
824:   case MAT_ROW_ORIENTED:
825:     a->roworiented = flg;
826:     MatSetOption(a->A,op,flg);
827:     break;
828:   case MAT_NEW_DIAGONALS:
829:   case MAT_USE_HASH_TABLE:
830:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
831:     break;
832:   case MAT_IGNORE_OFF_PROC_ENTRIES:
833:     a->donotstash = flg;
834:     break;
835:   case MAT_SYMMETRIC:
836:   case MAT_STRUCTURALLY_SYMMETRIC:
837:   case MAT_HERMITIAN:
838:   case MAT_SYMMETRY_ETERNAL:
839:   case MAT_IGNORE_LOWER_TRIANGULAR:
840:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
841:     break;
842:   default:
843:     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
844:   }
845:   return(0);
846: }


851: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
852: {
853:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
854:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
855:   PetscScalar    *l,*r,x,*v;
857:   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;

860:   MatGetLocalSize(A,&s2,&s3);
861:   if (ll) {
862:     VecGetLocalSize(ll,&s2a);
863:     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
864:     VecGetArray(ll,&l);
865:     for (i=0; i<m; i++) {
866:       x = l[i];
867:       v = mat->v + i;
868:       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
869:     }
870:     VecRestoreArray(ll,&l);
871:     PetscLogFlops(n*m);
872:   }
873:   if (rr) {
874:     VecGetLocalSize(rr,&s3a);
875:     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
876:     VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
877:     VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
878:     VecGetArray(mdn->lvec,&r);
879:     for (i=0; i<n; i++) {
880:       x = r[i];
881:       v = mat->v + i*m;
882:       for (j=0; j<m; j++) { (*v++) *= x;}
883:     }
884:     VecRestoreArray(mdn->lvec,&r);
885:     PetscLogFlops(n*m);
886:   }
887:   return(0);
888: }

892: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
893: {
894:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
895:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
897:   PetscInt       i,j;
898:   PetscReal      sum = 0.0;
899:   PetscScalar    *v = mat->v;

902:   if (mdn->size == 1) {
903:      MatNorm(mdn->A,type,nrm);
904:   } else {
905:     if (type == NORM_FROBENIUS) {
906:       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
907: #if defined(PETSC_USE_COMPLEX)
908:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
909: #else
910:         sum += (*v)*(*v); v++;
911: #endif
912:       }
913:       MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);
914:       *nrm = sqrt(*nrm);
915:       PetscLogFlops(2*mdn->A->cmap->n*mdn->A->rmap->n);
916:     } else if (type == NORM_1) {
917:       PetscReal *tmp,*tmp2;
918:       PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);
919:       tmp2 = tmp + A->cmap->N;
920:       PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));
921:       *nrm = 0.0;
922:       v = mat->v;
923:       for (j=0; j<mdn->A->cmap->n; j++) {
924:         for (i=0; i<mdn->A->rmap->n; i++) {
925:           tmp[j] += PetscAbsScalar(*v);  v++;
926:         }
927:       }
928:       MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);
929:       for (j=0; j<A->cmap->N; j++) {
930:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
931:       }
932:       PetscFree(tmp);
933:       PetscLogFlops(A->cmap->n*A->rmap->n);
934:     } else if (type == NORM_INFINITY) { /* max row norm */
935:       PetscReal ntemp;
936:       MatNorm(mdn->A,type,&ntemp);
937:       MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);
938:     } else {
939:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
940:     }
941:   }
942:   return(0);
943: }

947: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
948: {
949:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
950:   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
951:   Mat            B;
952:   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
954:   PetscInt       j,i;
955:   PetscScalar    *v;

958:   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
959:   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
960:     MatCreate(((PetscObject)A)->comm,&B);
961:     MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
962:     MatSetType(B,((PetscObject)A)->type_name);
963:     MatMPIDenseSetPreallocation(B,PETSC_NULL);
964:   } else {
965:     B = *matout;
966:   }

968:   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
969:   PetscMalloc(m*sizeof(PetscInt),&rwork);
970:   for (i=0; i<m; i++) rwork[i] = rstart + i;
971:   for (j=0; j<n; j++) {
972:     MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
973:     v   += m;
974:   }
975:   PetscFree(rwork);
976:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
977:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
978:   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
979:     *matout = B;
980:   } else {
981:     MatHeaderCopy(A,B);
982:   }
983:   return(0);
984: }

986:  #include petscblaslapack.h
989: PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
990: {
991:   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
992:   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
993:   PetscScalar    oalpha = alpha;
995:   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap->n*inA->cmap->N);

998:   BLASscal_(&nz,&oalpha,a->v,&one);
999:   PetscLogFlops(nz);
1000:   return(0);
1001: }

1003: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);

1007: PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1008: {

1012:    MatMPIDenseSetPreallocation(A,0);
1013:   return(0);
1014: }

1016: #if defined(PETSC_HAVE_PLAPACK)

1020: PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1021: {
1022:   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1024:   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1025:   PetscScalar    *array;
1026:   PetscReal      one = 1.0;

1029:   /* Copy A into F->lu->A */
1030:   PLA_Obj_set_to_zero(lu->A);
1031:   PLA_API_begin();
1032:   PLA_Obj_API_open(lu->A);
1033:   MatGetOwnershipRange(A,&rstart,PETSC_NULL);
1034:   MatGetArray(A,&array);
1035:   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1036:   MatRestoreArray(A,&array);
1037:   PLA_Obj_API_close(lu->A);
1038:   PLA_API_end();
1039:   lu->rstart = rstart;
1040:   return(0);
1041: }

1045: PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1046: {
1047:   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1049:   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1050:   PetscScalar    *array;
1051:   PetscReal      one = 1.0;

1054:   /* Copy F into A->lu->A */
1055:   MatZeroEntries(A);
1056:   PLA_API_begin();
1057:   PLA_Obj_API_open(lu->A);
1058:   MatGetOwnershipRange(A,&rstart,PETSC_NULL);
1059:   MatGetArray(A,&array);
1060:   PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);
1061:   MatRestoreArray(A,&array);
1062:   PLA_Obj_API_close(lu->A);
1063:   PLA_API_end();
1064:   lu->rstart = rstart;
1065:   return(0);
1066: }

1070: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1071: {
1073:   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1074:   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1075:   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1076:   PLA_Obj        alpha = NULL,beta = NULL;

1079:   MatMPIDenseCopyToPlapack(A,A);
1080:   MatMPIDenseCopyToPlapack(B,B);

1082:   /* 
1083:   PLA_Global_show("A = ",luA->A,"%g ","");
1084:   PLA_Global_show("B = ",luB->A,"%g ","");
1085:   */

1087:   /* do the multiply in PLA  */
1088:   PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);
1089:   PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);
1090:   CHKMEMQ;

1092:   PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /*  */
1093:   CHKMEMQ;
1094:   PLA_Obj_free(&alpha);
1095:   PLA_Obj_free(&beta);

1097:   /*
1098:   PLA_Global_show("C = ",luC->A,"%g ","");
1099:   */
1100:   MatMPIDenseCopyFromPlapack(C,C);
1101:   return(0);
1102: }

1106: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1107: {
1109:   PetscInt       m=A->rmap->n,n=B->cmap->n;
1110:   Mat            Cmat;

1113:   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1114:   SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported");
1115:   MatCreate(((PetscObject)B)->comm,&Cmat);
1116:   MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);
1117:   MatSetType(Cmat,MATMPIDENSE);
1118:   MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);
1119:   MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);

1121:   *C = Cmat;
1122:   return(0);
1123: }

1127: PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1128: {

1132:   if (scall == MAT_INITIAL_MATRIX){
1133:     MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
1134:   }
1135:   MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);
1136:   return(0);
1137: }

1141: PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1142: {
1143:   MPI_Comm       comm = ((PetscObject)A)->comm;
1144:   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1146:   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1147:   PetscScalar    *array;
1148:   PetscReal      one = 1.0;
1149:   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1150:   PLA_Obj        v_pla = NULL;
1151:   PetscScalar    *loc_buf;
1152:   Vec            loc_x;
1153: 
1155:   MPI_Comm_size(comm,&size);
1156:   MPI_Comm_rank(comm,&rank);

1158:   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1159:   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1160:   PLA_Obj_set_to_zero(v_pla);

1162:   /* Copy b into rhs_pla */
1163:   PLA_API_begin();
1164:   PLA_Obj_API_open(v_pla);
1165:   VecGetArray(b,&array);
1166:   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1167:   VecRestoreArray(b,&array);
1168:   PLA_Obj_API_close(v_pla);
1169:   PLA_API_end();

1171:   if (A->factor == MAT_FACTOR_LU){
1172:     /* Apply the permutations to the right hand sides */
1173:     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);

1175:     /* Solve L y = b, overwriting b with y */
1176:     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );

1178:     /* Solve U x = y (=b), overwriting b with x */
1179:     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1180:   } else { /* MAT_FACTOR_CHOLESKY */
1181:     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1182:     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1183:                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1184:   }

1186:   /* Copy PLAPACK x into Petsc vector x  */
1187:   PLA_Obj_local_length(v_pla, &loc_m);
1188:   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1189:   PLA_Obj_local_stride(v_pla, &loc_stride);
1190:   /*
1191:     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 
1192:   */
1193:   VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);
1194:   if (!lu->pla_solved){
1195: 
1196:     PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1197:     PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);

1199:     /* Create IS and cts for VecScatterring */
1200:     PLA_Obj_local_length(v_pla, &loc_m);
1201:     PLA_Obj_local_stride(v_pla, &loc_stride);
1202:     PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);
1203:     idx_petsc = idx_pla + loc_m;

1205:     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1206:     for (i=0; i<loc_m; i+=lu->nb){
1207:       j = 0;
1208:       while (j < lu->nb && i+j < loc_m){
1209:         idx_petsc[i+j] = rstart + j; j++;
1210:       }
1211:       rstart += size*lu->nb;
1212:     }

1214:     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;

1216:     ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);
1217:     ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);
1218:     PetscFree(idx_pla);
1219:     VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);
1220:   }
1221:   VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);
1222:   VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);
1223: 
1224:   /* Free data */
1225:   VecDestroy(loc_x);
1226:   PLA_Obj_free(&v_pla);

1228:   lu->pla_solved = PETSC_TRUE;
1229:   return(0);
1230: }

1234: PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1235: {
1236:   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1238:   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1239:   PetscInt       info_pla=0;
1240:   PetscScalar    *array,one = 1.0;

1243:   if (lu->mstruct == SAME_NONZERO_PATTERN){
1244:     PLA_Obj_free(&lu->A);
1245:     PLA_Obj_free (&lu->pivots);
1246:   }
1247:   /* Create PLAPACK matrix object */
1248:   lu->A = NULL; lu->pivots = NULL;
1249:   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1250:   PLA_Obj_set_to_zero(lu->A);
1251:   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);

1253:   /* Copy A into lu->A */
1254:   PLA_API_begin();
1255:   PLA_Obj_API_open(lu->A);
1256:   MatGetOwnershipRange(A,&rstart,&rend);
1257:   MatGetArray(A,&array);
1258:   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1259:   MatRestoreArray(A,&array);
1260:   PLA_Obj_API_close(lu->A);
1261:   PLA_API_end();

1263:   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1264:   info_pla = PLA_LU(lu->A,lu->pivots);
1265:   if (info_pla != 0)
1266:     SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);

1268:   lu->rstart         = rstart;
1269:   lu->mstruct        = SAME_NONZERO_PATTERN;
1270:   F->ops->solve      = MatSolve_MPIDense;
1271:   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1272:   return(0);
1273: }

1277: PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1278: {
1279:   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1281:   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1282:   PetscInt       info_pla=0;
1283:   PetscScalar    *array,one = 1.0;

1286:   if (lu->mstruct == SAME_NONZERO_PATTERN){
1287:     PLA_Obj_free(&lu->A);
1288:   }
1289:   /* Create PLAPACK matrix object */
1290:   lu->A      = NULL;
1291:   lu->pivots = NULL;
1292:   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);

1294:   /* Copy A into lu->A */
1295:   PLA_API_begin();
1296:   PLA_Obj_API_open(lu->A);
1297:   MatGetOwnershipRange(A,&rstart,&rend);
1298:   MatGetArray(A,&array);
1299:   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1300:   MatRestoreArray(A,&array);
1301:   PLA_Obj_API_close(lu->A);
1302:   PLA_API_end();

1304:   /* Factor P A -> Chol */
1305:   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1306:   if (info_pla != 0)
1307:     SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);

1309:   lu->rstart         = rstart;
1310:   lu->mstruct        = SAME_NONZERO_PATTERN;
1311:   F->ops->solve      = MatSolve_MPIDense;
1312:   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1313:   return(0);
1314: }

1316: /* Note the Petsc perm permutation is ignored */
1319: PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1320: {
1322:   PetscTruth     issymmetric,set;

1325:   MatIsSymmetricKnown(A,&set,&issymmetric);
1326:   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1327:   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1328:   return(0);
1329: }

1331: /* Note the Petsc r and c permutations are ignored */
1334: PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1335: {
1337:   PetscInt       M = A->rmap->N;
1338:   Mat_Plapack    *lu;

1341:   lu = (Mat_Plapack*)F->spptr;
1342:   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1343:   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1344:   return(0);
1345: }

1350: PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1351: {
1353:   *type = MAT_SOLVER_PLAPACK;
1354:   return(0);
1355: }

1360: PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1361: {
1363:   Mat_Plapack    *lu;
1364:   PetscMPIInt    size;
1365:   PetscInt       M=A->rmap->N;

1368:   /* Create the factorization matrix */
1369:   MatCreate(((PetscObject)A)->comm,F);
1370:   MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1371:   MatSetType(*F,((PetscObject)A)->type_name);
1372:   PetscNewLog(*F,Mat_Plapack,&lu);
1373:   (*F)->spptr = (void*)lu;

1375:   /* Set default Plapack parameters */
1376:   MPI_Comm_size(((PetscObject)A)->comm,&size);
1377:   lu->nb = M/size;
1378:   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1379: 
1380:   /* Set runtime options */
1381:   PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");
1382:     PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);
1383:   PetscOptionsEnd();

1385:   /* Create object distribution template */
1386:   lu->templ = NULL;
1387:   PLA_Temp_create(lu->nb, 0, &lu->templ);

1389:   /* Set the datatype */
1390: #if defined(PETSC_USE_COMPLEX)
1391:   lu->datatype = MPI_DOUBLE_COMPLEX;
1392: #else
1393:   lu->datatype = MPI_DOUBLE;
1394: #endif

1396:   PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);


1399:   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1400:   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;

1402:   if (ftype == MAT_FACTOR_LU) {
1403:     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1404:   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1405:     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1406:   } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1407:   (*F)->factor = ftype;
1408:   PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);
1409:   return(0);
1410: }
1411: #endif

1413: /* -------------------------------------------------------------------*/
1414: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1415:        MatGetRow_MPIDense,
1416:        MatRestoreRow_MPIDense,
1417:        MatMult_MPIDense,
1418: /* 4*/ MatMultAdd_MPIDense,
1419:        MatMultTranspose_MPIDense,
1420:        MatMultTransposeAdd_MPIDense,
1421:        0,
1422:        0,
1423:        0,
1424: /*10*/ 0,
1425:        0,
1426:        0,
1427:        0,
1428:        MatTranspose_MPIDense,
1429: /*15*/ MatGetInfo_MPIDense,
1430:        MatEqual_MPIDense,
1431:        MatGetDiagonal_MPIDense,
1432:        MatDiagonalScale_MPIDense,
1433:        MatNorm_MPIDense,
1434: /*20*/ MatAssemblyBegin_MPIDense,
1435:        MatAssemblyEnd_MPIDense,
1436:        0,
1437:        MatSetOption_MPIDense,
1438:        MatZeroEntries_MPIDense,
1439: /*25*/ MatZeroRows_MPIDense,
1440:        0,
1441:        0,
1442:        0,
1443:        0,
1444: /*30*/ MatSetUpPreallocation_MPIDense,
1445:        0,
1446:        0,
1447:        MatGetArray_MPIDense,
1448:        MatRestoreArray_MPIDense,
1449: /*35*/ MatDuplicate_MPIDense,
1450:        0,
1451:        0,
1452:        0,
1453:        0,
1454: /*40*/ 0,
1455:        MatGetSubMatrices_MPIDense,
1456:        0,
1457:        MatGetValues_MPIDense,
1458:        0,
1459: /*45*/ 0,
1460:        MatScale_MPIDense,
1461:        0,
1462:        0,
1463:        0,
1464: /*50*/ 0,
1465:        0,
1466:        0,
1467:        0,
1468:        0,
1469: /*55*/ 0,
1470:        0,
1471:        0,
1472:        0,
1473:        0,
1474: /*60*/ MatGetSubMatrix_MPIDense,
1475:        MatDestroy_MPIDense,
1476:        MatView_MPIDense,
1477:        0,
1478:        0,
1479: /*65*/ 0,
1480:        0,
1481:        0,
1482:        0,
1483:        0,
1484: /*70*/ 0,
1485:        0,
1486:        0,
1487:        0,
1488:        0,
1489: /*75*/ 0,
1490:        0,
1491:        0,
1492:        0,
1493:        0,
1494: /*80*/ 0,
1495:        0,
1496:        0,
1497:        0,
1498: /*84*/ MatLoad_MPIDense,
1499:        0,
1500:        0,
1501:        0,
1502:        0,
1503:        0,
1504: /*90*/
1505: #if defined(PETSC_HAVE_PLAPACK)
1506:        MatMatMult_MPIDense_MPIDense,
1507:        MatMatMultSymbolic_MPIDense_MPIDense,
1508:        MatMatMultNumeric_MPIDense_MPIDense,
1509: #else
1510:        0,
1511:        0,
1512:        0,
1513: #endif
1514:        0,
1515: /*95*/ 0,
1516:        0,
1517:        0,
1518:        0};

1523: PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1524: {
1525:   Mat_MPIDense   *a;

1529:   mat->preallocated = PETSC_TRUE;
1530:   /* Note:  For now, when data is specified above, this assumes the user correctly
1531:    allocates the local dense storage space.  We should add error checking. */

1533:   a    = (Mat_MPIDense*)mat->data;
1534:   MatCreate(PETSC_COMM_SELF,&a->A);
1535:   MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1536:   MatSetType(a->A,MATSEQDENSE);
1537:   MatSeqDenseSetPreallocation(a->A,data);
1538:   PetscLogObjectParent(mat,a->A);
1539:   return(0);
1540: }

1543: /*MC
1544:    MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices

1546:   run config/configure.py with the option --download-plapack


1549:   Options Database Keys:
1550: . -mat_plapack_nprows <n> - number of rows in processor partition
1551: . -mat_plapack_npcols <n> - number of columns in processor partition
1552: . -mat_plapack_nb <n> - block size of template vector
1553: . -mat_plapack_nb_alg <n> - algorithmic block size
1554: - -mat_plapack_ckerror <n> - error checking flag

1556: .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage

1558: M*/

1563: PetscErrorCode  MatCreate_MPIDense(Mat mat)
1564: {
1565:   Mat_MPIDense   *a;

1569:   PetscNewLog(mat,Mat_MPIDense,&a);
1570:   mat->data         = (void*)a;
1571:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1572:   mat->mapping      = 0;

1574:   mat->insertmode = NOT_SET_VALUES;
1575:   MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);
1576:   MPI_Comm_size(((PetscObject)mat)->comm,&a->size);

1578:   PetscMapSetBlockSize(mat->rmap,1);
1579:   PetscMapSetBlockSize(mat->cmap,1);
1580:   PetscMapSetUp(mat->rmap);
1581:   PetscMapSetUp(mat->cmap);
1582:   a->nvec = mat->cmap->n;

1584:   /* build cache for off array entries formed */
1585:   a->donotstash = PETSC_FALSE;
1586:   MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);

1588:   /* stuff used for matrix vector multiply */
1589:   a->lvec        = 0;
1590:   a->Mvctx       = 0;
1591:   a->roworiented = PETSC_TRUE;

1593:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1594:                                      "MatGetDiagonalBlock_MPIDense",
1595:                                      MatGetDiagonalBlock_MPIDense);
1596:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1597:                                      "MatMPIDenseSetPreallocation_MPIDense",
1598:                                      MatMPIDenseSetPreallocation_MPIDense);
1599:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1600:                                      "MatMatMult_MPIAIJ_MPIDense",
1601:                                       MatMatMult_MPIAIJ_MPIDense);
1602:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1603:                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1604:                                       MatMatMultSymbolic_MPIAIJ_MPIDense);
1605:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1606:                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1607:                                       MatMatMultNumeric_MPIAIJ_MPIDense);
1608: #if defined(PETSC_HAVE_PLAPACK)
1609:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_mpidense_plapack_C",
1610:                                      "MatGetFactor_mpidense_plapack",
1611:                                       MatGetFactor_mpidense_plapack);
1612:   PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);
1613: #endif
1614:   PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);

1616:   return(0);
1617: }

1620: /*MC
1621:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1623:    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1624:    and MATMPIDENSE otherwise.

1626:    Options Database Keys:
1627: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()

1629:   Level: beginner


1632: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1633: M*/

1638: PetscErrorCode  MatCreate_Dense(Mat A)
1639: {
1641:   PetscMPIInt    size;

1644:   MPI_Comm_size(((PetscObject)A)->comm,&size);
1645:   if (size == 1) {
1646:     MatSetType(A,MATSEQDENSE);
1647:   } else {
1648:     MatSetType(A,MATMPIDENSE);
1649:   }
1650:   return(0);
1651: }

1656: /*@C
1657:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1659:    Not collective

1661:    Input Parameters:
1662: .  A - the matrix
1663: -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1664:    to control all matrix memory allocation.

1666:    Notes:
1667:    The dense format is fully compatible with standard Fortran 77
1668:    storage by columns.

1670:    The data input variable is intended primarily for Fortran programmers
1671:    who wish to allocate their own matrix memory space.  Most users should
1672:    set data=PETSC_NULL.

1674:    Level: intermediate

1676: .keywords: matrix,dense, parallel

1678: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1679: @*/
1680: PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1681: {
1682:   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);

1685:   PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);
1686:   if (f) {
1687:     (*f)(mat,data);
1688:   }
1689:   return(0);
1690: }

1694: /*@C
1695:    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.

1697:    Collective on MPI_Comm

1699:    Input Parameters:
1700: +  comm - MPI communicator
1701: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1702: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1703: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1704: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1705: -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1706:    to control all matrix memory allocation.

1708:    Output Parameter:
1709: .  A - the matrix

1711:    Notes:
1712:    The dense format is fully compatible with standard Fortran 77
1713:    storage by columns.

1715:    The data input variable is intended primarily for Fortran programmers
1716:    who wish to allocate their own matrix memory space.  Most users should
1717:    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).

1719:    The user MUST specify either the local or global matrix dimensions
1720:    (possibly both).

1722:    Level: intermediate

1724: .keywords: matrix,dense, parallel

1726: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1727: @*/
1728: PetscErrorCode  MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1729: {
1731:   PetscMPIInt    size;

1734:   MatCreate(comm,A);
1735:   MatSetSizes(*A,m,n,M,N);
1736:   MPI_Comm_size(comm,&size);
1737:   if (size > 1) {
1738:     MatSetType(*A,MATMPIDENSE);
1739:     MatMPIDenseSetPreallocation(*A,data);
1740:   } else {
1741:     MatSetType(*A,MATSEQDENSE);
1742:     MatSeqDenseSetPreallocation(*A,data);
1743:   }
1744:   return(0);
1745: }

1749: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1750: {
1751:   Mat            mat;
1752:   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;

1756:   *newmat       = 0;
1757:   MatCreate(((PetscObject)A)->comm,&mat);
1758:   MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1759:   MatSetType(mat,((PetscObject)A)->type_name);
1760:   a                 = (Mat_MPIDense*)mat->data;
1761:   PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));
1762:   mat->factor       = A->factor;
1763:   mat->assembled    = PETSC_TRUE;
1764:   mat->preallocated = PETSC_TRUE;

1766:   mat->rmap->rstart     = A->rmap->rstart;
1767:   mat->rmap->rend       = A->rmap->rend;
1768:   a->size              = oldmat->size;
1769:   a->rank              = oldmat->rank;
1770:   mat->insertmode      = NOT_SET_VALUES;
1771:   a->nvec              = oldmat->nvec;
1772:   a->donotstash        = oldmat->donotstash;
1773: 
1774:   PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));
1775:   PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));
1776:   MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);

1778:   MatSetUpMultiply_MPIDense(mat);
1779:   MatDuplicate(oldmat->A,cpvalues,&a->A);
1780:   PetscLogObjectParent(mat,a->A);

1782:   *newmat = mat;
1783:   return(0);
1784: }

1786:  #include petscsys.h

1790: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat)
1791: {
1793:   PetscMPIInt    rank,size;
1794:   PetscInt       *rowners,i,m,nz,j;
1795:   PetscScalar    *array,*vals,*vals_ptr;
1796:   MPI_Status     status;

1799:   MPI_Comm_rank(comm,&rank);
1800:   MPI_Comm_size(comm,&size);

1802:   /* determine ownership of all rows */
1803:   m          = M/size + ((M % size) > rank);
1804:   PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1805:   MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
1806:   rowners[0] = 0;
1807:   for (i=2; i<=size; i++) {
1808:     rowners[i] += rowners[i-1];
1809:   }

1811:   MatCreate(comm,newmat);
1812:   MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);
1813:   MatSetType(*newmat,type);
1814:   MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1815:   MatGetArray(*newmat,&array);

1817:   if (!rank) {
1818:     PetscMalloc(m*N*sizeof(PetscScalar),&vals);

1820:     /* read in my part of the matrix numerical values  */
1821:     PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1822: 
1823:     /* insert into matrix-by row (this is why cannot directly read into array */
1824:     vals_ptr = vals;
1825:     for (i=0; i<m; i++) {
1826:       for (j=0; j<N; j++) {
1827:         array[i + j*m] = *vals_ptr++;
1828:       }
1829:     }

1831:     /* read in other processors and ship out */
1832:     for (i=1; i<size; i++) {
1833:       nz   = (rowners[i+1] - rowners[i])*N;
1834:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1835:       MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);
1836:     }
1837:   } else {
1838:     /* receive numeric values */
1839:     PetscMalloc(m*N*sizeof(PetscScalar),&vals);

1841:     /* receive message of values*/
1842:     MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);

1844:     /* insert into matrix-by row (this is why cannot directly read into array */
1845:     vals_ptr = vals;
1846:     for (i=0; i<m; i++) {
1847:       for (j=0; j<N; j++) {
1848:         array[i + j*m] = *vals_ptr++;
1849:       }
1850:     }
1851:   }
1852:   PetscFree(rowners);
1853:   PetscFree(vals);
1854:   MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1855:   MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1856:   return(0);
1857: }

1861: PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1862: {
1863:   Mat            A;
1864:   PetscScalar    *vals,*svals;
1865:   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1866:   MPI_Status     status;
1867:   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1868:   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1869:   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1870:   PetscInt       i,nz,j,rstart,rend;
1871:   int            fd;

1875:   MPI_Comm_size(comm,&size);
1876:   MPI_Comm_rank(comm,&rank);
1877:   if (!rank) {
1878:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1879:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1880:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1881:   }

1883:   MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1884:   M = header[1]; N = header[2]; nz = header[3];

1886:   /*
1887:        Handle case where matrix is stored on disk as a dense matrix 
1888:   */
1889:   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1890:     MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);
1891:     return(0);
1892:   }

1894:   /* determine ownership of all rows */
1895:   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1896:   PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);
1897:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1898:   rowners[0] = 0;
1899:   for (i=2; i<=size; i++) {
1900:     rowners[i] += rowners[i-1];
1901:   }
1902:   rstart = rowners[rank];
1903:   rend   = rowners[rank+1];

1905:   /* distribute row lengths to all processors */
1906:   PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);
1907:   offlens = ourlens + (rend-rstart);
1908:   if (!rank) {
1909:     PetscMalloc(M*sizeof(PetscInt),&rowlengths);
1910:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1911:     PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);
1912:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1913:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1914:     PetscFree(sndcounts);
1915:   } else {
1916:     MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1917:   }

1919:   if (!rank) {
1920:     /* calculate the number of nonzeros on each processor */
1921:     PetscMalloc(size*sizeof(PetscInt),&procsnz);
1922:     PetscMemzero(procsnz,size*sizeof(PetscInt));
1923:     for (i=0; i<size; i++) {
1924:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1925:         procsnz[i] += rowlengths[j];
1926:       }
1927:     }
1928:     PetscFree(rowlengths);

1930:     /* determine max buffer needed and allocate it */
1931:     maxnz = 0;
1932:     for (i=0; i<size; i++) {
1933:       maxnz = PetscMax(maxnz,procsnz[i]);
1934:     }
1935:     PetscMalloc(maxnz*sizeof(PetscInt),&cols);

1937:     /* read in my part of the matrix column indices  */
1938:     nz = procsnz[0];
1939:     PetscMalloc(nz*sizeof(PetscInt),&mycols);
1940:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1942:     /* read in every one elses and ship off */
1943:     for (i=1; i<size; i++) {
1944:       nz   = procsnz[i];
1945:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1946:       MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1947:     }
1948:     PetscFree(cols);
1949:   } else {
1950:     /* determine buffer space needed for message */
1951:     nz = 0;
1952:     for (i=0; i<m; i++) {
1953:       nz += ourlens[i];
1954:     }
1955:     PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);

1957:     /* receive message of column indices*/
1958:     MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1959:     MPI_Get_count(&status,MPIU_INT,&maxnz);
1960:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1961:   }

1963:   /* loop over local rows, determining number of off diagonal entries */
1964:   PetscMemzero(offlens,m*sizeof(PetscInt));
1965:   jj = 0;
1966:   for (i=0; i<m; i++) {
1967:     for (j=0; j<ourlens[i]; j++) {
1968:       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1969:       jj++;
1970:     }
1971:   }

1973:   /* create our matrix */
1974:   for (i=0; i<m; i++) {
1975:     ourlens[i] -= offlens[i];
1976:   }
1977:   MatCreate(comm,newmat);
1978:   MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);
1979:   MatSetType(*newmat,type);
1980:   MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1981:   A = *newmat;
1982:   for (i=0; i<m; i++) {
1983:     ourlens[i] += offlens[i];
1984:   }

1986:   if (!rank) {
1987:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1989:     /* read in my part of the matrix numerical values  */
1990:     nz = procsnz[0];
1991:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1992: 
1993:     /* insert into matrix */
1994:     jj      = rstart;
1995:     smycols = mycols;
1996:     svals   = vals;
1997:     for (i=0; i<m; i++) {
1998:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1999:       smycols += ourlens[i];
2000:       svals   += ourlens[i];
2001:       jj++;
2002:     }

2004:     /* read in other processors and ship out */
2005:     for (i=1; i<size; i++) {
2006:       nz   = procsnz[i];
2007:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2008:       MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);
2009:     }
2010:     PetscFree(procsnz);
2011:   } else {
2012:     /* receive numeric values */
2013:     PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);

2015:     /* receive message of values*/
2016:     MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);
2017:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2018:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

2020:     /* insert into matrix */
2021:     jj      = rstart;
2022:     smycols = mycols;
2023:     svals   = vals;
2024:     for (i=0; i<m; i++) {
2025:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2026:       smycols += ourlens[i];
2027:       svals   += ourlens[i];
2028:       jj++;
2029:     }
2030:   }
2031:   PetscFree(ourlens);
2032:   PetscFree(vals);
2033:   PetscFree(mycols);
2034:   PetscFree(rowners);

2036:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2037:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2038:   return(0);
2039: }

2043: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2044: {
2045:   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2046:   Mat            a,b;
2047:   PetscTruth     flg;

2051:   a = matA->A;
2052:   b = matB->A;
2053:   MatEqual(a,b,&flg);
2054:   MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);
2055:   return(0);
2056: }

2058: #if defined(PETSC_HAVE_PLAPACK)

2062: /*@C
2063:   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2064:   Level: developer

2066: .keywords: Petsc, destroy, package, PLAPACK
2067: .seealso: PetscFinalize()
2068: @*/
2069: PetscErrorCode  PetscPLAPACKFinalizePackage(void)
2070: {

2074:   PLA_Finalize();
2075:   return(0);
2076: }

2080: /*@C
2081:   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2082:   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.

2084:   Input Parameter:
2085: .   comm - the communicator the matrix lives on

2087:   Level: developer

2089:    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2090:   same communicator (because there is some global state that is initialized and used for all matrices). In addition if 
2091:   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2092:   cannot overlap.

2094: .keywords: Petsc, initialize, package, PLAPACK
2095: .seealso: PetscInitializePackage(), PetscInitialize()
2096: @*/
2097: PetscErrorCode  PetscPLAPACKInitializePackage(MPI_Comm comm)
2098: {
2099:   PetscMPIInt    size;

2103:   if (!PLA_Initialized(PETSC_NULL)) {

2105:     MPI_Comm_size(comm,&size);
2106:     Plapack_nprows = 1;
2107:     Plapack_npcols = size;
2108: 
2109:     PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");
2110:       PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);
2111:       PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);
2112: #if defined(PETSC_USE_DEBUG)
2113:       Plapack_ierror = 3;
2114: #else
2115:       Plapack_ierror = 0;
2116: #endif
2117:       PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);
2118:       if (Plapack_ierror){
2119:         PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );
2120:       } else {
2121:         PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );
2122:       }
2123: 
2124:       Plapack_nb_alg = 0;
2125:       PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);
2126:       if (Plapack_nb_alg) {
2127:         pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);
2128:       }
2129:     PetscOptionsEnd();

2131:     PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);
2132:     PLA_Init(Plapack_comm_2d);
2133:     PetscRegisterFinalize(PetscPLAPACKFinalizePackage);
2134:   }
2135:   return(0);
2136: }

2138: #endif