Actual source code: cholbs.c

  1: #define PETSCMAT_DLL

 3:  #include petsc.h

  5: /* We must define MLOG for BlockSolve logging */
  6: #if defined(PETSC_USE_LOG)
  7: #define MLOG
  8: #endif

 10:  #include ../src/mat/impls/rowbs/mpi/mpirowbs.h

 14: PetscErrorCode MatCholeskyFactorNumeric_MPIRowbs(Mat factp,Mat mat,const MatFactorInfo *info)
 15: {
 16:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
 18: #if defined(PETSC_USE_LOG)
 19:   PetscReal flop1 = BSlocal_flops();
 20: #endif


 24:   if (!mbs->blocksolveassembly) {
 25:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
 26:   }

 28:   /* Do prep work if same nonzero structure as previously factored matrix */
 29:   if (mbs->factor == MAT_FACTOR_CHOLESKY) {
 30:     /* Copy the nonzeros */
 31:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 32:   }
 33:   /* Form incomplete Cholesky factor */
 34:   mbs->0; mbs->failures = 0; mbs->alpha = 1.0;
 35:   while ((mbs->BSfactor(mbs->fpA,mbs->comm_fpA,mbs->procinfo))) {
 36:     CHKERRBS(0); mbs->failures++;
 37:     /* Copy only the nonzeros */
 38:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 39:     /* Increment the diagonal shift */
 40:     mbs->alpha += 0.1;
 41:     BSset_diag(mbs->fpA,mbs->alpha,mbs->procinfo);CHKERRBS(0);
 42:     PetscInfo3(mat,"BlockSolve95: %d failed factor(s), err=%d, alpha=%g\n",mbs->failures,mbs->ierr,mbs->alpha);
 43:   }
 44: #if defined(PETSC_USE_LOG)
 45:   PetscLogFlops((int)(BSlocal_flops()-flop1));
 46: #endif

 48:   factp->ops->solve         = MatSolve_MPIRowbs;
 49:   factp->ops->forwardsolve  = MatForwardSolve_MPIRowbs;
 50:   factp->ops->backwardsolve = MatBackSolve_MPIRowbs;
 51:   factp->assembled = PETSC_TRUE;
 52:   return(0);
 53: }

 57: PetscErrorCode MatLUFactorNumeric_MPIRowbs(Mat factp,Mat mat,const MatFactorInfo *info)
 58: {
 59:   Mat_MPIRowbs   *mbs = (Mat_MPIRowbs*)mat->data;

 61: #if defined(PETSC_USE_LOG)
 63:   PetscReal      flop1 = BSlocal_flops();
 64: #endif

 67:   if (!mbs->blocksolveassembly) {
 68:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
 69:   }

 71:   /* Do prep work if same nonzero structure as previously factored matrix */
 72:   if (mbs->factor == MAT_FACTOR_LU) {
 73:     /* Copy the nonzeros */
 74:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 75:   }
 76:   /* Form incomplete Cholesky factor */
 77:   mbs->0; mbs->failures = 0; mbs->alpha = 1.0;
 78:   while ((mbs->BSfactor(mbs->fpA,mbs->comm_fpA,mbs->procinfo))) {
 79:     CHKERRBS(0); mbs->failures++;
 80:     /* Copy only the nonzeros */
 81:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 82:     /* Increment the diagonal shift */
 83:     mbs->alpha += 0.1;
 84:     BSset_diag(mbs->fpA,mbs->alpha,mbs->procinfo);CHKERRBS(0);
 85:     PetscInfo3(mat,"BlockSolve95: %d failed factor(s), err=%d, alpha=%g\n",mbs->failures,mbs->ierr,mbs->alpha);
 86:   }
 87:   factp->assembled          = PETSC_TRUE;
 88:   factp->ops->solve         = MatSolve_MPIRowbs;
 89:   factp->ops->forwardsolve  = MatForwardSolve_MPIRowbs;
 90:   factp->ops->backwardsolve = MatBackSolve_MPIRowbs;

 92: #if defined(PETSC_USE_LOG)
 93:   PetscLogFlops((int)(BSlocal_flops()-flop1));
 94: #endif
 95:   return(0);
 96: }
 97: /* ------------------------------------------------------------------- */
100: PetscErrorCode MatSolve_MPIRowbs(Mat mat,Vec x,Vec y)
101: {
102:   Mat          submat = (Mat) mat->data;
103:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)submat->data;
105:   PetscScalar  *ya,*xa,*xworka;

107: #if defined(PETSC_USE_LOG)
108:   PetscReal flop1 = BSlocal_flops();
109: #endif

112:   /* Permute and apply diagonal scaling to vector, where D^{-1/2} is stored */
113:   if (!mbs->vecs_permscale) {
114:     VecGetArray(x,&xa);
115:     VecGetArray(mbs->xwork,&xworka);
116:     BSperm_dvec(xa,xworka,mbs->pA->perm);CHKERRBS(0);
117:     VecRestoreArray(x,&xa);
118:     VecRestoreArray(mbs->xwork,&xworka);
119:     VecPointwiseMult(y,mbs->diag,mbs->xwork);
120:   } else {
121:     VecCopy(x,y);
122:   }

124:   VecGetArray(y,&ya);
125:   if (mbs->procinfo->single) {
126:     /* Use BlockSolve routine for no cliques/inodes */
127:     BSfor_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
128:     BSback_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
129:   } else {
130:     BSfor_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
131:     BSback_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
132:   }
133:   VecRestoreArray(y,&ya);

135:   /* Apply diagonal scaling and unpermute, where D^{-1/2} is stored */
136:   if (!mbs->vecs_permscale) {
137:     VecPointwiseMult(mbs->xwork,y,mbs->diag);
138:     VecGetArray(y,&ya);
139:     VecGetArray(mbs->xwork,&xworka);
140:     BSiperm_dvec(xworka,ya,mbs->pA->perm);CHKERRBS(0);
141:     VecRestoreArray(y,&ya);
142:     VecRestoreArray(mbs->xwork,&xworka);
143:   }
144: #if defined(PETSC_USE_LOG)
145:   PetscLogFlops((int)(BSlocal_flops()-flop1));
146: #endif
147:   return(0);
148: }

150: /* ------------------------------------------------------------------- */
153: PetscErrorCode MatForwardSolve_MPIRowbs(Mat mat,Vec x,Vec y)
154: {
155:   Mat          submat = (Mat) mat->data;
156:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)submat->data;
158:   PetscScalar  *ya,*xa,*xworka;

160: #if defined(PETSC_USE_LOG)
161:   PetscReal flop1 = BSlocal_flops();
162: #endif

165:   /* Permute and apply diagonal scaling to vector, where D^{-1/2} is stored */
166:   if (!mbs->vecs_permscale) {
167:     VecGetArray(x,&xa);
168:     VecGetArray(mbs->xwork,&xworka);
169:     BSperm_dvec(xa,xworka,mbs->pA->perm);CHKERRBS(0);
170:     VecRestoreArray(x,&xa);
171:     VecRestoreArray(mbs->xwork,&xworka);
172:     VecPointwiseMult(y,mbs->diag,mbs->xwork);
173:   } else {
174:     VecCopy(x,y);
175:   }

177:   VecGetArray(y,&ya);
178:   if (mbs->procinfo->single){
179:     /* Use BlockSolve routine for no cliques/inodes */
180:     BSfor_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
181:   } else {
182:     BSfor_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
183:   }
184:   VecRestoreArray(y,&ya);

186: #if defined(PETSC_USE_LOG)
187:   PetscLogFlops((int)(BSlocal_flops()-flop1));
188: #endif

190:   return(0);
191: }

193: /* ------------------------------------------------------------------- */
196: PetscErrorCode MatBackwardSolve_MPIRowbs(Mat mat,Vec x,Vec y)
197: {
198:   Mat          submat = (Mat) mat->data;
199:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)submat->data;
201:   PetscScalar  *ya,*xworka;

203: #if defined (PETSC_USE_LOG)
204:   PetscReal flop1 = BSlocal_flops();
205: #endif

208:   VecCopy(x,y);

210:   VecGetArray(y,&ya);
211:   if (mbs->procinfo->single) {
212:     /* Use BlockSolve routine for no cliques/inodes */
213:     BSback_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
214:   } else {
215:     BSback_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
216:   }
217:   VecRestoreArray(y,&ya);

219:   /* Apply diagonal scaling and unpermute, where D^{-1/2} is stored */
220:   if (!mbs->vecs_permscale) {
221:     VecPointwiseMult(mbs->xwork,y,mbs->diag);
222:     VecGetArray(y,&ya);
223:     VecGetArray(mbs->xwork,&xworka);
224:     BSiperm_dvec(xworka,ya,mbs->pA->perm);CHKERRBS(0);
225:     VecRestoreArray(y,&ya);
226:     VecRestoreArray(mbs->xwork,&xworka);
227:   }
228: #if defined (PETSC_USE_LOG)
229:   PetscLogFlops((int)(BSlocal_flops()-flop1));
230: #endif
231:   return(0);
232: }


235: /* 
236:     The logging variables required by BlockSolve, 

238:     This is an ugly hack that allows PETSc to run properly with BlockSolve regardless
239:   of whether PETSc or BlockSolve is compiled with logging turned on. 

241:     It is bad because it relys on BlockSolve's internals not changing related to 
242:   logging but we have no choice, plus it is unlikely BlockSolve will be developed
243:   in the near future anyways.
244: */
245:  double MLOG_flops;
246:  double MLOG_event_flops;
247:  double MLOG_time_stamp;
248:  PetscErrorCode    MLOG_sequence_num;
249: #if defined (MLOG_MAX_EVNTS) 
250:  MLOG_log_type MLOG_event_log[MLOG_MAX_EVNTS];
251:  MLOG_log_type MLOG_accum_log[MLOG_MAX_ACCUM];
252: #else
253: typedef struct __MLOG_log_type {
254:         double        time_stamp;
255:         double        total_time;
256:         double  flops;
257:         int        event_num;
258: } MLOG_log_type;
259: #define        MLOG_MAX_EVNTS 1300
260: #define        MLOG_MAX_ACCUM 75
261:  MLOG_log_type MLOG_event_log[MLOG_MAX_EVNTS];
262:  MLOG_log_type MLOG_accum_log[MLOG_MAX_ACCUM];
263: #endif