Actual source code: baijfact3.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Factorization code for BAIJ format. 
  5: */
 6:  #include ../src/mat/impls/baij/seq/baij.h
 7:  #include ../src/inline/ilu.h

 11: /*
 12:    This is used to set the numeric factorization for both LU and ILU symbolic factorization
 13: */
 14: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat inA,PetscTruth natural)
 15: {
 17:   if (natural) {
 18:     switch (inA->rmap->bs) {
 19:     case 1:
 20:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
 21:       break;
 22:     case 2:
 23:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
 24:       break;
 25:     case 3:
 26:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
 27:       break;
 28:     case 4:
 29: #if defined(PETSC_USE_MAT_SINGLE)
 30:       {
 31:         PetscTruth  sse_enabled_local;
 33:         PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);
 34:         if (sse_enabled_local) {
 35: #  if defined(PETSC_HAVE_SSE)
 36:           int i,*AJ=a->j,nz=a->nz,n=a->mbs;
 37:           if (n==(unsigned short)n) {
 38:             unsigned short *aj=(unsigned short *)AJ;
 39:             for (i=0;i<nz;i++) {
 40:               aj[i] = (unsigned short)AJ[i];
 41:             }
 42:             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
 43:             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
 44:             PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");
 45:           } else {
 46:         /* Scale the column indices for easier indexing in MatSolve. */
 47: /*            for (i=0;i<nz;i++) { */
 48: /*              AJ[i] = AJ[i]*4; */
 49: /*            } */
 50:             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
 51:             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
 52:             PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");
 53:           }
 54: #  else
 55:         /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
 56:           SETERRQ(PETSC_ERR_SUP,"SSE Hardware unavailable");
 57: #  endif
 58:         } else {
 59:           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
 60:         }
 61:       }
 62: #else
 63:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
 64: #endif
 65:       break;
 66:     case 5:
 67:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
 68:       break;
 69:     case 6:
 70:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
 71:       break;
 72:     case 7:
 73:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
 74:       break;
 75:     default:
 76:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 77:       break;
 78:     }
 79:   } else {
 80:     switch (inA->rmap->bs) {
 81:     case 1:
 82:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
 83:       break;
 84:     case 2:
 85:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
 86:       break;
 87:     case 3:
 88:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
 89:       break;
 90:     case 4:
 91:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
 92:       break;
 93:     case 5:
 94:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
 95:       break;
 96:     case 6:
 97:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
 98:       break;
 99:     case 7:
100:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
101:       break;
102:     default:
103:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
104:       break;
105:     }
106:   }
107:   return(0);
108: }

110: /*
111:     The symbolic factorization code is identical to that for AIJ format,
112:   except for very small changes since this is now a SeqBAIJ datastructure.
113:   NOT good code reuse.
114: */
117: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
118: {
119:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b;
120:   IS             isicol;
122:   const PetscInt *r,*ic,*ai = a->i,*aj = a->j,*ajtmp;
123:   PetscInt       n = a->mbs,*ainew,*ajnew,jmax,*fill,nz,bs = A->rmap->bs,bs2=a->bs2,i,*ajtmp2;
124:   PetscInt       *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
125:   PetscReal      f = 1.0;
126:   PetscTruth     row_identity,col_identity,both_identity;

129:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
130:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
131:   ISGetIndices(isrow,&r);
132:   ISGetIndices(isicol,&ic);

134:   f = info->fill;
135:   /* get new row pointers */
136:   PetscMalloc((n+1)*sizeof(PetscInt),&ainew);
137:   ainew[0] = 0;
138:   /* don't know how many column pointers are needed so estimate */
139:   jmax     = (PetscInt)(f*ai[n] + 1);
140:   PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);
141:   /* fill is a linked list of nonzeros in active row */
142:   PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);
143:   im       = fill + n + 1;
144:   /* idnew is location of diagonal in factor */
145:   PetscMalloc((n+1)*sizeof(PetscInt),&idnew);
146:   idnew[0] = 0;

148:   for (i=0; i<n; i++) {
149:     /* first copy previous fill into linked list */
150:     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
151:     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
152:     ajtmp   = aj + ai[r[i]];
153:     fill[n] = n;
154:     while (nz--) {
155:       fm  = n;
156:       idx = ic[*ajtmp++];
157:       do {
158:         m  = fm;
159:         fm = fill[m];
160:       } while (fm < idx);
161:       fill[m]   = idx;
162:       fill[idx] = fm;
163:     }
164:     row = fill[n];
165:     while (row < i) {
166:       ajtmp = ajnew + idnew[row] + 1;
167:       nzbd  = 1 + idnew[row] - ainew[row];
168:       nz    = im[row] - nzbd;
169:       fm    = row;
170:       while (nz-- > 0) {
171:         idx = *ajtmp++;
172:         nzbd++;
173:         if (idx == i) im[row] = nzbd;
174:         do {
175:           m  = fm;
176:           fm = fill[m];
177:         } while (fm < idx);
178:         if (fm != idx) {
179:           fill[m]   = idx;
180:           fill[idx] = fm;
181:           fm        = idx;
182:           nnz++;
183:         }
184:       }
185:       row = fill[row];
186:     }
187:     /* copy new filled row into permanent storage */
188:     ainew[i+1] = ainew[i] + nnz;
189:     if (ainew[i+1] > jmax) {

191:       /* estimate how much additional space we will need */
192:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
193:       /* just double the memory each time */
194:       PetscInt maxadd = jmax;
195:       /* maxadd = (int)((f*(ai[n]+1)*(n-i+5))/n); */
196:       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
197:       jmax += maxadd;

199:       /* allocate a longer ajnew */
200:       PetscMalloc(jmax*sizeof(PetscInt),&ajtmp2);
201:       PetscMemcpy(ajtmp2,ajnew,ainew[i]*sizeof(PetscInt));
202:       PetscFree(ajnew);
203:       ajnew = ajtmp2;
204:       reallocs++; /* count how many times we realloc */
205:     }
206:     ajtmp2 = ajnew + ainew[i];
207:     fm     = fill[n];
208:     nzi    = 0;
209:     im[i]  = nnz;
210:     while (nnz--) {
211:       if (fm < i) nzi++;
212:       *ajtmp2++ = fm;
213:       fm        = fill[fm];
214:     }
215:     idnew[i] = ainew[i] + nzi;
216:   }

218: #if defined(PETSC_USE_INFO)
219:   if (ai[n] != 0) {
220:     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
221:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
222:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
223:     PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
224:     PetscInfo(A,"for best performance.\n");
225:   } else {
226:     PetscInfo(A,"Empty matrix.\n");
227:   }
228: #endif

230:   ISRestoreIndices(isrow,&r);
231:   ISRestoreIndices(isicol,&ic);

233:   PetscFree(fill);

235:   /* put together the new matrix */
236:   MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);
237:   PetscLogObjectParent(B,isicol);
238:   b = (Mat_SeqBAIJ*)(B)->data;
239:   b->singlemalloc = PETSC_FALSE;
240:   b->free_a     = PETSC_TRUE;
241:   b->free_ij    = PETSC_TRUE;
242:   PetscMalloc((ainew[n]+1)*sizeof(MatScalar)*bs2,&b->a);
243:   b->j          = ajnew;
244:   b->i          = ainew;
245:   b->diag       = idnew;
246:   b->ilen       = 0;
247:   b->imax       = 0;
248:   b->row        = isrow;
249:   b->col        = iscol;
250:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
251:   PetscObjectReference((PetscObject)isrow);
252:   PetscObjectReference((PetscObject)iscol);
253:   b->icol       = isicol;
254:   PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);
255:   /* In b structure:  Free imax, ilen, old a, old j.  
256:      Allocate idnew, solve_work, new a, new j */
257:   PetscLogObjectMemory(B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(MatScalar)));
258:   b->maxnz = b->nz = ainew[n];
259: 
260:   (B)->info.factor_mallocs    = reallocs;
261:   (B)->info.fill_ratio_given  = f;
262:   if (ai[n] != 0) {
263:     (B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
264:   } else {
265:     (B)->info.fill_ratio_needed = 0.0;
266:   }
267:   ISIdentity(isrow,&row_identity);
268:   ISIdentity(iscol,&col_identity);
269:   both_identity = (PetscTruth) (row_identity && col_identity);
270:   MatSeqBAIJSetNumericFactorization(B,both_identity);
271:   return(0);
272:  }