Actual source code: baijfact3.c
1: #define PETSCMAT_DLL
3: /*
4: Factorization code for BAIJ format.
5: */
6: #include ../src/mat/impls/baij/seq/baij.h
7: #include ../src/inline/ilu.h
11: /*
12: This is used to set the numeric factorization for both LU and ILU symbolic factorization
13: */
14: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat inA,PetscTruth natural)
15: {
17: if (natural) {
18: switch (inA->rmap->bs) {
19: case 1:
20: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
21: break;
22: case 2:
23: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
24: break;
25: case 3:
26: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
27: break;
28: case 4:
29: #if defined(PETSC_USE_MAT_SINGLE)
30: {
31: PetscTruth sse_enabled_local;
33: PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);
34: if (sse_enabled_local) {
35: # if defined(PETSC_HAVE_SSE)
36: int i,*AJ=a->j,nz=a->nz,n=a->mbs;
37: if (n==(unsigned short)n) {
38: unsigned short *aj=(unsigned short *)AJ;
39: for (i=0;i<nz;i++) {
40: aj[i] = (unsigned short)AJ[i];
41: }
42: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
43: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
44: PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");
45: } else {
46: /* Scale the column indices for easier indexing in MatSolve. */
47: /* for (i=0;i<nz;i++) { */
48: /* AJ[i] = AJ[i]*4; */
49: /* } */
50: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
51: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
52: PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");
53: }
54: # else
55: /* This should never be reached. If so, problem in PetscSSEIsEnabled. */
56: SETERRQ(PETSC_ERR_SUP,"SSE Hardware unavailable");
57: # endif
58: } else {
59: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
60: }
61: }
62: #else
63: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
64: #endif
65: break;
66: case 5:
67: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
68: break;
69: case 6:
70: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
71: break;
72: case 7:
73: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
74: break;
75: default:
76: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
77: break;
78: }
79: } else {
80: switch (inA->rmap->bs) {
81: case 1:
82: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
83: break;
84: case 2:
85: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
86: break;
87: case 3:
88: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
89: break;
90: case 4:
91: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
92: break;
93: case 5:
94: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
95: break;
96: case 6:
97: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
98: break;
99: case 7:
100: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
101: break;
102: default:
103: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
104: break;
105: }
106: }
107: return(0);
108: }
110: /*
111: The symbolic factorization code is identical to that for AIJ format,
112: except for very small changes since this is now a SeqBAIJ datastructure.
113: NOT good code reuse.
114: */
117: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
118: {
119: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
120: IS isicol;
122: const PetscInt *r,*ic,*ai = a->i,*aj = a->j,*ajtmp;
123: PetscInt n = a->mbs,*ainew,*ajnew,jmax,*fill,nz,bs = A->rmap->bs,bs2=a->bs2,i,*ajtmp2;
124: PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
125: PetscReal f = 1.0;
126: PetscTruth row_identity,col_identity,both_identity;
129: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
130: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
131: ISGetIndices(isrow,&r);
132: ISGetIndices(isicol,&ic);
134: f = info->fill;
135: /* get new row pointers */
136: PetscMalloc((n+1)*sizeof(PetscInt),&ainew);
137: ainew[0] = 0;
138: /* don't know how many column pointers are needed so estimate */
139: jmax = (PetscInt)(f*ai[n] + 1);
140: PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);
141: /* fill is a linked list of nonzeros in active row */
142: PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);
143: im = fill + n + 1;
144: /* idnew is location of diagonal in factor */
145: PetscMalloc((n+1)*sizeof(PetscInt),&idnew);
146: idnew[0] = 0;
148: for (i=0; i<n; i++) {
149: /* first copy previous fill into linked list */
150: nnz = nz = ai[r[i]+1] - ai[r[i]];
151: if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
152: ajtmp = aj + ai[r[i]];
153: fill[n] = n;
154: while (nz--) {
155: fm = n;
156: idx = ic[*ajtmp++];
157: do {
158: m = fm;
159: fm = fill[m];
160: } while (fm < idx);
161: fill[m] = idx;
162: fill[idx] = fm;
163: }
164: row = fill[n];
165: while (row < i) {
166: ajtmp = ajnew + idnew[row] + 1;
167: nzbd = 1 + idnew[row] - ainew[row];
168: nz = im[row] - nzbd;
169: fm = row;
170: while (nz-- > 0) {
171: idx = *ajtmp++;
172: nzbd++;
173: if (idx == i) im[row] = nzbd;
174: do {
175: m = fm;
176: fm = fill[m];
177: } while (fm < idx);
178: if (fm != idx) {
179: fill[m] = idx;
180: fill[idx] = fm;
181: fm = idx;
182: nnz++;
183: }
184: }
185: row = fill[row];
186: }
187: /* copy new filled row into permanent storage */
188: ainew[i+1] = ainew[i] + nnz;
189: if (ainew[i+1] > jmax) {
191: /* estimate how much additional space we will need */
192: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
193: /* just double the memory each time */
194: PetscInt maxadd = jmax;
195: /* maxadd = (int)((f*(ai[n]+1)*(n-i+5))/n); */
196: if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
197: jmax += maxadd;
199: /* allocate a longer ajnew */
200: PetscMalloc(jmax*sizeof(PetscInt),&ajtmp2);
201: PetscMemcpy(ajtmp2,ajnew,ainew[i]*sizeof(PetscInt));
202: PetscFree(ajnew);
203: ajnew = ajtmp2;
204: reallocs++; /* count how many times we realloc */
205: }
206: ajtmp2 = ajnew + ainew[i];
207: fm = fill[n];
208: nzi = 0;
209: im[i] = nnz;
210: while (nnz--) {
211: if (fm < i) nzi++;
212: *ajtmp2++ = fm;
213: fm = fill[fm];
214: }
215: idnew[i] = ainew[i] + nzi;
216: }
218: #if defined(PETSC_USE_INFO)
219: if (ai[n] != 0) {
220: PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
221: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
222: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
223: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
224: PetscInfo(A,"for best performance.\n");
225: } else {
226: PetscInfo(A,"Empty matrix.\n");
227: }
228: #endif
230: ISRestoreIndices(isrow,&r);
231: ISRestoreIndices(isicol,&ic);
233: PetscFree(fill);
235: /* put together the new matrix */
236: MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);
237: PetscLogObjectParent(B,isicol);
238: b = (Mat_SeqBAIJ*)(B)->data;
239: b->singlemalloc = PETSC_FALSE;
240: b->free_a = PETSC_TRUE;
241: b->free_ij = PETSC_TRUE;
242: PetscMalloc((ainew[n]+1)*sizeof(MatScalar)*bs2,&b->a);
243: b->j = ajnew;
244: b->i = ainew;
245: b->diag = idnew;
246: b->ilen = 0;
247: b->imax = 0;
248: b->row = isrow;
249: b->col = iscol;
250: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
251: PetscObjectReference((PetscObject)isrow);
252: PetscObjectReference((PetscObject)iscol);
253: b->icol = isicol;
254: PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);
255: /* In b structure: Free imax, ilen, old a, old j.
256: Allocate idnew, solve_work, new a, new j */
257: PetscLogObjectMemory(B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(MatScalar)));
258: b->maxnz = b->nz = ainew[n];
259:
260: (B)->info.factor_mallocs = reallocs;
261: (B)->info.fill_ratio_given = f;
262: if (ai[n] != 0) {
263: (B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
264: } else {
265: (B)->info.fill_ratio_needed = 0.0;
266: }
267: ISIdentity(isrow,&row_identity);
268: ISIdentity(iscol,&col_identity);
269: both_identity = (PetscTruth) (row_identity && col_identity);
270: MatSeqBAIJSetNumericFactorization(B,both_identity);
271: return(0);
272: }