Actual source code: matpapt.c
1: #define PETSCMAT_DLL
3: /*
4: Defines matrix-matrix product routines for pairs of SeqAIJ matrices
5: C = P * A * P^T
6: */
8: #include ../src/mat/impls/aij/seq/aij.h
9: #include ../src/mat/utils/freespace.h
12: /*
13: MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
14: C = P * A * P^T;
16: Note: C is assumed to be uncreated.
17: If this is not the case, Destroy C before calling this routine.
18: */
21: PetscErrorCode MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C)
22: {
23: /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */
24: /* and MatMatMult_SeqAIJ_SeqAIJ_Symbolic. Perhaps they could be merged nicely. */
25: PetscErrorCode ierr;
26: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
27: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
28: PetscInt *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj;
29: PetscInt *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow;
30: PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N;
31: PetscInt i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi;
32: MatScalar *ca;
35: /* some error checking which could be moved into interface layer */
36: if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am);
37: if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an);
39: /* Set up timers */
40: PetscLogEventBegin(MAT_Applypapt_symbolic,A,P,0,0);
42: /* Create ij structure of P^T */
43: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
45: /* Allocate ci array, arrays for fill computation and */
46: /* free space for accumulating nonzero column info */
47: PetscMalloc(((pm+1)*1)*sizeof(PetscInt),&ci);
48: ci[0] = 0;
50: PetscMalloc((2*an+2*pm+1)*sizeof(PetscInt),&padenserow);
51: PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(PetscInt));
52: pasparserow = padenserow + an;
53: denserow = pasparserow + an;
54: sparserow = denserow + pm;
56: /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */
57: /* This should be reasonable if sparsity of PAPt is similar to that of A. */
58: PetscFreeSpaceGet((ai[am]/pn)*pm,&free_space);
59: current_space = free_space;
61: /* Determine fill for each row of C: */
62: for (i=0;i<pm;i++) {
63: pnzi = pi[i+1] - pi[i];
64: panzi = 0;
65: /* Get symbolic sparse row of PA: */
66: for (j=0;j<pnzi;j++) {
67: arow = *pj++;
68: anzj = ai[arow+1] - ai[arow];
69: ajj = aj + ai[arow];
70: for (k=0;k<anzj;k++) {
71: if (!padenserow[ajj[k]]) {
72: padenserow[ajj[k]] = -1;
73: pasparserow[panzi++] = ajj[k];
74: }
75: }
76: }
77: /* Using symbolic row of PA, determine symbolic row of C: */
78: paj = pasparserow;
79: cnzi = 0;
80: for (j=0;j<panzi;j++) {
81: ptrow = *paj++;
82: ptnzj = pti[ptrow+1] - pti[ptrow];
83: ptjj = ptj + pti[ptrow];
84: for (k=0;k<ptnzj;k++) {
85: if (!denserow[ptjj[k]]) {
86: denserow[ptjj[k]] = -1;
87: sparserow[cnzi++] = ptjj[k];
88: }
89: }
90: }
92: /* sort sparse representation */
93: PetscSortInt(cnzi,sparserow);
95: /* If free space is not available, make more free space */
96: /* Double the amount of total space in the list */
97: if (current_space->local_remaining<cnzi) {
98: PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);
99: }
101: /* Copy data into free space, and zero out dense row */
102: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
103: current_space->array += cnzi;
104: current_space->local_used += cnzi;
105: current_space->local_remaining -= cnzi;
107: for (j=0;j<panzi;j++) {
108: padenserow[pasparserow[j]] = 0;
109: }
110: for (j=0;j<cnzi;j++) {
111: denserow[sparserow[j]] = 0;
112: }
113: ci[i+1] = ci[i] + cnzi;
114: }
115: /* column indices are in the list of free space */
116: /* Allocate space for cj, initialize cj, and */
117: /* destroy list of free space and other temporary array(s) */
118: PetscMalloc((ci[pm]+1)*sizeof(PetscInt),&cj);
119: PetscFreeSpaceContiguous(&free_space,cj);
120: PetscFree(padenserow);
121:
122: /* Allocate space for ca */
123: PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);
124: PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));
125:
126: /* put together the new matrix */
127: MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,pm,pm,ci,cj,ca,C);
129: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
130: /* Since these are PETSc arrays, change flags to free them as necessary. */
131: c = (Mat_SeqAIJ *)((*C)->data);
132: c->free_a = PETSC_TRUE;
133: c->free_ij = PETSC_TRUE;
134: c->nonew = 0;
136: /* Clean up. */
137: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
139: PetscLogEventEnd(MAT_Applypapt_symbolic,A,P,0,0);
140: return(0);
141: }
143: /*
144: MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
145: C = P * A * P^T;
146: Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ.
147: */
150: PetscErrorCode MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
151: {
153: PetscInt flops=0;
154: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
155: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
156: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
157: PetscInt *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj;
158: PetscInt *ci=c->i,*cj=c->j;
159: PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,cn=C->cmap->N,cm=C->rmap->N;
160: PetscInt i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi;
161: MatScalar *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum;
165: /* This error checking should be unnecessary if the symbolic was performed */
166: if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pm,cm);
167: if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am);
168: if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an);
169: if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pm, cn);
171: /* Set up timers */
172: PetscLogEventBegin(MAT_Applypapt_numeric,A,P,C,0);
174: PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(PetscInt)),&paa);
175: PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(PetscInt)));
176: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
178: paj = (PetscInt*)(paa + an);
179: pajdense = paj + an;
181: for (i=0;i<pm;i++) {
182: /* Form sparse row of P*A */
183: pnzi = pi[i+1] - pi[i];
184: panzj = 0;
185: for (j=0;j<pnzi;j++) {
186: arow = *pj++;
187: anzj = ai[arow+1] - ai[arow];
188: ajj = aj + ai[arow];
189: aaj = aa + ai[arow];
190: for (k=0;k<anzj;k++) {
191: if (!pajdense[ajj[k]]) {
192: pajdense[ajj[k]] = -1;
193: paj[panzj++] = ajj[k];
194: }
195: paa[ajj[k]] += (*pa)*aaj[k];
196: }
197: flops += 2*anzj;
198: pa++;
199: }
201: /* Sort the j index array for quick sparse axpy. */
202: PetscSortInt(panzj,paj);
204: /* Compute P*A*P^T using sparse inner products. */
205: /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */
206: cnzi = ci[i+1] - ci[i];
207: for (j=0;j<cnzi;j++) {
208: /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */
209: ptcol = *cj++;
210: ptnzj = pi[ptcol+1] - pi[ptcol];
211: ptj = pjj + pi[ptcol];
212: ptaj = pta + pi[ptcol];
213: sum = 0.;
214: k1 = 0;
215: k2 = 0;
216: while ((k1<panzj) && (k2<ptnzj)) {
217: if (paj[k1]==ptj[k2]) {
218: sum += paa[paj[k1++]]*ptaj[k2++];
219: } else if (paj[k1] < ptj[k2]) {
220: k1++;
221: } else /* if (paj[k1] > ptj[k2]) */ {
222: k2++;
223: }
224: }
225: *ca++ = sum;
226: }
228: /* Zero the current row info for P*A */
229: for (j=0;j<panzj;j++) {
230: paa[paj[j]] = 0.;
231: pajdense[paj[j]] = 0;
232: }
233: }
235: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
236: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
237: PetscLogFlops(flops);
238: PetscLogEventEnd(MAT_Applypapt_numeric,A,P,C,0);
239: return(0);
240: }
241:
244: PetscErrorCode MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C)
245: {
249: PetscLogEventBegin(MAT_Applypapt,A,P,0,0);
250: MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);
251: MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);
252: PetscLogEventEnd(MAT_Applypapt,A,P,0,0);
253: return(0);
254: }