Actual source code: matstash.c
1: #define PETSCMAT_DLL
3: #include private/matimpl.h
5: #define DEFAULT_STASH_SIZE 10000
7: /*
8: MatStashCreate_Private - Creates a stash,currently used for all the parallel
9: matrix implementations. The stash is where elements of a matrix destined
10: to be stored on other processors are kept until matrix assembly is done.
12: This is a simple minded stash. Simply adds entries to end of stash.
14: Input Parameters:
15: comm - communicator, required for scatters.
16: bs - stash block size. used when stashing blocks of values
18: Output Parameters:
19: stash - the newly created stash
20: */
23: PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
24: {
26: PetscInt max,*opt,nopt;
27: PetscTruth flg;
30: /* Require 2 tags,get the second using PetscCommGetNewTag() */
31: stash->comm = comm;
32: PetscCommGetNewTag(stash->comm,&stash->tag1);
33: PetscCommGetNewTag(stash->comm,&stash->tag2);
34: MPI_Comm_size(stash->comm,&stash->size);
35: MPI_Comm_rank(stash->comm,&stash->rank);
37: nopt = stash->size;
38: PetscMalloc(nopt*sizeof(PetscInt),&opt);
39: PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);
40: if (flg) {
41: if (nopt == 1) max = opt[0];
42: else if (nopt == stash->size) max = opt[stash->rank];
43: else if (stash->rank < nopt) max = opt[stash->rank];
44: else max = 0; /* Use default */
45: stash->umax = max;
46: } else {
47: stash->umax = 0;
48: }
49: PetscFree(opt);
50: if (bs <= 0) bs = 1;
52: stash->bs = bs;
53: stash->nmax = 0;
54: stash->oldnmax = 0;
55: stash->n = 0;
56: stash->reallocs = -1;
57: stash->space_head = 0;
58: stash->space = 0;
60: stash->send_waits = 0;
61: stash->recv_waits = 0;
62: stash->send_status = 0;
63: stash->nsends = 0;
64: stash->nrecvs = 0;
65: stash->svalues = 0;
66: stash->rvalues = 0;
67: stash->rindices = 0;
68: stash->nprocs = 0;
69: stash->nprocessed = 0;
70: return(0);
71: }
73: /*
74: MatStashDestroy_Private - Destroy the stash
75: */
78: PetscErrorCode MatStashDestroy_Private(MatStash *stash)
79: {
83: if (stash->space_head){
84: PetscMatStashSpaceDestroy(stash->space_head);
85: stash->space_head = 0;
86: stash->space = 0;
87: }
88: return(0);
89: }
91: /*
92: MatStashScatterEnd_Private - This is called as the fial stage of
93: scatter. The final stages of messagepassing is done here, and
94: all the memory used for messagepassing is cleanedu up. This
95: routine also resets the stash, and deallocates the memory used
96: for the stash. It also keeps track of the current memory usage
97: so that the same value can be used the next time through.
98: */
101: PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
102: {
104: PetscInt nsends=stash->nsends,bs2,oldnmax;
105: MPI_Status *send_status;
108: /* wait on sends */
109: if (nsends) {
110: PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);
111: MPI_Waitall(2*nsends,stash->send_waits,send_status);
112: PetscFree(send_status);
113: }
115: /* Now update nmaxold to be app 10% more than max n used, this way the
116: wastage of space is reduced the next time this stash is used.
117: Also update the oldmax, only if it increases */
118: if (stash->n) {
119: bs2 = stash->bs*stash->bs;
120: oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
121: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
122: }
124: stash->nmax = 0;
125: stash->n = 0;
126: stash->reallocs = -1;
127: stash->nprocessed = 0;
128: if (stash->space_head){
129: PetscMatStashSpaceDestroy(stash->space_head);
130: stash->space_head = 0;
131: stash->space = 0;
132: }
133: PetscFree(stash->send_waits);
134: stash->send_waits = 0;
135: PetscFree(stash->recv_waits);
136: stash->recv_waits = 0;
137: PetscFree(stash->svalues);
138: stash->svalues = 0;
139: PetscFree(stash->rvalues);
140: stash->rvalues = 0;
141: PetscFree(stash->rindices);
142: stash->rindices = 0;
143: PetscFree(stash->nprocs);
144: stash->nprocs = 0;
145: return(0);
146: }
148: /*
149: MatStashGetInfo_Private - Gets the relavant statistics of the stash
151: Input Parameters:
152: stash - the stash
153: nstash - the size of the stash. Indicates the number of values stored.
154: reallocs - the number of additional mallocs incurred.
155:
156: */
159: PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
160: {
161: PetscInt bs2 = stash->bs*stash->bs;
164: if (nstash) *nstash = stash->n*bs2;
165: if (reallocs) {
166: if (stash->reallocs < 0) *reallocs = 0;
167: else *reallocs = stash->reallocs;
168: }
169: return(0);
170: }
172: /*
173: MatStashSetInitialSize_Private - Sets the initial size of the stash
175: Input Parameters:
176: stash - the stash
177: max - the value that is used as the max size of the stash.
178: this value is used while allocating memory.
179: */
182: PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
183: {
185: stash->umax = max;
186: return(0);
187: }
189: /* MatStashExpand_Private - Expand the stash. This function is called
190: when the space in the stash is not sufficient to add the new values
191: being inserted into the stash.
192:
193: Input Parameters:
194: stash - the stash
195: incr - the minimum increase requested
196:
197: Notes:
198: This routine doubles the currently used memory.
199: */
202: static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
203: {
205: PetscInt newnmax,bs2= stash->bs*stash->bs;
208: /* allocate a larger stash */
209: if (!stash->oldnmax && !stash->nmax) { /* new stash */
210: if (stash->umax) newnmax = stash->umax/bs2;
211: else newnmax = DEFAULT_STASH_SIZE/bs2;
212: } else if (!stash->nmax) { /* resuing stash */
213: if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
214: else newnmax = stash->oldnmax/bs2;
215: } else newnmax = stash->nmax*2;
216: if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
218: /* Get a MatStashSpace and attach it to stash */
219: PetscMatStashSpaceGet(bs2,newnmax,&stash->space);
220: if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
221: stash->space_head = stash->space;
222: }
224: stash->reallocs++;
225: stash->nmax = newnmax;
226: return(0);
227: }
228: /*
229: MatStashValuesRow_Private - inserts values into the stash. This function
230: expects the values to be roworiented. Multiple columns belong to the same row
231: can be inserted with a single call to this function.
233: Input Parameters:
234: stash - the stash
235: row - the global row correspoiding to the values
236: n - the number of elements inserted. All elements belong to the above row.
237: idxn - the global column indices corresponding to each of the values.
238: values - the values inserted
239: */
242: PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[])
243: {
244: PetscErrorCode ierr;
245: PetscInt i,k;
246: PetscMatStashSpace space=stash->space;
249: /* Check and see if we have sufficient memory */
250: if (!space || space->local_remaining < n){
251: MatStashExpand_Private(stash,n);
252: }
253: space = stash->space;
254: k = space->local_used;
255: for (i=0; i<n; i++) {
256: space->idx[k] = row;
257: space->idy[k] = idxn[i];
258: space->val[k] = values[i];
259: k++;
260: }
261: stash->n += n;
262: space->local_used += n;
263: space->local_remaining -= n;
264: return(0);
265: }
267: /*
268: MatStashValuesCol_Private - inserts values into the stash. This function
269: expects the values to be columnoriented. Multiple columns belong to the same row
270: can be inserted with a single call to this function.
272: Input Parameters:
273: stash - the stash
274: row - the global row correspoiding to the values
275: n - the number of elements inserted. All elements belong to the above row.
276: idxn - the global column indices corresponding to each of the values.
277: values - the values inserted
278: stepval - the consecutive values are sepated by a distance of stepval.
279: this happens because the input is columnoriented.
280: */
283: PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval)
284: {
285: PetscErrorCode ierr;
286: PetscInt i,k;
287: PetscMatStashSpace space=stash->space;
290: /* Check and see if we have sufficient memory */
291: if (!space || space->local_remaining < n){
292: MatStashExpand_Private(stash,n);
293: }
294: space = stash->space;
295: k = space->local_used;
296: for (i=0; i<n; i++) {
297: space->idx[k] = row;
298: space->idy[k] = idxn[i];
299: space->val[k] = values[i*stepval];
300: k++;
301: }
302: stash->n += n;
303: space->local_used += n;
304: space->local_remaining -= n;
305: return(0);
306: }
308: /*
309: MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
310: This function expects the values to be roworiented. Multiple columns belong
311: to the same block-row can be inserted with a single call to this function.
312: This function extracts the sub-block of values based on the dimensions of
313: the original input block, and the row,col values corresponding to the blocks.
315: Input Parameters:
316: stash - the stash
317: row - the global block-row correspoiding to the values
318: n - the number of elements inserted. All elements belong to the above row.
319: idxn - the global block-column indices corresponding to each of the blocks of
320: values. Each block is of size bs*bs.
321: values - the values inserted
322: rmax - the number of block-rows in the original block.
323: cmax - the number of block-columsn on the original block.
324: idx - the index of the current block-row in the original block.
325: */
328: PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
329: {
330: PetscErrorCode ierr;
331: PetscInt i,j,k,bs2,bs=stash->bs,l;
332: const PetscScalar *vals;
333: PetscScalar *array;
334: PetscMatStashSpace space=stash->space;
337: if (!space || space->local_remaining < n){
338: MatStashExpand_Private(stash,n);
339: }
340: space = stash->space;
341: l = space->local_used;
342: bs2 = bs*bs;
343: for (i=0; i<n; i++) {
344: space->idx[l] = row;
345: space->idy[l] = idxn[i];
346: /* Now copy over the block of values. Store the values column oriented.
347: This enables inserting multiple blocks belonging to a row with a single
348: funtion call */
349: array = space->val + bs2*l;
350: vals = values + idx*bs2*n + bs*i;
351: for (j=0; j<bs; j++) {
352: for (k=0; k<bs; k++) array[k*bs] = vals[k];
353: array++;
354: vals += cmax*bs;
355: }
356: l++;
357: }
358: stash->n += n;
359: space->local_used += n;
360: space->local_remaining -= n;
361: return(0);
362: }
364: /*
365: MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
366: This function expects the values to be roworiented. Multiple columns belong
367: to the same block-row can be inserted with a single call to this function.
368: This function extracts the sub-block of values based on the dimensions of
369: the original input block, and the row,col values corresponding to the blocks.
371: Input Parameters:
372: stash - the stash
373: row - the global block-row correspoiding to the values
374: n - the number of elements inserted. All elements belong to the above row.
375: idxn - the global block-column indices corresponding to each of the blocks of
376: values. Each block is of size bs*bs.
377: values - the values inserted
378: rmax - the number of block-rows in the original block.
379: cmax - the number of block-columsn on the original block.
380: idx - the index of the current block-row in the original block.
381: */
384: PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
385: {
386: PetscErrorCode ierr;
387: PetscInt i,j,k,bs2,bs=stash->bs,l;
388: const PetscScalar *vals;
389: PetscScalar *array;
390: PetscMatStashSpace space=stash->space;
393: if (!space || space->local_remaining < n){
394: MatStashExpand_Private(stash,n);
395: }
396: space = stash->space;
397: l = space->local_used;
398: bs2 = bs*bs;
399: for (i=0; i<n; i++) {
400: space->idx[l] = row;
401: space->idy[l] = idxn[i];
402: /* Now copy over the block of values. Store the values column oriented.
403: This enables inserting multiple blocks belonging to a row with a single
404: funtion call */
405: array = space->val + bs2*l;
406: vals = values + idx*bs2*n + bs*i;
407: for (j=0; j<bs; j++) {
408: for (k=0; k<bs; k++) {array[k] = vals[k];}
409: array += bs;
410: vals += rmax*bs;
411: }
412: l++;
413: }
414: stash->n += n;
415: space->local_used += n;
416: space->local_remaining -= n;
417: return(0);
418: }
419: /*
420: MatStashScatterBegin_Private - Initiates the transfer of values to the
421: correct owners. This function goes through the stash, and check the
422: owners of each stashed value, and sends the values off to the owner
423: processors.
425: Input Parameters:
426: stash - the stash
427: owners - an array of size 'no-of-procs' which gives the ownership range
428: for each node.
430: Notes: The 'owners' array in the cased of the blocked-stash has the
431: ranges specified blocked global indices, and for the regular stash in
432: the proper global indices.
433: */
436: PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
437: {
438: PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
439: PetscInt size=stash->size,nsends;
440: PetscErrorCode ierr;
441: PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l;
442: PetscScalar **rvalues,*svalues;
443: MPI_Comm comm = stash->comm;
444: MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
445: PetscMPIInt *nprocs,*nlengths,nreceives;
446: PetscInt *sp_idx,*sp_idy;
447: PetscScalar *sp_val;
448: PetscMatStashSpace space,space_next;
451: bs2 = stash->bs*stash->bs;
452:
453: /* first count number of contributors to each processor */
454: PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);
455: PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));
456: PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);
458: nlengths = nprocs+size;
459: i = j = 0;
460: lastidx = -1;
461: space = stash->space_head;
462: while (space != PETSC_NULL){
463: space_next = space->next;
464: sp_idx = space->idx;
465: for (l=0; l<space->local_used; l++){
466: /* if indices are NOT locally sorted, need to start search at the beginning */
467: if (lastidx > (idx = sp_idx[l])) j = 0;
468: lastidx = idx;
469: for (; j<size; j++) {
470: if (idx >= owners[j] && idx < owners[j+1]) {
471: nlengths[j]++; owner[i] = j; break;
472: }
473: }
474: i++;
475: }
476: space = space_next;
477: }
478: /* Now check what procs get messages - and compute nsends. */
479: for (i=0, nsends=0 ; i<size; i++) {
480: if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
481: }
483: {PetscMPIInt *onodes,*olengths;
484: /* Determine the number of messages to expect, their lengths, from from-ids */
485: PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);
486: PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
487: /* since clubbing row,col - lengths are multiplied by 2 */
488: for (i=0; i<nreceives; i++) olengths[i] *=2;
489: PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
490: /* values are size 'bs2' lengths (and remove earlier factor 2 */
491: for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
492: PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
493: PetscFree(onodes);
494: PetscFree(olengths);
495: }
497: /* do sends:
498: 1) starts[i] gives the starting index in svalues for stuff going to
499: the ith processor
500: */
501: PetscMalloc((stash->n+1)*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)),&svalues);
502: sindices = (PetscInt*)(svalues + bs2*stash->n);
503: PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);
504: PetscMalloc(2*size*sizeof(PetscInt),&startv);
505: starti = startv + size;
506: /* use 2 sends the first with all_a, the next with all_i and all_j */
507: startv[0] = 0; starti[0] = 0;
508: for (i=1; i<size; i++) {
509: startv[i] = startv[i-1] + nlengths[i-1];
510: starti[i] = starti[i-1] + nlengths[i-1]*2;
511: }
512:
513: i = 0;
514: space = stash->space_head;
515: while (space != PETSC_NULL){
516: space_next = space->next;
517: sp_idx = space->idx;
518: sp_idy = space->idy;
519: sp_val = space->val;
520: for (l=0; l<space->local_used; l++){
521: j = owner[i];
522: if (bs2 == 1) {
523: svalues[startv[j]] = sp_val[l];
524: } else {
525: PetscInt k;
526: PetscScalar *buf1,*buf2;
527: buf1 = svalues+bs2*startv[j];
528: buf2 = space->val + bs2*l;
529: for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
530: }
531: sindices[starti[j]] = sp_idx[l];
532: sindices[starti[j]+nlengths[j]] = sp_idy[l];
533: startv[j]++;
534: starti[j]++;
535: i++;
536: }
537: space = space_next;
538: }
539: startv[0] = 0;
540: for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
542: for (i=0,count=0; i<size; i++) {
543: if (nprocs[i]) {
544: MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
545: MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
546: }
547: }
548: #if defined(PETSC_USE_INFO)
549: PetscInfo1(mat,"No of messages: %d \n",nsends);
550: for (i=0; i<size; i++) {
551: if (nprocs[i]) {
552: PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));
553: }
554: }
555: #endif
556: PetscFree(owner);
557: PetscFree(startv);
558: /* This memory is reused in scatter end for a different purpose*/
559: for (i=0; i<2*size; i++) nprocs[i] = -1;
560: stash->nprocs = nprocs;
561:
562: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
563: PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);
565: for (i=0; i<nreceives; i++) {
566: recv_waits[2*i] = recv_waits1[i];
567: recv_waits[2*i+1] = recv_waits2[i];
568: }
569: stash->recv_waits = recv_waits;
570: PetscFree(recv_waits1);
571: PetscFree(recv_waits2);
573: stash->svalues = svalues; stash->rvalues = rvalues;
574: stash->rindices = rindices; stash->send_waits = send_waits;
575: stash->nsends = nsends; stash->nrecvs = nreceives;
576: return(0);
577: }
579: /*
580: MatStashScatterGetMesg_Private - This function waits on the receives posted
581: in the function MatStashScatterBegin_Private() and returns one message at
582: a time to the calling function. If no messages are left, it indicates this
583: by setting flg = 0, else it sets flg = 1.
585: Input Parameters:
586: stash - the stash
588: Output Parameters:
589: nvals - the number of entries in the current message.
590: rows - an array of row indices (or blocked indices) corresponding to the values
591: cols - an array of columnindices (or blocked indices) corresponding to the values
592: vals - the values
593: flg - 0 indicates no more message left, and the current call has no values associated.
594: 1 indicates that the current call successfully received a message, and the
595: other output parameters nvals,rows,cols,vals are set appropriately.
596: */
599: PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg)
600: {
602: PetscMPIInt i,*flg_v,i1,i2;
603: PetscInt bs2;
604: MPI_Status recv_status;
605: PetscTruth match_found = PETSC_FALSE;
609: *flg = 0; /* When a message is discovered this is reset to 1 */
610: /* Return if no more messages to process */
611: if (stash->nprocessed == stash->nrecvs) { return(0); }
613: flg_v = stash->nprocs;
614: bs2 = stash->bs*stash->bs;
615: /* If a matching pair of receieves are found, process them, and return the data to
616: the calling function. Until then keep receiving messages */
617: while (!match_found) {
618: MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
619: /* Now pack the received message into a structure which is useable by others */
620: if (i % 2) {
621: MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);
622: flg_v[2*recv_status.MPI_SOURCE] = i/2;
623: *nvals = *nvals/bs2;
624: } else {
625: MPI_Get_count(&recv_status,MPIU_INT,nvals);
626: flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
627: *nvals = *nvals/2; /* This message has both row indices and col indices */
628: }
629:
630: /* Check if we have both messages from this proc */
631: i1 = flg_v[2*recv_status.MPI_SOURCE];
632: i2 = flg_v[2*recv_status.MPI_SOURCE+1];
633: if (i1 != -1 && i2 != -1) {
634: *rows = stash->rindices[i2];
635: *cols = *rows + *nvals;
636: *vals = stash->rvalues[i1];
637: *flg = 1;
638: stash->nprocessed ++;
639: match_found = PETSC_TRUE;
640: }
641: }
642: return(0);
643: }