Actual source code: vecstash.c
1: #define PETSCVEC_DLL
3: #include private/vecimpl.h
5: #define DEFAULT_STASH_SIZE 100
7: /*
8: VecStashCreate_Private - Creates a stash,currently used for all the parallel
9: matrix implementations. The stash is where elements of a matrix destined
10: to be stored on other processors are kept until matrix assembly is done.
12: This is a simple minded stash. Simply adds entries to end of stash.
14: Input Parameters:
15: comm - communicator, required for scatters.
16: bs - stash block size. used when stashing blocks of values
18: Output Parameters:
19: stash - the newly created stash
20: */
23: PetscErrorCode VecStashCreate_Private(MPI_Comm comm,PetscInt bs,VecStash *stash)
24: {
26: PetscInt max,*opt,nopt;
27: PetscTruth flg;
30: /* Require 2 tags, get the second using PetscCommGetNewTag() */
31: stash->comm = comm;
32: PetscCommGetNewTag(stash->comm,&stash->tag1);
33: PetscCommGetNewTag(stash->comm,&stash->tag2);
34: MPI_Comm_size(stash->comm,&stash->size);
35: MPI_Comm_rank(stash->comm,&stash->rank);
37: nopt = stash->size;
38: PetscMalloc(nopt*sizeof(PetscInt),&opt);
39: PetscOptionsGetIntArray(PETSC_NULL,"-vecstash_initial_size",opt,&nopt,&flg);
40: if (flg) {
41: if (nopt == 1) max = opt[0];
42: else if (nopt == stash->size) max = opt[stash->rank];
43: else if (stash->rank < nopt) max = opt[stash->rank];
44: else max = 0; /* use default */
45: stash->umax = max;
46: } else {
47: stash->umax = 0;
48: }
49: PetscFree(opt);
51: if (bs <= 0) bs = 1;
53: stash->bs = bs;
54: stash->nmax = 0;
55: stash->oldnmax = 0;
56: stash->n = 0;
57: stash->reallocs = -1;
58: stash->idx = 0;
59: stash->array = 0;
61: stash->send_waits = 0;
62: stash->recv_waits = 0;
63: stash->send_status = 0;
64: stash->nsends = 0;
65: stash->nrecvs = 0;
66: stash->svalues = 0;
67: stash->rvalues = 0;
68: stash->rmax = 0;
69: stash->nprocs = 0;
70: stash->nprocessed = 0;
71: stash->donotstash = PETSC_FALSE;
72: stash->ignorenegidx = PETSC_FALSE;
73: return(0);
74: }
76: /*
77: VecStashDestroy_Private - Destroy the stash
78: */
81: PetscErrorCode VecStashDestroy_Private(VecStash *stash)
82: {
86: PetscFree(stash->array);
87: stash->array = 0;
88: PetscFree(stash->bowners);
89: return(0);
90: }
92: /*
93: VecStashScatterEnd_Private - This is called as the fial stage of
94: scatter. The final stages of message passing is done here, and
95: all the memory used for message passing is cleanedu up. This
96: routine also resets the stash, and deallocates the memory used
97: for the stash. It also keeps track of the current memory usage
98: so that the same value can be used the next time through.
99: */
102: PetscErrorCode VecStashScatterEnd_Private(VecStash *stash)
103: {
105: PetscInt nsends=stash->nsends,oldnmax;
106: MPI_Status *send_status;
109: /* wait on sends */
110: if (nsends) {
111: PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);
112: MPI_Waitall(2*nsends,stash->send_waits,send_status);
113: PetscFree(send_status);
114: }
116: /* Now update nmaxold to be app 10% more than max n, this way the
117: wastage of space is reduced the next time this stash is used.
118: Also update the oldmax, only if it increases */
119: if (stash->n) {
120: oldnmax = ((PetscInt)(stash->n * 1.1) + 5)*stash->bs;
121: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
122: }
124: stash->nmax = 0;
125: stash->n = 0;
126: stash->reallocs = -1;
127: stash->rmax = 0;
128: stash->nprocessed = 0;
130: PetscFree(stash->array);
131: stash->array = 0;
132: stash->idx = 0;
133: PetscFree(stash->send_waits);
134: stash->send_waits = 0;
135: PetscFree(stash->recv_waits);
136: stash->recv_waits = 0;
137: PetscFree(stash->svalues);
138: stash->svalues = 0;
139: PetscFree(stash->rvalues);
140: stash->rvalues = 0;
141: PetscFree(stash->nprocs);
142: stash->nprocs = 0;
143: return(0);
144: }
146: /*
147: VecStashGetInfo_Private - Gets the relavant statistics of the stash
149: Input Parameters:
150: stash - the stash
151: nstash - the size of the stash
152: reallocs - the number of additional mallocs incurred.
153:
154: */
157: PetscErrorCode VecStashGetInfo_Private(VecStash *stash,PetscInt *nstash,PetscInt *reallocs)
158: {
161: if (nstash) *nstash = stash->n*stash->bs;
162: if (reallocs) {
163: if (stash->reallocs < 0) *reallocs = 0;
164: else *reallocs = stash->reallocs;
165: }
166: return(0);
167: }
170: /*
171: VecStashSetInitialSize_Private - Sets the initial size of the stash
173: Input Parameters:
174: stash - the stash
175: max - the value that is used as the max size of the stash.
176: this value is used while allocating memory. It specifies
177: the number of vals stored, even with the block-stash
178: */
181: PetscErrorCode VecStashSetInitialSize_Private(VecStash *stash,PetscInt max)
182: {
184: stash->umax = max;
185: return(0);
186: }
188: /* VecStashExpand_Private - Expand the stash. This function is called
189: when the space in the stash is not sufficient to add the new values
190: being inserted into the stash.
191:
192: Input Parameters:
193: stash - the stash
194: incr - the minimum increase requested
195:
196: Notes:
197: This routine doubles the currently used memory.
198: */
201: PetscErrorCode VecStashExpand_Private(VecStash *stash,PetscInt incr)
202: {
204: PetscInt *n_idx,newnmax,bs=stash->bs;
205: PetscScalar *n_array;
208: /* allocate a larger stash. */
209: if (!stash->oldnmax && !stash->nmax) { /* new stash */
210: if (stash->umax) newnmax = stash->umax/bs;
211: else newnmax = DEFAULT_STASH_SIZE/bs;
212: } else if (!stash->nmax) { /* resuing stash */
213: if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs;
214: else newnmax = stash->oldnmax/bs;
215: } else newnmax = stash->nmax*2;
217: if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
219: PetscMalloc((newnmax)*(sizeof(PetscInt)+bs*sizeof(PetscScalar)),&n_array);
220: n_idx = (PetscInt*)(n_array + bs*newnmax);
221: PetscMemcpy(n_array,stash->array,bs*stash->nmax*sizeof(PetscScalar));
222: PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));
223: PetscFree(stash->array);
224: stash->array = n_array;
225: stash->idx = n_idx;
226: stash->nmax = newnmax;
227: stash->reallocs++;
228: return(0);
229: }
230: /*
231: VecStashScatterBegin_Private - Initiates the transfer of values to the
232: correct owners. This function goes through the stash, and check the
233: owners of each stashed value, and sends the values off to the owner
234: processors.
236: Input Parameters:
237: stash - the stash
238: owners - an array of size 'no-of-procs' which gives the ownership range
239: for each node.
241: Notes: The 'owners' array in the cased of the blocked-stash has the
242: ranges specified blocked global indices, and for the regular stash in
243: the proper global indices.
244: */
247: PetscErrorCode VecStashScatterBegin_Private(VecStash *stash,PetscInt *owners)
248: {
250: PetscMPIInt size = stash->size,tag1=stash->tag1,tag2=stash->tag2;
251: PetscInt *owner,*start,*nprocs,nsends,nreceives;
252: PetscInt nmax,count,*sindices,*rindices,i,j,idx,bs=stash->bs,lastidx;
253: PetscScalar *rvalues,*svalues;
254: MPI_Comm comm = stash->comm;
255: MPI_Request *send_waits,*recv_waits;
259: /* first count number of contributors to each processor */
260: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
261: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
262: PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);
264: j = 0;
265: lastidx = -1;
266: for (i=0; i<stash->n; i++) {
267: /* if indices are NOT locally sorted, need to start search at the beginning */
268: if (lastidx > (idx = stash->idx[i])) j = 0;
269: lastidx = idx;
270: for (; j<size; j++) {
271: if (idx >= owners[j] && idx < owners[j+1]) {
272: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
273: }
274: }
275: }
276: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
277:
278: /* inform other processors of number of messages and max length*/
279: PetscMaxSum(comm,nprocs,&nmax,&nreceives);
281: /* post receives:
282: since we don't know how long each individual message is we
283: allocate the largest needed buffer for each receive. Potentially
284: this is a lot of wasted space.
285: */
286: PetscMalloc((nreceives+1)*(nmax+1)*(bs*sizeof(PetscScalar)+sizeof(PetscInt)),&rvalues);
287: rindices = (PetscInt*)(rvalues + bs*nreceives*nmax);
288: PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);
289: for (i=0,count=0; i<nreceives; i++) {
290: MPI_Irecv(rvalues+bs*nmax*i,bs*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);
291: MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);
292: }
294: /* do sends:
295: 1) starts[i] gives the starting index in svalues for stuff going to
296: the ith processor
297: */
298: PetscMalloc((stash->n+1)*(bs*sizeof(PetscScalar)+sizeof(PetscInt)),&svalues);
299: sindices = (PetscInt*)(svalues + bs*stash->n);
300: PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);
301: PetscMalloc(size*sizeof(PetscInt),&start);
302: /* use 2 sends the first with all_v, the next with all_i */
303: start[0] = 0;
304: for (i=1; i<size; i++) {
305: start[i] = start[i-1] + nprocs[2*i-2];
306: }
307: for (i=0; i<stash->n; i++) {
308: j = owner[i];
309: if (bs == 1) {
310: svalues[start[j]] = stash->array[i];
311: } else {
312: PetscMemcpy(svalues+bs*start[j],stash->array+bs*i,bs*sizeof(PetscScalar));
313: }
314: sindices[start[j]] = stash->idx[i];
315: start[j]++;
316: }
317: start[0] = 0;
318: for (i=1; i<size; i++) { start[i] = start[i-1] + nprocs[2*i-2];}
319: for (i=0,count=0; i<size; i++) {
320: if (nprocs[2*i+1]) {
321: MPI_Isend(svalues+bs*start[i],bs*nprocs[2*i],MPIU_SCALAR,i,tag1,comm,send_waits+count++);
322: MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag2,comm,send_waits+count++);
323: }
324: }
325: PetscFree(owner);
326: PetscFree(start);
327: /* This memory is reused in scatter end for a different purpose*/
328: for (i=0; i<2*size; i++) nprocs[i] = -1;
329: stash->nprocs = nprocs;
331: stash->svalues = svalues; stash->rvalues = rvalues;
332: stash->nsends = nsends; stash->nrecvs = nreceives;
333: stash->send_waits = send_waits; stash->recv_waits = recv_waits;
334: stash->rmax = nmax;
335: return(0);
336: }
338: /*
339: VecStashScatterGetMesg_Private - This function waits on the receives posted
340: in the function VecStashScatterBegin_Private() and returns one message at
341: a time to the calling function. If no messages are left, it indicates this
342: by setting flg = 0, else it sets flg = 1.
344: Input Parameters:
345: stash - the stash
347: Output Parameters:
348: nvals - the number of entries in the current message.
349: rows - an array of row indices (or blocked indices) corresponding to the values
350: cols - an array of columnindices (or blocked indices) corresponding to the values
351: vals - the values
352: flg - 0 indicates no more message left, and the current call has no values associated.
353: 1 indicates that the current call successfully received a message, and the
354: other output parameters nvals,rows,cols,vals are set appropriately.
355: */
358: PetscErrorCode VecStashScatterGetMesg_Private(VecStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscScalar **vals,PetscInt *flg)
359: {
361: PetscMPIInt i;
362: PetscInt *flg_v;
363: PetscInt i1,i2,*rindices,bs=stash->bs;
364: MPI_Status recv_status;
365: PetscTruth match_found = PETSC_FALSE;
369: *flg = 0; /* When a message is discovered this is reset to 1 */
370: /* Return if no more messages to process */
371: if (stash->nprocessed == stash->nrecvs) { return(0); }
373: flg_v = stash->nprocs;
374: /* If a matching pair of receieves are found, process them, and return the data to
375: the calling function. Until then keep receiving messages */
376: while (!match_found) {
377: MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
378: /* Now pack the received message into a structure which is useable by others */
379: if (i % 2) {
380: MPI_Get_count(&recv_status,MPIU_INT,nvals);
381: flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
382: } else {
383: MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);
384: flg_v[2*recv_status.MPI_SOURCE] = i/2;
385: *nvals = *nvals/bs;
386: }
387:
388: /* Check if we have both the messages from this proc */
389: i1 = flg_v[2*recv_status.MPI_SOURCE];
390: i2 = flg_v[2*recv_status.MPI_SOURCE+1];
391: if (i1 != -1 && i2 != -1) {
392: rindices = (PetscInt*)(stash->rvalues + bs*stash->rmax*stash->nrecvs);
393: *rows = rindices + i2*stash->rmax;
394: *vals = stash->rvalues + i1*bs*stash->rmax;
395: *flg = 1;
396: stash->nprocessed ++;
397: match_found = PETSC_TRUE;
398: }
399: }
400: return(0);
401: }