Actual source code: sbaijov.c


  2: /*
  3:    Routines to compute overlapping regions of a parallel MPI matrix.
  4:    Used for finding submatrices that were shared across processors.
  5: */
  6: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
  7: #include <petscbt.h>

  9: static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat, PetscInt, IS *);
 10: static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Local(Mat, PetscInt *, PetscInt, PetscInt *, PetscBT *);

 12: PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat C, PetscInt is_max, IS is[], PetscInt ov)
 13: {
 14:   PetscInt        i, N = C->cmap->N, bs = C->rmap->bs, M = C->rmap->N, Mbs = M / bs, *nidx, isz, iov;
 15:   IS             *is_new, *is_row;
 16:   Mat            *submats;
 17:   Mat_MPISBAIJ   *c = (Mat_MPISBAIJ *)C->data;
 18:   Mat_SeqSBAIJ   *asub_i;
 19:   PetscBT         table;
 20:   PetscInt       *ai, brow, nz, nis, l, nmax, nstages_local, nstages, max_no, pos;
 21:   const PetscInt *idx;
 22:   PetscBool       flg;

 24:   PetscMalloc1(is_max, &is_new);
 25:   /* Convert the indices into block format */
 26:   ISCompressIndicesGeneral(N, C->rmap->n, bs, is_max, is, is_new);

 29:   /* ----- previous non-scalable implementation ----- */
 30:   flg = PETSC_FALSE;
 31:   PetscOptionsHasName(NULL, NULL, "-IncreaseOverlap_old", &flg);
 32:   if (flg) { /* previous non-scalable implementation */
 33:     printf("use previous non-scalable implementation...\n");
 34:     for (i = 0; i < ov; ++i) MatIncreaseOverlap_MPISBAIJ_Once(C, is_max, is_new);
 35:   } else { /* implementation using modified BAIJ routines */

 37:     PetscMalloc1(Mbs + 1, &nidx);
 38:     PetscBTCreate(Mbs, &table); /* for column search */

 40:     /* Create is_row */
 41:     PetscMalloc1(is_max, &is_row);
 42:     ISCreateStride(PETSC_COMM_SELF, Mbs, 0, 1, &is_row[0]);

 44:     for (i = 1; i < is_max; i++) { is_row[i] = is_row[0]; /* reuse is_row[0] */ }

 46:     /* Allocate memory to hold all the submatrices - Modified from MatCreateSubMatrices_MPIBAIJ() */
 47:     PetscMalloc1(is_max + 1, &submats);

 49:     /* Determine the number of stages through which submatrices are done */
 50:     nmax = 20 * 1000000 / (c->Nbs * sizeof(PetscInt));
 51:     if (!nmax) nmax = 1;
 52:     nstages_local = is_max / nmax + ((is_max % nmax) ? 1 : 0);

 54:     /* Make sure every processor loops through the nstages */
 55:     MPIU_Allreduce(&nstages_local, &nstages, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)C));

 57:     for (iov = 0; iov < ov; ++iov) {
 58:       /* 1) Get submats for column search */
 59:       for (i = 0, pos = 0; i < nstages; i++) {
 60:         if (pos + nmax <= is_max) max_no = nmax;
 61:         else if (pos == is_max) max_no = 0;
 62:         else max_no = is_max - pos;
 63:         c->ijonly = PETSC_TRUE; /* only matrix data structures are requested */
 64:         /* The resulting submatrices should be BAIJ, not SBAIJ, hence we change this value to trigger that */
 65:         PetscStrcpy(((PetscObject)c->A)->type_name, MATSEQBAIJ);
 66:         MatCreateSubMatrices_MPIBAIJ_local(C, max_no, is_row + pos, is_new + pos, MAT_INITIAL_MATRIX, submats + pos);
 67:         PetscStrcpy(((PetscObject)c->A)->type_name, MATSEQSBAIJ);
 68:         pos += max_no;
 69:       }

 71:       /* 2) Row search */
 72:       MatIncreaseOverlap_MPIBAIJ_Once(C, is_max, is_new);

 74:       /* 3) Column search */
 75:       for (i = 0; i < is_max; i++) {
 76:         asub_i = (Mat_SeqSBAIJ *)submats[i]->data;
 77:         ai     = asub_i->i;

 79:         /* put is_new obtained from MatIncreaseOverlap_MPIBAIJ() to table */
 80:         PetscBTMemzero(Mbs, table);

 82:         ISGetIndices(is_new[i], &idx);
 83:         ISGetLocalSize(is_new[i], &nis);
 84:         for (l = 0; l < nis; l++) {
 85:           PetscBTSet(table, idx[l]);
 86:           nidx[l] = idx[l];
 87:         }
 88:         isz = nis;

 90:         /* add column entries to table */
 91:         for (brow = 0; brow < Mbs; brow++) {
 92:           nz = ai[brow + 1] - ai[brow];
 93:           if (nz) {
 94:             if (!PetscBTLookupSet(table, brow)) nidx[isz++] = brow;
 95:           }
 96:         }
 97:         ISRestoreIndices(is_new[i], &idx);
 98:         ISDestroy(&is_new[i]);

100:         /* create updated is_new */
101:         ISCreateGeneral(PETSC_COMM_SELF, isz, nidx, PETSC_COPY_VALUES, is_new + i);
102:       }

104:       /* Free tmp spaces */
105:       for (i = 0; i < is_max; i++) MatDestroy(&submats[i]);
106:     }

108:     PetscBTDestroy(&table);
109:     PetscFree(submats);
110:     ISDestroy(&is_row[0]);
111:     PetscFree(is_row);
112:     PetscFree(nidx);
113:   }

115:   for (i = 0; i < is_max; i++) ISDestroy(&is[i]);
116:   ISExpandIndicesGeneral(N, N, bs, is_max, is_new, is);

118:   for (i = 0; i < is_max; i++) ISDestroy(&is_new[i]);
119:   PetscFree(is_new);
120:   return 0;
121: }

123: typedef enum {
124:   MINE,
125:   OTHER
126: } WhoseOwner;
127: /*  data1, odata1 and odata2 are packed in the format (for communication):
128:        data[0]          = is_max, no of is
129:        data[1]          = size of is[0]
130:         ...
131:        data[is_max]     = size of is[is_max-1]
132:        data[is_max + 1] = data(is[0])
133:         ...
134:        data[is_max+1+sum(size of is[k]), k=0,...,i-1] = data(is[i])
135:         ...
136:    data2 is packed in the format (for creating output is[]):
137:        data[0]          = is_max, no of is
138:        data[1]          = size of is[0]
139:         ...
140:        data[is_max]     = size of is[is_max-1]
141:        data[is_max + 1] = data(is[0])
142:         ...
143:        data[is_max + 1 + Mbs*i) = data(is[i])
144:         ...
145: */
146: static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat C, PetscInt is_max, IS is[])
147: {
148:   Mat_MPISBAIJ   *c = (Mat_MPISBAIJ *)C->data;
149:   PetscMPIInt     size, rank, tag1, tag2, *len_s, nrqr, nrqs, *id_r1, *len_r1, flag, len, *iwork;
150:   const PetscInt *idx_i;
151:   PetscInt        idx, isz, col, *n, *data1, **data1_start, *data2, *data2_i, *data, *data_i;
152:   PetscInt        Mbs, i, j, k, *odata1, *odata2;
153:   PetscInt        proc_id, **odata2_ptr, *ctable = NULL, *btable, len_max, len_est;
154:   PetscInt        proc_end = 0, len_unused, nodata2;
155:   PetscInt        ois_max; /* max no of is[] in each of processor */
156:   char           *t_p;
157:   MPI_Comm        comm;
158:   MPI_Request    *s_waits1, *s_waits2, r_req;
159:   MPI_Status     *s_status, r_status;
160:   PetscBT        *table; /* mark indices of this processor's is[] */
161:   PetscBT         table_i;
162:   PetscBT         otable; /* mark indices of other processors' is[] */
163:   PetscInt        bs = C->rmap->bs, Bn = c->B->cmap->n, Bnbs = Bn / bs, *Bowners;
164:   IS              garray_local, garray_gl;

166:   PetscObjectGetComm((PetscObject)C, &comm);
167:   size = c->size;
168:   rank = c->rank;
169:   Mbs  = c->Mbs;

171:   PetscObjectGetNewTag((PetscObject)C, &tag1);
172:   PetscObjectGetNewTag((PetscObject)C, &tag2);

174:   /* create tables used in
175:      step 1: table[i] - mark c->garray of proc [i]
176:      step 3: table[i] - mark indices of is[i] when whose=MINE
177:              table[0] - mark incideces of is[] when whose=OTHER */
178:   len = PetscMax(is_max, size);
179:   PetscMalloc2(len, &table, (Mbs / PETSC_BITS_PER_BYTE + 1) * len, &t_p);
180:   for (i = 0; i < len; i++) table[i] = t_p + (Mbs / PETSC_BITS_PER_BYTE + 1) * i;

182:   MPIU_Allreduce(&is_max, &ois_max, 1, MPIU_INT, MPI_MAX, comm);

184:   /* 1. Send this processor's is[] to other processors */
185:   /*---------------------------------------------------*/
186:   /* allocate spaces */
187:   PetscMalloc1(is_max, &n);
188:   len = 0;
189:   for (i = 0; i < is_max; i++) {
190:     ISGetLocalSize(is[i], &n[i]);
191:     len += n[i];
192:   }
193:   if (!len) {
194:     is_max = 0;
195:   } else {
196:     len += 1 + is_max; /* max length of data1 for one processor */
197:   }

199:   PetscMalloc1(size * len + 1, &data1);
200:   PetscMalloc1(size, &data1_start);
201:   for (i = 0; i < size; i++) data1_start[i] = data1 + i * len;

203:   PetscMalloc4(size, &len_s, size, &btable, size, &iwork, size + 1, &Bowners);

205:   /* gather c->garray from all processors */
206:   ISCreateGeneral(comm, Bnbs, c->garray, PETSC_COPY_VALUES, &garray_local);
207:   ISAllGather(garray_local, &garray_gl);
208:   ISDestroy(&garray_local);
209:   MPI_Allgather(&Bnbs, 1, MPIU_INT, Bowners + 1, 1, MPIU_INT, comm);

211:   Bowners[0] = 0;
212:   for (i = 0; i < size; i++) Bowners[i + 1] += Bowners[i];

214:   if (is_max) {
215:     /* hash table ctable which maps c->row to proc_id) */
216:     PetscMalloc1(Mbs, &ctable);
217:     for (proc_id = 0, j = 0; proc_id < size; proc_id++) {
218:       for (; j < C->rmap->range[proc_id + 1] / bs; j++) ctable[j] = proc_id;
219:     }

221:     /* hash tables marking c->garray */
222:     ISGetIndices(garray_gl, &idx_i);
223:     for (i = 0; i < size; i++) {
224:       table_i = table[i];
225:       PetscBTMemzero(Mbs, table_i);
226:       for (j = Bowners[i]; j < Bowners[i + 1]; j++) { /* go through B cols of proc[i]*/
227:         PetscBTSet(table_i, idx_i[j]);
228:       }
229:     }
230:     ISRestoreIndices(garray_gl, &idx_i);
231:   } /* if (is_max) */
232:   ISDestroy(&garray_gl);

234:   /* evaluate communication - mesg to who, length, and buffer space */
235:   for (i = 0; i < size; i++) len_s[i] = 0;

237:   /* header of data1 */
238:   for (proc_id = 0; proc_id < size; proc_id++) {
239:     iwork[proc_id]        = 0;
240:     *data1_start[proc_id] = is_max;
241:     data1_start[proc_id]++;
242:     for (j = 0; j < is_max; j++) {
243:       if (proc_id == rank) {
244:         *data1_start[proc_id] = n[j];
245:       } else {
246:         *data1_start[proc_id] = 0;
247:       }
248:       data1_start[proc_id]++;
249:     }
250:   }

252:   for (i = 0; i < is_max; i++) {
253:     ISGetIndices(is[i], &idx_i);
254:     for (j = 0; j < n[i]; j++) {
255:       idx                = idx_i[j];
256:       *data1_start[rank] = idx;
257:       data1_start[rank]++; /* for local processing */
258:       proc_end = ctable[idx];
259:       for (proc_id = 0; proc_id <= proc_end; proc_id++) {                        /* for others to process */
260:         if (proc_id == rank) continue;                                           /* done before this loop */
261:         if (proc_id < proc_end && !PetscBTLookup(table[proc_id], idx)) continue; /* no need for sending idx to [proc_id] */
262:         *data1_start[proc_id] = idx;
263:         data1_start[proc_id]++;
264:         len_s[proc_id]++;
265:       }
266:     }
267:     /* update header data */
268:     for (proc_id = 0; proc_id < size; proc_id++) {
269:       if (proc_id == rank) continue;
270:       *(data1 + proc_id * len + 1 + i) = len_s[proc_id] - iwork[proc_id];
271:       iwork[proc_id]                   = len_s[proc_id];
272:     }
273:     ISRestoreIndices(is[i], &idx_i);
274:   }

276:   nrqs = 0;
277:   nrqr = 0;
278:   for (i = 0; i < size; i++) {
279:     data1_start[i] = data1 + i * len;
280:     if (len_s[i]) {
281:       nrqs++;
282:       len_s[i] += 1 + is_max; /* add no. of header msg */
283:     }
284:   }

286:   for (i = 0; i < is_max; i++) ISDestroy(&is[i]);
287:   PetscFree(n);
288:   PetscFree(ctable);

290:   /* Determine the number of messages to expect, their lengths, from from-ids */
291:   PetscGatherNumberOfMessages(comm, NULL, len_s, &nrqr);
292:   PetscGatherMessageLengths(comm, nrqs, nrqr, len_s, &id_r1, &len_r1);

294:   /*  Now  post the sends */
295:   PetscMalloc2(size, &s_waits1, size, &s_waits2);
296:   k = 0;
297:   for (proc_id = 0; proc_id < size; proc_id++) { /* send data1 to processor [proc_id] */
298:     if (len_s[proc_id]) {
299:       MPI_Isend(data1_start[proc_id], len_s[proc_id], MPIU_INT, proc_id, tag1, comm, s_waits1 + k);
300:       k++;
301:     }
302:   }

304:   /* 2. Receive other's is[] and process. Then send back */
305:   /*-----------------------------------------------------*/
306:   len = 0;
307:   for (i = 0; i < nrqr; i++) {
308:     if (len_r1[i] > len) len = len_r1[i];
309:   }
310:   PetscFree(len_r1);
311:   PetscFree(id_r1);

313:   for (proc_id = 0; proc_id < size; proc_id++) len_s[proc_id] = iwork[proc_id] = 0;

315:   PetscMalloc1(len + 1, &odata1);
316:   PetscMalloc1(size, &odata2_ptr);
317:   PetscBTCreate(Mbs, &otable);

319:   len_max = ois_max * (Mbs + 1); /* max space storing all is[] for each receive */
320:   len_est = 2 * len_max;         /* estimated space of storing is[] for all receiving messages */
321:   PetscMalloc1(len_est + 1, &odata2);
322:   nodata2 = 0; /* nodata2+1: num of PetscMalloc(,&odata2_ptr[]) called */

324:   odata2_ptr[nodata2] = odata2;

326:   len_unused = len_est; /* unused space in the array odata2_ptr[nodata2]-- needs to be >= len_max  */

328:   k = 0;
329:   while (k < nrqr) {
330:     /* Receive messages */
331:     MPI_Iprobe(MPI_ANY_SOURCE, tag1, comm, &flag, &r_status);
332:     if (flag) {
333:       MPI_Get_count(&r_status, MPIU_INT, &len);
334:       proc_id = r_status.MPI_SOURCE;
335:       MPI_Irecv(odata1, len, MPIU_INT, proc_id, r_status.MPI_TAG, comm, &r_req);
336:       MPI_Wait(&r_req, &r_status);

338:       /*  Process messages */
339:       /*  make sure there is enough unused space in odata2 array */
340:       if (len_unused < len_max) { /* allocate more space for odata2 */
341:         PetscMalloc1(len_est + 1, &odata2);

343:         odata2_ptr[++nodata2] = odata2;

345:         len_unused = len_est;
346:       }

348:       MatIncreaseOverlap_MPISBAIJ_Local(C, odata1, OTHER, odata2, &otable);
349:       len = 1 + odata2[0];
350:       for (i = 0; i < odata2[0]; i++) len += odata2[1 + i];

352:       /* Send messages back */
353:       MPI_Isend(odata2, len, MPIU_INT, proc_id, tag2, comm, s_waits2 + k);
354:       k++;
355:       odata2 += len;
356:       len_unused -= len;
357:       len_s[proc_id] = len; /* num of messages sending back to [proc_id] by this proc */
358:     }
359:   }
360:   PetscFree(odata1);
361:   PetscBTDestroy(&otable);

363:   /* 3. Do local work on this processor's is[] */
364:   /*-------------------------------------------*/
365:   /* make sure there is enough unused space in odata2(=data) array */
366:   len_max = is_max * (Mbs + 1); /* max space storing all is[] for this processor */
367:   if (len_unused < len_max) {   /* allocate more space for odata2 */
368:     PetscMalloc1(len_est + 1, &odata2);

370:     odata2_ptr[++nodata2] = odata2;
371:   }

373:   data = odata2;
374:   MatIncreaseOverlap_MPISBAIJ_Local(C, data1_start[rank], MINE, data, table);
375:   PetscFree(data1_start);

377:   /* 4. Receive work done on other processors, then merge */
378:   /*------------------------------------------------------*/
379:   /* get max number of messages that this processor expects to recv */
380:   MPIU_Allreduce(len_s, iwork, size, MPI_INT, MPI_MAX, comm);
381:   PetscMalloc1(iwork[rank] + 1, &data2);
382:   PetscFree4(len_s, btable, iwork, Bowners);

384:   k = 0;
385:   while (k < nrqs) {
386:     /* Receive messages */
387:     MPI_Iprobe(MPI_ANY_SOURCE, tag2, comm, &flag, &r_status);
388:     if (flag) {
389:       MPI_Get_count(&r_status, MPIU_INT, &len);

391:       proc_id = r_status.MPI_SOURCE;

393:       MPI_Irecv(data2, len, MPIU_INT, proc_id, r_status.MPI_TAG, comm, &r_req);
394:       MPI_Wait(&r_req, &r_status);
395:       if (len > 1 + is_max) { /* Add data2 into data */
396:         data2_i = data2 + 1 + is_max;
397:         for (i = 0; i < is_max; i++) {
398:           table_i = table[i];
399:           data_i  = data + 1 + is_max + Mbs * i;
400:           isz     = data[1 + i];
401:           for (j = 0; j < data2[1 + i]; j++) {
402:             col = data2_i[j];
403:             if (!PetscBTLookupSet(table_i, col)) data_i[isz++] = col;
404:           }
405:           data[1 + i] = isz;
406:           if (i < is_max - 1) data2_i += data2[1 + i];
407:         }
408:       }
409:       k++;
410:     }
411:   }
412:   PetscFree(data2);
413:   PetscFree2(table, t_p);

415:   /* phase 1 sends are complete */
416:   PetscMalloc1(size, &s_status);
417:   if (nrqs) MPI_Waitall(nrqs, s_waits1, s_status);
418:   PetscFree(data1);

420:   /* phase 2 sends are complete */
421:   if (nrqr) MPI_Waitall(nrqr, s_waits2, s_status);
422:   PetscFree2(s_waits1, s_waits2);
423:   PetscFree(s_status);

425:   /* 5. Create new is[] */
426:   /*--------------------*/
427:   for (i = 0; i < is_max; i++) {
428:     data_i = data + 1 + is_max + Mbs * i;
429:     ISCreateGeneral(PETSC_COMM_SELF, data[1 + i], data_i, PETSC_COPY_VALUES, is + i);
430:   }
431:   for (k = 0; k <= nodata2; k++) PetscFree(odata2_ptr[k]);
432:   PetscFree(odata2_ptr);
433:   return 0;
434: }

436: /*
437:    MatIncreaseOverlap_MPISBAIJ_Local - Called by MatIncreaseOverlap, to do
438:        the work on the local processor.

440:      Inputs:
441:       C      - MAT_MPISBAIJ;
442:       data   - holds is[]. See MatIncreaseOverlap_MPISBAIJ_Once() for the format.
443:       whose  - whose is[] to be processed,
444:                MINE:  this processor's is[]
445:                OTHER: other processor's is[]
446:      Output:
447:        nidx  - whose = MINE:
448:                      holds input and newly found indices in the same format as data
449:                whose = OTHER:
450:                      only holds the newly found indices
451:        table - table[i]: mark the indices of is[i], i=0,...,is_max. Used only in the case 'whose=MINE'.
452: */
453: /* Would computation be reduced by swapping the loop 'for each is' and 'for each row'? */
454: static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Local(Mat C, PetscInt *data, PetscInt whose, PetscInt *nidx, PetscBT *table)
455: {
456:   Mat_MPISBAIJ *c = (Mat_MPISBAIJ *)C->data;
457:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(c->A)->data;
458:   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(c->B)->data;
459:   PetscInt      row, mbs, Mbs, *nidx_i, col, col_max, isz, isz0, *ai, *aj, *bi, *bj, *garray, rstart, l;
460:   PetscInt      a_start, a_end, b_start, b_end, i, j, k, is_max, *idx_i, n;
461:   PetscBT       table0;  /* mark the indices of input is[] for look up */
462:   PetscBT       table_i; /* points to i-th table. When whose=OTHER, a single table is used for all is[] */

464:   Mbs    = c->Mbs;
465:   mbs    = a->mbs;
466:   ai     = a->i;
467:   aj     = a->j;
468:   bi     = b->i;
469:   bj     = b->j;
470:   garray = c->garray;
471:   rstart = c->rstartbs;
472:   is_max = data[0];

474:   PetscBTCreate(Mbs, &table0);

476:   nidx[0] = is_max;
477:   idx_i   = data + is_max + 1;   /* ptr to input is[0] array */
478:   nidx_i  = nidx + is_max + 1;   /* ptr to output is[0] array */
479:   for (i = 0; i < is_max; i++) { /* for each is */
480:     isz = 0;
481:     n   = data[1 + i]; /* size of input is[i] */

483:     /* initialize and set table_i(mark idx and nidx) and table0(only mark idx) */
484:     if (whose == MINE) { /* process this processor's is[] */
485:       table_i = table[i];
486:       nidx_i  = nidx + 1 + is_max + Mbs * i;
487:     } else { /* process other processor's is[] - only use one temp table */
488:       table_i = table[0];
489:     }
490:     PetscBTMemzero(Mbs, table_i);
491:     PetscBTMemzero(Mbs, table0);
492:     if (n == 0) {
493:       nidx[1 + i] = 0; /* size of new is[i] */
494:       continue;
495:     }

497:     isz0    = 0;
498:     col_max = 0;
499:     for (j = 0; j < n; j++) {
500:       col = idx_i[j];
502:       if (!PetscBTLookupSet(table_i, col)) {
503:         PetscBTSet(table0, col);
504:         if (whose == MINE) nidx_i[isz0] = col;
505:         if (col_max < col) col_max = col;
506:         isz0++;
507:       }
508:     }

510:     if (whose == MINE) isz = isz0;
511:     k = 0; /* no. of indices from input is[i] that have been examined */
512:     for (row = 0; row < mbs; row++) {
513:       a_start = ai[row];
514:       a_end   = ai[row + 1];
515:       b_start = bi[row];
516:       b_end   = bi[row + 1];
517:       if (PetscBTLookup(table0, row + rstart)) { /* row is on input is[i]:
518:                                                 do row search: collect all col in this row */
519:         for (l = a_start; l < a_end; l++) {      /* Amat */
520:           col = aj[l] + rstart;
521:           if (!PetscBTLookupSet(table_i, col)) nidx_i[isz++] = col;
522:         }
523:         for (l = b_start; l < b_end; l++) { /* Bmat */
524:           col = garray[bj[l]];
525:           if (!PetscBTLookupSet(table_i, col)) nidx_i[isz++] = col;
526:         }
527:         k++;
528:         if (k >= isz0) break;               /* for (row=0; row<mbs; row++) */
529:       } else {                              /* row is not on input is[i]:
530:                   do col search: add row onto nidx_i if there is a col in nidx_i */
531:         for (l = a_start; l < a_end; l++) { /* Amat */
532:           col = aj[l] + rstart;
533:           if (col > col_max) break;
534:           if (PetscBTLookup(table0, col)) {
535:             if (!PetscBTLookupSet(table_i, row + rstart)) nidx_i[isz++] = row + rstart;
536:             break; /* for l = start; l<end ; l++) */
537:           }
538:         }
539:         for (l = b_start; l < b_end; l++) { /* Bmat */
540:           col = garray[bj[l]];
541:           if (col > col_max) break;
542:           if (PetscBTLookup(table0, col)) {
543:             if (!PetscBTLookupSet(table_i, row + rstart)) nidx_i[isz++] = row + rstart;
544:             break; /* for l = start; l<end ; l++) */
545:           }
546:         }
547:       }
548:     }

550:     if (i < is_max - 1) {
551:       idx_i += n;    /* ptr to input is[i+1] array */
552:       nidx_i += isz; /* ptr to output is[i+1] array */
553:     }
554:     nidx[1 + i] = isz; /* size of new is[i] */
555:   }                    /* for each is */
556:   PetscBTDestroy(&table0);
557:   return 0;
558: }