Actual source code: Index.c
2: #include <petscsys.h>
3: #include <petsctime.h>
5: extern int BlastCache(void);
6: extern int test1(void);
7: extern int test2(void);
9: int main(int argc,char **argv)
10: {
12: PetscInitialize(&argc,&argv,0,0);
13: test1();
14: test2();
15: PetscFinalize();
16: return 0;
17: }
19: int test1(void)
20: {
21: PetscLogDouble t1,t2;
22: double value;
23: int i,ierr,*z,*zi,intval;
24: PetscScalar *x,*y;
25: PetscRandom r;
27: PetscRandomCreate(PETSC_COMM_SELF,&r);
28: PetscRandomSetFromOptions(r);
29: PetscMalloc1(20000,&x);
30: PetscMalloc1(20000,&y);
32: PetscMalloc1(2000,&z);
33: PetscMalloc1(2000,&zi);
35: /* Take care of paging effects */
36: PetscTime(&t1);
38: /* Form the random set of integers */
39: for (i=0; i<2000; i++) {
40: PetscRandomGetValue(r,&value);
41: intval = (int)(value*20000.0);
42: z[i] = intval;
43: }
45: for (i=0; i<2000; i++) {
46: PetscRandomGetValue(r,&value);
47: intval = (int)(value*20000.0);
48: zi[i] = intval;
49: }
50: /* fprintf(stdout,"Done setup\n"); */
52: BlastCache();
54: PetscTime(&t1);
55: for (i=0; i<2000; i++) x[i] = y[i];
56: PetscTime(&t2);
57: fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0);
59: BlastCache();
61: PetscTime(&t1);
62: for (i=0; i<500; i+=4) {
63: x[i] = y[z[i]];
64: x[1+i] = y[z[1+i]];
65: x[2+i] = y[z[2+i]];
66: x[3+i] = y[z[3+i]];
67: }
68: PetscTime(&t2);
69: fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 4",(t2-t1)/2000.0);
71: BlastCache();
73: PetscTime(&t1);
74: for (i=0; i<2000; i++) x[i] = y[z[i]];
75: PetscTime(&t2);
76: fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0);
78: BlastCache();
80: PetscTime(&t1);
81: for (i=0; i<1000; i+=2) { x[i] = y[z[i]]; x[1+i] = y[z[1+i]]; }
82: PetscTime(&t2);
83: fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 2",(t2-t1)/2000.0);
85: BlastCache();
87: PetscTime(&t1);
88: for (i=0; i<2000; i++) x[z[i]] = y[i];
89: PetscTime(&t2);
90: fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0);
92: BlastCache();
94: PetscTime(&t1);
95: for (i=0; i<2000; i++) x[z[i]] = y[zi[i]];
96: PetscTime(&t2);
97: fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0);
99: PetscArraycpy(x,y,10);
100: PetscArraycpy(z,zi,10);
101: PetscFree(z);
102: PetscFree(zi);
103: PetscFree(x);
104: PetscFree(y);
105: PetscRandomDestroy(&r);
106: return 0;
107: }
109: int test2(void)
110: {
111: PetscLogDouble t1,t2;
112: double value;
113: int i,ierr,z[20000],zi[20000],intval,tmp;
114: PetscScalar x[20000],y[20000];
115: PetscRandom r;
117: PetscRandomCreate(PETSC_COMM_SELF,&r);
118: PetscRandomSetFromOptions(r);
120: /* Take care of paging effects */
121: PetscTime(&t1);
123: for (i=0; i<20000; i++) {
124: x[i] = i;
125: y[i] = i;
126: z[i] = i;
127: zi[i] = i;
128: }
130: /* Form the random set of integers */
131: for (i=0; i<20000; i++) {
132: PetscRandomGetValue(r,&value);
133: intval = (int)(value*20000.0);
134: tmp = z[i];
135: z[i] = z[intval];
136: z[intval] = tmp;
137: }
139: for (i=0; i<20000; i++) {
140: PetscRandomGetValue(r,&value);
141: intval = (int)(value*20000.0);
142: tmp = zi[i];
143: zi[i] = zi[intval];
144: zi[intval] = tmp;
145: }
146: /* fprintf(stdout,"Done setup\n"); */
148: /* BlastCache(); */
150: PetscTime(&t1);
151: for (i=0; i<2000; i++) x[i] = y[i];
152: PetscTime(&t2);
153: fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0);
155: /* BlastCache(); */
157: PetscTime(&t1);
158: for (i=0; i<2000; i++) y[i] = x[z[i]];
159: PetscTime(&t2);
160: fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0);
162: /* BlastCache(); */
164: PetscTime(&t1);
165: for (i=0; i<2000; i++) x[z[i]] = y[i];
166: PetscTime(&t2);
167: fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0);
169: /* BlastCache(); */
171: PetscTime(&t1);
172: for (i=0; i<2000; i++) y[z[i]] = x[zi[i]];
173: PetscTime(&t2);
174: fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0);
176: PetscRandomDestroy(&r);
177: return 0;
178: }
180: int BlastCache(void)
181: {
182: int i,ierr,n = 1000000;
183: PetscScalar *x,*y,*z,*a,*b;
185: PetscMalloc1(5*n,&x);
186: y = x + n;
187: z = y + n;
188: a = z + n;
189: b = a + n;
191: for (i=0; i<n; i++) {
192: a[i] = (PetscScalar) i;
193: y[i] = (PetscScalar) i;
194: z[i] = (PetscScalar) i;
195: b[i] = (PetscScalar) i;
196: x[i] = (PetscScalar) i;
197: }
199: for (i=0; i<n; i++) a[i] = 3.0*x[i] + 2.0*y[i] + 3.3*z[i] - 25.*b[i];
200: for (i=0; i<n; i++) b[i] = 3.0*x[i] + 2.0*y[i] + 3.3*a[i] - 25.*b[i];
201: for (i=0; i<n; i++) z[i] = 3.0*x[i] + 2.0*y[i] + 3.3*a[i] - 25.*b[i];
202: PetscFree(x);
203: return 0;
204: }