(original) (raw)
struct ip { float *t_rp; /* time the reflected p-wave to reach nodes */ float *t_is; /* time the incident s-wave to reach nodes */ float *t_rs; /* time the reflected s-wave to reach nodes */ float c_s; /* speed of s-wave = sqrt(mv/E) */ float c_p; /* speed of p-wave */ float rho; /* speed of p-wave */ float cosG; /* cos(Gamma), Gamma denotes the plane */ /* incident wave comes along */ float sinG; /* sin(Gamma) */ float cosB; /* cos(Beta), angle between wave and the norm /* direction of the surface */ float sinB; /* sin(Beta) */ float cosA; /* cos(Alpha), reflected angle of the wave */ float sinA; /* sin(Alpha) */ float cAB; /* cos(Alpha+Beta) */ float kapa; /* c_s/c_p */ float d0; /* d0 = -x0*sinB + z0*cosB, x0,z0 are location of source of wave */ float f1; /* characteristic frequency of incident wave */ float ArBi; /* coefficient for computing free */ /* field motion */ float BrBi; /* coefficient for computing free */ /* field motion */ float dt; /* time step */ float duration; /* duration to be run, number of steps */ /* = duration/dt */ float dip; float strike; float rake; float T0; }; struct properties { float p_v; float s_v; float den; }; struct source { float dip, strike, rake; float x0[3]; }; struct source_node { int number; float x0[3]; float dist; }; struct triangle { float area; float cent[3]; int sign; }; static int ijk[4][3] = {{1,2,3},{2,3,0},{3,0,1},{0,1,2}}; #define PI 3.1415927 #define EPS 0.0001 #define COEF 0.017453293 #define DELTA 4 #define IDENTITY(x) x /* data types of output records */ #define OUT_CHAR 0 #define OUT_INT 1 #define OUT_FLOAT 2 struct source s; struct source_node sn; struct ip IP; void KMC_maker(); void prepare(); void iso3d(); void d_shape(); void inv_J(); float phi0(float); float phi1(float); float phi2(float); void cal_uvw(); void abc_damper(); void mv12x12(); void vv12x12(); float point2fault(float, float, float); float area(); void centroid(); void print_record_header(int, int); void print_file_header(); float dispx,dispy,dispz,velx,vely,velz; float ampl_h_dsp,ampl_h_vel,ampl_v_dsp,ampl_v_vel; float *max_h_dsp, *ang_h_dsp, *max_h_vel, *ang_h_vel; float *max_v_dsp, *max_v_vel, *energy_integral, *ampl_h_vel_prv; /* Define Archimedes vectors/matrices */ INODEVECTOR sourcenode; INODEVECTOR nodekind; NODEVECTOR nodekindf; IELEMVECTOR forcele; ELEMVECTOR elemkindf[3]; NODEVECTOR3 disp[3], vel, M, C; NODEVECTOR3 M23, C23, V23; MATRIX3 K; void main(int argc, char **argv) { int i, j, k, iter, timesteps; int NumNeighbor = 0; int NumPlus = 0; int NumMinus = 0; int globalNumNeighbor = 0; int globalNumPlus = 0; int disptplus, dispt, disptminus; int verticesonbnd; int cor[4], bv[3]; int Np_step; float c1[3], c2[3], c3[3], c4[3]; float time; float *Ke[12], Kee[12][12]; float Me[12], Ce[12], Mexv[12], Cexv[12], v[12]; float zeta, d_alpha, const_a, const_b; /* Rayleigh damping factors */ float x[3][4], xc[3]; float fr; /* Characteristic frequency for buffer zone */ float fault_u0; float uf[3]; struct properties prop; /* output related variables */ int FLOATS_PER_NODE = 7; /* output buffers */ int *ibuf; float *fbuf; int count; int surface_nodes; /* timing variables */ double starttime; double endtime; double totalstarttime; double totalendtime; double totaltime = 0.0; double totaltime1 = 0.0; double totaltime2 = 0.0; double packtime = 0.0; double readtime = 0.0; double printtime = 0.0; double inittime = 0.0; double sourcesetuptime = 0.0; double boundarytime = 0.0; double assembletime = 0.0; double mvtime = 0.0; double vvtime = 0.0; double misctime = 0.0; double mvflops; double mvmflops; double vvflops; double vvmflops; double localmflops; double globalmflops; double commbytes; double commthruput; /* NOTE: it has been assumed that there are 5 possible flag values for the node data, that is: 1 if the node is in the interior 4 if the node is on a x=const boundary surface 5 if the node is on a y=const boundary surface 6 if the node is on the bottom surface (z=z_lower) 3 if the node is on the surface (z=0) (but not along the edges) */ /*---------------------------------------------------------------*/ /* start timings */ TIME(totalstarttime); totalstarttime = totalstarttime - ARCHiotime; /* packfile loading time */ packtime = ARCHiotime; /* read data in from pack file */ TIME(ARCHstarttime); READNODEVECTOR(nodekindf); READELEMVECTOR(elemkindf[0]); READELEMVECTOR(elemkindf[1]); READELEMVECTOR(elemkindf[2]); TIME(ARCHendtime); ARCHiotime += (ARCHendtime - ARCHstarttime); readtime += (ARCHendtime - ARCHstarttime); TIME(ARCHstarttime); TIME(starttime); /* initializations */ /* dynamic parameters */ IP.dt = 0.0024; IP.duration = 40.0; IP.T0 = 0.6; Np_step = 30; timesteps = IP.duration / IP.dt + 1; /* source characteristics */ s.strike = 111.0; s.dip = 44.0; s.rake = 70.0; s.x0[0] = 32.264153; s.x0[1] = 23.814432; s.x0[2] = -11.25; IP.strike = s.strike * PI / 180.0; IP.dip = s.dip * PI / 180.0; IP.rake = s.rake * PI / 180.0; fault_u0 = 29.640788; /* prescribe slip motion */ uf[0] = uf[1] = uf[2] = 0.0; cal_uvw(&uf[0], &uf[1], &uf[2]); uf[0] *= fault_u0; uf[1] *= fault_u0; uf[2] *= fault_u0; /* damping constants */ zeta = 30.0; /* Damping factor FOR BUFFER ZONE in percentage(%) */ const_a = 0.00533333; const_b = 0.06666667; fr = 0.5; /* vector and array initialization */ M = 0.0; C = 0.0; M23 = 0.0; C23 = 0.0; V23 = 0.0; disp[0] = 0.0; disp[1] = 0.0; disp[2] = 0.0; nodekind = 0; K = 0.0; for (i = 0; i < 12; i++) Ke[i] = Kee[i]; disptplus = 0; dispt = 1; disptminus = 2; TIME(ARCHendtime); TIME(endtime); ARCHcomptime += (ARCHendtime - ARCHstarttime); inittime += (endtime - starttime); TIME(ARCHstarttime); TIME(starttime); /* Print out case info */ if (ARCHself == 0) { fprintf(stderr, " CASE SUMMARY \n\n"); fprintf(stderr, " Fault Information\n"); fprintf(stderr, " Orientation: strike: %f\n",s.strike); fprintf(stderr, " dip: %f\n",s.dip); fprintf(stderr, " rake: %f\n",s.rake); fprintf(stderr, " Location: (%f, %f, %f), in Km\n", s.x0[0], s.x0[1], s.x0[2]); fprintf(stderr, " Dislocation: %f, cm\n", fault_u0); fprintf(stderr, " Ramp rise time: %f, sec\n", IP.T0); fprintf(stderr, " Time step: %f, sec\n", IP.dt); fprintf(stderr, " Duration: %f, sec\n", IP.duration); fflush(stderr); } TIME(ARCHendtime); TIME(endtime); ARCHiotime += (ARCHendtime - ARCHstarttime); printtime += (endtime - starttime); TIME(ARCHstarttime); TIME(starttime); /* redefine nodekind to be 1 for all surface nodes */ /* this is temp work-around */ FORNODE(i) { nodekind[i] = (int) nodekindf[i]; if (nodekind[i] == 3) nodekind[i] = 1; } /* Count surface nodes */ surface_nodes = 0; for (i = 0; i < ARCHmine; i++) { GETCOORD3 (i,c1); if (c1[2] == 0.0) surface_nodes++; } TIME(ARCHendtime); TIME(endtime); ARCHcomptime += (ARCHendtime - ARCHstarttime); inittime += (endtime - starttime); TIME(ARCHstarttime); TIME(starttime); fprintf(stderr, "For subdomain %d there are %d surface nodes\n", ARCHself, surface_nodes); fflush(stderr); TIME(ARCHendtime); TIME(endtime); ARCHiotime += (ARCHendtime - ARCHstarttime); printtime += (endtime - starttime); TIME(ARCHstarttime); TIME(starttime); /* various memory allocations */ if (surface_nodes != 0) { fbuf = (float *) malloc(surface_nodes * FLOATS_PER_NODE * sizeof(float)); if ( fbuf == (float *) NULL) { fprintf(stderr, "malloc failed for fbuf on processor = %d \n",ARCHself); fflush(stderr); exit(0); } ibuf = (int *) malloc(surface_nodes * sizeof(int)); if ( ibuf == (int *) NULL) { fprintf(stderr, "malloc failed for ibuf on processor = %d \n",ARCHself); fflush(stderr); exit(0); } max_h_dsp = (float *) malloc(surface_nodes * sizeof(float)); if ( max_h_dsp == (float *) NULL) { fprintf (stderr,"malloc failed for max_h_dsp on processor = %d \n",ARCHself); fflush(stderr); exit(0); } ang_h_dsp = (float *) malloc(surface_nodes * sizeof(float)); if ( ang_h_dsp == (float *) NULL) { fprintf (stderr,"malloc failed for ang_h_dsp on processor = %d \n",ARCHself); fflush(stderr); exit(0); } max_h_vel = (float *) malloc(surface_nodes * sizeof(float)); if ( max_h_vel == (float *) NULL) { fprintf (stderr,"malloc failed for max_h_vel on processor = %d \n",ARCHself); fflush(stderr); exit(0); } ang_h_vel = (float *) malloc(surface_nodes * sizeof(float)); if ( ang_h_vel == (float *) NULL) { fprintf (stderr,"malloc failed for ang_h_vel on processor = %d \n",ARCHself); fflush(stderr); exit(0); } max_v_dsp = (float *) malloc(surface_nodes * sizeof(float)); if ( max_v_dsp == (float *) NULL) { fprintf (stderr,"malloc failed for max_v_dsp on processor = %d \n",ARCHself); fflush(stderr); exit(0); } max_v_vel = (float *) malloc(surface_nodes * sizeof(float)); if ( max_v_vel == (float *) NULL) { fprintf (stderr,"malloc failed for max_v_vel on processor = %d \n",ARCHself); fflush(stderr); exit(0); } energy_integral = (float *) malloc(surface_nodes * sizeof(float)); if ( energy_integral == (float *) NULL) { fprintf (stderr,"malloc failed for energy_integral on processor = %d \n",ARCHself); fflush(stderr); exit(0); } ampl_h_vel_prv = (float *) malloc(surface_nodes * sizeof(float)); if ( ampl_h_vel_prv == (float *) NULL) { fprintf (stderr,"malloc failed for ampl_h_vel_prv on processor = %d \n",ARCHself); fflush(stderr); exit(0); } } else { fbuf = (float *) malloc (FLOATS_PER_NODE * sizeof(float)); ibuf = (int *) malloc (sizeof(int)); max_h_dsp = (float *) malloc(sizeof(float)); ang_h_dsp = (float *) malloc(sizeof(float)); max_h_vel = (float *) malloc(sizeof(float)); ang_h_vel = (float *) malloc(sizeof(float)); max_v_dsp = (float *) malloc(sizeof(float)); max_v_vel = (float *) malloc(sizeof(float)); energy_integral = (float *) malloc(sizeof(float)); ampl_h_vel_prv = (float *) malloc(sizeof(float)); } /* initialize output quantities */ for (i = 0; i < surface_nodes; i++) { max_h_dsp[i] = -1.0; max_h_vel[i] = -1.0; ang_h_dsp[i] = 0.0; ang_h_vel[i] = 0.0; max_v_dsp[i] = -1.0; max_v_vel[i] = -1.0; energy_integral[i] = 0.0; ampl_h_vel_prv[i] = 0.0; } TIME(ARCHendtime); TIME(endtime); ARCHcomptime += (ARCHendtime - ARCHstarttime); inittime += (endtime - starttime); TIME(ARCHstarttime); TIME(starttime); /* search for the node closest to the point source */ sourcenode = 1; forcele = 1; sn.number = 0; FORNODE(i) if (ARCHglobalnode[i] == 254216) { sn.number = i; sourcenode[i] = 10; } /* search for all the elements that contain the source node */ if (sn.number != 0) { FORELEM(i) { GETCORNERS(i, cor); if (cor[0] == sn.number || cor[1] == sn.number || cor[2] == sn.number || cor[3] == sn.number) { GETCOORD3(cor[0],c2); GETCOORD3(cor[1],c1); GETCOORD3(cor[2],c3); GETCOORD3(cor[3],c4); for(j = 0; j < 3; j++) { x[j][0] = c1[j]; x[j][1] = c2[j]; x[j][2] = c3[j]; x[j][3] = c4[j]; } centroid(x, xc); if (point2fault(xc[0], xc[1], xc[2]) >= 0) { NumPlus++; forcele[i] = 3; } else { NumMinus++; forcele[i] = 2; } NumNeighbor++; TIME(ARCHendtime); TIME(endtime); ARCHcomptime += (ARCHendtime - ARCHstarttime); sourcesetuptime += (endtime - starttime); TIME(ARCHstarttime); TIME(starttime); fprintf(stderr, "ele %d at (%f %f %f) %d (subdomain %d)\n", i+1, xc[0], xc[1], xc[2], forcele[i],ARCHself); fprintf(stderr, "%d %d %d %d %d\n", i+1, cor[0], cor[1], cor[2], cor[3]); fflush(stderr); TIME(ARCHendtime); TIME(endtime); ARCHiotime += (ARCHendtime - ARCHstarttime); printtime += (endtime - starttime); TIME(ARCHstarttime); TIME(starttime); } } } TIME(ARCHendtime); TIME(endtime); ARCHcomptime += (ARCHendtime - ARCHstarttime); sourcesetuptime += (endtime - starttime); TIME(ARCHstarttime); TIME(starttime); ISUMREDUCE(&NumNeighbor, &globalNumNeighbor, 1); ISUMREDUCE(&NumPlus, &globalNumPlus, 1); if (ARCHself == 0) { fprintf(stderr, "There are %d elements adjacent to the source\n", globalNumNeighbor); fprintf(stderr, " %d on the *plus* side of the mesh. \n", globalNumPlus); fflush(stderr); } TIME(ARCHendtime); TIME(endtime); ARCHiotime += (ARCHendtime - ARCHstarttime); printtime += (endtime - starttime); TIME(ARCHstarttime); TIME(starttime); /* simulation */ FORELEM(i) { for (j = 0; j < 12; j++) { Me[j] = 0.0; Ce[j] = 0.0; v[j] = 0.0; for (k = 0; k < 12; k++) Kee[j][k] = 0.0; } GETCORNERS(i, cor); /* inhomogeneous material */ prop.p_v = elemkindf[0][i]; prop.s_v = elemkindf[1][i]; prop.den = elemkindf[2][i]; /* check if it is a boundary element */ TIME(ARCHendtime); TIME(endtime); ARCHcomptime += (ARCHendtime - ARCHstarttime); assembletime += (endtime - starttime); TIME(ARCHstarttime); TIME(starttime); verticesonbnd = 0; for (j = 0; j < 4; j++) if (nodekind[cor[j]] != 1) bv[verticesonbnd++] = j; if (verticesonbnd == 3) { GETCOORD3(cor[bv[0]], c1); GETCOORD3(cor[bv[1]], c2); GETCOORD3(cor[bv[2]], c3); abc_damper(c1, c2, c3, bv, &prop, Ce); } TIME(ARCHendtime); TIME(endtime); ARCHcomptime += (ARCHendtime - ARCHstarttime); boundarytime += (endtime - starttime); TIME(ARCHstarttime); TIME(starttime); GETCOORD3(cor[0], c1); GETCOORD3(cor[1], c2); GETCOORD3(cor[2], c3); GETCOORD3(cor[3], c4); KMC_maker(c1, c2, c3, c4, &prop, Ke, Me, Ce); /* Here put damping into it with alpha only */ for(j = 0; j < 3; j++) { x[j][0] = c1[j]; x[j][1] = c2[j]; x[j][2] = c3[j]; x[j][3] = c4[j]; } centroid(x, xc); if (xc[2] < -11.5) d_alpha = 2.0 * zeta / 100.0 * (2.0 * PI * fr); else d_alpha = 4.0 * PI * const_a * 0.95 / (prop.s_v + const_b); for (j = 0; j < 12; j++) Ce[j] = Ce[j] + d_alpha * Me[j]; if (forcele[i] == 2 || forcele[i] == 3) { for (j = 0; j < 4; j++) { if (sourcenode[cor[j]] == 10) { v[3*j] = uf[0]; v[3*j+1] = uf[1]; v[3*j+2] = uf[2]; } else { v[3*j] = 0; v[3*j+1] = 0; v[3*j+2] = 0; } } vv12x12(Me, v, Mexv); vv12x12(Ce, v, Cexv); mv12x12(Kee, v); if (forcele[i] == 3) for (j = 0; j < 12; j++) { v[j] = -v[j]; Mexv[j] = -Mexv[j]; Cexv[j] = -Cexv[j]; } ASSEMBLEVECTOR3(v , i, V23); ASSEMBLEVECTOR3(Mexv, i, M23); ASSEMBLEVECTOR3(Cexv, i, C23); } ASSEMBLEVECTOR3(Me, i, M); ASSEMBLEVECTOR3(Ce, i, C); ASSEMBLEMATRIX3(Kee, i, K); } FULLASSEMBLE3(M); FULLASSEMBLE3(C); FULLASSEMBLE3(V23); FULLASSEMBLE3(M23); FULLASSEMBLE3(C23); TIME(ARCHendtime); TIME(endtime); ARCHcomptime += (ARCHendtime - ARCHstarttime); assembletime += (endtime - starttime); TIME(ARCHstarttime); TIME(starttime); /* redefine nodekind to be 3 for all surface nodes */ count = 0; FORNODE(i) { if (i == ARCHmine) break; /* necessary to avoid overallocating ibuf,fbuf */ GETCOORD3 (i,c1); if (c1[2] == 0.0) { nodekind[i] = 3; ibuf[count] = ARCHglobalnode[i]; fbuf[2*count] = c1[0]; fbuf[2*count+1] = c1[1]; count++; /* on exit, count should be equal to surface_nodes */ } } TIME(ARCHendtime); TIME(endtime); ARCHcomptime += (ARCHendtime - ARCHstarttime); inittime += (endtime - starttime); /* print the surface nodes ids and coordinates */ print_file_header(); TIME(ARCHstarttime); TIME(starttime); print_record_header(OUT_INT, surface_nodes); PRINTBUF(ibuf, surface_nodes*sizeof(int)); print_record_header(OUT_FLOAT, 2 * surface_nodes); PRINTBUF(fbuf, 2*surface_nodes*sizeof(float)); fbuf[0] = IP.dt; print_record_header(OUT_FLOAT, 1); PRINTBUF(fbuf, sizeof(float)); ibuf[0] = Np_step; ibuf[1] = timesteps; print_record_header(OUT_INT, 2); PRINTBUF(ibuf, 2*sizeof(int)); TIME(ARCHendtime); TIME(endtime); ARCHiotime += (ARCHendtime - ARCHstarttime); printtime += (endtime - starttime); /* time integration loop */ for (iter = 1; iter <= timesteps; iter++) { TIME(ARCHstarttime); TIME(starttime); MV3PRODUCT(K, disp[dispt], disp[disptplus]); TIME(ARCHendtime); TIME(endtime); ARCHcomptime += (ARCHendtime - ARCHstarttime); mvtime += (endtime - starttime); TIME(ARCHstarttime); TIME(starttime); time = iter * IP.dt; disp[disptplus] *= - IP.dt * IP.dt; disp[disptplus] += 2.0 * M * disp[dispt] - (M - IP.dt / 2.0 * C) * disp[disptminus] - IP.dt * IP.dt * (M23 * phi2(time) / 2.0 + C23 * phi1(time) / 2.0 + V23 * phi0(time) / 2.0); disp[disptplus] = disp[disptplus] / (M + IP.dt / 2.0 * C); vel = 0.5 / IP.dt * (disp[disptplus] - disp[disptminus]); TIME(ARCHendtime); TIME(endtime); ARCHcomptime += (ARCHendtime - ARCHstarttime); vvtime += (endtime - starttime); TIME(ARCHstarttime); TIME(starttime); j = 0; for (i = 0; i < ARCHmine; i++) { if (nodekind[i] == 3) { dispx = IDENTITY(disp[disptplus][i][0]); dispy = IDENTITY(disp[disptplus][i][1]); dispz = IDENTITY(disp[disptplus][i][2]); velx = IDENTITY(vel[i][0]); vely = IDENTITY(vel[i][1]); velz = IDENTITY(vel[i][2]); /* find maximum surface horizontal displacement; store amplitude and direction */ ampl_h_dsp = dispx * dispx + dispy * dispy; if (ampl_h_dsp > max_h_dsp[j]) { max_h_dsp[j] = ampl_h_dsp; ang_h_dsp[j] = atan2f(dispy,dispx); } /* find maximum surface horizontal velocity; store amplitude and direction */ ampl_h_vel = velx * velx + vely * vely; if (ampl_h_vel > max_h_vel[j]) { max_h_vel[j] = ampl_h_vel; ang_h_vel[j] = atan2f(vely,velx); } /* find maximum surface vertical displacement; store amplitude */ ampl_v_dsp = fabs(dispz); if (ampl_v_dsp > max_v_dsp[j]) max_v_dsp[j] = ampl_v_dsp; /* find maximum surface vertical velocity; store amplitude */ ampl_v_vel = fabs(velz); if (ampl_v_vel > max_v_vel[j]) max_v_vel[j] = ampl_v_vel; /* energy integral = int v^2 dt */ energy_integral[j] += 0.5 * (ampl_h_vel + ampl_h_vel_prv[j]) * IP.dt; ampl_h_vel_prv[j] = ampl_h_vel; j++; } } TIME(ARCHendtime); TIME(endtime); ARCHcomptime += (ARCHendtime - ARCHstarttime); misctime += (endtime - starttime); /* pack the output buffer and print */ TIME(ARCHstarttime); TIME(starttime); if (iter % Np_step == 0) { if (ARCHself == 0) { count = 1; fprintf(stderr,"Iteration = %d\n",iter); } else count = 0; ibuf[0] = iter; print_record_header(OUT_INT, count); PRINTBUF(ibuf, count*sizeof(int)); /* print response for all surface nodes */ count = 0; for (i = 0; i < ARCHmine; i++) { if (nodekind[i] == 3) { fbuf[count++] = IDENTITY(disp[disptplus][i][0]); fbuf[count++] = IDENTITY(disp[disptplus][i][1]); fbuf[count++] = IDENTITY(disp[disptplus][i][2]); fbuf[count++] = IDENTITY(vel[i][0]); fbuf[count++] = IDENTITY(vel[i][1]); fbuf[count++] = IDENTITY(vel[i][2]); } } print_record_header(OUT_FLOAT, count); PRINTBUF(fbuf, count*sizeof(float)); /* print response at the source node */ count = 0; for (i = 0; i < ARCHmine; i++) { if (sourcenode[i] == 10) { fbuf[count++] = IDENTITY(disp[disptplus][i][0]); fbuf[count++] = IDENTITY(disp[disptplus][i][1]); fbuf[count++] = IDENTITY(disp[disptplus][i][2]); fbuf[count++] = IDENTITY(vel[i][0]); fbuf[count++] = IDENTITY(vel[i][1]); fbuf[count++] = IDENTITY(vel[i][2]); } } print_record_header(OUT_FLOAT, count); PRINTBUF(fbuf, count*sizeof(float)); } TIME(ARCHendtime); TIME(endtime); ARCHiotime += (ARCHendtime - ARCHstarttime); printtime += (endtime - starttime); TIME(ARCHstarttime); TIME(starttime); i = disptminus; disptminus = dispt; dispt = disptplus; disptplus = i; TIME(ARCHendtime); TIME(endtime); ARCHcomptime += (ARCHendtime - ARCHstarttime); misctime += (endtime - starttime); } /* print maxima */ TIME(ARCHstarttime); TIME(starttime); count = 0; j = 0; for (i = 0; i < ARCHmine; i++) { if (nodekind[i] == 3) { fbuf[count++] = sqrtf(max_h_dsp[j]); fbuf[count++] = ang_h_dsp[j]; fbuf[count++] = sqrtf(max_h_vel[j]); fbuf[count++] = ang_h_vel[j]; fbuf[count++] = max_v_dsp[j]; fbuf[count++] = max_v_vel[j]; fbuf[count++] = energy_integral[j]; j++; } } print_record_header(OUT_FLOAT, count); PRINTBUF(fbuf, count*sizeof(float)); TIME(ARCHendtime); TIME(endtime); ARCHiotime += (ARCHendtime - ARCHstarttime); printtime += (endtime - starttime); TIME(totalendtime); /* various summaries of timings */ totaltime = (totalendtime - totalstarttime); totaltime1 = ARCHcomptime + ARCHcommtime + ARCHiotime; totaltime2 = packtime + readtime + printtime + inittime + sourcesetuptime + boundarytime + assembletime + mvtime + vvtime + misctime; /* flops and mflops per processor for matrix-vector operations */ mvflops = timesteps * 1.0 * /* 1 mvproduct/timestep */ (18 * /* 2 * dof^2 operations/entry */ (2*ARCHmatrixlen - ARCHnodes)); /* number of nonzero entries */ mvmflops = (mvflops/1000000.0)/mvtime; /* flops and mflops per processor for vector-vector operations */ vvflops = timesteps * 20.0 * /* twenty vv operations */ (ARCHnodes*3.0); /* three dof per operation */ vvmflops = (vvflops/1000000.0)/vvtime; /* total local mflops/processor (including all io and communication) */ localmflops = ((mvflops + vvflops)/1000000.0)/totaltime; /* aggregate global mflops (approximate because we assume */ /* equal work on each processor) */ globalmflops = (((mvflops + vvflops) * ARCHsubdomains) / 1000000.0) / totaltime; /* per processor communication during vector assembly */ commbytes = timesteps * 1.0 * /* one assembly ops per timestep */ ARCHcommlen * 6.0 * /* three items sent and received */ sizeof(float); /* bytes per item */ commthruput = (commbytes/(1<<20))/ARCHcommtime; if (ARCHself == 0) { fprintf(stderr, "%s: %d subdomains %d nodes %d elems %d timesteps\n", progname, ARCHsubdomains, ARCHglobalnodes, ARCHglobalelems, timesteps); fprintf(stderr, "\n"); fprintf(stderr, "comp : %12.2fs (%2d%%)\n", ARCHcomptime, (int)((ARCHcomptime/totaltime)*100.0)); fprintf(stderr, "comm : %12.2fs (%2d%%)\n", ARCHcommtime, (int)((ARCHcommtime/totaltime)*100.0)); fprintf(stderr, "io : %12.2fs (%2d%%)\n", ARCHiotime, (int)((ARCHiotime/totaltime)*100.0)); fprintf(stderr, "total : %12.2fs (%.0f min)[%.2f][%.2f]\n", totaltime, totaltime/60.0, totaltime1/totaltime, totaltime2/totaltime); fprintf(stderr, "total : %12.2fs\n",totaltime); fprintf(stderr, "total1 : %12.2fs\n",totaltime1); fprintf(stderr, "total2 : %12.2fs\n",totaltime2); fprintf(stderr, "pack file loading : %12.2fs (%2d%%)\n", packtime, (int)((packtime/totaltime)*100.0)); fprintf(stderr, "reading : %12.2fs (%2d%%)\n", readtime, (int)((readtime/totaltime)*100.0)); fprintf(stderr, "printing : %12.2fs (%2d%%)\n", printtime, (int)((printtime/totaltime)*100.0)); fprintf(stderr, "initializations : %12.2fs (%2d%%)\n", inittime, (int)((inittime/totaltime)*100.0)); fprintf(stderr, "source setup : %12.2fs (%2d%%)\n", sourcesetuptime, (int)((sourcesetuptime/totaltime)*100.0)); fprintf(stderr, "boundary setup : %12.2fs (%2d%%)\n", boundarytime, (int)((boundarytime/totaltime)*100.0)); fprintf(stderr, "assembling : %12.2fs (%2d%%)\n", assembletime, (int)((assembletime/totaltime)*100.0)); fprintf(stderr, "mv operations : %12.2fs (%2d%%)\n", mvtime, (int)((mvtime/totaltime)*100.0)); fprintf(stderr, "vv operations : %12.2fs (%2d%%)\n", vvtime, (int)((vvtime/totaltime)*100.0)); fprintf(stderr, "misc : %12.2fs (%2d%%)\n", misctime, (int)((misctime/totaltime)*100.0)); fprintf(stderr, "\n"); fprintf(stderr, "MFlops/sec/proc : %.1f\n", localmflops); fprintf(stderr, "MFlops/sec/proc (mv) : %.1f\n", mvmflops); fprintf(stderr, "MFlops/sec/proc (vv) : %.1f\n", vvmflops); fprintf(stderr, "Global MFlops/sec : %.1f\n", globalmflops); fprintf(stderr, "\n"); fprintf(stderr, "MBytes/sec/proc (as) : %.1f\n", commthruput); fprintf(stderr, "\n"); fprintf(stderr, "mvflops: %.0f mvtime: %.0f\n", mvflops, mvtime); fprintf(stderr, "vvflops: %.0f vvtime: %.0f\n", vvflops, vvtime); fprintf(stderr, "commbytes: %d\n", (int)commbytes); fflush(stderr); } } /* generates Ke[12][12], Me[12] and Ce[12] */ void KMC_maker(c1, c2, c3, c4, prop, Ke, Me, Ce) float *c1, *c2, *c3, *c4, **Ke, *Me, *Ce; struct properties *prop; { float jicob[3][5], ds[4][4], xyz[8][3], elas[4]; prepare(c1, c2, c3, c4, xyz, elas, prop); iso3d(xyz, elas, jicob, ds, Ke, Me); } /* returns material properties */ void Enu(e, v, prop) float *e, *v; struct properties *prop; { float x; x = prop->p_v / prop->s_v; x = x * x; *v = 0.5 * (x - 2) / (x - 1); *e = 2 * prop->den * prop->s_v * prop->s_v * (1 + *v); } /* returns corner coords and material properties */ void prepare(c1, c2, c3, c4, xyz, elas, prop) float *c1, *c2, *c3, *c4; float xyz[8][3]; float elas[]; struct properties *prop; { int i; float e, v; Enu(&e, &v, prop); elas[0] = e / (2.0 * (v + 1.0) * (1.0 - v * 2.0)); elas[1] = e * v / ((v + 1.0) * (1.0 - v * 2.0)); elas[2] = e / ((v + 1.0) * 2.0); elas[3] = prop->den; for (i = 0; i < 3; i++) { /* which defines corner coordinates */ xyz[0][i] = c1[i]; xyz[1][i] = c2[i]; xyz[2][i] = c3[i]; xyz[3][i] = c4[i]; } } /* generate element stiffness (Ke) and mass matrices (Me) for a 3d isoparameteric element (elasticity) stiffness matrix Ke and Me */ void iso3d(xyz, elas, jicob, ds, Ke, Me) float xyz[8][3]; float elas[]; float jicob[3][5]; float ds[4][4]; float **Ke; float *Me; { float det, tt, ts, c1, c2, c3; float volume; int i, j, m, n, row, column; d_shape(ds); /* compute derivertives of shapes */ for (i = 0; i < 3; ++i) { for (j = 0; j < 3; ++j) { det = 0.0; for (m = 0; m < 4; ++m) { det = det + ds[i][m]*xyz[m][j]; } jicob[j][i] = det; /* form Jacobian=dXj/dXIi */ } } inv_J(jicob, &det); /* find the J^-1 & its determinant */ for (m = 0; m < 4; ++m) { for (i = 0; i < 3; ++i) { jicob[i][3] = 0.0; for (j = 0; j < 3; ++j) { jicob[i][3] = jicob[i][3] + jicob[j][i]*ds[j][m]; } } for (i = 0; i < 3; ++i) { ds[i][m] = jicob[i][3]; /* d(shapes)/d(x,y,z) */ } } volume = det/6; /* volume of a tetrahedron in xyz coordinates */ /* printf("%f\n", volume); */ if (volume <= 0) { /* Something wrong w/ the numbering of ele vertices. */ printf("Warning: volume of the element = %f !\n", volume); /* exit(-1); */ } c1 = elas[0]*volume; /* volume in xi-coordinates: 1/6 */ c2 = elas[1]*volume; c3 = elas[2]*volume; row = -1; for (m = 0; m < 4; m++) { /* generate low-half matrix */ for (i = 0; i < 3; ++i) { ++row; column = -1; for (n = 0; n <= m; n++) { for (j = 0; j < 3; j++) { ++column; ts = ds[i][m]*ds[j][n]; if (i == j) { ts = ts*c1; tt = (ds[0][m]*ds[0][n] + ds[1][m]*ds[1][n] + ds[2][m]*ds[2][n])*c3; } else { if(m==n){ ts=ts*c1; tt=0; } else { ts = ts * c2; tt = ds[j][m] * ds[i][n] * c3; } } Ke[row][column] = Ke[row][column] + ts + tt; } } } } tt = elas[3] * volume / 4.0; for (i = 0; i < 12; i++) Me[i] = tt; for (i = 0; i < 12; ++i) for (j = 0; j <= i; j++) Ke[j][i] = Ke[i][j]; } /* calculate D(N_m)/D(XI_j) into ds[0..2][*] */ void d_shape(ds) float ds[4][4]; { ds[0][0] = -1; ds[1][0] = -1; ds[2][0] = -1; ds[0][1] = 1; ds[1][1] = 0; ds[2][1] = 0; ds[0][2] = 0; ds[1][2] = 1; ds[2][2] = 0; ds[0][3] = 0; ds[1][3] = 0; ds[2][3] = 1; } /* calculating the inverse and the determinant of the jacobian */ void inv_J(a, det) float a[3][5]; float *det; { float d1; float c[3][3]; int i,j; c[0][0] = a[1][1]*a[2][2] - a[2][1]*a[1][2]; c[0][1] = a[0][2]*a[2][1] - a[0][1]*a[2][2]; c[0][2] = a[0][1]*a[1][2] - a[0][2]*a[1][1]; c[1][0] = a[1][2]*a[2][0] - a[1][0]*a[2][2]; c[1][1] = a[0][0]*a[2][2] - a[0][2]*a[2][0]; c[1][2] = a[0][2]*a[1][0] - a[0][0]*a[1][2]; c[2][0] = a[1][0]*a[2][1] - a[1][1]*a[2][0]; c[2][1] = a[0][1]*a[2][0] - a[0][0]*a[2][1]; c[2][2] = a[0][0]*a[1][1] - a[0][1]*a[1][0]; *det = a[0][0]*c[0][0] + a[0][1]*c[1][0] + a[0][2]*c[2][0]; d1 = 1.0 / *det; for(i=0;i<3;i++) for(j=0;j<3;j++) a[i][j]= c[i][j]*d1; } /* calculate the area of a triangle given the coordinates of its three vertices (c1,c2,c3) */ float cal_area(c1, c2, c3) float *c1, *c2, *c3; { float a, b, c; float x2, y2, z2; float p; float area; x2 = (c1[0]-c2[0])*(c1[0]-c2[0]); y2 = (c1[1]-c2[1])*(c1[1]-c2[1]); z2 = (c1[2]-c2[2])*(c1[2]-c2[2]); a = sqrt(x2 + y2 + z2); x2 = (c3[0]-c2[0])*(c3[0]-c2[0]); y2 = (c3[1]-c2[1])*(c3[1]-c2[1]); z2 = (c3[2]-c2[2])*(c3[2]-c2[2]); b = sqrt(x2 + y2 + z2); x2 = (c1[0]-c3[0])*(c1[0]-c3[0]); y2 = (c1[1]-c3[1])*(c1[1]-c3[1]); z2 = (c1[2]-c3[2])*(c1[2]-c3[2]); c = sqrt(x2 + y2 + z2); p = (a + b + c) / 2; area = sqrt(p*(p-a)*(p-b)*(p-c)); if (area <= 0) { fprintf(stderr, "Error: negative area or zero area %f\n", p); exit(-1); } return area; } /* absorbing boundary element */ void abc_damper(c1, c2, c3, bv, prop, Ce) int bv[3]; float *c1, *c2, *c3; float Ce[12]; struct properties *prop; { int i, m0; float area; area = cal_area(c1, c2, c3); for (i = 0; i < 3; i++) { m0 = 3 * bv[i]; Ce[m0] = Ce[m0] + prop->s_v * prop->den * area / 3; Ce[m0+1] = Ce[m0+1] + prop->s_v * prop->den * area / 3; Ce[m0+2] = Ce[m0+2] + prop->p_v * prop->den * area / 3; } } /* ramp function */ float phi0(float t) { float value; if ( t <= IP.T0 ) { value = 0.5/PI*(2.0*PI*t/IP.T0-sinf(2.0*PI*t/IP.T0)); return value; } else return 1.0; } /* velocity of ramp function */ float phi1(float t) { float value; if (t <= IP.T0) { value = (1.0 - cosf(2.0 * PI * t / IP.T0)) / IP.T0; return value; } else return 0; } /* acceleration of ramp function */ float phi2(float t) { float value; if (t <= IP.T0) { value = 2 * PI / IP.T0 / IP.T0 * sinf(2 * PI * t / IP.T0); return value; } else return 0; } /* calculates the slip motion at the source node */ void cal_uvw(float *u, float *v, float *w) { *u = *v = *w = 0.0; *u = (cos(IP.rake)*sin(IP.strike)-sin(IP.rake)* cos(IP.strike)*cos(IP.dip)); *v = (cos(IP.rake)*cos(IP.strike)+sin(IP.rake)* sin(IP.strike)*cos(IP.dip)); *w = sin(IP.rake)*sin(IP.dip); } /* calculate the centroid of a tetrahedron */ void centroid(float x[3][4], float x0[3]) { int i; for (i = 0; i < 3; i++) x0[i] = (x[i][0] + x[i][1] + x[i][2] + x[i][3]) / 4; } /* calculate the distance to the fault from a given point (x,y,z) */ float point2fault(float x, float y, float z) { float nx, ny, nz; float d0; nx = cos(IP.strike) * sin(IP.dip); ny = - sin(IP.strike) * sin(IP.dip); nz = cos(IP.dip); d0 = - (nx * s.x0[0] + ny * s.x0[1] + nz * s.x0[2]); return (float) nx * x + ny * y + nz * z + d0; } /* matrix (12x12) times vector (12x1) product */ void mv12x12(float m[12][12], float v[12]) { int i, j; float u[12]; for (i = 0; i < 12; i++) { u[i] = 0; for (j = 0; j < 12; j++) u[i] += m[i][j] * v[j]; } for (i = 0; i < 12; i++) v[i] = u[i]; } /* vector (12x1) times vector (12x1) product */ void vv12x12 (float v1[12], float v2[12], float u[12]) { int i; for (i = 0; i < 12; i++) u[i] = v1[i] * v2[i]; } void print_record_header(int type, int items) { int rhdr[2], globalitems; ISUMREDUCE(&items, &globalitems, 1); if (ARCHself == 0) { rhdr[0] = type; rhdr[1] = globalitems; fwrite(rhdr, sizeof(int), 2, stdout); } } void print_file_header() { char buf[8]; buf[0] = 'A'; buf[1] = 'O'; buf[2] = '1'; if (ARCHself == 0) { fwrite(buf, sizeof(char), 8, stdout); } }