tSpeed up code by avoiding use of index function idx1g - cngf-pf - continuum model for granular flows with pore-pressure dynamics (renamed from 1d_fd_simple_shear)
 (HTM) git clone git://src.adamsgaard.dk/cngf-pf
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit aa7e4acb6d8f1d2cda21c117a363184f94ed2499
 (DIR) parent 231ab304381ef266046dffb7a26abc5833c32c7b
 (HTM) Author: Anders Damsgaard <anders@adamsgaard.dk>
       Date:   Fri, 30 Aug 2019 12:58:03 +0200
       
       Speed up code by avoiding use of index function idx1g
       
       Diffstat:
         M Makefile                            |       2 +-
         M fluid.c                             |      75 +++++++++++++++++++++++---------
         M main.c                              |       1 +
         M simulation.c                        |      34 +++++++++++++++----------------
       
       4 files changed, 73 insertions(+), 39 deletions(-)
       ---
 (DIR) diff --git a/Makefile b/Makefile
       t@@ -1,4 +1,4 @@
       -CFLAGS = -g -std=c99 -pedantic -Wall
       +CFLAGS = -std=c99 -pedantic -Wall -O3
        LDFLAGS = -lm
        SRC = $(wildcard *.c)
        OBJ = $(patsubst %.c,%.o,$(SRC))
 (DIR) diff --git a/fluid.c b/fluid.c
       t@@ -10,9 +10,9 @@ hydrostatic_fluid_pressure_distribution(struct simulation* sim)
        {
                int i;
                for (i=0; i<sim->nz; ++i)
       -                sim->p_f_ghost[idx1g(i)] = sim->p_f_top +
       -                                           sim->phi[i]*sim->rho_f*sim->G*
       -                                           (sim->L_z - sim->z[i]);
       +                sim->p_f_ghost[i+1] = sim->p_f_top +
       +                                      sim->phi[i]*sim->rho_f*sim->G*
       +                                      (sim->L_z - sim->z[i]);
        }
        
        /* Determines the largest time step for the current simulation state. Note 
       t@@ -64,7 +64,7 @@ static void
        set_fluid_bcs(struct simulation* sim, const double p_f_top)
        {
                set_bc_dirichlet(sim->p_f_ghost, sim->nz, +1, p_f_top);
       -        sim->p_f_ghost[idx1g(sim->nz-1)] = p_f_top; /* Include top node in BC */
       +        sim->p_f_ghost[sim->nz] = p_f_top; /* Include top node in BC */
                set_bc_neumann(sim->p_f_ghost,
                               sim->nz,
                               -1,
       t@@ -82,24 +82,24 @@ darcy_pressure_change_1d(const int i,
                                 const double beta_f,
                                 const double mu_f)
        {
       -        double p, p_zn, p_zp, k_, div_k_grad_p, k_zn, k_zp;
       -
       -        p    = p_f_ghost_in[idx1g(i)];
       -        p_zn = p_f_ghost_in[idx1g(i-1)];
       -        p_zp = p_f_ghost_in[idx1g(i+1)];
       +        double k_, div_k_grad_p, k_zn, k_zp;
        
                k_   = k[i];
                if (i==0) k_zn = k_; else k_zn = k[i-1];
                if (i==nz-1) k_zp = k_; else k_zp = k[i+1];
        #ifdef DEBUG
                printf("%d->%d: p=[%g, %g, %g]\tk=[%g, %g, %g]\n",
       -                i, idx1g(i),
       +                i, i+1,
                        p_zn, p, p_zp,
                        k_zn, k_, k_zp);
        #endif
        
       -        div_k_grad_p = (2.0*k_zp*k_/(k_zp + k_) * (p_zp - p)/dz - 
       -                        2.0*k_zn*k_/(k_zn + k_) * (p - p_zn)/dz 
       +        div_k_grad_p = (2.0*k_zp*k_/(k_zp + k_)
       +                        * (p_f_ghost_in[i+2]
       +                           - p_f_ghost_in[i+1])/dz
       +                        - 2.0*k_zn*k_/(k_zn + k_)
       +                        * (p_f_ghost_in[i+1]
       +                           - p_f_ghost_in[i])/dz 
                               )/dz; 
        
        #ifdef DEBUG
       t@@ -110,6 +110,38 @@ darcy_pressure_change_1d(const int i,
                return 1.0/(beta_f*phi[i]*mu_f)*div_k_grad_p; 
        }
        
       +/*void
       +darcy_pressure_change_1d(double* dp_f_dt,
       +                         const int nz,
       +                         const double* p_f_ghost_in,
       +                         const double* phi,
       +                         const double* k,
       +                         const double dz,
       +                         const double beta_f,
       +                         const double mu_f)
       +{
       +        int i;
       +        double k_, div_k_grad_p, k_zn, k_zp;
       +
       +#pragma omp parallel for private(i, k_, div_k_grad_p, k_zn, k_zp) if(nz>12)
       +        for (i=0; i<nz; ++i) {
       +
       +                k_   = k[i];
       +                if (i==0) k_zn = k_; else k_zn = k[i-1];
       +                if (i==nz-1) k_zp = k_; else k_zp = k[i+1];
       +
       +                div_k_grad_p = (2.0*k_zp*k_/(k_zp + k_)
       +                                * (p_f_ghost_in[i+2]
       +                                   - p_f_ghost_in[i+1])/dz
       +                                - 2.0*k_zn*k_/(k_zn + k_)
       +                                * (p_f_ghost_in[i+1]
       +                                   - p_f_ghost_in[i])/dz 
       +                               )/dz; 
       +
       +                dp_f_dt[i] = 1.0/(beta_f*phi[i]*mu_f)*div_k_grad_p; 
       +        }
       +}*/
       +
        int
        darcy_solver_1d(struct simulation* sim,
                        const int max_iter,
       t@@ -119,6 +151,8 @@ darcy_solver_1d(struct simulation* sim,
                double epsilon, theta, p_f_top, r_norm_max;
                double *dp_f_dt_expl;
                double *p_f_ghost_old, *dp_f_dt_impl, *p_f_ghost_new, *r_norm;
       +        
       +        r_norm_max = NAN;
        
                /* choose integration method, parameter in [0.0; 1.0]
                 *     epsilon = 0.0: explicit
       t@@ -184,25 +218,26 @@ darcy_solver_1d(struct simulation* sim,
                                                                                   sim->dz,
                                                                                   sim->beta_f,
                                                                                   sim->mu_f);
       +
                                for (i=0; i<sim->nz-1; ++i) {
        #ifdef DEBUG
                                        printf("dp_f_expl[%d] = %g\ndp_f_impl[%d] = %g\n",
                                               i, dp_f_dt_expl[i], i, dp_f_dt_impl[i]);
        #endif
        
       -                                p_f_ghost_new[idx1g(i)] = p_f_ghost_old[idx1g(i)]
       +                                p_f_ghost_new[i+1] = p_f_ghost_old[i+1]
                                                                  + epsilon*dp_f_dt_impl[i]*sim->dt;
        
                                        if (epsilon < 1.0)
       -                                        p_f_ghost_new[idx1g(i)] += (1.0 - epsilon)
       +                                        p_f_ghost_new[i+1] += (1.0 - epsilon)
                                                                           *dp_f_dt_expl[i]*sim->dt;
        
       -                                p_f_ghost_new[idx1g(i)] = p_f_ghost_old[idx1g(i)]*(1.0 - theta)
       -                                                          + p_f_ghost_new[idx1g(i)]*theta;
       +                                p_f_ghost_new[i+1] = p_f_ghost_old[i+1]*(1.0 - theta)
       +                                                          + p_f_ghost_new[i+1]*theta;
        
       -                                r_norm[i] = (p_f_ghost_new[idx1g(i)]
       -                                            - sim->p_f_ghost[idx1g(i)])
       -                                            /(sim->p_f_ghost[idx1g(i)] + 1e-16);
       +                                r_norm[i] = (p_f_ghost_new[i+1]
       +                                            - sim->p_f_ghost[i+1])
       +                                            /(sim->p_f_ghost[i+1] + 1e-16);
                                }
        
                                r_norm_max = max(r_norm, sim->nz);
       t@@ -237,7 +272,7 @@ darcy_solver_1d(struct simulation* sim,
                        }
                } else {
                        for (i=0; i<sim->nz; ++i)
       -                        sim->p_f_ghost[idx1g(i)] += dp_f_dt_expl[i]*sim->dt;
       +                        sim->p_f_ghost[i+1] += dp_f_dt_expl[i]*sim->dt;
                        solved = 1;
        #ifdef DEBUG
                        puts(".. dp_f_dt_expl:");
 (DIR) diff --git a/main.c b/main.c
       t@@ -311,6 +311,7 @@ main(int argc, char* argv[])
                filetimeclock = 0.0;
                iter = 0;
                mu_wall_orig = sim.mu_wall;
       +        res_norm = NAN;
                while (sim.t <= sim.t_end) {
        
                        stressiter = 0;
 (DIR) diff --git a/simulation.c b/simulation.c
       t@@ -253,7 +253,7 @@ compute_shear_strain_rate_plastic(struct simulation* sim)
        {
                int i;
                for (i=0; i<sim->nz; ++i)
       -                sim->gamma_dot_p[i] = shear_strain_rate_plastic(sim->g_ghost[idx1g(i)],
       +                sim->gamma_dot_p[i] = shear_strain_rate_plastic(sim->g_ghost[i+1],
                                                                        sim->mu[i]);
        }
        
       t@@ -276,7 +276,7 @@ compute_effective_stress(struct simulation* sim)
                int i;
                if (sim->fluid)
                        for (i=0; i<sim->nz; ++i)
       -                        sim->sigma_n_eff[i] = sim->sigma_n[i] - sim->p_f_ghost[idx1g(i)];
       +                        sim->sigma_n_eff[i] = sim->sigma_n[i] - sim->p_f_ghost[i+1];
                else
                        for (i=0; i<sim->nz; ++i)
                                sim->sigma_n_eff[i] = sim->sigma_n[i];
       t@@ -326,13 +326,13 @@ compute_local_fluidity(struct simulation* sim)
        {
                int i;
                for (i=0; i<sim->nz; ++i)
       -                sim->g_ghost[idx1g(i)] = local_fluidity(sim->sigma_n_eff[i],
       -                                                        sim->mu[i],
       -                                                        sim->mu_s,
       -                                                        sim->C,
       -                                                        sim->b,
       -                                                        sim->rho_s,
       -                                                        sim->d);
       +                sim->g_ghost[i+1] = local_fluidity(sim->sigma_n_eff[i],
       +                                                   sim->mu[i],
       +                                                   sim->mu_s,
       +                                                   sim->C,
       +                                                   sim->b,
       +                                                   sim->rho_s,
       +                                                   sim->d);
        }
        
        void
       t@@ -384,23 +384,21 @@ poisson_solver_1d_cell_update(int i,
                                      const double d)
        {
                double coorp_term;
       -        int gi;
        
                coorp_term = dz*dz/(2.0*pow(xi[i], 2.0));
       -        gi = idx1g(i);
       -        g_out[gi] = 1.0/(1.0 + coorp_term)*(coorp_term*
       +        g_out[i+1] = 1.0/(1.0 + coorp_term)*(coorp_term*
                            local_fluidity(p[i], mu[i], mu_s, C, b, rho_s, d)
       -                    + g_in[gi+1]/2.0
       -                    + g_in[gi-1]/2.0);
       +                    + g_in[i+2]/2.0
       +                    + g_in[i]/2.0);
        
       -        r_norm[i] = pow(g_out[gi] - g_in[gi], 2.0) / (pow(g_out[gi], 2.0) + 1e-16);
       +        r_norm[i] = pow(g_out[i+1] - g_in[i+1], 2.0) / (pow(g_out[i+1], 2.0) + 1e-16);
        
        #ifdef DEBUG
                printf("-- %d --------------\n", i);
                printf("coorp_term: %g\n", coorp_term);
                printf("   g_local: %g\n", local_fluidity(p[i], mu[i], mu_s, b, rho_s, d));
       -        printf("      g_in: %g\n", g_in[gi]);
       -        printf("     g_out: %g\n", g_out[gi]);
       +        printf("      g_in: %g\n", g_in[i+1]);
       +        printf("     g_out: %g\n", g_out[i+1]);
                printf("    r_norm: %g\n", r_norm[i]);
        #endif
        }
       t@@ -521,7 +519,7 @@ print_wet_output(FILE* fp, struct simulation* sim, const int norm)
                                sim->z[i],
                                v_x_out[i],
                                sim->sigma_n_eff[i],
       -                        sim->p_f_ghost[idx1g(i)],
       +                        sim->p_f_ghost[i+1],
                                sim->mu[i],
                                sim->gamma_dot_p[i]);