/*File: lennard-jones_ga.c Purpose: to test Lennard-Jones problem using Tao/GA vectors using multiple processors. Date created: August 16, 2002 */ /************************************************************** Author: Limin Zhang, Ph.D. Mathematics Department Columbia Basin College Pasco, WA 99301 Limin.Zhang@cbc2.org Mentor: Jarek Naplocha, Ph.D. Environmental Molecular Science Laboratory Pacific Northwest National Laboratory Richland, WA 99352 Date: 4/22/2002 Purpose: to design and implement an application for between TAO and global arrays. **************************************************************/ /* Program usage: mpirun -machinefile m -np 1 lennard-jones_ga2 [-help] [all TAO options] */ /* Program usage: mpirun -machinefile m -np 4 lennard-jones_ga2 [-help] [all TAO options] */ /* Include MPI header files */ #include <mpi.h> /* Include tao header files so we can use TAO */ #include "tao.h" #include "taoapp_ga.h" /* Include ga header files so that we can use GA */ #include "ga.h" #include "taovec_ga.h" #include "macdecls.h" /*some standard header files */ #include <iostream> #include <stdio.h> #include <stdlib.h> //for malloc function #include <math.h> #define NDIM 2 #define NATOMS 1000 #define BLOCKSIZE 500 #define MAX_BLOCKS 25600 static char help[] = "This example demonstrates use of the TAO package to \n\ solve an unconstrained minimization problem on multiple processors. We \n\ minimize the extended Lennard-Jones function.\n"; /*T Concepts: TAO - Solving an unconstrained minimization problem Routines: TaoInitialize(); TaoFinalize(); TaoSetFromOptions(); Routines: TaoGAApplicationCreate(); TaoSetApplication(); Routines: TaoCreate(); TaoSetGAFunctionGradient(); Routines: TaoSetGAHessian(); TaoSetGAInitialVector(); Routines: TaoSolve(); TaoDestroy(); TaoApplicationDestroy(); Routines: TaoGetTerminationReason(); Processors: >=1 T*/ /* User-defined application context - contains data needed by the application-provided call-back routines that evaluate the function, gradient, and hessian. */ typedef struct { int x; int y; } topo_t; typedef struct { int ndim; /* dimension n = NDIM*NATOMS */ int natoms; /* number of atoms */ int me; /* proc id */ int nproc; /* number of processes */ int BlockSize; int nBlocks; int memHandle; /* memory handle for MA allocated work space */ int TopoHandle; /* memory handle for allocated btopo */ double *x1,*x2,*grad; /* work arrays */ topo_t btopo[MAX_BLOCKS]; /* block topography array */ int atomicTask; /* Atomic Task: 1-d array whose size=1 */ } AppCtx; /* -------------- User-defined routines ---------- */ int FormFunctionGradient (TAO_GA_APPLICATION gaapp, GAVec ga_X, double *f, GAVec ga_G, void *ptr); int InitializeVariables(GAVec ga_x, AppCtx *user); int SetupBlocks(AppCtx *user); int getBlock(GAVec ga_x, int taskId, AppCtx *user); int main (int argc, char **argv) { double startTime; int info; /* used to check for functions returning nonzeros */ GAVec ga_x; /* solution vector */ TAO_SOLVER tao; /* TAO_SOLVER solver context */ TAO_GA_APPLICATION taoapp; /* TAO application context */ TaoTerminateReason reason; AppCtx user; /* user-defined application context */ /*initialize GA and MPI */ int heap = 400000, stack = 400000; MPI_Init (&argc, &argv); /* initialize MPI */ GA_Initialize (); /* initialize GA */ user.me = GA_Nodeid (); user.nproc = GA_Nnodes (); startTime = MPI_Wtime(); if (user.me == 0) { if (GA_Uses_fapi ()) GA_Error ("Program runs with C array API only", 0); printf ("Using %ld processes\n", (long) user.nproc); fflush (stdout); } heap /= user.nproc; stack /= user.nproc; if (!MA_init (MT_F_DBL, stack, heap)) GA_Error ("MA_init failed", stack + heap); /* initialize memory allocator */ /* Initialize TAO */ TaoInitialize (&argc, &argv, (char *) 0, help); /* Initialize problem parameters */ user.ndim = NDIM; user.natoms = NATOMS; user.BlockSize = BLOCKSIZE; /* Allocate vectors for the solution and gradient */ int dims[2]; dims[0] = user.ndim*user.natoms; ga_x = NGA_Create (C_DBL, 1, dims, "GA_X", NULL); if (!ga_x) GA_Error ("lennard-jones.main::NGA_Create ga_x", ga_x); /* Set up structures for data distribution */ info = SetupBlocks(&user); CHKERRQ(info); /* The TAO code begins here */ /* Create TAO solver with desired solution method */ info = TaoCreate (MPI_COMM_WORLD, "tao_lmvm", &tao); CHKERRQ(info); info = TaoGAApplicationCreate (MPI_COMM_WORLD, &taoapp); CHKERRQ(info); /* Set the initial solution */ info = InitializeVariables(ga_x, &user); CHKERRQ(info); info = TaoGAAppSetInitialSolutionVec(taoapp, ga_x); CHKERRQ(info); /* Set routines for function, gradient */ info = TaoGAAppSetObjectiveAndGradientRoutine (taoapp, FormFunctionGradient, (void *) &user); CHKERRQ(info); /* Check for TAO command line options */ info = TaoSetFromOptions (tao); CHKERRQ(info); /* SOLVE THE APPLICATION */ info = TaoSolveGAApplication (taoapp, tao); CHKERRQ(info); /* To View TAO solver information use */ info = TaoView(tao); CHKERRQ(info); /* Get termination information */ info = TaoGetTerminationReason (tao, &reason); if(info) GA_Error("lennard-jones.main.TaoGetTerminationReason",info); if (user.me == 0) { if (reason <= 0) printf("Try a different TAO method, adjust some parameters, or check the function evaluation routines\n"); printf("WALL TIME TAKEN = %lf\n", MPI_Wtime()-startTime); /*output the solutions */ printf ("The solution is :\n"); } GA_Print (ga_x); /* Free TAO data structures */ info = TaoDestroy (tao); CHKERRQ(info); info = TaoGAAppDestroy (taoapp); CHKERRQ(info); /* Free GA data structures */ GA_Destroy (ga_x); if (!MA_pop_stack(user.memHandle)) ga_error("Main::MA_pop_stack for memHandle failed",0); /* Finalize TAO, GA, and MPI */ TaoFinalize (); GA_Terminate (); MPI_Finalize (); return 0; } #define FUNCTION_GRAD_2D(is, js) \ xx = x_j[2*j] - x_i[2*i]; \ yy = x_j[2*j+1] - x_i[2*i+1]; \ rij = xx*xx + yy*yy; \ temp = 1.0/rij/rij/rij; \ *f += temp*(temp-2.0); \ temp *= 12.0*(temp-1.0)/rij; \ g[js + 2*j] -= xx*temp; \ g[js + 2*j + 1] -= yy*temp; \ g[is+2*i] += xx*temp; \ g[is + 2*i +1] += yy*temp; int LJFG_2D(int taskId,double *f,AppCtx *user) { int b_x, b_y; /* block topology */ int i, j, start_i, start_j; double xx, yy, rij, temp; double *g, *x_i, *x_j; b_x = user->btopo[taskId].x; b_y = user->btopo[taskId].y; start_i = user->BlockSize * b_x * 2; start_j = user->BlockSize * b_y * 2; g = user->grad; x_i = user->x1; x_j = user->x2; if (b_x == b_y) /* compute lower triangular matrix only */ for (i=1; i<user->BlockSize; i++) for (j=0; j<i; j++) { FUNCTION_GRAD_2D(start_i, start_j); } else if (b_x > b_y) /* compute right half of the block */ for (i=0; i<user->BlockSize; i++) for (j=user->BlockSize/2; j<user->BlockSize; j++) { FUNCTION_GRAD_2D(start_i, start_j); } else /* Compute upper half of the block */ for (i=0; i<user->BlockSize/2; i++) for (j=0; j<user->BlockSize; j++) { FUNCTION_GRAD_2D(start_i, start_j); } return 0; } #define FUNCTION_GRAD_3D(is, js) \ xx = x_j[3*j] - x_i[3*i]; \ yy = x_j[3*j+1] - x_i[3*i+1]; \ zz = x_j[3*j+2] - x_i[3*i+2]; \ rij = xx*xx + yy*yy + zz*zz; \ temp = 1.0/rij/rij/rij; \ *f += temp*(temp-2.0); \ temp *= 12.0*(temp-1.0)/rij; \ g[js + 3*j] -= xx*temp; \ g[js + 3*j + 1] -= yy*temp; \ g[js + 3*j + 2] -= zz*temp; \ g[is + 3*i] += xx*temp; \ g[is + 3*i +1] += yy*temp; \ g[is + 3*i +2] += zz*temp; int LJFG_3D(int taskId,double *f,AppCtx *user) { int b_x, b_y; /* block topology */ int i, j, start_i, start_j; double xx, yy, zz, rij, temp; double *g, *x_i, *x_j; b_x = user->btopo[taskId].x; b_y = user->btopo[taskId].y; start_i = user->BlockSize * b_x * 3; start_j = user->BlockSize * b_y * 3; g = user->grad; x_i = user->x1; x_j = user->x2; if (b_x == b_y) /* compute lower triangular matrix only */ for (i=1; i<user->BlockSize; i++) for (j=0; j<i; j++) { FUNCTION_GRAD_3D(start_i, start_j); } else if (b_x > b_y) /* compute right half of the block */ for (i=0; i<user->BlockSize; i++) for (j=user->BlockSize/2; j<user->BlockSize; j++) { FUNCTION_GRAD_3D(start_i, start_j); } else /* Compute upper half of the block */ for (i=0; i<user->BlockSize/2; i++) for (j=0; j<user->BlockSize; j++) { FUNCTION_GRAD_3D(start_i, start_j); } return 0; } /* -------------------------------------------------------------------- */ /* FormFunctionGradient - Evaluates the function, f(X), and gradient, G(X). Input Parameters: . tao - the TAO_SOLVER context . ga_X - input vector . ptr - optional user-defined context, as set by TaoSetFunctionGradient() Output Parameters: . ga_G - vector containing the newly evaluated gradient . f - function value Note: Some optimization methods ask for the function and the gradient evaluation at the same time. Evaluating both at once may be more efficient that evaluating each separately. */ int FormFunctionGradient (TAO_GA_APPLICATION gaapp, GAVec ga_X, double *f, GAVec ga_G, void *ptr) { AppCtx *user = (AppCtx *) ptr; int lo, hi; int taskId=user->me; //Which task am I running int i; int zero = 0; /* reset atomicTask to nproc */ if (user->me == 0) NGA_Put(user->atomicTask, &zero, &zero, &user->nproc, &zero); for (i=0;i<user->natoms*user->ndim; i++) user->grad[i] = 0.0; *f = 0.0; while (taskId < user->nBlocks) { getBlock(ga_X, taskId, user); if (user->ndim == 2) LJFG_2D(taskId,f, user); else LJFG_3D(taskId,f, user); /* Get next block */ taskId += user->nproc; //NGA_Read_inc(user->atomicTask, &zero, 1); } /* gather function */ GA_Dgop(f, 1, "+"); /* gather gradient */ GA_Dgop(user->grad, user->natoms*user->ndim, "+"); NGA_Distribution(ga_G, user->me, &lo, &hi); NGA_Put(ga_G, &lo, &hi, user->grad+lo, &hi); GA_Sync(); return 0; } InitializeVariables">int InitializeVariables(GAVec ga_x, AppCtx *user) { double *x; int lo, hi, n, handle; double xx,yy,zz; int i,j,k, il, jl, ctr, icrtn, ileft, isqrtn; // This method is not parallelized if (user->me != 0) { GA_Sync(); return 0; } n = user->ndim * user->natoms + 1; if (MA_push_stack(C_DBL, n, "InitializeVariables buffer", &handle)) MA_get_pointer(handle, &x); else ga_error("ma_alloc_get failed", n); lo = 0; hi = n-2; if (user->ndim == 2) { isqrtn = int(sqrt(user->natoms)); ileft = user->natoms - isqrtn * isqrtn; xx = yy = 0.0; for (j=0; j <= isqrtn + ileft/isqrtn; j++) { for (i=0; i<TaoMin(isqrtn, user->natoms - j * isqrtn); i++) { ctr = j * isqrtn + i; x[2*ctr] = xx; x[2*ctr + 1] = yy; xx += 1.0; } yy += 1.0; xx = 0.0; } } else if (user->ndim == 3) { icrtn = (int)pow((int)(user->natoms + 0.5),1.0/3.0); ileft = user->natoms - icrtn*icrtn*icrtn; xx = yy = zz = 0.0; for (k=0; k<=icrtn + ileft/(icrtn*icrtn); k++) { jl = TaoMin(icrtn, (user->natoms - k*icrtn*icrtn)/icrtn + 1); for (j=0; j<jl; j++) { il = TaoMin(icrtn, user->natoms - k*icrtn*icrtn - j*icrtn); for (i = 0; i<il; i++) { ctr = k*icrtn*icrtn + j*icrtn + i; x[3*ctr] = xx; x[3*ctr + 1] = yy; x[3*ctr + 2] = zz; xx += 1.0; } yy += 1.0; xx = 0.0; } zz += 1.0; yy = 0.0; } } // Distribute the array NGA_Put(ga_x, &lo, &hi, x, &hi); if (!MA_pop_stack(handle)) ga_error("InitializeVariables:MA_pop_stack failed",0); GA_Sync(); return 0; } /** * Block Topology (of Force Matrix): * Say for example: If there are 4 block and 100 atoms, the size of * the force matrix is 100x100 and each block size is 50x50. * * ----------- * | | | * | 0,0 | 0,1 | * ----------- * | 1,0 | 1,1 | * | | | * ----------- */ int SetupBlocks(AppCtx *user) { int i,j,k=0; int n; int zero = 0; int x_space, g_space; if (user->natoms % user->BlockSize) { GA_Error("Number of atoms should be a multiple of block size. Choose a different block size.", 0L); } n = user->natoms / user->BlockSize; user->nBlocks = n*n; if (user->nBlocks > MAX_BLOCKS) GA_Error("Number of blocks is greater that MAX_BLOCKS: Solution is either to increase the defined MAX_BLOCKS or increase your block size",0L); if (user->nBlocks < user->nproc) GA_Error("Number of blocks should be greater than or equal to the number of processors",0L); for (i=0;i<n;i++) for (j=0;j<n;j++,k++) { user->btopo[k].x = i; user->btopo[k].y = j; } /* Create task array */ n = 1; user->atomicTask = NGA_Create(C_INT, 1, &n, "Atomic Task", NULL); if (!user->atomicTask) GA_Error("NGA_Create failed for Atomic Task",0); if (user->me == 0) NGA_Put(user->atomicTask, &zero, &zero, &user->nproc, &zero); /* space for x values from two processors */ x_space = 2 * user->BlockSize * user->ndim; /* space for ALL gradient value */ g_space = user->natoms * user->ndim; if (MA_push_stack(C_DBL, x_space + g_space+3, "GA LJ bufs", &user->memHandle)) MA_get_pointer(user->memHandle, &user->x1); else GA_Error("ma_alloc_get failed",x_space + g_space); user->x2 = user->x1 + x_space/2 + 1; user->grad = user->x2 + x_space/2 + 1; GA_Sync(); return 0; } int getBlock(GAVec g_x, int taskId, AppCtx *user) { int lo, hi; int size; size = user->BlockSize * user->ndim; /* get the coordinates of the atoms in the corresponding rows in the block */ lo = user->btopo[taskId].x*size; hi = lo + size -1; NGA_Get(g_x, &lo, &hi, user->x1, &hi); /* get the coordinates of the atoms in the corresponding cols in the block */ lo = user->btopo[taskId].y*size; hi = lo + size - 1; NGA_Get(g_x, &lo, &hi, user->x2, &hi); return 0; }