/*File: lennard-jones_ga.c
Purpose: to test Lennard-Jones problem using Tao/GA vectors using multiple processors.
Date created: August 16, 2002
*/
/**************************************************************
Author: Limin Zhang, Ph.D.
Mathematics Department
Columbia Basin College
Pasco, WA 99301
Limin.Zhang@cbc2.org
Mentor: Jarek Naplocha, Ph.D.
Environmental Molecular Science Laboratory
Pacific Northwest National Laboratory
Richland, WA 99352
Date: 4/22/2002
Purpose:
to design and implement an application for between TAO and
global arrays.
**************************************************************/
/* Program usage: mpirun -machinefile m -np 1 lennard-jones_ga2 [-help] [all TAO options] */
/* Program usage: mpirun -machinefile m -np 4 lennard-jones_ga2 [-help] [all TAO options] */
/* Include MPI header files */
#include <mpi.h>
/* Include tao header files so we can use TAO */
#include "tao.h"
#include "taoapp_ga.h"
/* Include ga header files so that we can use GA */
#include "ga.h"
#include "taovec_ga.h"
#include "macdecls.h"
/*some standard header files */
#include <iostream>
#include <stdio.h>
#include <stdlib.h> //for malloc function
#include <math.h>
#define NDIM 2
#define NATOMS 1000
#define BLOCKSIZE 500
#define MAX_BLOCKS 25600
static char help[] = "This example demonstrates use of the TAO package to \n\
solve an unconstrained minimization problem on multiple processors. We \n\
minimize the extended Lennard-Jones function.\n";
/*T
Concepts: TAO - Solving an unconstrained minimization problem
Routines: TaoInitialize(); TaoFinalize(); TaoSetFromOptions();
Routines: TaoGAApplicationCreate(); TaoSetApplication();
Routines: TaoCreate(); TaoSetGAFunctionGradient();
Routines: TaoSetGAHessian(); TaoSetGAInitialVector();
Routines: TaoSolve(); TaoDestroy(); TaoApplicationDestroy();
Routines: TaoGetTerminationReason();
Processors: >=1
T*/
/*
User-defined application context - contains data needed by the
application-provided call-back routines that evaluate the function,
gradient, and hessian.
*/
typedef struct {
int x;
int y;
} topo_t;
typedef struct
{
int ndim; /* dimension n = NDIM*NATOMS */
int natoms; /* number of atoms */
int me; /* proc id */
int nproc; /* number of processes */
int BlockSize;
int nBlocks;
int memHandle; /* memory handle for MA allocated work space */
int TopoHandle; /* memory handle for allocated btopo */
double *x1,*x2,*grad; /* work arrays */
topo_t btopo[MAX_BLOCKS]; /* block topography array */
int atomicTask; /* Atomic Task: 1-d array whose size=1 */
} AppCtx;
/* -------------- User-defined routines ---------- */
int FormFunctionGradient (TAO_GA_APPLICATION gaapp, GAVec ga_X, double *f,
GAVec ga_G, void *ptr);
int InitializeVariables(GAVec ga_x, AppCtx *user);
int SetupBlocks(AppCtx *user);
int getBlock(GAVec ga_x, int taskId, AppCtx *user);
int main (int argc, char **argv)
{
double startTime;
int info; /* used to check for functions returning nonzeros */
GAVec ga_x; /* solution vector */
TAO_SOLVER tao; /* TAO_SOLVER solver context */
TAO_GA_APPLICATION taoapp; /* TAO application context */
TaoTerminateReason reason;
AppCtx user; /* user-defined application context */
/*initialize GA and MPI */
int heap = 400000, stack = 400000;
MPI_Init (&argc, &argv); /* initialize MPI */
GA_Initialize (); /* initialize GA */
user.me = GA_Nodeid ();
user.nproc = GA_Nnodes ();
startTime = MPI_Wtime();
if (user.me == 0) {
if (GA_Uses_fapi ())
GA_Error ("Program runs with C array API only", 0);
printf ("Using %ld processes\n", (long) user.nproc);
fflush (stdout);
}
heap /= user.nproc;
stack /= user.nproc;
if (!MA_init (MT_F_DBL, stack, heap))
GA_Error ("MA_init failed", stack + heap); /* initialize memory allocator */
/* Initialize TAO */
TaoInitialize (&argc, &argv, (char *) 0, help);
/* Initialize problem parameters */
user.ndim = NDIM;
user.natoms = NATOMS;
user.BlockSize = BLOCKSIZE;
/* Allocate vectors for the solution and gradient */
int dims[2];
dims[0] = user.ndim*user.natoms;
ga_x = NGA_Create (C_DBL, 1, dims, "GA_X", NULL);
if (!ga_x) GA_Error ("lennard-jones.main::NGA_Create ga_x", ga_x);
/* Set up structures for data distribution */
info = SetupBlocks(&user); CHKERRQ(info);
/* The TAO code begins here */
/* Create TAO solver with desired solution method */
info = TaoCreate (MPI_COMM_WORLD, "tao_lmvm", &tao); CHKERRQ(info);
info = TaoGAApplicationCreate (MPI_COMM_WORLD, &taoapp); CHKERRQ(info);
/* Set the initial solution */
info = InitializeVariables(ga_x, &user); CHKERRQ(info);
info = TaoGAAppSetInitialSolutionVec(taoapp, ga_x); CHKERRQ(info);
/* Set routines for function, gradient */
info = TaoGAAppSetObjectiveAndGradientRoutine (taoapp, FormFunctionGradient,
(void *) &user); CHKERRQ(info);
/* Check for TAO command line options */
info = TaoSetFromOptions (tao); CHKERRQ(info);
/* SOLVE THE APPLICATION */
info = TaoSolveGAApplication (taoapp, tao); CHKERRQ(info);
/* To View TAO solver information use */
info = TaoView(tao); CHKERRQ(info);
/* Get termination information */
info = TaoGetTerminationReason (tao, &reason);
if(info) GA_Error("lennard-jones.main.TaoGetTerminationReason",info);
if (user.me == 0) {
if (reason <= 0)
printf("Try a different TAO method, adjust some parameters, or check the function evaluation routines\n");
printf("WALL TIME TAKEN = %lf\n", MPI_Wtime()-startTime);
/*output the solutions */
printf ("The solution is :\n");
}
GA_Print (ga_x);
/* Free TAO data structures */
info = TaoDestroy (tao); CHKERRQ(info);
info = TaoGAAppDestroy (taoapp); CHKERRQ(info);
/* Free GA data structures */
GA_Destroy (ga_x);
if (!MA_pop_stack(user.memHandle))
ga_error("Main::MA_pop_stack for memHandle failed",0);
/* Finalize TAO, GA, and MPI */
TaoFinalize ();
GA_Terminate ();
MPI_Finalize ();
return 0;
}
#define FUNCTION_GRAD_2D(is, js) \
xx = x_j[2*j] - x_i[2*i]; \
yy = x_j[2*j+1] - x_i[2*i+1]; \
rij = xx*xx + yy*yy; \
temp = 1.0/rij/rij/rij; \
*f += temp*(temp-2.0); \
temp *= 12.0*(temp-1.0)/rij; \
g[js + 2*j] -= xx*temp; \
g[js + 2*j + 1] -= yy*temp; \
g[is+2*i] += xx*temp; \
g[is + 2*i +1] += yy*temp;
int LJFG_2D(int taskId,double *f,AppCtx *user)
{
int b_x, b_y; /* block topology */
int i, j, start_i, start_j;
double xx, yy, rij, temp;
double *g, *x_i, *x_j;
b_x = user->btopo[taskId].x;
b_y = user->btopo[taskId].y;
start_i = user->BlockSize * b_x * 2;
start_j = user->BlockSize * b_y * 2;
g = user->grad;
x_i = user->x1;
x_j = user->x2;
if (b_x == b_y) /* compute lower triangular matrix only */
for (i=1; i<user->BlockSize; i++)
for (j=0; j<i; j++) {
FUNCTION_GRAD_2D(start_i, start_j);
}
else if (b_x > b_y) /* compute right half of the block */
for (i=0; i<user->BlockSize; i++)
for (j=user->BlockSize/2; j<user->BlockSize; j++) {
FUNCTION_GRAD_2D(start_i, start_j);
}
else /* Compute upper half of the block */
for (i=0; i<user->BlockSize/2; i++)
for (j=0; j<user->BlockSize; j++) {
FUNCTION_GRAD_2D(start_i, start_j);
}
return 0;
}
#define FUNCTION_GRAD_3D(is, js) \
xx = x_j[3*j] - x_i[3*i]; \
yy = x_j[3*j+1] - x_i[3*i+1]; \
zz = x_j[3*j+2] - x_i[3*i+2]; \
rij = xx*xx + yy*yy + zz*zz; \
temp = 1.0/rij/rij/rij; \
*f += temp*(temp-2.0); \
temp *= 12.0*(temp-1.0)/rij; \
g[js + 3*j] -= xx*temp; \
g[js + 3*j + 1] -= yy*temp; \
g[js + 3*j + 2] -= zz*temp; \
g[is + 3*i] += xx*temp; \
g[is + 3*i +1] += yy*temp; \
g[is + 3*i +2] += zz*temp;
int LJFG_3D(int taskId,double *f,AppCtx *user)
{
int b_x, b_y; /* block topology */
int i, j, start_i, start_j;
double xx, yy, zz, rij, temp;
double *g, *x_i, *x_j;
b_x = user->btopo[taskId].x;
b_y = user->btopo[taskId].y;
start_i = user->BlockSize * b_x * 3;
start_j = user->BlockSize * b_y * 3;
g = user->grad;
x_i = user->x1;
x_j = user->x2;
if (b_x == b_y) /* compute lower triangular matrix only */
for (i=1; i<user->BlockSize; i++)
for (j=0; j<i; j++) {
FUNCTION_GRAD_3D(start_i, start_j);
}
else if (b_x > b_y) /* compute right half of the block */
for (i=0; i<user->BlockSize; i++)
for (j=user->BlockSize/2; j<user->BlockSize; j++) {
FUNCTION_GRAD_3D(start_i, start_j);
}
else /* Compute upper half of the block */
for (i=0; i<user->BlockSize/2; i++)
for (j=0; j<user->BlockSize; j++) {
FUNCTION_GRAD_3D(start_i, start_j);
}
return 0;
}
/* -------------------------------------------------------------------- */
/*
FormFunctionGradient - Evaluates the function, f(X), and gradient, G(X).
Input Parameters:
. tao - the TAO_SOLVER context
. ga_X - input vector
. ptr - optional user-defined context, as set by TaoSetFunctionGradient()
Output Parameters:
. ga_G - vector containing the newly evaluated gradient
. f - function value
Note:
Some optimization methods ask for the function and the gradient evaluation
at the same time. Evaluating both at once may be more efficient that
evaluating each separately.
*/
int FormFunctionGradient (TAO_GA_APPLICATION gaapp, GAVec ga_X, double *f, GAVec ga_G, void *ptr)
{
AppCtx *user = (AppCtx *) ptr;
int lo, hi;
int taskId=user->me; //Which task am I running
int i;
int zero = 0;
/* reset atomicTask to nproc */
if (user->me == 0)
NGA_Put(user->atomicTask, &zero, &zero, &user->nproc, &zero);
for (i=0;i<user->natoms*user->ndim; i++)
user->grad[i] = 0.0;
*f = 0.0;
while (taskId < user->nBlocks) {
getBlock(ga_X, taskId, user);
if (user->ndim == 2)
LJFG_2D(taskId,f, user);
else
LJFG_3D(taskId,f, user);
/* Get next block */
taskId += user->nproc;
//NGA_Read_inc(user->atomicTask, &zero, 1);
}
/* gather function */
GA_Dgop(f, 1, "+");
/* gather gradient */
GA_Dgop(user->grad, user->natoms*user->ndim, "+");
NGA_Distribution(ga_G, user->me, &lo, &hi);
NGA_Put(ga_G, &lo, &hi, user->grad+lo, &hi);
GA_Sync();
return 0;
}
InitializeVariables">int InitializeVariables(GAVec ga_x, AppCtx *user) {
double *x;
int lo, hi, n, handle;
double xx,yy,zz;
int i,j,k, il, jl, ctr, icrtn, ileft, isqrtn;
// This method is not parallelized
if (user->me != 0) {
GA_Sync();
return 0;
}
n = user->ndim * user->natoms + 1;
if (MA_push_stack(C_DBL, n, "InitializeVariables buffer", &handle))
MA_get_pointer(handle, &x);
else
ga_error("ma_alloc_get failed", n);
lo = 0;
hi = n-2;
if (user->ndim == 2) {
isqrtn = int(sqrt(user->natoms));
ileft = user->natoms - isqrtn * isqrtn;
xx = yy = 0.0;
for (j=0; j <= isqrtn + ileft/isqrtn; j++) {
for (i=0; i<TaoMin(isqrtn, user->natoms - j * isqrtn); i++) {
ctr = j * isqrtn + i;
x[2*ctr] = xx;
x[2*ctr + 1] = yy;
xx += 1.0;
}
yy += 1.0;
xx = 0.0;
}
} else if (user->ndim == 3) {
icrtn = (int)pow((int)(user->natoms + 0.5),1.0/3.0);
ileft = user->natoms - icrtn*icrtn*icrtn;
xx = yy = zz = 0.0;
for (k=0; k<=icrtn + ileft/(icrtn*icrtn); k++) {
jl = TaoMin(icrtn, (user->natoms - k*icrtn*icrtn)/icrtn + 1);
for (j=0; j<jl; j++) {
il = TaoMin(icrtn, user->natoms - k*icrtn*icrtn - j*icrtn);
for (i = 0; i<il; i++) {
ctr = k*icrtn*icrtn + j*icrtn + i;
x[3*ctr] = xx;
x[3*ctr + 1] = yy;
x[3*ctr + 2] = zz;
xx += 1.0;
}
yy += 1.0;
xx = 0.0;
}
zz += 1.0;
yy = 0.0;
}
}
// Distribute the array
NGA_Put(ga_x, &lo, &hi, x, &hi);
if (!MA_pop_stack(handle))
ga_error("InitializeVariables:MA_pop_stack failed",0);
GA_Sync();
return 0;
}
/**
* Block Topology (of Force Matrix):
* Say for example: If there are 4 block and 100 atoms, the size of
* the force matrix is 100x100 and each block size is 50x50.
*
* -----------
* | | |
* | 0,0 | 0,1 |
* -----------
* | 1,0 | 1,1 |
* | | |
* -----------
*/
int SetupBlocks(AppCtx *user)
{
int i,j,k=0;
int n;
int zero = 0;
int x_space, g_space;
if (user->natoms % user->BlockSize) {
GA_Error("Number of atoms should be a multiple of block size. Choose a different block size.", 0L);
}
n = user->natoms / user->BlockSize;
user->nBlocks = n*n;
if (user->nBlocks > MAX_BLOCKS)
GA_Error("Number of blocks is greater that MAX_BLOCKS: Solution is either to increase the defined MAX_BLOCKS or increase your block size",0L);
if (user->nBlocks < user->nproc)
GA_Error("Number of blocks should be greater than or equal to the number of processors",0L);
for (i=0;i<n;i++)
for (j=0;j<n;j++,k++) {
user->btopo[k].x = i;
user->btopo[k].y = j;
}
/* Create task array */
n = 1;
user->atomicTask = NGA_Create(C_INT, 1, &n, "Atomic Task", NULL);
if (!user->atomicTask)
GA_Error("NGA_Create failed for Atomic Task",0);
if (user->me == 0)
NGA_Put(user->atomicTask, &zero, &zero, &user->nproc, &zero);
/* space for x values from two processors */
x_space = 2 * user->BlockSize * user->ndim;
/* space for ALL gradient value */
g_space = user->natoms * user->ndim;
if (MA_push_stack(C_DBL, x_space + g_space+3, "GA LJ bufs", &user->memHandle))
MA_get_pointer(user->memHandle, &user->x1);
else
GA_Error("ma_alloc_get failed",x_space + g_space);
user->x2 = user->x1 + x_space/2 + 1;
user->grad = user->x2 + x_space/2 + 1;
GA_Sync();
return 0;
}
int getBlock(GAVec g_x, int taskId, AppCtx *user)
{
int lo, hi;
int size;
size = user->BlockSize * user->ndim;
/* get the coordinates of the atoms in the corresponding rows in the block */
lo = user->btopo[taskId].x*size;
hi = lo + size -1;
NGA_Get(g_x, &lo, &hi, user->x1, &hi);
/* get the coordinates of the atoms in the corresponding cols in the block */
lo = user->btopo[taskId].y*size;
hi = lo + size - 1;
NGA_Get(g_x, &lo, &hi, user->x2, &hi);
return 0;
}