! "$Id: plate2f.F 1.47 05/05/12 13:59:03-05:00 sarich@zorak.(none) $";
!
!  Program usage: mpirun -np <proc> plate2f [all TAO options] 
!
!  This example demonstrates use of the TAO package to solve a bound constrained
!  minimization problem.  This example is based on a problem from the
!  MINPACK-2 test suite.  Given a rectangular 2-D domain and boundary values
!  along the edges of the domain, the objective is to find the surface
!  with the minimal area that satisfies the boundary conditions.
!  The command line options are:
!    -mx <xg>, where <xg> = number of grid points in the 1st coordinate direction
!    -my <yg>, where <yg> = number of grid points in the 2nd coordinate direction
!    -bmx <bxg>, where <bxg> = number of grid points under plate in 1st direction
!    -bmy <byg>, where <byg> = number of grid points under plate in 2nd direction
!    -bheight <ht>, where <ht> = height of the plate
!
!/*T
!   Concepts: TAO - Solving a bound constrained minimization problem
!   Routines: TaoInitialize(); TaoFinalize();
!   Routines: TaoCreate(); TaoDestroy();
!   Routines: TaoAppSetObjectiveAndGradientRoutine(); 
!   Routines: TaoAppSetHessianMat(); TaoAppSetHessianRoutine();
!   Routines: TaoAppSetInitialSolutionVec(); TaoAppSetVariableBounds();
!   Routines: TaoSetApplication(); TaoSetOptions();
!   Routines: TaoApplicationCreate(); TaoSolve();
!   Routines: TaoView(); TaoAppDestroy();
!   Processors: n
!T*/



      implicit none

#include "plate2f.h"

! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
!                   Variable declarations
! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
!
!  Variables:
!    (common from plate2f.h):
!    Nx, Ny           number of processors in x- and y- directions
!    mx, my           number of grid points in x,y directions
!    N    global dimension of vector

      integer          info          ! used to check for functions returning nonzeros
      Vec              x             ! solution vector
      Vec              xl, xu        ! lower and upper bounds vectorsp
      integer          m             ! number of local elements in vector
      TAO_SOLVER       tao           ! TAO_SOLVER solver context
      TAO_APPLICATION  plateapp      ! PETSc application
      Mat              H             ! Hessian matrix
      ISLocalToGlobalMapping isltog  ! local to global mapping object
      PetscTruth       flg


      external FormFunctionGradient
      external FormHessian
      external MSA_BoundaryConditions
      external MSA_Plate
      external MSA_InitialPoint
! Initialize Tao
      call PetscInitialize(PETSC_NULL_CHARACTER,info)
      call TaoInitialize(PETSC_NULL_CHARACTER,info)
      

! Specify default dimensions of the problem
      mx = 10
      my = 10
      bheight = 0.1

! Check for any command line arguments that override defaults      
      
      call PetscOptionsGetInt(TAO_NULL_CHARACTER,"-mx",mx,flg,info)
      call PetscOptionsGetInt(TAO_NULL_CHARACTER,"-my",my,flg,info)
      
      bmx = mx/2
      bmy = my/2

      call PetscOptionsGetInt(TAO_NULL_CHARACTER,"-bmx",bmx,flg,info)
      call PetscOptionsGetInt(TAO_NULL_CHARACTER,"-bmy",bmy,flg,info)
      call PetscOptionsGetReal(TAO_NULL_CHARACTER,"-bheight",bheight,   &
     &      flg,info)
      

! Calculate any derived values from parameters
      N = mx*my

! Let Petsc determine the dimensions of the local vectors 
      Nx = PETSC_DECIDE
      NY = PETSC_DECIDE

! A two dimensional distributed array will help define this problem, which
! derives from an elliptic PDE on a two-dimensional domain.  From the 
! distributed array, create the vectors

      call DACreate2d(MPI_COMM_WORLD,DA_NONPERIODIC,DA_STENCIL_BOX,         &
     &     mx,my,Nx,Ny,1,1,PETSC_NULL_INTEGER,PETSC_NULL_INTEGER,           &
     &     da,info)
      

! Extract global and local vectors from DA; The local vectors are
! used solely as work space for the evaluation of the function, 
! gradient, and Hessian.  Duplicate for remaining vectors that are 
! the same types.

      call DACreateGlobalVector(da,x,info)
      call DACreateLocalVector(da,localX,info)
      call VecDuplicate(localX,localV,info)

! Create a matrix data structure to store the Hessian.
! Here we (optionally) also associate the local numbering scheme
! with the matrix so that later we can use local indices for matrix
! assembly

      call VecGetLocalSize(x,m,info)
      call MatCreateMPIAIJ(MPI_COMM_WORLD,m,m,N,N,7,PETSC_NULL_INTEGER,  &
     &     3,PETSC_NULL_INTEGER,H,info)

      call MatSetOption(H,MAT_SYMMETRIC,info)
      call DAGetISLocalToGlobalMapping(da,isltog,info)
      call MatSetLocalToGlobalMapping(H,isltog,info)
      

! The Tao code begins here
! Create TAO solver and set desired solution method.
! This problems uses bounded variables, so the
! method must either be 'tao_tron' or 'tao_blmvm'

      call TaoCreate(MPI_COMM_WORLD,'tao_blmvm',tao,info)
      call TaoApplicationCreate(MPI_COMM_WORLD,plateapp,info)
      

!     Set minimization function and gradient, hessian evaluation functions

!     TaoAppSetObjectiveAndGradientRoutine is shortened to 31 chars to comply with some compilers
      call TaoAppSetObjectiveAndGradientRo(plateapp,                    &
     &     FormFunctionGradient,PETSC_NULL_OBJECT,info)

      call TaoAppSetHessianMat(plateapp,H,H,info)
      call TaoAppSetHessianRoutine(plateapp,FormHessian,                 &
     &     PETSC_NULL_OBJECT, info)
      

! Set Variable bounds 
      call MSA_BoundaryConditions(info)
      call VecDuplicate(x,xl,info)
      call VecDuplicate(x,xu,info)
      call MSA_Plate(xl,xu,info)
      call TaoAppSetVariableBounds(plateapp,xl,xu,info)

! Set the initial solution guess
      call MSA_InitialPoint(x, info)
      call TaoAppSetInitialSolutionVec(plateapp,x,info)

! Now that the PETSc application is all set, attach to tao context 
      call TaoSetApplication(tao,plateapp,info)

! Check for any tao command line options
      call TaoSetOptions(plateapp,tao,info)

! Solve the application
      call TaoSolveApplication(plateapp,tao,info)

!     View TAO solver information
!      call TaoView(tao,info)

! Free TAO data structures
      call TaoDestroy(tao,info)
      call TaoAppDestroy(plateapp,info)  

! Free PETSc data structures
      call VecDestroy(x,info)
      call VecDestroy(xl,info)
      call VecDestroy(xu,info)
      call VecDestroy(Top,info)
      call VecDestroy(Bottom,info)
      call VecDestroy(Left,info)
      call VecDestroy(Right,info)
      call MatDestroy(H,info)
      call VecDestroy(localX,info)
      call VecDestroy(localV,info)
      call DADestroy(da,info)

! Finalize TAO 

      call TaoFinalize(info)
      call PetscFinalize(info)

      end

! ---------------------------------------------------------------------
!
!  FormFunctionGradient - Evaluates function f(X). 
!    
!  Input Parameters:
!  tao   - the TAO_SOLVER context
!  X     - the input vector 
!  dummy - optional user-defined context, as set by TaoSetFunction()
!          (not used here)
!
!  Output Parameters:
!  fcn     - the newly evaluated function
!  G       - the gradient vector
!  info  - error code
!


      subroutine FormFunctionGradient(tao,X,fcn,G,dummy,info)
      implicit none

! da, localX, localG, Top, Bottom, Left, Right defined in plate2f.h
#include "plate2f.h"
      
! Input/output variables

      TAO_SOLVER       tao
      PetscScalar fcn
      Vec              X, G
      integer          dummy, info
      
      integer          i,j,row
      integer          xs, xm, gxs, gxm, ys, ym, gys, gym
      PetscScalar      ft,zero,hx,hy,hydhx,hxdhy,area,rhx,rhy
      PetscScalar      f1,f2,f3,f4,f5,f6,d1,d2,d3,d4,d5,d6,d7,d8
      PetscScalar      df1dxc,df2dxc,df3dxc,df4dxc,df5dxc,df6dxc
      PetscScalar      xc,xl,xr,xt,xb,xlt,xrb


! PETSc's VecGetArray acts differently in Fortran than it does in C.
! Calling VecGetArray((Vec) X, (PetscScalar) x_array(0:1), (PetscOffset) x_index, info)
! will return an array of doubles referenced by x_array offset by x_index.
!  i.e.,  to reference the kth element of X, use x_array(k + x_index).
! Notice that by declaring the arrays with range (0:1), we are using the C 0-indexing practice.
      PetscScalar          g_v(0:1),x_v(0:1), top_v(0:1)
      PetscScalar          left_v(0:1), right_v(0:1),bottom_v(0:1)
      PetscOffset          g_i,left_i,right_i,bottom_i,top_i
      PetscOffset          x_i

      ft = 0.0d0
      zero = 0.0d0
      hx = 1.0d0/(mx + 1)
      hy = 1.0d0/(my + 1)
      hydhx = hy/hx
      hxdhy = hx/hy
      area = 0.5d0 * hx * hy
      rhx = mx + 1.0d0
      rhy = my + 1.0d0


! Get local mesh boundaries
      call DAGetCorners(da,xs,ys,PETSC_NULL_INTEGER,xm,ym,              &
     &                  PETSC_NULL_INTEGER,info)
      call DAGetGhostCorners(da,gxs,gys,PETSC_NULL_INTEGER,             &
     &                       gxm,gym,PETSC_NULL_INTEGER,info)

! Scatter ghost points to local vector
      call DAGlobalToLocalBegin(da,X,INSERT_VALUES,localX,info)
      call DAGlobalToLocalEnd(da,X,INSERT_VALUES,localX,info)

! Initialize the vector to zero
      call VecSet(localV,zero,info)

! Get arrays to vector data (See note above about using VecGetArray in Fortran)
      call VecGetArray(localX,x_v,x_i,info)
      call VecGetArray(localV,g_v,g_i,info)
      call VecGetArray(Top,top_v,top_i,info)
      call VecGetArray(Bottom,bottom_v,bottom_i,info)
      call VecGetArray(Left,left_v,left_i,info)
      call VecGetArray(Right,right_v,right_i,info)

! Compute function over the locally owned part of the mesh       
      do j = ys,ys+ym-1
         do i = xs,xs+xm-1
            row = (j-gys)*gxm + (i-gxs)
            xc = x_v(row+x_i)
            xt = xc
            xb = xc
            xr = xc
            xl = xc
            xrb = xc
            xlt = xc

            if (i .eq. 0) then !left side
               xl = left_v(j - ys + 1 + left_i)
               xlt = left_v(j - ys + 2 + left_i)
            else
               xl = x_v(row - 1 + x_i)
            endif

            if (j .eq. 0) then !bottom side
               xb = bottom_v(i - xs + 1 + bottom_i)
               xrb = bottom_v(i - xs + 2 + bottom_i)
            else
               xb = x_v(row - gxm + x_i)
            endif

            if (i + 1 .eq. gxs + gxm) then !right side
               xr = right_v(j - ys + 1 + right_i)
               xrb = right_v(j - ys + right_i)
            else
               xr = x_v(row + 1 + x_i)
            endif

            if (j + 1 .eq. gys + gym) then !top side
               xt = top_v(i - xs + 1 + top_i)
               xlt = top_v(i - xs + top_i)
            else
               xt = x_v(row + gxm + x_i)
            endif

            if ((i .gt. gxs ) .and. (j + 1 .lt. gys + gym)) then
               xlt = x_v(row - 1 + gxm + x_i)
            endif

            if ((j .gt. gys) .and. (i + 1 .lt. gxs + gxm)) then
               xrb = x_v(row + 1 - gxm + x_i)
            endif

            d1 = xc-xl
            d2 = xc-xr
            d3 = xc-xt
            d4 = xc-xb
            d5 = xr-xrb
            d6 = xrb-xb
            d7 = xlt-xl
            d8 = xt-xlt

            df1dxc = d1 * hydhx
            df2dxc = d1 * hydhx + d4 * hxdhy
            df3dxc = d3 * hxdhy
            df4dxc = d2 * hydhx + d3 * hxdhy
            df5dxc = d2 * hydhx
            df6dxc = d4 * hxdhy

            d1 = d1 * rhx
            d2 = d2 * rhx
            d3 = d3 * rhy
            d4 = d4 * rhy
            d5 = d5 * rhy
            d6 = d6 * rhx
            d7 = d7 * rhy
            d8 = d8 * rhx

            f1 = sqrt(1.0d0 + d1*d1 + d7*d7)
            f2 = sqrt(1.0d0 + d1*d1 + d4*d4)
            f3 = sqrt(1.0d0 + d3*d3 + d8*d8)
            f4 = sqrt(1.0d0 + d3*d3 + d2*d2)
            f5 = sqrt(1.0d0 + d2*d2 + d5*d5)
            f6 = sqrt(1.0d0 + d4*d4 + d6*d6)

            ft = ft + f2 + f4

            df1dxc = df1dxc / f1
            df2dxc = df2dxc / f2
            df3dxc = df3dxc / f3
            df4dxc = df4dxc / f4
            df5dxc = df5dxc / f5
            df6dxc = df6dxc / f6

            g_v(row + g_i) = 0.5 * (df1dxc + df2dxc + df3dxc + df4dxc +  &
     &                              df5dxc + df6dxc)
         enddo
      enddo

! Compute triangular areas along the border of the domain. 
      if (xs .eq. 0) then  ! left side
         do j=ys,ys+ym-1
            d3 = (left_v(j-ys+1+left_i) - left_v(j-ys+2+left_i))         &
     &                 * rhy
            d2 = (left_v(j-ys+1+left_i) - x_v((j-gys)*gxm + x_i))        &
     &                 * rhx
            ft = ft + sqrt(1.0d0 + d3*d3 + d2*d2)
         enddo
      endif

      
      if (ys .eq. 0) then !bottom side
         do i=xs,xs+xm-1
            d2 = (bottom_v(i+1-xs+bottom_i)-bottom_v(i-xs+2+bottom_i))    &
     &                    * rhx
            d3 = (bottom_v(i-xs+1+bottom_i)-x_v(i-gxs+x_i))*rhy
            ft = ft + sqrt(1.0 + d3*d3 + d2*d2)
         enddo
      endif

      
      if (xs + xm .eq. mx) then ! right side
         do j=ys,ys+ym-1
            d1 = (x_v((j+1-gys)*gxm-1+x_i)-right_v(j-ys+1+right_i))*rhx
            d4 = (right_v(j-ys+right_i) - right_v(j-ys+1+right_i))*rhy
            ft = ft + sqrt(1.0d0 + d1*d1 + d4*d4)
         enddo
      endif

      
      if (ys + ym .eq. my) then
         do i=xs,xs+xm-1
            d1 = (x_v((gym-1)*gxm+i-gxs+x_i) - top_v(i-xs+1+top_i))*rhy
            d4 = (top_v(i-xs+1+top_i) - top_v(i-xs+top_i))*rhx
            ft = ft + sqrt(1.0d0 + d1*d1 + d4*d4)
         enddo
      endif

      
      if ((ys .eq. 0) .and. (xs .eq. 0)) then
         d1 = (left_v(0 + left_i) - left_v(1 + left_i)) * rhy
         d2 = (bottom_v(0+bottom_i)-bottom_v(1+bottom_i))*rhx
         ft = ft + sqrt(1.0d0 + d1*d1 + d2*d2)
      endif

      if ((ys + ym .eq. my) .and. (xs + xm .eq. mx)) then
         d1 = (right_v(ym+1+right_i) - right_v(ym+right_i))*rhy
         d2 = (top_v(xm+1+top_i) - top_v(xm + top_i))*rhx
         ft = ft + sqrt(1.0d0 + d1*d1 + d2*d2)
      endif

      ft = ft * area
      call MPI_Allreduce(ft,fcn,1,MPI_DOUBLE_PRECISION,                  &
     &             MPI_SUM,MPI_COMM_WORLD,info)



! Restore vectors
      call VecRestoreArray(localX,x_v,x_i,info)
      call VecRestoreArray(localV,g_v,g_i,info)
      call VecRestoreArray(Left,left_v,left_i,info)
      call VecRestoreArray(Top,top_v,top_i,info)
      call VecRestoreArray(Bottom,bottom_v,bottom_i,info)
      call VecRestoreArray(Right,right_v,right_i,info)

! Scatter values to global vector
      call DALocalToGlobal(da,localV,INSERT_VALUES,G,info)

      call PetscLogFlops(70*xm*ym,info)

      return
      end  !FormFunctionGradient
      




! ----------------------------------------------------------------------------
! 
!/*
!   FormHessian - Evaluates Hessian matrix.
!
!   Input Parameters:
!.  tao  - the TAO_SOLVER context
!.  X    - input vector
!.  dummy  - not used 
!
!   Output Parameters:
!.  Hessian    - Hessian matrix
!.  Hpc    - optionally different preconditioning matrix
!.  flag - flag indicating matrix structure
!
!   Notes:
!   Due to mesh point reordering with DAs, we must always work
!   with the local mesh points, and then transform them to the new
!   global numbering with the local-to-global mapping.  We cannot work
!   directly with the global numbers for the original uniprocessor mesh!  
!
!   Two methods are available for imposing this transformation
!   when setting matrix entries:
!     (A) MatSetValuesLocal(), using the local ordering (including
!         ghost points!)
!         - Do the following two steps once, before calling TaoSolve()
!           - Use DAGetISLocalToGlobalMapping() to extract the
!             local-to-global map from the DA
!           - Associate this map with the matrix by calling
!             MatSetLocalToGlobalMapping() 
!         - Then set matrix entries using the local ordering
!           by calling MatSetValuesLocal()
!     (B) MatSetValues(), using the global ordering 
!         - Use DAGetGlobalIndices() to extract the local-to-global map
!         - Then apply this map explicitly yourself
!         - Set matrix entries using the global ordering by calling
!           MatSetValues()
!   Option (A) seems cleaner/easier in many cases, and is the procedure
!   used in this example.
*/
      subroutine FormHessian(tao, X, Hessian, Hpc, flg, dummy, info)
      implicit none

! da,Top,Left,Right,Bottom,mx,my,localX defined in plate2f.h
#include "plate2f.h"
      
      TAO_SOLVER       tao
      Vec              X
      Mat              Hessian,Hpc
      MatStructure     flg
      integer          dummy,info

      integer          i,j,k,row
      integer          xs,xm,gxs,gxm,ys,ym,gys,gym,col(0:6)
      PetscScalar      hx,hy,hydhx,hxdhy,rhx,rhy
      PetscScalar      f1,f2,f3,f4,f5,f6,d1,d2,d3,d4,d5,d6,d7,d8
      PetscScalar      xc,xl,xr,xt,xb,xlt,xrb
      PetscScalar      hl,hr,ht,hb,hc,htl,hbr

! PETSc's VecGetArray acts differently in Fortran than it does in C.
! Calling VecGetArray((Vec) X, (PetscScalar) x_array(0:1), (PetscOffset) x_index, info)
! will return an array of doubles referenced by x_array offset by x_index.
!  i.e.,  to reference the kth element of X, use x_array(k + x_index).
! Notice that by declaring the arrays with range (0:1), we are using the C 0-indexing practice.
      PetscScalar      right_v(0:1),left_v(0:1),bottom_v(0:1),top_v(0:1)
      PetscScalar      x_v(0:1)
      PetscOffset      x_i,right_i,left_i,bottom_i,top_i
      PetscScalar      v(0:6)
      PetscTruth       assembled
      
! Set various matrix options 
      call MatSetOption(Hessian,MAT_IGNORE_OFF_PROC_ENTRIES,info)
      call MatSetOption(Hessian,MAT_COLUMNS_SORTED,info)
      call MatSetOption(Hessian,MAT_ROWS_SORTED,info)

! Get local mesh boundaries
      call DAGetCorners(da,xs,ys,PETSC_NULL_INTEGER,xm,ym,              &
     &                  PETSC_NULL_INTEGER,info)
      call DAGetGhostCorners(da,gxs,gys,PETSC_NULL_INTEGER,gxm,gym,     &
     &                       PETSC_NULL_INTEGER,info)

! Scatter ghost points to local vectors
      call DAGlobalToLocalBegin(da,X,INSERT_VALUES,localX,info)
      call DAGlobalToLocalEnd(da,X,INSERT_VALUES,localX,info)

! Get pointers to vector data (see note on Fortran arrays above)
      call VecGetArray(localX,x_v,x_i,info)
      call VecGetArray(Top,top_v,top_i,info)
      call VecGetArray(Bottom,bottom_v,bottom_i,info)
      call VecGetArray(Left,left_v,left_i,info)
      call VecGetArray(Right,right_v,right_i,info)

! Initialize matrix entries to zero
      call MatAssembled(Hessian,assembled,info)
      if (assembled .eq. PETSC_TRUE) call MatZeroEntries(Hessian,info)


      rhx = mx + 1.0
      rhy = my + 1.0
      hx = 1.0/rhx
      hy = 1.0/rhy
      hydhx = hy/hx
      hxdhy = hx/hy
! compute Hessian over the locally owned part of the mesh

      do  i=xs,xs+xm-1
         do  j=ys,ys+ym-1
            row = (j-gys)*gxm + (i-gxs)
            
            xc = x_v(row + x_i)
            xt = xc
            xb = xc
            xr = xc
            xl = xc
            xrb = xc
            xlt = xc

            if (i .eq. gxs) then   ! Left side
               xl = left_v(left_i + j - ys + 1)
               xlt = left_v(left_i + j - ys + 2)
            else
               xl = x_v(x_i + row -1 )
            endif

            if (j .eq. gys) then ! bottom side
               xb = bottom_v(bottom_i + i - xs + 1)
               xrb = bottom_v(bottom_i + i - xs + 2)
            else
               xb = x_v(x_i + row - gxm)
            endif

            if (i+1 .eq. gxs + gxm) then !right side
               xr = right_v(right_i + j - ys + 1)
               xrb = right_v(right_i + j - ys)
            else
               xr = x_v(x_i + row + 1)
            endif

            if (j+1 .eq. gym+gys) then !top side
               xt = top_v(top_i +i - xs + 1)
               xlt = top_v(top_i + i - xs)
            else
               xt = x_v(x_i + row + gxm)
            endif

            if ((i .gt. gxs) .and. (j+1 .lt. gys+gym)) then
               xlt = x_v(x_i + row - 1 + gxm)
            endif
            
            if ((i+1 .lt. gxs+gxm) .and. (j .gt. gys)) then
               xrb = x_v(x_i + row + 1 - gxm)
            endif

            d1 = (xc-xl)*rhx
            d2 = (xc-xr)*rhx
            d3 = (xc-xt)*rhy
            d4 = (xc-xb)*rhy
            d5 = (xrb-xr)*rhy
            d6 = (xrb-xb)*rhx
            d7 = (xlt-xl)*rhy
            d8 = (xlt-xt)*rhx
            
            f1 = sqrt( 1.0d0 + d1*d1 + d7*d7)
            f2 = sqrt( 1.0d0 + d1*d1 + d4*d4)
            f3 = sqrt( 1.0d0 + d3*d3 + d8*d8)
            f4 = sqrt( 1.0d0 + d3*d3 + d2*d2)
            f5 = sqrt( 1.0d0 + d2*d2 + d5*d5)
            f6 = sqrt( 1.0d0 + d4*d4 + d6*d6)
            
            
            hl = (-hydhx*(1.0+d7*d7)+d1*d7)/(f1*f1*f1)+                 &
     &              (-hydhx*(1.0+d4*d4)+d1*d4)/(f2*f2*f2)

            hr = (-hydhx*(1.0+d5*d5)+d2*d5)/(f5*f5*f5)+                 &
     &            (-hydhx*(1.0+d3*d3)+d2*d3)/(f4*f4*f4)

            ht = (-hxdhy*(1.0+d8*d8)+d3*d8)/(f3*f3*f3)+                 &
     &                (-hxdhy*(1.0+d2*d2)+d2*d3)/(f4*f4*f4)

            hb = (-hxdhy*(1.0+d6*d6)+d4*d6)/(f6*f6*f6)+                 &
     &              (-hxdhy*(1.0+d1*d1)+d1*d4)/(f2*f2*f2)
            
            hbr = -d2*d5/(f5*f5*f5) - d4*d6/(f6*f6*f6)
            htl = -d1*d7/(f1*f1*f1) - d3*d8/(f3*f3*f3)
            
            hc = hydhx*(1.0+d7*d7)/(f1*f1*f1) +                         &
     &              hxdhy*(1.0+d8*d8)/(f3*f3*f3) +                      &
     &              hydhx*(1.0+d5*d5)/(f5*f5*f5) +                      &
     &              hxdhy*(1.0+d6*d6)/(f6*f6*f6) +                      &
     &              (hxdhy*(1.0+d1*d1)+hydhx*(1.0+d4*d4)-               &
     &              2*d1*d4)/(f2*f2*f2) +  (hxdhy*(1.0+d2*d2)+          &
     &              hydhx*(1.0+d3*d3)-2*d2*d3)/(f4*f4*f4)               
            
            hl = hl * 0.5
            hr = hr * 0.5
            ht = ht * 0.5
            hb = hb * 0.5
            hbr = hbr * 0.5
            htl = htl * 0.5
            hc = hc * 0.5 

            k = 0

            if (j .gt. 0) then
               v(k) = hb
               col(k) = row - gxm
               k=k+1
            endif

            if ((j .gt. 0) .and. (i .lt. mx-1)) then
               v(k) = hbr
               col(k) = row-gxm+1
               k=k+1
            endif

            if (i .gt. 0) then
               v(k) = hl
               col(k) = row - 1
               k = k+1
            endif

            v(k) = hc
            col(k) = row
            k=k+1

            if (i .lt. mx-1) then
               v(k) = hr
               col(k) = row + 1
               k=k+1
            endif

            if ((i .gt. 0) .and. (j .lt. my-1)) then
               v(k) = htl
               col(k) = row + gxm - 1
               k=k+1
            endif

            if (j .lt. my-1) then
               v(k) = ht
               col(k) = row + gxm
               k=k+1
            endif

! Set matrix values using local numbering, defined earlier in main routine
            call MatSetValuesLocal(Hessian,1,row,k,col,v,INSERT_VALUES,       &
     &                              info)

            

         enddo
      enddo
      
! restore vectors
      call VecRestoreArray(localX,x_v,x_i,info)
      call VecRestoreArray(Left,left_v,left_i,info)
      call VecRestoreArray(Right,right_v,right_i,info)
      call VecRestoreArray(Top,top_v,top_i,info)
      call VecRestoreArray(Bottom,bottom_v,bottom_i,info)


! Assemble the matrix
      call MatAssemblyBegin(Hessian,MAT_FINAL_ASSEMBLY,info)
      call MatAssemblyEnd(Hessian,MAT_FINAL_ASSEMBLY,info)

      call PetscLogFlops(199*xm*ym,info)

      return
      end  
      
      



! Top,Left,Right,Bottom,bheight,mx,my,bmx,bmy,H, defined in plate2f.h

! ----------------------------------------------------------------------------
!
!/*
!     MSA_BoundaryConditions - calculates the boundary conditions for the region
!
!
!*/

      subroutine MSA_BoundaryConditions(info)
      implicit none

! Top,Left,Right,Bottom,bheight,mx,my,bmx,bmy defined in plate2f.h
#include "plate2f.h"

      integer i,j,k,limit,info,maxits
      integer          xs, xm, gxs, gxm, ys, ym, gys, gym
      integer bsize, lsize, tsize, rsize
      PetscScalar           one, two, three, tol
      PetscScalar           scl,fnorm,det,xt,yt,hx,hy
      PetscScalar           u1,u2,nf1,nf2,njac11,njac12,njac21,njac22
      PetscScalar           b, t, l, r
      PetscScalar           boundary_v(0:1)
      PetscOffset           boundary_i
      logical exitloop
      TaoTruth flg

      limit=0
      maxits = 5
      tol=1e-10
      b=-0.5d0
      t= 0.5d0
      l=-0.5d0
      r= 0.5d0
      xt=0
      yt=0
      one=1.0d0
      two=2.0d0
      three=3.0d0


      call DAGetCorners(da,xs,ys,PETSC_NULL_INTEGER,xm,ym,               &
     &                  PETSC_NULL_INTEGER,info)
      call DAGetGhostCorners(da,gxs,gys,PETSC_NULL_INTEGER,              &
     &                       gxm,gym,PETSC_NULL_INTEGER,info)

      bsize = xm + 2
      lsize = ym + 2
      rsize = ym + 2
      tsize = xm + 2
      

      call VecCreateMPI(MPI_COMM_WORLD,bsize,PETSC_DECIDE,Bottom,info)
      call VecCreateMPI(MPI_COMM_WORLD,tsize,PETSC_DECIDE,Top,info)
      call VecCreateMPI(MPI_COMM_WORLD,lsize,PETSC_DECIDE,Left,info)
      call VecCreateMPI(MPI_COMM_WORLD,rsize,PETSC_DECIDE,Right,info)

      hx= (r-l)/(mx+1)
      hy= (t-b)/(my+1)

      do j=0,3
         
         if (j.eq.0) then
            yt=b
            xt=l+hx*xs
            limit=bsize
            call VecGetArray(Bottom,boundary_v,boundary_i,info)
            

         elseif (j.eq.1) then
            yt=t
            xt=l+hx*xs
            limit=tsize
            call VecGetArray(Top,boundary_v,boundary_i,info)

         elseif (j.eq.2) then
            yt=b+hy*ys
            xt=l
            limit=lsize
            call VecGetArray(Left,boundary_v,boundary_i,info)

         elseif (j.eq.3) then
            yt=b+hy*ys
            xt=r
            limit=rsize
            call VecGetArray(Right,boundary_v,boundary_i,info)
         endif
         

         do i=0,limit-1
            
            u1=xt
            u2=-yt
            k = 0
            exitloop = .false.
            do while (k .lt. maxits .and. (.not. exitloop) )

               nf1=u1 + u1*u2*u2 - u1*u1*u1/three-xt
               nf2=-u2 - u1*u1*u2 + u2*u2*u2/three-yt
               fnorm=sqrt(nf1*nf1+nf2*nf2)
               if (fnorm .gt. tol) then
                  njac11=one+u2*u2-u1*u1
                  njac12=two*u1*u2
                  njac21=-two*u1*u2
                  njac22=-one - u1*u1 + u2*u2
                  det = njac11*njac22-njac21*njac12
                  u1 = u1-(njac22*nf1-njac12*nf2)/det
                  u2 = u2-(njac11*nf2-njac21*nf1)/det
               else 
                  exitloop = .true.
               endif
               k=k+1
            enddo

            boundary_v(i + boundary_i) = u1*u1-u2*u2
            if ((j .eq. 0) .or. (j .eq. 1)) then
               xt = xt + hx
            else
               yt = yt + hy
            endif

         enddo
               

         if (j.eq.0) then
            call VecRestoreArray(Bottom,boundary_v,boundary_i,info)
         elseif (j.eq.1) then
            call VecRestoreArray(Top,boundary_v,boundary_i,info)
         elseif (j.eq.2) then
            call VecRestoreArray(Left,boundary_v,boundary_i,info)
         elseif (j.eq.3) then
            call VecRestoreArray(Right,boundary_v,boundary_i,info)
         endif
         
      enddo


! Scale the boundary if desired
      call PetscOptionsGetReal(PETSC_NULL_CHARACTER,"-bottom",            &
     &                         scl,flg,info)
      if (flg .eq. PETSC_TRUE) then
         call VecScale(scl,Bottom,info)
      endif

      call PetscOptionsGetReal(PETSC_NULL_CHARACTER,"-top",               &
     &                         scl,flg,info)
      if (flg .eq. PETSC_TRUE) then
         call VecScale(scl,Top,info)
      endif

      call PetscOptionsGetReal(PETSC_NULL_CHARACTER,"-right",             &
     &                         scl,flg,info)
      if (flg .eq. PETSC_TRUE) then
         call VecScale(scl,Right,info)
      endif

      call PetscOptionsGetReal(PETSC_NULL_CHARACTER,"-left",              &
     &                         scl,flg,info)
      if (flg .eq. PETSC_TRUE) then
         call VecScale(scl,Left,info)
      endif
         
      
      return
      end

! ----------------------------------------------------------------------------
!
!/*
!     MSA_Plate - Calculates an obstacle for surface to stretch over
!
!     Output Parameter:
!.    xl - lower bound vector
!.    xu - upper bound vector
!
!*/

      subroutine MSA_Plate(xl,xu,info)
      implicit none

! mx,my,bmx,bmy,da,bheight defined in plate2f.h
#include "plate2f.h"
      Vec              xl,xu
      integer          i,j,row,info
      integer          xs, xm, ys, ym
      PetscScalar      lb,ub

! PETSc's VecGetArray acts differently in Fortran than it does in C.
! Calling VecGetArray((Vec) X, (PetscScalar) x_array(0:1), (PetscOffset) x_index, info)
! will return an array of doubles referenced by x_array offset by x_index.
!  i.e.,  to reference the kth element of X, use x_array(k + x_index).
! Notice that by declaring the arrays with range (0:1), we are using the C 0-indexing practice.
      PetscScalar      xl_v(0:1)
      PetscOffset      xl_i


      lb = -1.0d300
      ub = 1.0d300

      if (bmy .lt. 0) bmy = 0
      if (bmy .gt. my) bmy = my
      if (bmx .lt. 0) bmx = 0
      if (bmx .gt. mx) bmx = mx
      

      call DAGetCorners(da,xs,ys,PETSC_NULL_INTEGER,xm,ym,               &
     &             PETSC_NULL_INTEGER,info)

      call VecSet(xl,lb,info)
      call VecSet(xu,ub,info)

      call VecGetArray(xl,xl_v,xl_i,info)
      

      do i=xs,xs+xm-1

         do j=ys,ys+ym-1
            
            row=(j-ys)*xm + (i-xs)

            if (i.ge.((mx-bmx)/2) .and. i.lt.(mx-(mx-bmx)/2) .and.           &
     &          j.ge.((my-bmy)/2) .and. j.lt.(my-(my-bmy)/2)) then  
               xl_v(xl_i+row) = bheight

            endif

         enddo
      enddo


      call VecRestoreArray(xl,xl_v,xl_i,info)
      
      return
      end



      
      
! ----------------------------------------------------------------------------
!
!/*
!     MSA_InitialPoint - Calculates an obstacle for surface to stretch over
!
!     Output Parameter:
!.    X - vector for initial guess
!
!*/

      subroutine MSA_InitialPoint(X, info)
      implicit none

! mx,my,localX,da,Top,Left,Bottom,Right defined in plate2f.h
#include "plate2f.h"
      Vec               X

      integer           start,i,j,info
      integer           row,xs,xm,gxs,gxm,ys,ym,gys,gym
      PetscScalar       zero, np5

! PETSc's VecGetArray acts differently in Fortran than it does in C.
! Calling VecGetArray((Vec) X, (PetscScalar) x_array(0:1), (integer) x_index, info)
! will return an array of doubles referenced by x_array offset by x_index.
!  i.e.,  to reference the kth element of X, use x_array(k + x_index).
! Notice that by declaring the arrays with range (0:1), we are using the C 0-indexing practice.
      PetscScalar       left_v(0:1), right_v(0:1), top_v(0:1)
      PetscScalar       x_v(0:1), bottom_v(0:1)
      PetscOffset       left_i, right_i, top_i, bottom_i, x_i
      PetscTruth        flg
      PetscRandom       rctx
      
      zero = 0.0d0
      np5 = -0.5d0

      call PetscOptionsGetInt(PETSC_NULL_CHARACTER,"-start",            &
     &                        start,flg,info)

      if ((flg .eq. PETSC_TRUE) .and. (start .eq. 0)) then  ! the zero vector is reasonable
         call VecSet(X,zero,info)

      elseif ((flg .eq. PETSC_TRUE) .and. (start .gt. 0)) then  ! random start -0.5 < xi < 0.5 
         call PetscRandomCreate(MPI_COMM_WORLD,RANDOM_DEFAULT,rctx,info)
         do i=0,start-1
            call VecSetRandom(X,rctx,info)
         enddo

         call PetscRandomDestroy(rctx,info)
         call VecShift(X,np5,info)

      else   ! average of boundary conditions
         
!        Get Local mesh boundaries
         call DAGetCorners(da,xs,ys,PETSC_NULL_INTEGER,xm,ym,             &
     &                     PETSC_NULL_INTEGER,info) 
         call DAGetGhostCorners(da,gxs,gys,PETSC_NULL_INTEGER,gxm,gym,    &
     &                     PETSC_NULL_INTEGER,info)



!        Get pointers to vector data
         call VecGetArray(Top,top_v,top_i,info)
         call VecGetArray(Bottom,bottom_v,bottom_i,info)
         call VecGetArray(Left,left_v,left_i,info)
         call VecGetArray(Right,right_v,right_i,info)
         
         call VecGetArray(localX,x_v,x_i,info)
      
!        Perform local computations 
         do  j=ys,ys+ym-1
            do i=xs,xs+xm-1
               row = (j-gys)*gxm  + (i-gxs)
               x_v(x_i + row) = ((j+1)*bottom_v(bottom_i +i-xs+1)/my        &
     &             + (my-j+1)*top_v(top_i+i-xs+1)/(my+2) +                  &
     &              (i+1)*left_v(left_i+j-ys+1)/mx       +                  &
     &              (mx-i+1)*right_v(right_i+j-ys+1)/(mx+2))*0.5
            enddo
         enddo

!        Restore vectors
         call VecRestoreArray(localX,x_v,x_i,info)

         call VecRestoreArray(Left,left_v,left_i,info)
         call VecRestoreArray(Top,top_v,top_i,info)
         call VecRestoreArray(Bottom,bottom_v,bottom_i,info)
         call VecRestoreArray(Right,right_v,right_i,info)

         call DALocalToGlobal(da,localX,INSERT_VALUES,X,info)

      endif

      return
      end