*  This is a general template script to create a solvated 
*  simulation "box" using periodic boundaries with a few specific volumes.
*  The script will generate, minimize (or alternatively read in a psf 
*  file for a solute system already generated) a solute system in a:
*  1) cubic volume w/ images or crystal
*  2) truncated octahedral box w/ images
*  3) truncated octahedral box w/ crystal (reoriented from images)
***
*  Required files: top_all22_prot.inp, par_all22_prot.inp
**                 octa.img, tip125.crd, tip216.crd
*


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!*  IMPORTANT USAGE NOTE:  ONLY SINGLE CHAIN PROTEINS CAN BE PROCESSED
!*  WITH GENERATION/MINIMAZATION STAGE BECAUSE OF PSF GENERATION PROCEDURE
!*  USED, I.E. ASSUMPTION THAT PSF CAN BE GENERATED BY READING SEQUENCE OF
!*  PROTEIN FROM PDB FILE.
!*  FOR MORE GENERAL SOLVATION PROBLEMS, I.E. MULTI-CHAIN PROTEINS,
!*  SIMPLY JUST USE THE PSF=TRUE KEY AND THE SOLVATION PART.
!**_______________________________________________________________________**
!**                    WRITTEN BY C.L. BROOKS, III                        **
!**                           OCTOBER, 1999                               **
!**                                                                       **
!**     COPYRIGHT C.L. BROOKS, III, THE SCRIPPS RESEARCH INSTITUTE        **
!**_______________________________________________________________________**
!********
!*  The usage requires that several variables either be set in the
!*  input script or on the CHARMM command line at execution.
!*  The needed variables are:
!**  Variable   Default               Function
!*__________________________________________________________________________
!*   toppar   ($toppar)  ==> path to parameter and topology files
!**
!*   pdbfile     (...)   ==> name of pdb coordinate file containing 
!**                          coordinates and sequence information
!**                          (for psf=true key this is the name of the
!**                          coordinate file in pdb format (pdbfile.pdb)
!**                          and the pre-generated psf file (pdbfile.psf))
!*   psf       (false)   ==> key specifying use of pre-generated psf and
!**                          pdb coordinate file for use w/ multi-segid
!**                          solute systems
!*   seqname  (pdbfile)  ==> name of protein sequence (segid)
!**
!*   shape      (cubic)  ==> variable specifying whether truncated octahedral or
!**                          cubic volume used (octahedral/cubic)
!*   crystal    (false)  ==> logical variable true/false specifying whether
!**                          to use crystal orientation of simulation volume
!*   cutoff      (9)    ==> value of cutoff to be used in establishing how
!**                          far images are to be from volume "edges", often set
!**                          at or near nonbonded list cutoff value
!**                          to use crystal orientation of simulation volume
!*   solvate    (false) ==> logical flag which determines whether you stop after
!**                          initial minimization and psf generation or proceed 
!**                          to solvation component.               
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

if @?pdbfile eq 0 goto iamdead
if @?seqname eq 0 set seqname = @pdbfile
if @?psf eq 0 set psf = false
if @?toppar eq 0 set toppar = "$toppar"
if @?shape eq 0 set shape = cubic
if @?crystal eq 0 set crystal = false
if @?cutoff eq 0 set cutoff = 9
if @?solvate eq 0 set solvate = false
!  Read the parameter and topology files:
goto readparams
label fromreadparams

if psf ne true goto buildprotein
label frombuildprotein


stop


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
label readparams
prnlev 0 node 0
wrnlev 0 node 0
open unit 1 read form name "@TOPPAR/top_all22_prot.inp"
read rtf card unit 1
close unit 1

open unit 1 read form name "@TOPPAR/par_all22_prot.inp"
read param card unit 1
close unit 1

prnlev 5 node 0
wrnlev 2 node 0
goto fromreadparams
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!


label buildprotein
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!  SOLUTE GENERATION COMPONENT:  ASSUMES SINGLE CHAIN SOLUTE IN PDB               !
!  FILE GIVEN BY KEY PDBFILE.  IF MORE THAN ONE CHAIN IS PRESENT                  !
!  THE GENERATION ASPECTS WILL BE ERRONEOUS.                                      !
!  THIS COMPONENT OF THE SCRIPT:                                                  !
!       1) GENERATES PROTEIN SEQUENCE                                             !
!       2) BUILDS ALL MISSING ATOMS                                               !
!       3) MINIMIZES PROTEIN SUBJECT TO SUCCESSIVELY REDUCED HARMONIC RESTRAINTS  !
!       4) WRITES THE MINIMIZED PDB FILE                                          !
!  CHECKS ARE DONE ON NUMBER OF MISSING ATOMS TO GUESS WHETHER FILE WAS           !
!  PROCESSED CORRECTLY.  EXAMINE OUTPUT FILE FOR POTENTIAL PROBLEMS!              !
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!  Copy original file for safe keeping then "fix" pdb file
system "echo @PDBFILE.pdb > filename"
system "cp `awk '{print tolower($0)}' filename` pdb_orig"

!  Make "standard" changes to accomodate pdb -> top_all22/28 atom/residue renames.
system "echo 'BEGIN {write = 0}' > awk.file"
system "echo '$1 == "TER"  {write += 1;}'>> awk.file"
system "echo '{' >> awk.file"
system "echo 'if ($4 == "HIS") sub("HIS","HSD");' >> awk.file"
system "echo 'if ( write == 0 ) {print $0 "@PDBFILE" ;}' >> awk.file"
system "echo '}'>> awk.file"
system "echo '{if ( write == 1 ) {' >> awk.file"
system "echo 'if ($1 == "TER")  {print "END" ;}'>> awk.file"
system "echo '}}'>> awk.file"
!  Beyond column 72 we should have the segid, some NMR files don't do this.  Add
!  segid after column 72 using cut and awk.
system "cut -b 1-72 pdb_orig | awk -f awk.file > pdb_new"

!  Clean-up temporary files
system "rm awk.file"
system "mv pdb_new `(awk '{print tolower($0)}' filename)`"

open unit 1 read form name @PDBFILE.pdb
read sequ pdb unit 1
close unit 1

generate @seqname setup first nter last cter

!  Check for disulphide bonds in SSBOND records
system "echo "* Disuplhides for @pdbfile" > disu.strm"
system "echo "* " >> disu.strm"
system "awk '$1 == "SSBOND" {print "patch disu @pdbfile",$4," @pdbfile",$6}' pdb_orig >> disu.strm"

stream "disu.strm"

system "rm disu.strm"

rename atom cd1 select resname ile .and. type cd end
rename atom oxt select type ot1 end
rename atom o select ires ?nres .and. type ot2 end
open unit 1 read form name @PDBFILE.pdb
read coor pdb unit 1
close unit 1
rename atom cd select resname ile .and. type cd1 end
rename atom ot1 select type oxt end
rename atom ot2 select ires ?nres .and. type o end

! Now move original pdb file back and fnish clean-up
system "mv pdb_orig `awk '{print tolower($0)}' filename`"
system "rm filename"

define nonH select .not. hydrogen end
set nonHatoms = ?nsel
define missing select .not. hydrogen .and. .not. initialized end
set missingatoms = ?nsel

Calc fraction = (@nonHatoms - @missingatoms) / @nonHatoms
if fraction le 0.8 goto toomanywrong

print coor select missing end

!  Build missing atoms if just a few
ic param
ic build

hbuild

define missing select .not. initialized end
if nsel ne 0 goto skipthis

   system "banner Some atoms not built"
   print coor select missing end
   stop

label skipthis
coor copy compare

coor orie
coor stat
faster on

open unit 1 write form name @pdbfile.psf
write psf card unit 1
*  PSF for @pdbfile sequence
*
close unit 1
open unit 1 write form name @pdbfile_notmin.pdb
write coor pdb unit 1
*  Coordinates for @pdbfile before minimization 
*  
close unit 1

prnlev 0
define hisdone sele none end
define hiss sele resname HSD end
set hisn ?selresi
label nexthis
prnlev 5
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!          HISTIDINE distance checks
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  coor mindist sele resi @hisn .and. type ND1 end -
               sele .not. resi @hisn end
  coor mindist sele resi @hisn .and. type NE2  end -
               sele .not. resi @hisn end
prnlev 0
  define hisdone sele hisdone .or. resi @hisn end
  define hiss sele hiss .and. .not. resi @hisn end
  if ?nsel eq 0 goto donehis
  set hisn ?selresi
  goto nexthis
label donehis
prnlev 5

coor dist near cut 3.0 sele resn HS* .and. (type NE2 .or. type ND1 ) end -
                       sele .not. resn HS* end

stop