#!/usr/bin/env python
#
# Script to run first the analysis portion on a set of NMR strucutres
# followed by XPLOR water refinement and analysis of resulting structures
#
# Usage: edit pathnames in refine.pars file
#       execute script 'nmr_waterrefine.py'
#

import string,os,sys,profile,cPickle,math,socket,time,shutil,fnmatch,glob,random,pdb_file
import nmv_dct,nmv_dsc,nmv_xplor

# MAIN SCRIPT
# ===============================

# SET THE GENERAL CONFIGURATION FILE AND INPUT PARAMETER FILE
configfile                  = os.path.join(os.getcwd(),'nmr_refine.conf')
parameterfile               = os.path.join(os.getcwd(),'refine.pars')

# READ THE GENERAL CONFIGURATION FILE
print 'Reading CONFIGURATION file %s...\n'%configfile
config                      = nmv_dct.read(configfile)

# READ THE REFINEMENT PARAMETER FILE
print 'Reading PARAMETER file %s...\n'%parameterfile
refinepars                  = nmv_dct.read(parameterfile)

# SET SOFTWARE AND ACCOMPANYING PARAMETER LOCATIONS
xplor                       = config['XPLOR']
topparpath                  = config['XPLOR_TOPPAR']
protocolspath               = config['XPLOR_SOLVENT']

# SET PATHNAMES
path                        = refinepars['PROJECTDIR']
tmppath                     = refinepars['TMPDIR']
psfpath                     = refinepars['INPUTDIR']
tablepath                   = refinepars['INPUTDIR']
inputcoordinatespath        = refinepars['INPUTDIR']
scriptspath                 = refinepars['SCRIPTSDIR']

# SET INITIAL PARAMETERS FOR THE XPLOR RUN
ninputstructures            = int(refinepars['NINPUTSTRUCTURES'])
pdb_base                    = refinepars['PDB_BASE']

# SET FILES AND PARAMETERS FOR THE XPLOR RUNS THAT REMAIN UNCHANGED
psf_file                    = refinepars['PSF_FILE']
noe_file                    = refinepars['NOE_FILE']
cdih_file                   = refinepars['CDIH_FILE']
coup_file                   = refinepars['COUP_FILE']
sani_file                   = refinepars['SANI_FILE']
vean_file                   = refinepars['VEAN_FILE']
noe_accept                  = float(refinepars['NOE_ACCEPT'])
cdih_accept                 = float(refinepars['CDIH_ACCEPT'])
coup_accept                 = float(refinepars['COUP_ACCEPT'])
sani_accept                 = float(refinepars['SANI_ACCEPT'])
vean_accept                 = float(refinepars['VEAN_ACCEPT'])
noe_ave                     = refinepars['NOE_AVE']
heat_steps                  = int(refinepars['HEAT_STEPS'])
hot_steps                   = int(refinepars['HOT_STEPS'])
cool_steps                  = int(refinepars['COOL_STEPS'])
seed                        = int(refinepars['SEED'])

# SET BASE FILENAMES
analyzed_pdb_base           = 'analyzed'
refined_pdb_base            = 'refined'
analyzed_subdirname         = 'analyzed_input'
refined_subdirname          = 'refined_input'


# SET SOFTWARE AND PARAMETERS FOR THE VALIDATION ROUTINES
profit                      = config['PROFIT']
procheck_run                = config['PROCHECK_RUN']
prochecknmr_run             = config['PROCHECKNMR_RUN']
procheck_dir                = config['PROCHECK_DIR']
whatif_run                  = config['WHATIF_RUN']
zonefile                    = refinepars['ZONEFILE']
energysort                  = refinepars['ENERGYSORT']
selection                   = refinepars['SELECTION']

# SET FURTHER OPTIONS
run_cluster                 = refinepars['RUN_CLUSTER']
queu_cluster                = refinepars['QUEU_CLUSTER']


#############################
# ANALYSIS INPUT STRUCTURES #
#############################

# SET THE OUTPUTCOORDINATESPATH FOR THE ANALYSIS OF THE INPUT STRUCTURES
outputcoordinatespath  = os.path.join(path,analyzed_subdirname)

# CHECK WHETHER ANALYSIS WAS DONE
filename = os.path.join(outputcoordinatespath,'summary_%s.txt'%analyzed_pdb_base)
if os.path.exists(filename):
  print 'Analysis of the input structures is finished'
else:
  # SET TEMPORARY FILES
  checkscriptfilename    = os.path.join(outputcoordinatespath,'checkscr.tmp')
  runpath                = os.path.join(outputcoordinatespath,'jobs')
  procheckpath           = os.path.join(outputcoordinatespath,'procheck')
  whatcheckpath          = os.path.join(outputcoordinatespath,'whatcheck')

  # RE-CREATE ANALYSIS DIRECTORY/CREATE ANALYSIS DIRECTORY IF NOT PRESENT:
  # REMOVE ALL PREVIOUS TEMPORARY FILES FOR CLEAN START
  # CREATE RUNPATH
  if os.path.exists(outputcoordinatespath):
    print 'Cleaning directory %s'%outputcoordinatespath
    nmv_dsc.removedir(outputcoordinatespath)
    os.mkdir(outputcoordinatespath)
    os.mkdir(runpath)
  else:
    print 'Creating directory %s'%outputcoordinatespath
    os.mkdir(outputcoordinatespath)
    os.mkdir(runpath)

  # ANALYZE THE INPUT STRUCTURES IN THE XPLOR FORCEFIELD USED FOR THE WATERREFINEMENT
  # INCLUDES VIOLATION AND ENERGY ANALYSIS
  # NOTE THAT THE XPLOR PROTOCOLS DO NOT YET INCLUDE THE RDC OR COUP RESTRAINTS.
  # THE NMV_XPLOR MODULE CREATES THE SCRIPTS NEEDED TO CALCULATE STRUCTURES ON A CLUSTER
  nmv_xplor.analyze(xplor,
                    topparpath,
                    protocolspath,
                    inputcoordinatespath,
                    outputcoordinatespath,
                    runpath,
                    ninputstructures,
                    pdb_base,
                    analyzed_pdb_base,
                    psfpath,
                    psf_file,
                    tablepath,
                    noe_file=noe_file,
                    noe_ave=noe_ave,
                    noe_accept=noe_accept,
                    cdih_file=cdih_file,
                    cdih_accept=cdih_accept,
                    coup_file=coup_file,
                    coup_accept=coup_accept,
                    sani_file=sani_file,
                    sani_accept=sani_accept,
                    vean_file=vean_file,
                    vean_accept=vean_accept,
                    run_cluster=run_cluster,
                    queu_cluster=queu_cluster)

  # CHECK EVERY 10 SECS WHETHER ALL STRUCTURES ARE ANALYZED (NEEDED WHEN RUNNING ON A CLUSTER)
  # todo: check acceptance criteria and loop until we have enough accepted structures
  done=0
  while not done:
    time.sleep(10)
    if len(glob.glob(os.path.join(outputcoordinatespath,analyzed_pdb_base+'*.pdb')))==ninputstructures:
      done=1
  print 'Done with the analysis of the input coordinate and constraints'

  # RUN THE OTHER CHECKS
  # CREATE INPUT SCRIPTS FOR THE CHECKING ROUTINES
  print 'Starting protein structure checks of the analyzed input structures'
  checkscript=open(checkscriptfilename,'w')
  checkscript.write('#!/bin/tcsh -f\n')

  # SET ENVIRONMENT VARIABLES
  checkscript.write('setenv prodir %s\n'%procheck_dir)

  # WRITE A XPLOR FILE-LIST CONTAINING THE INFORMATION ABOUT THE ANALYZED INPUT STRUCTURES
  # FILES ARE SORTED ON RESTRAINT ENERGY (SET ENERGYSORT), THE FUNCTION USES THE PDB-HEADER INFORMATION
  print 'DEBUG: doing writing xplor file list'
  checkscript.write('%s -writexplorfilelist %s %s %s\n'%(os.path.join(scriptspath,'nmv_nmrcheck.py'),\
                    outputcoordinatespath,\
                    analyzed_pdb_base,\
                    energysort))
  print 'DEBUG: done with writing xplor file list'
  # AUTOMATICALLY CHECK THE ANALYZED INPUT STRUCTURES WITH PROCHECK AND WHATIF
  # PROFIT IS USED FOR RMSD CALCULATIONS
  checkscript.write('%s -checkall %s %s %s %s %s %s %s %s %s %s\n'%(os.path.join(scriptspath,'nmv_nmrcheck.py'),\
                    profit,\
                    procheck_run,\
                    prochecknmr_run,\
                    procheck_dir,\
                    whatif_run,\
                    outputcoordinatespath,\
                    analyzed_pdb_base,\
                    zonefile,\
                    selection,
                    tmppath))

  # CREATE A SUMMARY FILE THAT CONTAINS ALL INFORMATION
  checkscript.write('%s -summary %s %s \n'%(os.path.join(scriptspath,'nmv_nmrcheck.py'),\
                    outputcoordinatespath,\
                    analyzed_pdb_base))

  # CLOSE THE SCRIPTFILE
  checkscript.close()

  # SUBMIT THE SCRIPT EITHER TO THE CLUSTER OR ON A SINGLE PROCESSOR MACHINE
  os.system('/bin/chmod +x %s'%checkscriptfilename)
  #print 'Not doing the procheck validation analysis'
  print 'Doing the procheck validation analysis'
  if run_cluster=='y':
    os.system('%s %s &'%(queu_cluster,checkscriptfilename))
  else:
    os.system('%s'%checkscriptfilename)

  # CHECK EVERY 10 SECS WHETHER THE CHECKING ROUTINES ARE FINISHED (NEEDED WHEN RUNNING ON A CLUSTER)
  done=0
  while not done:
    time.sleep(10)
    if os.path.exists(os.path.join(outputcoordinatespath,'summary_%s.txt'%analyzed_pdb_base)):
      done=1

  # COMPRESS JOBS, PROCHECK AND WHATCHECK SUBDIRECTORIES
  current_location=os.getcwd()
  os.chdir(outputcoordinatespath)
  #os.system('tar cfz jobs.tgz jobs')
  #os.system('tar cfz procheck.tgz procheck')
  #os.system('tar cfz whatcheck.tgz whatcheck')
  os.chdir(current_location)

  # REMOVE THE SCRIPTFILES AND PROCHECK AND WHATCHECK DIRECTORIES
  # IN THE FINISHED CYCLE
  #for file in glob.glob(os.path.join(outputcoordinatespath,'*.tmp*')):
  #  os.remove(file)
  #nmv_dsc.removedir(runpath)
  #nmv_dsc.removedir(procheckpath)
  #nmv_dsc.removedir(whatcheckpath)
  print 'Analysis of the input structures is finished'

#  print 'DEBUG: doing premature exit for debugging'
#  sys.exit(1)
##################################
# REFINEMENT OF INPUT STRUCTURES #
##################################

# SET THE OUTPUTCOORDINATESPATH FOR THE ACTUAL REFINEMENT OF THE ANALYZED INPUT STRUCTURES
inputcoordinatespath   = os.path.join(path,analyzed_subdirname)
outputcoordinatespath  = os.path.join(path,refined_subdirname)

# CHECK WHETHER ANALYSIS WAS DONE
filename = os.path.join(outputcoordinatespath,'summary_%s.txt'%refined_pdb_base)
if os.path.exists(filename):
  print 'Analysis of refined structures is finished'
else:
  # SET TEMPORARY FILES
  checkscriptfilename    = os.path.join(outputcoordinatespath,'checkscr.tmp')
  runpath                = os.path.join(outputcoordinatespath,'jobs')
  procheckpath           = os.path.join(outputcoordinatespath,'procheck')
  whatcheckpath          = os.path.join(outputcoordinatespath,'whatcheck')

  # CREATE REFINED STRUCTURES DIRECTORY IF NOT PRESENT:
  if not os.path.exists(outputcoordinatespath):
    print 'Creating directory %s'%outputcoordinatespath
    os.mkdir(outputcoordinatespath)
    os.mkdir(runpath)

  # CHECK HOW MANY STRUCTURES HAVE ALREADY BEEN REFINED
  nrefinedstructures=len(glob.glob(os.path.join(outputcoordinatespath,refined_pdb_base)+'_*.pdb'))
  # SET NSTRUCTURES AND START COUNTER FOR NMV_XPLOR.WATERREFINE
  nstructures            = ninputstructures-nrefinedstructures
  start_count            = nrefinedstructures+1
  if nrefinedstructures<ninputstructures and not nrefinedstructures==0:
    print '%i structures have already been refined, continueing at structure %i'%(nrefinedstructures,start_count)
  elif nrefinedstructures==ninputstructures:
    print 'All structures have been refined, starting analysis of refined structures'

  # DO THE ACTUAL WATERREFINEMENT
  # INPUTSTRUCTURES ARE NOW TAKEN FROM RUNPATH
  # OUTPUTSTRUCTURES ARE WRITTEN TO CURRENT CYCLE DIRECTORY
  nmv_xplor.waterrefine(xplor,
                        topparpath,
                        protocolspath,
                        inputcoordinatespath,
                        outputcoordinatespath,
                        runpath,
                        nstructures,
                        start_count,
                        analyzed_pdb_base,
                        refined_pdb_base,
                        psfpath,
                        psf_file,
                        tablepath,
                        noe_file=noe_file,
                        noe_ave=noe_ave,
                        noe_accept=noe_accept,
                        cdih_file=cdih_file,
                        cdih_accept=cdih_accept,
                        coup_file=coup_file,
                        coup_accept=coup_accept,
                        sani_file=sani_file,
                        sani_accept=sani_accept,
                        vean_file=vean_file,
                        vean_accept=vean_accept,
                        seed=seed,
                        heat_steps=heat_steps,
                        hot_steps=hot_steps,
                        cool_steps=cool_steps,
                        run_cluster=run_cluster,
                        queu_cluster=queu_cluster)

  # CHECK EVERY 10 SECS WHETHER ALL STRUCTURES ARE CALCULATED (NEEDED WHEN RUNNING ON A CLUSTER)
  # check acceptance criteria and loop until we have enough accepted structures
  done=0
  while not done:
    time.sleep(10)
    if len(glob.glob(os.path.join(outputcoordinatespath,refined_pdb_base+'*.pdb*')))==ninputstructures:
      done=1

  # RUN THE OTHER CHECKS
  # CREATE INPUT SCRIPTS FOR THE CHECKING ROUTINES
  checkscript=open(checkscriptfilename,'w')
  checkscript.write('#!/bin/tcsh -f\n')
  # SET ENVIRONMENT VARIABLES
  checkscript.write('setenv prodir %s\n'%procheck_dir)
  # WRITE A XPLOR FILE-LIST CONTAINING THE INFORMATION ABOUT THE ANALYZED INPUT STRUCTURES
  # FILES ARE SORTED ON RESTRAINT ENERGY (SET ENERGYSORT), THE FUNCTION USES THE PDB-HEADER INFORMATION
  checkscript.write('%s -writexplorfilelist %s %s %s\n'%(os.path.join(scriptspath,'nmv_nmrcheck.py'),\
                    outputcoordinatespath,\
                    refined_pdb_base,\
                    energysort))

  # AUTOMATICALLY CHECK THE INPUT STRUCTURES WITH PROCHECK AND WHATIF
  # PROFIT IS USED FOR RMSD CALCULATIONS
  checkscript.write('%s -checkall %s %s %s %s %s %s %s %s %s %s\n'%(os.path.join(scriptspath,'nmv_nmrcheck.py'),\
                    profit,\
                    procheck_run,\
                    prochecknmr_run,\
                    procheck_dir,\
                    whatif_run,\
                    outputcoordinatespath,\
                    refined_pdb_base,\
                    zonefile,\
                    selection,
                    tmppath))

  # CREATE A SUMMARY FILE THAT CONTAINS ALL INFORMATION
  checkscript.write('%s -summary %s %s \n'%(os.path.join(scriptspath,'nmv_nmrcheck.py'),\
                    outputcoordinatespath,\
                    refined_pdb_base))
  checkscript.close()

  # SUBMIT THE SCRIPT EITHER TO THE CLUSTER OR ON A SINGLE PROCESSOR MACHINE
  os.system('/bin/chmod +x %s'%checkscriptfilename)
  if run_cluster=='y':
    os.system('%s %s &'%(queu_cluster,checkscriptfilename))
  else:
    os.system('%s'%checkscriptfilename)

  # CHECK EVERY 10 SECS WHETHER THE CHECKING ROUTINES ARE FINISHED (NEEDED WHEN RUNNING ON A CLUSTER)
  done=0
  while not done:
    time.sleep(10)
    if os.path.exists(os.path.join(outputcoordinatespath,'summary_%s.txt'%refined_pdb_base)):
      done=1

  # COMPRESS JOBS, PROCHECK AND WHATCHECK SUBDIRECTORIES
  current_location=os.getcwd()
  os.chdir(outputcoordinatespath)
  #os.system('tar cfz jobs.tgz jobs')
  #os.system('tar cfz procheck.tgz procheck')
  #os.system('tar cfz whatcheck.tgz whatcheck')
  os.chdir(current_location)

  # REMOVE THE SCRIPTFILES AND PROCHECK AND WHATCHECK DIRECTORIES
  # IN THE FINISHED CYCLE
  for file in glob.glob(os.path.join(outputcoordinatespath,'*.tmp*')):
    os.remove(file)
  #nmv_dsc.removedir(runpath)
  #nmv_dsc.removedir(procheckpath)
  #nmv_dsc.removedir(whatcheckpath)
  print 'Refinement and analysis of the input structures is finished'