#!/bin/sh
# PURPOSE: 	createJob: prepares jobs for off-line execution
# AUTHOR:       Reinhard Hanuschik / ESO
# VERSIONS:     1.0 -- rewritten from scratch (2004-08-11)
#		       writes DFO status, uses dfosLog 
#		1.0.1- supported DRS_TYPEs read from config.processAB (2005-04-05)
#		1.1 -- support for CONDOR added (2005-06-03)
#		1.1.1- added $JOB_ID for CONDOR; removes orphan WAITFORs; REI support switched off (2005-10-20)
#		1.1.2- small bug fixed if no TRIGGER defined (2005-10-22)
#		1.1.3- call of dfoMonitor added after measureQuality (2006-01-17)
#		1.2 -- compatible with createAB v2.0 (2006-03-03)
#		1.2.1- compatible with new version of CONDOR (2006-03-13)
#		1.3 -- supports secondary cascades for MEF files; subcascades supported; dfosLog suspended (2006-05-15)
#		1.4 -- creates execQC job file instead of calling measureQuality (2006-09-20)
#		1.5 -- in CALIB mode: remove all raw fits files after QC processing if enabled; stop RBS support; checks for pre-existing entries with $MODE+$DATE in JOB_FILE_NAME; supports incremental createAB_v2.5 (2008-09-16)
#		1.5.1- final call of getStatusAB added (to update total score) (2009-01-12)
#		1.5.2- no entry in JOB_FILE if AB_list is empty (2009-01-21)
#		1.5.3- new option -F (to call getStatusAB -F), used for exporting links to safweb1 (2009-03-25)
#		1.6 -- support for creating execHC files; new key SCHED_TYPE; new option IMPLICIT for QCBQS_TYPE; joinAB improved (2009-07-21)
#		1.6.1- bug fixes in joinAB (2009-09-30) [WHu]
#		1.6.2- optional call of $PLUGIN (2009-10-26) [WHu]
#		1.7 -- new parameter -r, to support createAB v3.0 (2010-06-02)
#		1.8 -- INITIAL and FINAL standardized; JOB_FILE_NAME=JOBS_NIGHT hard-coded; FILE_MODE obsolete (2013-01-09)
#		2.0 -- DET_SPLITTING and ABscheduler support terminated, SCHED_TYPE obsolete; new mcalDown file; new: QC_CLUSTER and MONO_CALL (2013-01-24)
#		2.1 -- ready for muc: rawDown and mcalDown files, calls cascadeMonitor (2013-02-14)
#		2.1.1- calls cascadeMonitor -D in addition to -d; requires cascadeMonitor v1.1 (2013-03-05)
#		2.1.2- bug fix in line :841 ($CASC_CALL) (2013-03-06)
#		2.1.3- QC_CLUSTER removed, code cleaned for obsolete DET_SPLITTING; rawDown file also offered for DRS_TYPE=CPL (2014-05-29)
#		2.1.4- typo fixed (2014-07-14)
#		2.1.5- DRS_TYPE=INT supported (2014-07-22)
#		2.2 -- compatible with DFOS and PHOENIX; CHECK_MUC obsolete (2014-08-14)
#		2.2.1- minor bugs fixed in rawDown and execAB calls (2015-09-02)
#		2.2.2- small bug fix in code related to rawDown file (2015-10-19)
#		2.2.3- removed: MONO_CALL; added: option -l for DRS_TYPE=INT (2015-11-02)
#		2.3beta1 - OPSHUB aware, sending config.processAB to qcweb (2018-06-26)
#		2.3 -- fully OPSHUB aware; optional config keys MAX_JOBS and FULL_RAWDOWNLOAD, set by OPSHUB tool; DRS_TYPE=INT: evaluate dependencies (was missing before) (2018-10-22)
# 		2.3.1- drop the 'watcher' command in JOBS file, obsolete; slightly modified text in JOB_FILE_NAME (2018-11-16)
#		2.3.2- dependency iteration for DRS_TYPE=INT (2018-11-30)
# 		2.3.3- sequence of rawDown and getStatusAB calls switched; OPSHUB: progress bar for downloads (2019-03-25)
# 		2.3.4- OPSHUB: MAX_CORES can be configured, default is 8 (2019-04-16)
# 		2.3.5- removes dependencies for cascadeMonitor (2019-09-17)
#	
# PARAMETERS:	-m mode (CALIB or SCIENCE)
#     	   	-d date (DATE)
#		-a <ab name>, creates subcascade for parent AB
#		-F call getStatusAB -F (only within autoDaily!)
#
# CONFIG:	config.createJob; also: 
#			OCA/config.filterRaw	for HIDE_DIR
#			OCA/config.createAB 	for DRS_TYPE
#			config.processQC 	for QCBQS_TYPE
#
# TOOLS CALLED: none
#
# OUTPUT:	executable files
#		- rawDown_CALIB_<DATE>  in $DFO_JOB_DIR with raw file download requests (first 50 raw files for current cascade)
# 		- mcalDown_CALIB_<DATE> in $DFO_JOB_DIR with download requests (all for current cascade)
#		- execAB_<MODE>_<DATE>  in $DFO_JOB_DIR with pipeline recipe calls
#		- execQC_<MODE>_<DATE>  in $DFO_JOB_DIR with QC report calls
#               - command lines in JOBS_NIGHT
#		
# COMMENTS:	reads DRS_TYPE to find syntax for command lines
#		supported: CPL | CON | INT
#		supporting PHOENIX and OPSHUB environments.
#		MAX_CORES is configurable; default is 8 unless OPSHUB (1). 
# ======================================================================
TOOL_VERSION="2.3.5"

if [ Q$THIS_IS_OPSHUB != QYES ]
then
	THIS_IS_OPSHUB=NO
fi

if [ Q$THIS_IS_PHOENIX != QYES ]
then
	THIS_IS_PHOENIX=NO
fi

if [ $THIS_IS_OPSHUB = NO ] && [ $THIS_IS_PHOENIX = NO ]
then
	THIS_IS_DFOS=YES 
else
	THIS_IS_DFOS=NO
fi

if [ $THIS_IS_OPSHUB = YES ] && [ Q$1 != Q-v ] && [ Q$1 != Q-h ]
then
	if [ Q$PROJECT = Q ]
	then
		echo "*** ERROR: PROJECT not defined. Exit."
	else
		JOB_FILE_NAME=JOBS_$PROJECT
	fi
else
	JOB_FILE_NAME=JOBS_NIGHT
fi

# =========================================================================
# 0. Initialize
# 0.1 check for directory variables which should be set globally in .dfosrc
# =========================================================================

CHECK=`printenv | grep DFO`

if [ "Q$CHECK" = "Q" ]
then
        echo "***ERROR: DFO variables not defined. Check ~/.dfosrc and restart."
        exit -1
fi

if [ ! -s $DFO_CONFIG_DIR/config.createJob ]
then
        echo "*** ERROR: No configuration file $DFO_CONFIG_DIR/config.createJob found. Check and re-start."
        exit 2
fi

WEB_DIR=/home/qc/qc/${DFO_INSTRUMENT}

# =========================================================================
# 0.2 read options and parameters
# =========================================================================

if [ $# = 0 ] 
then
        cat $DFO_DOC_DIR/createJob.h | more
        exit 0
fi

AB_FLAG="NO"
SCP_YN="NO"
RECREATE="NO"

while getopts m:d:aFr:hv OPTION
do
	case "$OPTION" in
         v ) echo $TOOL_VERSION
             exit 0 ;;
         h ) cat $DFO_DOC_DIR/createJob.h | more
             exit 0 ;;
	 m ) export MODE=$OPTARG ;;
	 d ) export DATE=$OPTARG ;;
	 a ) AB_FLAG=YES ;;
	 F ) SCP_YN=YES ;;
	 r ) RECREATE=YES ; export JOB_ID=$OPTARG ;;
         ? ) cat $DFO_DOC_DIR/createJob.h | more
             exit 0 ;;
	esac
done

# =========================================================================
# 0.3 Check params
# =========================================================================

if [ $MODE != "CALIB" ] && [ $MODE != "SCIENCE" ]
then
        echo "***ERROR: Mode has to be CALIB or SCIENCE. Re-start."
        exit -1
fi

CHECK=`echo $DATE | wc -c`
if [ $CHECK != 11 ]
then
        echo "***ERROR: wrong date format: $2; should be: 2004-04-04; start again."
        exit -1
fi

if [ $MODE != "CALIB" ] && [ $SCP_YN = YES ]
then
	echo "***ERROR: option -F can only be used for MODE = CALIB; start again."
	exit -1
fi

case $SCP_YN in
 "YES" ) SCP_FLAG="-F" ; SCP_ENABLE="cd $DFO_JOB_DIR; export CALL_ENABLED=YES # $DATE";;
 *     ) SCP_FLAG="" ;   SCP_ENABLE="cd $DFO_JOB_DIR # $DATE" ;;
esac

# =========================================================================
# 0.4 Initialize some parameters
# =========================================================================

SCHED_TYPE=VULTUR	# always VULTUR, no ABScheduler support with version 2.0

DRS_TYPE=`grep  "^DRS_TYPE" 	$DFO_CONFIG_DIR/OCA/config.createAB | awk '{print $2}'`
if [ $THIS_IS_OPSHUB = NO ]
then
	HIDE_DIR=`grep  "^HIDE_DIR"  	$DFO_CONFIG_DIR/OCA/config.filterRaw | awk '{print $2}'`
fi

rm -f $TMP_DIR/list_supported
if [ $THIS_IS_PHOENIX = NO ]
then
	grep "^#SUPPORTED" 	$DFO_BIN_DIR/createAB | awk '{print $2}' > $TMP_DIR/list_supported
	CHECK_SUPPORT=`grep $DRS_TYPE $TMP_DIR/list_supported`

	if [ "Q$CHECK_SUPPORT" = "Q" ]
	then
		echo "***ERROR: unsupported DRS_TYPE $DRS_TYPE. Exit."
		exit -1
	fi
# THIS_IS_PHOENIX=YES: no check needed, strict control
else
	:
fi

if [ $THIS_IS_OPSHUB = NO ]
then
	QCBQS_TYPE=`grep "^QCBQS_TYPE" 	$DFO_CONFIG_DIR/config.processQC | awk '{print $2}'`
	if [ "Q$QCBQS_TYPE" = "Q" ]
	then
		echo "***WARNING: no \$QCBQS_TYPE defined in config.processQC, set to SER."
		QCBQS_TYPE=SER
	fi

	echo "DRS_TYPE found for pipeline calls: $DRS_TYPE" 
	echo "QCBQS_TYPE used for QC reports:    $QCBQS_TYPE"
	echo ""
else
	echo "DRS_TYPE found for pipeline calls: $DRS_TYPE"
fi

# RECREATE=NO: standard JOB_ID, otherwise specified with -r
if [ $RECREATE = NO ]
then
	export JOB_ID="${MODE}_${DATE}"
fi

CAL_CLEANUP=`grep "^CAL_CLEANUP"	$DFO_CONFIG_DIR/config.createJob | awk '{print $2}'`
if [ Q$CAL_CLEANUP != QYES ]
then
	CAL_CLEANUP=NO
fi

MAX_JOBS=`grep "^MAX_JOBS"		$DFO_CONFIG_DIR/config.createJob | awk '{print $2}'`
if [ Q$MAX_JOBS = Q ]
then
	MAX_JOBS=UNDEF
fi

FULL_RAWDOWNLOAD=`grep "^FULL_RAWDOWNLOAD"	$DFO_CONFIG_DIR/config.createJob | awk '{print $2}'`
if [ Q$FULL_RAWDOWNLOAD = Q ] || [ Q$FULL_RAWDOWNLOAD != QYES ]
then
	FULL_RAWDOWNLOAD=NO
fi

VULTUR_CALL="vultur_exec_cascade"

WORK_AREA=`grep "^WORK_AREA" $DFO_CONFIG_DIR/config.processAB | awk '{print $2}'`
export WORK_AREA=`eval "echo $WORK_AREA"`

# MAX_CORES (used for download requests): this is depending on the NGAS nodes and is hard-coded unless configured
MAX_CORES=`grep "^MAX_CORES"      $DFO_CONFIG_DIR/config.createJob | awk '{print $2}'`
if [ Q$MAX_CORES = Q ]
then
	MAX_CORES=8
fi

# upload config.processAB
if [ $THIS_IS_DFOS = YES ]
then
	scp -o BatchMode=yes $DFO_CONFIG_DIR/config.processAB  ${DFO_WEB_SERVER}:${WEB_DIR}/ 1>/dev/null
fi

# ======================================================================
# 0.5  procedure createSub: create subcascade for given parent AB
#      requires an existing job file execAB defined by -d and -m
#      creates a new job file execAB_<mode>_<date>_sub
# ======================================================================

createSub(){
# a) find and eliminate all ABs listed as WAITFORs for given parent AB
rm -f $TMP_DIR/list_mwaitfors
if [ ! -s $JOBFILE ]
then
	echo "***ERROR: $JOBFILE not found. Can't create subcascade."
	exit -1
fi

grep "^$PARENT_AB" $JOBFILE | awk '{print $10}' | sort -u > $TMP_DIR/list_mwaitfors
cp $JOBFILE $TMP_DIR/jobfile

if [ -s $TMP_DIR/list_mwaitfors ]
then
	for W in `cat $TMP_DIR/list_mwaitfors`
	do
		grep -v "^$W" $TMP_DIR/jobfile | sed "s/,$W//" | sed "s/$W/NONE/" > $TMP_DIR/jobfile1
		mv $TMP_DIR/jobfile1 $TMP_DIR/jobfile
	done
fi

# b) Remove all ABs *not* having $PARENT_AB as WAITFOR 
cat > ${JOBFILE}_sub <<EOT
#!/bin/sh
# This is a subcascade extracted from $JOBFILE.
# PARENT AB: $PARENT_AB

EOT

grep "^$PARENT_AB" $TMP_DIR/jobfile >> ${JOBFILE}_sub
grep -v "^$PARENT_AB" $TMP_DIR/jobfile | grep "$PARENT_AB" >> ${JOBFILE}_sub
echo "created:   ${JOBFILE}_sub"
echo "Launch as: $VULTUR_CALL --dagId=${JOB_ID} --jobs=${JOBFILE}_sub --wait"
}

JOBFILE="$DFO_JOB_DIR/execAB_${JOB_ID}"
QCFILE="$DFO_JOB_DIR/execQC_${JOB_ID}"
HC1FILE="execHC1_${JOB_ID}"

# ======================================================================
# START
# 1. PARENT_AB given: create subcascade and exit
# ======================================================================

if [ "$AB_FLAG" = "YES" ] && [ $DRS_TYPE = CON ]
then
	echo "Creation of subcascade ..."
	echo ""
	cat $JOBFILE | grep -v "^#"  | awk '{print $1}'
	echo "Enter AB for subcascade:"
	read PARENT_AB
	if [ "Q$PARENT_AB" != "Q" ]
	then
		createSub
		exit 0
	else
		echo "***ERROR: No parent AB defined, can't do subcascade creation."
		exit -1
	fi
elif [ "$AB_FLAG" = "YES" ]
then
	echo "***ERROR: creation of subcascade supported for DRS_TYPE=CON only."
	exit -1
fi	

# ======================================================================
# 2. Find AB_list
# ======================================================================

AB_LIST="$DFO_MON_DIR/AB_list_${JOB_ID}"
if [ $RECREATE = YES ]
then
        AB_LIST="$DFO_MON_DIR/AB_list_${MODE}_${DATE}_recreate"
fi

if [ ! -s $AB_LIST ]
then
	echo "***WARNING: No $AB_LIST found. Exit." 
	exit 0
fi

# ======================================================================
# 3. Create the launcher script
# 3.1 INITIAL/FINAL calls: obsolete 
# 3.2 Start filling $JOBFILE and $QCFILE
# 3.2.1 $JOBFILE
# ======================================================================
cat > $JOBFILE <<EOT
#!/bin/sh
# DFOS
# This is the list of AB calls in DRS command-line syntax.
# This file is executable.
# Used DRS_TYPE: $DRS_TYPE
# =================================================
EOT

# ======================================================================
# 3.2.2 $QCFILE
# ======================================================================

cat > $QCFILE <<EOT
#!/bin/sh
# DFOS
# This is the list of QC calls per AB.
# This file is executable.
# =================================================
EOT

# ======================================================================
# 3.3 Pipeline calls for DRS = CPL:
#     These calls are single AB calls, no dependencies to be extracted
# ======================================================================

if [ $DRS_TYPE = "CPL" ] 
then
	cat $AB_LIST | grep -v "^#" | grep -v DONE | sed "/^$/d" | awk '{print $1}' | sed "s/^.*/processAB -a &/" >> $JOBFILE
fi

# ======================================================================
# 3.4 Pipeline calls for CON or INT (extract dependencies)
# 3.4.1 find all waitfors
# ======================================================================

if [ $DRS_TYPE = "CON" ] || [ $DRS_TYPE = "INT" ]
then
	echo "- creating the primary cascade ..."
# tlist: list of all existing ABs
# wlist: list of ABs that are listed as WAITFORs 
	rm -f $TMP_DIR/all_tlist $TMP_DIR/all_wlist
	for AB in `cat $AB_LIST | grep -v "^#" | grep -v DONE | sed "/^$/d" | awk '{print $1}'`
	do
		if [ ! -s $DFO_AB_DIR/$AB ]
		then
			echo "***ERROR: $AB not found in \$DFO_AB_DIR. Check!" 
			exit -1
		fi

		echo $AB >> $TMP_DIR/all_tlist
		grep "^WAITFOR" $DFO_AB_DIR/$AB | grep -v NONE | awk '{print $2}' >> $TMP_DIR/all_wlist
	done

# make entries unique
	if [ -s $TMP_DIR/all_tlist ]
	then
		cat $TMP_DIR/all_tlist | sort -u > $TMP_DIR/tmp_list
		mv $TMP_DIR/tmp_list $TMP_DIR/all_tlist
	fi

	if [ -s $TMP_DIR/all_wlist ]
	then
		cat $TMP_DIR/all_wlist | sort -u > $TMP_DIR/tmp_list
		mv  $TMP_DIR/tmp_list $TMP_DIR/all_wlist
	fi

# ======================================================================	
# 3.4.2 find orphan waitfors (w/o valid job_id) and remove them 
#       (CONDOR will otherwise stop and exit)
# ======================================================================	

	JOBFILE1="${JOBFILE}1"
	rm -f ${JOBFILE}1
	for AB in `cat $AB_LIST | grep -v "^#" | grep -v DONE | sed "/^$/d" | awk '{print $1}'`
	do
		rm -f $TMP_DIR/wlist
		grep "^WAITFOR" $DFO_AB_DIR/$AB | grep -v NONE | awk '{print $2}' > $TMP_DIR/wlist

		WAITFOR="NONE"

		if [ -s $TMP_DIR/wlist ]
		then
			rm -f $TMP_DIR/wlist1 $TMP_DIR/wlist2
			for W in `cat $TMP_DIR/wlist`
			do
				CHECK=`grep "^${W}$" $TMP_DIR/all_tlist`
				if [ "Q$CHECK" = "Q" ]
				then
					echo $W >> $TMP_DIR/wlist1
				fi
			done

			if [ -s $TMP_DIR/wlist1 ]
			then
				for W in `cat $TMP_DIR/wlist1`
				do
					grep -v "^${W}$" $TMP_DIR/wlist >> $TMP_DIR/wlist2
					mv $TMP_DIR/wlist2 $TMP_DIR/wlist
				done
			fi

			if [ -s $TMP_DIR/wlist ]
			then
				WAITFOR=`cat $TMP_DIR/wlist | tr "\012" "," | sed "s/,$//"`
			fi
		fi

		if [ $THIS_IS_OPSHUB = YES ]
		then
			OPSHUB_INSTR="-i $DFO_INSTRUMENT "
		else
			OPSHUB_INSTR=""
		fi

		case $DRS_TYPE in
		 "CON" ) echo "$AB || $DFO_BIN_DIR/processAB || -a $AB ${OPSHUB_INSTR}-u $USER || $WAITFOR" >> $JOBFILE ;;
		 #"INT" ) echo "processAB -a $AB -l" >> $JOBFILE 
		 "INT" ) echo "$AB || $WAITFOR " >> $JOBFILE1 ;;
		esac
	done

# ======================================================================	
# 3.4.3: evaluate dependencies for DRS_TYPE=INT
# ======================================================================	

	if [ $DRS_TYPE = INT ] 
	then

# no dependencies: these are the first ones to execute
		grep -v "|| .*${DFO_INSTRUMENT}.*" $JOBFILE1 >> $JOBFILE
		for DEP in `grep -v "^#" $JOBFILE | awk '{print $1}'`
		do
			sed -i -e "/^$DEP/d" $JOBFILE1
			sed -i -e "s/$DEP/NONE/g" $JOBFILE1
		done

# do iterations until all ABs are analyzed
		NO_ITERATION=1
		if [ -s $JOBFILE1 ]
		then
			echo "- DRS_TYPE=INT: iterate dependencies ..."
			while [ -s $JOBFILE1 ]
			do
				echo "	iteration #$NO_ITERATION ..."
				grep -v "|| .*${DFO_INSTRUMENT}.*" $JOBFILE1 >> $JOBFILE
				for DEP in `grep -v "|| .*${DFO_INSTRUMENT}.*" $JOBFILE1 | awk '{print $1}'`
				do
					sed -i -e "/^$DEP/d"      $JOBFILE1
					sed -i -e "s/$DEP/NONE/g" $JOBFILE1
				done
				NO_ITERATION=`echo $NO_ITERATION | awk '{print $1+1}'`
			done
		fi

# finish the JOBFILE
		sed -i -e "/^${DFO_INSTRUMENT}/s/||.*//" $JOBFILE
		sed -i -e "/^${DFO_INSTRUMENT}/s|^.*|processAB -a & -l|" $JOBFILE
		rm -f $JOBFILE1
	fi
fi

# ======================================================================	
# 3.5 QC jobs for SER (serial processing)
# ======================================================================	

if [ $THIS_IS_OPSHUB = NO ]
then
	if [ $QCBQS_TYPE = "SER" ] 
	then
		cat $AB_LIST  | grep -v "^#" | grep -v DONE | sed "/^$/d" | awk '{print $1}' | sed "s/^.*/processQC -a &/" >> $QCFILE 
	fi

# ======================================================================	
# 3.6 QC jobs for PAR (parallel processing)
# ======================================================================	

	if [ $QCBQS_TYPE = "PAR" ] 
	then
		for AB in `cat $AB_LIST | grep -v "^#" | grep -v DONE | sed "/^$/d" | awk '{print $1}'`
		do
		  	echo "$AB || $DFO_BIN_DIR/processQC || -a $AB -u $USER || NONE " >> $QCFILE 
		done
	fi

	chmod u+x $JOBFILE $QCFILE
else
	rm  $QCFILE
	chmod u+x $JOBFILE
fi

# ======================================================================
# 4. Prepare download jobs
# 4.1 Create the raw download job 
# We need to organize the raw data downloads a bit to avoid overloading
# ngasClient and NGAS, and also the CPUs if tile uncompression is done.
# Current model, unless FULL_RAWDOWNLOAD=YES: 
# - download the first 50 raw files in organized pattern of max. 8 downloads
# - all subsequent raw downloads will likely be spread in time and are not
#   critical
# If FULL_RAWDOWNLOAD=YES: 
# - download all N raw files in organized pattern of max. 8 downloads
# - start processing only when this is finished
# ======================================================================

RAWDOWNFILE=rawDown_${MODE}_$DATE
rm -f $TMP_DIR/cj_raw*
for AB in `cat $AB_LIST | grep -v "^#" | grep -v DONE | awk '{print $1}'`
do
	grep "^RAWFILE" $DFO_AB_DIR/$AB | awk '{print $2}' | grep -v NONE >> $TMP_DIR/cj_raw
done

# make it unique (certain rawfiles could be contained in multiple ABs: PHOENIX MUSE!)
if [ -s $TMP_DIR/cj_raw ]
then
	cat $TMP_DIR/cj_raw | sort -u > $TMP_DIR/cj_raw1
	mv $TMP_DIR/cj_raw1 $TMP_DIR/cj_raw
fi

# $JOBFILE could be empty since all processed already
if [ -s $TMP_DIR/cj_raw ]
then
	
	cat > $DFO_JOB_DIR/$RAWDOWNFILE <<EOT
#!/bin/sh
# DFOS
# This is the list of RAW download calls to be executed before starting the cascade.
# This file is executable.
# =================================================
if [ ! -d $DFO_RAW_DIR/$DATE ]
then
	mkdir $DFO_RAW_DIR/$DATE
fi
cd $DFO_RAW_DIR/$DATE
EOT

# first 50 raw files or ALL
	if [ $FULL_RAWDOWNLOAD = NO ]
	then
		cat $TMP_DIR/cj_raw | head -50 | awk '{if (NR==int(NR/maxcores)*maxcores) {print "FULL",$1} else {print "PARA",$1}}' maxcores=$MAX_CORES > $TMP_DIR/cj_raw1
	else
		cat $TMP_DIR/cj_raw | awk '{if (NR==int(NR/maxcores)*maxcores) {print "FULL",$1} else {print "PARA",$1}}' maxcores=$MAX_CORES > $TMP_DIR/cj_raw1
	fi

	for R in `cat $TMP_DIR/cj_raw1 | awk '{print $2}'`
	do
		R1=`basename $R`
		INDEX=`grep $R $TMP_DIR/cj_raw1 | awk '{print $1}'`

# feedback given on command line
		if [ $THIS_IS_OPSHUB = YES ]
		then
			case $INDEX in
		 	 "FULL" ) echo "echo \"\" | tr \"\n\" \".\" ; ngasClient -f $R1 1>/dev/null "  >> $TMP_DIR/cj_raw2 ; echo "" >> $TMP_DIR/cj_raw2 ;;
		 	 "PARA" ) echo "echo \"\" | tr \"\n\" \".\" ; ngasClient -f $R1 1>/dev/null &" >> $TMP_DIR/cj_raw2 ;;
			esac
# normal cases: no feedback
		else
			case $INDEX in
		 	 "FULL" ) echo "ngasClient -f $R1" >> $TMP_DIR/cj_raw2 ; echo "" >> $TMP_DIR/cj_raw2 ;;
			 "PARA" ) echo "ngasClient -f $R1 &" >> $TMP_DIR/cj_raw2 ;;
			esac
		fi
	done
# last job: always w/o &
	sed -i -e "$,$ s/\&$//" $TMP_DIR/cj_raw2

	chmod u+x $DFO_JOB_DIR/$RAWDOWNFILE
	cat $TMP_DIR/cj_raw2 >> $DFO_JOB_DIR/$RAWDOWNFILE

# ======================================================================
# 4.2 Create the mcal download job (for MODE=CALIB only; OPSHUB: CALIB and SCIENCE)
# We want to download all required mcalibs first before we start processing
# (unless we work with DRS_TYPE=CPL).
# ======================================================================

	if ([ $MODE = CALIB ] || ([ $THIS_IS_OPSHUB = YES ] && [ $MODE = SCIENCE ])) && [ $DRS_TYPE != CPL ]
	then
		MCALDOWNFILE=mcalDown_${MODE}_$DATE
		rm -f $TMP_DIR/cj_mcalibs $TMP_DIR/$MCALDOWNFILE
		for AB in `cat $AB_LIST | grep -v "^#" | grep -v DONE | awk '{print $1}'`
		do
			grep "^MCALIB" $DFO_AB_DIR/$AB | grep REAL | awk '{print $3}' >> $TMP_DIR/cj_mcalibs
		done

		if [ -s $TMP_DIR/cj_mcalibs ]
		then
			cat $TMP_DIR/cj_mcalibs | sort -u > $TMP_DIR/cj_mcalibs1
			for MC in `cat $TMP_DIR/cj_mcalibs1`
			do
				MC1=`eval "echo $MC"`
				if [ ! -s $MC1 ]
				then
					MC2=`basename $MC1`
					MCPATH=`dirname $MC1`
					if [ $THIS_IS_OPSHUB = YES ]
					then
						echo "cd $MCPATH; echo \"\" | tr \"\n\" \".\"; ngasClient -c $MC2 1>/dev/null; echo \"$MC1\" >> $DFO_MON_DIR/MCAL_DOWNLOAD" >> $TMP_DIR/$MCALDOWNFILE
					else
						echo "cd $MCPATH; ngasClient -c $MC2; echo \"$MC1\" >> $DFO_MON_DIR/MCAL_DOWNLOAD" >> $TMP_DIR/$MCALDOWNFILE
					fi
				fi
			done
	
			if [ -s $TMP_DIR/$MCALDOWNFILE ]
			then
				cat > $DFO_JOB_DIR/$MCALDOWNFILE <<EOT
#!/bin/sh
# DFOS
# This is the list of MCALIB download calls to be executed before starting the cascade.
# This file is executable.
# =================================================
EOT
				cat $TMP_DIR/$MCALDOWNFILE >> $DFO_JOB_DIR/$MCALDOWNFILE	
				echo "- $DFO_JOB_DIR/$MCALDOWNFILE created"
				chmod u+x $DFO_JOB_DIR/$MCALDOWNFILE
			else
				echo "No mcalDown file created, all downloads done at begin of processing."
			fi	
		else
			echo "No mcalDown file created, all downloads done at begin of processing."
		fi
	fi
fi

# ======================================================================
# 4.3 call optional plugin
# ======================================================================

PLUGIN=`cat $DFO_CONFIG_DIR/config.createJob | grep "^PLUGIN" | awk '{print $2}'`
if [ Q$PLUGIN != "Q" ]
then
	if [ -f $DFO_BIN_DIR/${PLUGIN} ]
       	then
        	echo "***INFO: \$DFO_BIN_DIR/$PLUGIN started ..."
         	$DFO_BIN_DIR/$PLUGIN
         	echo "***INFO: \$DFO_BIN_DIR/$PLUGIN finished."
	else
		echo "***ERROR: \$DFO_BIN_DIR/$PLUGIN configured but not found."
        fi
fi

# ======================================================================
# 4.4 Split QC jobs per detector 
# not enabled since unclear whether required/useful 
# if required, check versions earlier than v1.5  
# ======================================================================

# ======================================================================
# 5. Create entries in $JOB_FILE_NAME
# 5.1 Prepare
# ======================================================================

if [ ! -s $DFO_JOB_DIR/$JOB_FILE_NAME ]
then
	cat > $DFO_JOB_DIR/$JOB_FILE_NAME <<EOT
#!/bin/sh
# This is the job file for off-line processing.
EOT
	if [ $THIS_IS_OPSHUB = NO ]
	then
		 cat >> $DFO_JOB_DIR/$JOB_FILE_NAME <<EOT
# You may edit this file. Outdated entries are removed by moveProducts.

EOT
	else
		 cat >> $DFO_JOB_DIR/$JOB_FILE_NAME <<EOT
# You may edit this file, at your own risk.

EOT
	fi
fi

# check for non-empty execAB; exit if none found
if [ ! -s $JOBFILE ]
then
	echo "***INFO: No $JOBFILE found. Exit."
	exit 0
fi

CHECK_EMPTY=`grep -v "^#" $JOBFILE | head -1 | wc -l`
if [ $CHECK_EMPTY != 1 ]
then
	echo "***INFO: $JOBFILE is empty, no jobs created. Exit."
	exit 0
fi

# ======================================================================
# 5.2 For CONDOR: enter the download file
# ======================================================================

# check for pre-existing entry for $JOB_ID 
CHECK_EXIST=`grep -v "^#" $DFO_JOB_DIR/$JOB_FILE_NAME | grep $JOB_ID | head -1 | wc -l`
if ([ $CHECK_EXIST = 0 ]) 
then

# Enter download jobs first
	if [ Q$RAWDOWNFILE != Q ] && [ -s $DFO_JOB_DIR/$RAWDOWNFILE ]
	then
		if [ $THIS_IS_OPSHUB = YES ]
		then
			NTOTAL=`grep       ngasClient $DFO_JOB_DIR/$RAWDOWNFILE | wc -l`
			TOTAL_STRING=`grep ngasClient $DFO_JOB_DIR/$RAWDOWNFILE | sed "s/^.*//" | tr "\n" "X" | sed "s/^.*/&\n/"`
			if [ $NTOTAL -lt 10 ]
			then
				SPACE_FIRST_LINE="   "
			elif [ $NTOTAL -lt 100 ]
			then
				SPACE_FIRST_LINE="  "
			else
				SPACE_FIRST_LINE=" "
			fi

			cat >> $DFO_JOB_DIR/$JOB_FILE_NAME <<EOT
echo "Download files for $MODE processing:"
echo "Raw data: $NTOTAL files to download  $SPACE_FIRST_LINE $TOTAL_STRING"
echo "Starting download                 " | tr "\n" " "; $DFO_JOB_DIR/$RAWDOWNFILE; echo " done."
EOT
		else
			cat >> $DFO_JOB_DIR/$JOB_FILE_NAME <<EOT
echo "starting raw data download ..."; $DFO_JOB_DIR/$RAWDOWNFILE 1>/dev/null
EOT
		fi
	fi

	if [ Q$MCALDOWNFILE != Q ] && [ -s $DFO_JOB_DIR/$MCALDOWNFILE ]
	then
		if [ $THIS_IS_OPSHUB = YES ]
		then
			NTOTAL=`grep       ngasClient $DFO_JOB_DIR/$MCALDOWNFILE | wc -l`
			TOTAL_STRING=`grep ngasClient $DFO_JOB_DIR/$MCALDOWNFILE | sed "s/^.*//" | tr "\n" "X" | sed "s/^.*/&\n/"`
			if [ $NTOTAL -lt 10 ]
			then
				SPACE_FIRST_LINE="   "
			elif [ $NTOTAL -lt 100 ]
			then
				SPACE_FIRST_LINE="  "
			else
				SPACE_FIRST_LINE=" "
			fi

			cat >> $DFO_JOB_DIR/$JOB_FILE_NAME <<EOT
echo "Mcalib data: $NTOTAL files to download${SPACE_FIRST_LINE}$TOTAL_STRING"
echo "Starting download                 " | tr "\n" " "; $DFO_JOB_DIR/$MCALDOWNFILE; echo " done."
EOT
		else
			cat >> $DFO_JOB_DIR/$JOB_FILE_NAME <<EOT
$DFO_JOB_DIR/$MCALDOWNFILE
EOT
		fi
	fi

# then recursive calls of AB monitor
	if [ $THIS_IS_OPSHUB = NO ]
	then
		cat >> $DFO_JOB_DIR/$JOB_FILE_NAME <<EOT
# Pipeline jobs for ${MODE}, $DATE 
getStatusAB -d $DATE -r &
EOT
	else
		cat >> $DFO_JOB_DIR/$JOB_FILE_NAME <<EOT
# Pipeline jobs for ${MODE}, $DATE 
getStatusAB -d $DATE -p $PROJECT -r &
EOT
	fi

# ===========================================================================================
# 5.3 Pipeline jobs (execAB)
#	For MAX_JOBS set, and OPSHUB and CONDOR and SCIENCE: we split the execAB calls into N
#	jobs to avoid overloading the host
# ===========================================================================================

	if [ $THIS_IS_OPSHUB = NO ] 
	then
		case $DRS_TYPE in
	 	 CON ) echo "$VULTUR_CALL --dagId=${JOB_ID} --jobs=$JOBFILE --wait" >> $DFO_JOB_DIR/$JOB_FILE_NAME ;;
	 	 *   ) echo $JOBFILE >> $DFO_JOB_DIR/$JOB_FILE_NAME ;;
		esac
	elif [ $THIS_IS_OPSHUB = YES ] && ( [ $MAX_JOBS = UNDEF ] || [ $DRS_TYPE != CON ] || [ $MODE = CALIB ] )
	then
		case $DRS_TYPE in
	 	 CON ) echo "$VULTUR_CALL --dagId=${JOB_ID} --jobs=$JOBFILE --wait" >> $DFO_JOB_DIR/$JOB_FILE_NAME ;;
	 	 *   ) echo $JOBFILE >> $DFO_JOB_DIR/$JOB_FILE_NAME ;;
		esac
# OPSHUB and MAX_JOBS and CON set and SCIENCE: we execute only $MAX_JOBS in parallel and split the entire execAB file into N batches
	else
	 	echo "$VULTUR_CALL --dagId=${JOB_ID} --jobs=$JOBFILE --wait" >> $DFO_JOB_DIR/$JOB_FILE_NAME 
		NUM_JOBS=`cat $JOBFILE | grep -v "^#" | wc -l`

		if [ $NUM_JOBS -gt $MAX_JOBS ]
		then
# split into subjobs
		
			cat $JOBFILE | grep -v "^#" | awk '{if (NR==int(NR/max_jobs)*max_jobs) {print "FULL",$1} else {print "PARA",$1}}' max_jobs=$MAX_JOBS > $TMP_DIR/cj_maxjobs
			cat $TMP_DIR/cj_maxjobs | awk '{ {print indexj,$2}; if ( $1=="FULL" ) {indexj=indexj+1}}' indexj=1 > $TMP_DIR/cj_maxjobs1
			rm -f $TMP_DIR/cj_newjobfile*
			for INDEXJ in `cat $TMP_DIR/cj_maxjobs1 | awk '{print $1}' | sort -nu`
			do
				JOBFILE1=`echo ${JOBFILE} | sed "s|execAB|&${INDEXJ}|"`
				JOBFILE2=`basename $JOBFILE1`
				grep "^#" ${JOBFILE} > ${JOBFILE1}
				grep "^$INDEXJ " $TMP_DIR/cj_maxjobs1 | awk '{print $2}' > $TMP_DIR/cj_maxjobs2
				for SELECTED in `cat $TMP_DIR/cj_maxjobs2`
				do
 					grep $SELECTED $JOBFILE >> ${JOBFILE1}
				done
				chmod u+x ${JOBFILE1}
				grep "execAB_SCIENCE_$DATE" $DFO_JOB_DIR/$JOB_FILE_NAME | sed "s|execAB_SCIENCE_${DATE}|${JOBFILE2}|" >> $TMP_DIR/cj_newjobfile
			done

			MAX_INDEX=`cat $TMP_DIR/cj_maxjobs1 | awk '{print $1}' | sort -nu | tail -1`		
			LINE_EXECAB=`grep -n execAB_SCIENCE_$DATE $DFO_JOB_DIR/$JOB_FILE_NAME  | sed "s/:.*//" | awk '{print $1-1}'`
			LINE_EXECAB1=`grep -n execAB_SCIENCE_$DATE $DFO_JOB_DIR/$JOB_FILE_NAME | sed "s/:.*//" | awk '{print $1+1}'`
			cat $DFO_JOB_DIR/$JOB_FILE_NAME | head -${LINE_EXECAB} > $TMP_DIR/cj_newjobfile1 
			cat $TMP_DIR/cj_newjobfile >> $TMP_DIR/cj_newjobfile1
			cat $DFO_JOB_DIR/$JOB_FILE_NAME | sed "1,$LINE_EXECAB1 d" >> $TMP_DIR/cj_newjobfile1
	
			mv $TMP_DIR/cj_newjobfile1 $DFO_JOB_DIR/$JOB_FILE_NAME
			echo "- execAB calls for SCIENCE in ${JOBFILE} split into $MAX_INDEX calls because of \$MAX_JOBS set to $MAX_JOBS."
		fi
	fi

# finish CONDOR queue
	if [ $DRS_TYPE = "CON" ]
	then
		echo "echo \"$VULTUR_CALL for $DATE done.\" " >> $DFO_JOB_DIR/$JOB_FILE_NAME
	fi

# ======================================================================
# 5.4 QC report jobs (execQC)
# ======================================================================

# set flag and close the QCJOB
	case $MODE in
       	 "CALIB" )       DFO_STATUS="cal_QC" ;;
       	 "SCIENCE" )     DFO_STATUS="sci_QC" ;;
	esac

	if [ $SCP_YN = YES ]
	then
		echo "$SCP_ENABLE " >> $DFO_JOB_DIR/$JOB_FILE_NAME
	fi

# QCBQS_TYPE: parallel, seriell, or implicit (within processAB)
	if [ $THIS_IS_OPSHUB = NO ]
	then
		case $QCBQS_TYPE in
	 	PAR 	  ) echo "$VULTUR_CALL --dagId=QC${JOB_ID} --jobs=$QCFILE --wait" >> $DFO_JOB_DIR/$JOB_FILE_NAME ;;
	 	SER 	  ) echo $QCFILE >> $DFO_JOB_DIR/$JOB_FILE_NAME ;;
	 	IMPLICIT  ) echo "# QC jobs submitted implicitly (e.g. via processPost) # $DATE" >> $DFO_JOB_DIR/$JOB_FILE_NAME
			    echo "$DFO_STATUS $DATE \`date +%Y-%m-%d"T"%H:%M:%S\`" >> \$DFO_MON_DIR/DFO_STATUS
			    ;;
		esac
	fi

# MODE=CALIB: delete raw files if configured
	if [ $CAL_CLEANUP = YES ] && [ $MODE = CALIB ]
	then
		if [ "Q$HIDE_DIR" != "Q" ]
		then
			DELETE_RAW=`eval "echo rm -f \$DFO_RAW_DIR/${DATE}/\*.fits"; eval "echo rm -f \$HIDE_DIR/${DATE}/\*.fits"`
		else
			DELETE_RAW=`eval "echo rm -f \$DFO_RAW_DIR/${DATE}/\*.fits"`
		fi
	else
		DELETE_RAW=""
	fi

	if [ $THIS_IS_OPSHUB = NO ]
	then	
		cat >> $DFO_JOB_DIR/$JOB_FILE_NAME <<EOT
echo "$DFO_STATUS $DATE \`date +%Y-%m-%d"T"%H:%M:%S\`" >> \$DFO_MON_DIR/DFO_STATUS
getStatusAB -d $DATE $SCP_FLAG &
$DELETE_RAW
EOT

# OPSHUB: no additional call of getStatusAB
	#else
		#cat >> $DFO_JOB_DIR/$JOB_FILE_NAME <<EOT
#getStatusAB -d $DATE -p $PROJECT &
#EOT
#		fi
	fi	
fi

# ======================================================================
# 5.5 Set DFO flag
# ======================================================================

UPDATE=`date +%Y-%m-%d"T"%H:%M:%S`

case $MODE in
 "CALIB" )       DFO_STATUS="cal_Queued" ;;
 "SCIENCE" )     DFO_STATUS="sci_Queued" ;;
esac

echo "$DFO_STATUS $DATE $UPDATE" >> $DFO_MON_DIR/DFO_STATUS

# ======================================================================
# 5.6 RECREATE=YES: check ABs in $DFO_AB_DIR, refresh AB_list 
#     (to have it proper for certifyProducts)
# ======================================================================

if [ $RECREATE = YES ]
then
	cd $DFO_AB_DIR
	rm -f $TMP_DIR/cj_list_exist_ab $DFO_MON_DIR/AB_list_${MODE}_${DATE}
	CHECK_EXIST=`ls | grep .ab | wc -l`
	if [ $CHECK_EXIST = 1 ]
	then
		cp `ls | grep .ab` XX`ls | grep .ab`
	fi
		
	grep "^DATE" *.ab | grep "[[:space:]]$DATE" | grep -v XX | awk '{print $1}' | sed "s/:DATE//" > $TMP_DIR/cj_list_exist_ab
	rm -f XX*
	
	for AB in `cat $TMP_DIR/cj_list_exist_ab`
	do
		SETUP=`grep "^RAW_MATCH_KEY" $AB | grep -v UNDEFINED | grep -v TPL.START | grep -v ARCFILE | awk '{print $2}' | sed "s/^.*=//" | tr "\012" "_" | sed "s/_$//"`
		RAWTYPE=`grep "^RAW_TYPE"    $AB | awk '{print $2}'`
		echo "$AB $RAWTYPE $SETUP" >> $DFO_MON_DIR/AB_list_${MODE}_${DATE}
	done
fi

# ======================================================================
# 6. End
# ======================================================================

echo "... done." 

if [ $THIS_IS_OPSHUB = NO ]
then
	echo ""
	echo "Find AB processing tasks in $JOBFILE "
	echo "     QC report tasks in     $QCFILE ." 
	echo "The execAB and execQC files can be launched in $DFO_JOB_DIR/$JOB_FILE_NAME ." 
fi
	
exit 0
