#!/bin/sh
# PURPOSE:      This tool checks for the latest available archive data and creates the NGAS monitor.
# AUTHOR:       Reinhard Hanuschik/DFO
# VERSION:      0.9 -- original version for UVES (2002-03)
#		1.0 -- turned into dfos tool dvdMonitor (2004-10-01)
#		1.0.1- auto_reload in interactive mode (2004-11-08)
#		1.0.2- bug removed with 'other' link (2004-11-12)
#		1.1 -- include case when no DVD is found in the selected time range (2004-11-30)
#		1.1.1- bug fixed with case v1.1 (2004-12-13)
#		1.1.2- query improved (2005-03-29)
#		1.1.3- writing into DFO_STATUS now done by dataclient (2005-05-24)
#		1.1.4- filters for new DVD being older than $N_DELAY hrs, to enforce NGAS downloads (2005-09-30)
#		1.2 -- supports autoDaily; has links to listings, ops_logs, nightlogs (2006-01-04)
#		1.2.1- connection error handling improved (2006-01-27)
#		1.2.2- browser calls generalized (2006-03-08)
#		1.2.3- qcdate replacing mjd2date and date2mjd (2006-05-15)
#		1.3 -- N_DELAY reduced because of DVD ingestion speeded up; bug for parameter -d fixed; checkConsist included (2006-06-01)
#		1.3.1- link to data tracking system included (2006-07-19)
#		1.3.2- link to nightlogs aligned with dfoMonitor (2006-11-14)
#               -------------------------------------------------------------
#		2.0 -- reborn as ngasMonitor (2006-12-14)
#		2.0.1- improved handling to suppress preImg/ToO data arriving thru ftp (2007-01-22)
#		2.0.2- detect filtered files (2007-02-26)
#		2.0.3- bug fixed with FILTER (2007-06-28)
#		2.0.4- improved error handling (2007-07-13)
#		2.1 -- process watcher and sciproc ingestion monitor (2007-11-29)
#		2.1.1- link to HTML data reports (2008-06-02)
#		2.1.2- updated info text (2008-06-12)
#		2.1.3- e-mail notification and autoDaily call decoupled (2008-07-31)
#		2.2 -- tool upgraded to report files rather than dates; launch_dataclient removed; NGAS access checker (2008-08-14)
#		2.2.1- bug fixed with NGAS_file_list (2008-09-26)
#		2.2.2- NGAS_diff more robust against timestamp changes in NGAS (line 541ff) (2008-09-30)
#		2.2.3- turned off rcp to dmdarc1; RCP_YN and QC_SERVER obsolete (2009-02-17)
#		2.3 -- called by autoDaily v2.0 (2009-07-21)
#		2.3.1- DFO_EDITOR replaces $CFG_EDITOR (2011-04-21)
#               -------------------------------------------------------------
#		3.0 -- html part and scp is obsolete, removed (2013-03-07)
#		3.0.1- improved handling of 'Cannot connect' issues (2016-02-23)
#		3.1 -- query to NGAS database changed due to change of format of ingestion_date column (2024-03-05)
#		
# CONFIG:	NONE
# TOOLS CALLED:	qcdate, ngasClient (for test download)
# OUTPUT:	updated DFO_MON_DIR/NGAS_file_list and NGAS_STATUS 
# COMMENTS:	the tool is usually called within autoDaily.
# ===================================================================================
TOOL_VERSION="3.1"

# handling of preImg and TOO data
FTP_HOSTID="ngau1"	# host_id of host receiving ftp data

# cleanup
rm -f $DFO_MON_DIR/ngasMonitor.html $DFO_MON_DIR/ngasMonitor_help.html $DFO_CONFIG_DIR/config.ngasMonitor

# ===================================================================================
# 0. Initialization
# 0.1 check for directory variables which should be set globally in .dfosrc
# ===================================================================================

CHECK=`printenv | grep DFO`

if [ "Q$CHECK" = "Q" ]
then
        echo "*** ERROR: DFO variables not defined. Check ~/.dfosrc and restart."
        exit -1
fi

# =========================================================================
# 0.2 get options
# =========================================================================

NDAYS_OFF=NO

while getopts d:hv OPTION
do
	case "$OPTION" in
	 v ) echo "$TOOL_VERSION"
	     exit 0 ;;
	 h ) cat $DFO_DOC_DIR/ngasMonitor.h | more
	     exit 0 ;;
	 d ) LAST_DATE=$OPTARG 
	     NDAYS_OFF=YES ;;
	esac
done

# =========================================================================
# 0.3 Initialize some parameters
# =========================================================================

N_LAST_DAYS=10	#number of dates to be scanned backwards

# =========================================================================
# 0.4 Subroutines
# 0.4.1 fetchDateInfo: get info from ngas db about availability of files
# 	all DATEs are read from $TMP_DIR/list_dates and queried at once.
# =========================================================================

fetchDateInfo(){
rm -f $TMP_DIR/in_sql1 

for DATE in `cat $TMP_DIR/list_dates`
do
	DATE_PLUS1=`qcdate $DATE +1`
	HOUR_MIN=`echo "12 $DFO_OFFSET" | awk '{print $1+$2}'`
	HOUR_MAX=`echo "12 $DFO_OFFSET" | awk '{print $1+$2-1}'`

	DATE_MIN=`echo ${DATE}T${HOUR_MIN}:00:00`
	DATE_MAX=`echo ${DATE_PLUS1}T${HOUR_MAX}:59:59.999`

	cat >> $TMP_DIR/in_sql1 <<EOT

SELECT DISTINCT
	"$DATE" , 
	convert(char(40), file_id),
	substring(ingestion_date,1,4)+'.'+substring(ingestion_date,6,2)+'.'+substring(ingestion_date,9,2),
	substring(ingestion_date,12,8)
FROM
	ngas..ngas_files f ,
        ngas..ngas_disks d ,
	ngas..ngas_hosts h
WHERE
	file_id between "${DFO_FILE_NAME}.${DATE_MIN}" and "${DFO_FILE_NAME}.${DATE_MAX}"
AND
	h.domain = "hq.eso.org"
AND
	d.disk_id = f.disk_id
AND 
	h.host_id = d.host_id
AND
	d.host_id != "${FTP_HOSTID}"
GO     
EOT
done
}

# =========================================================================
# 0.4.2 fetchError: track database errors
# =========================================================================

fetchError(){
if [ ! -s $TMP_DIR/out_sql1 ]
then
	echo "ngasMonitor: timeout, no database connection to ${ARCH_SERVER}!" > $TMP_DIR/out_sql1
fi

ERROR=`egrep "CT-LIBRARY error|ct_connect|Canno|Datab" $TMP_DIR/out_sql1 | head -1`

if [ "Q$ERROR" != "Q" ]
then
	echo "ERROR!"
	cat $TMP_DIR/out_sql1
	exit 
fi
}

# =========================================================================
# 1.  Query for available files
# 1.1 Determine dates 
# =========================================================================

echo "-query archive database ..."

TODAY=`date -u +%Y-%m-%d`
LAST_HOUR=`date -u +%H`
BOUNDARY=`echo 12 $DFO_OFFSET | awk '{print $1+$2}'`

if [ $LAST_HOUR -ge $BOUNDARY ]
then
        TODAY=$TODAY
else
        TODAY=`qcdate $TODAY -1`
fi

FIRST_MJD=`qcdate $TODAY | awk '{print $1+1}'`

if [ "Q$NDAYS_OFF" != "QYES" ]
then
	LAST_MJD=`echo $FIRST_MJD $N_LAST_DAYS | awk '{print $1-$2-1}'`
else
	LAST_MJD=`qcdate $LAST_DATE | awk '{print $1-1}'`
fi

rm -f $TMP_DIR/list_dates
MJD=`echo $FIRST_MJD | awk '{print $1-1}'`

while [ $MJD -gt $LAST_MJD ] 
do
	qcdate $MJD >> $TMP_DIR/list_dates
	MJD=`echo $MJD | awk '{print $1-1}'`
done

# =========================================================================
# 1.2 Loop over dates
# =========================================================================

fetchDateInfo 

rm -f $TMP_DIR/out_sql1 $TMP_DIR/check_no_data $TMP_DIR/out_sql
isql -Uqc -P`eval "cat ${ARCH_PWD}"` -S${ARCH_SERVER} -w 999 -i $TMP_DIR/in_sql1 -o $TMP_DIR/out_sql1

fetchError

grep "rows affected)" $TMP_DIR/out_sql1 >  $TMP_DIR/check_no_data
grep "row affected)"  $TMP_DIR/out_sql1 >> $TMP_DIR/check_no_data

cat $TMP_DIR/out_sql1 |\
 grep -v Canno |\
 grep -v Datab |\
 grep -v affected |\
 grep -v disk_id |\
 grep -v "\-\-\-" |\
 sed "/^$/d" |\
 tr " " "Y" |\
 sed "/^Y.Y/d" |\
 sed "s/Y/ /g" |\
 sort -k2,2 -r \
  > $TMP_DIR/out_sql

# =========================================================================
# 1.3 Check for new data
# =========================================================================
# find new files (modified WHu v2.3.1]
if [ ! -s $TMP_DIR/out_sql ]
then
	echo "No data found in the selected time range. Try option 'earlier'."
	if [ -f $DFO_MON_DIR/NGAS_file_list ]
	then
		rm $DFO_MON_DIR/NGAS_file_list
		touch $DFO_MON_DIR/NGAS_file_list
	fi
else
	if [ -s $DFO_MON_DIR/NGAS_file_list ]
	then
		mv $DFO_MON_DIR/NGAS_file_list $DFO_MON_DIR/NGAS_file_list.old
	elif [ -s $DFO_MON_DIR/NGAS_file_list.save ]
	then
		mv $DFO_MON_DIR/NGAS_file_list.save $DFO_MON_DIR/NGAS_file_list.old
	else
		cp $TMP_DIR/out_sql $DFO_MON_DIR/NGAS_file_list.old
	fi
	cp $TMP_DIR/out_sql $DFO_MON_DIR/NGAS_file_list
fi

OLDEST_DATE=`cat $DFO_MON_DIR/NGAS_file_list.old | sed "1,2 d" | awk '{print $1}' | sort -u | head -1 | sed "s/-//g"`

# find new files; evaluate old and new entries (DATE and FILE_ID only)
cat $DFO_MON_DIR/NGAS_file_list | sed "s/-//" | sed "s/-//" | awk '{ if ($1 >= old) {print $0}}' old=$OLDEST_DATE | sed "s/^...../&-/" | sed "s/^......../&-/" > $DFO_MON_DIR/NGAS_file_list1

rm -f $TMP_DIR/NGAS_diff $DFO_MON_DIR/NGAS_diff $TMP_DIR/ngas_new $TMP_DIR/ngas_old

# file entries could come in duplicates, with different timestamps ...
cat $DFO_MON_DIR/NGAS_file_list1    | awk '{print $1,$2}' | sort -ur > $TMP_DIR/ngas_new
cat $DFO_MON_DIR/NGAS_file_list.old | awk '{print $1,$2}' | sort -ur > $TMP_DIR/ngas_old

diff $TMP_DIR/ngas_new $TMP_DIR/ngas_old | grep "^<" | awk '{print $2,$3}' > $TMP_DIR/NGAS_diff

if [ -s $TMP_DIR/NGAS_diff ]
then
	mv $TMP_DIR/NGAS_diff $DFO_MON_DIR/
	NEW_FILES=YES
else
	NEW_FILES=NO
fi

if [ ! -s $DFO_MON_DIR/NGAS_file_list.old ]
then
	NEW_FILES=YES
fi

# old scheme, assumes sequence
if [ -s $TMP_DIR/out_sql ]
then
	NEW_LATEST_FILENAME=`head -1 $TMP_DIR/out_sql | awk '{print $2}'`
	NEW_LATEST_TIMESTAMP=`head -1 $TMP_DIR/out_sql | awk '{print $2}' | sed "s/${DFO_FILE_NAME}.//"`
	NEW_LATEST_DATE=`echo $NEW_LATEST_TIMESTAMP | cut -c1-10`
	NEW_LATEST_MJD=`qcdate $NEW_LATEST_DATE`
	HH=`echo $NEW_LATEST_TIMESTAMP | cut -c12,13`
	MM=`echo $NEW_LATEST_TIMESTAMP | cut -c15,16`
	SS=`echo $NEW_LATEST_TIMESTAMP | cut -c18,19`

	NEW_LATEST_MJD=`echo $NEW_LATEST_MJD $HH $MM $SS | awk '{printf"%10.6f\n", $1+$2/24+$3/24/60+$4/24/60/60}'`

	if [ -s $DFO_MON_DIR/NGAS_STATUS ]
	then
		OLD_LATEST_MJD=`cat $DFO_MON_DIR/NGAS_STATUS | tail -1 | awk '{print $2}'`
		CHECK_NEWENTRY=`echo $OLD_LATEST_MJD $NEW_LATEST_FILENAME $NEW_LATEST_MJD | awk '{ if ( $1 < $3 ) {print $2,$3} }'`
		if [ "Q$CHECK_NEWENTRY" != "Q" ]
		then
			echo $CHECK_NEWENTRY >> $DFO_MON_DIR/NGAS_STATUS 
		fi	
	else
		echo "#last_NGAS_FILENAME MJD" > $DFO_MON_DIR/NGAS_STATUS
		echo "$NEW_LATEST_TIMESTAMP $NEW_LATEST_MJD" >> $DFO_MON_DIR/NGAS_STATUS
		echo "No status file \$DFO_MON_DIR/NGAS_STATUS found, created."
	fi
fi

ROWS_EXIST=`grep -v "(0 rows affected)" $TMP_DIR/check_no_data`

# nicer output
echo "   DFO_DATE FILE_ID                                  Ingestion_timestamp"  > $TMP_DIR/out_sql2
echo "========================================================================" >> $TMP_DIR/out_sql2
cat $DFO_MON_DIR/NGAS_file_list >> $TMP_DIR/out_sql2
mv $TMP_DIR/out_sql2 $DFO_MON_DIR/NGAS_file_list 

# additional safety copy
cp $DFO_MON_DIR/NGAS_file_list $DFO_MON_DIR/NGAS_file_list.save

# ===================================================================================
# 1.4 end
# ===================================================================================

rm -f $DFO_MON_DIR/list_data_dates
if [ $NEW_FILES = "YES"  ] 
then
	echo " ... new data found."
	cat $DFO_MON_DIR/NGAS_diff | awk '{print $1}' | sort -u > $DFO_MON_DIR/list_data_dates
else
	echo " ... no new data found."
fi

exit 
