#!/bin/bash -l
##################################################################################
#Andy Rampersaud, 03.11.16
#This script is called by DiffExp.sh
#06.29.2015: Tisha edited to accomodate edgeR results
##################################################################################
# Specify which shell to use
#$ -S /bin/bash
# Run on the current working directory
#$ -cwd

# Join standard output and error to a single file  
#$ -j n
# change to y if you want a single qlog file 
##################################################################################
#Initialize variables from DiffExp.sh
##################################################################################
#checking the command line arg
#-ne : "is not equal to"
if [ $# -ne 14 ] ; then
      echo "Need 14 arguments for the qsub command:"
      echo "qsub -N ${Job_Name}'_'${DiffExp_Index} -P waxmanlab -l h_rt=${TIME_LIMIT} DiffExp.qsub ${SCRIPT_DIR} ${Dataset_DIR} ${Dataset_Label} ${GTF_Files_DIR} ${ANNOTATION_FILE}  ${CONDITION_1_NAME} ${CONDITION_2_NAME} ${Lengths_DIR} ${GENE_LENGTHS_FILE} ${COUNT_DIR} ${OUTPUT_PREFIX} ${DiffExp_Index} ${COL_SUFFIX} ${COUNT_PROGRAM}"
      exit 0
fi

#http://www.ibm.com/developerworks/library/l-bash-parameters/
#Note: If you have more than 9 parameters, you cannot use $10 to refer to the tenth one. You must first either process or save the first parameter ($1), then use the shift command to drop parameter 1 and move all remaining parameters down 1, so that $10 becomes $9 and so on.

#http://unix.stackexchange.com/questions/104420/how-to-use-command-line-arguments-in-a-shell-script
#If you need access more than 9 command line arguments, you can use the shift command. Example: shift 2 renames $3 to $1, $4 to $2 etc.

#process the command line arguments
SCRIPT_DIR=$1
Dataset_DIR=$2
Dataset_Label=$3
GTF_Files_DIR=$4
ANNOTATION_FILE=$5
CONDITION_1_NAME=$6
CONDITION_2_NAME=$7
Lengths_DIR=$8
GENE_LENGTHS_FILE=$9
#COUNT_DIR=$10
#OUTPUT_PREFIX=$11
#DiffExp_Index=$12
#COL_SUFFIX=$13
#COUNT_PROGRAM=$14

echo "-----------------------"
echo "Need shift 5 command (have more than 9 arguments):"
#The "shift" command allows users to move through an argument list (if there are more than 9 arguments)
#It removes the arguments that occur at the beginning of the list and shifts arguments at the end of the list so that they are now within the list of 9 arguments
#Example: shift 1
#This will remove arg #1 then shift args #2 - #10 so that they are numbered $1 - $9
#Now the user can save the arg #10 as $9
#Shift_Number = (Total number of args) - (9)
#For this script:
#Shift_Number = (Total number of args) - (9) = (14) - (9) = 5
#Hence: Need shift 5 command
#Now I can save arg:
#10 as $5
#11 as $6
#12 as $7
#13 as $8
#14 as $9
echo "-----------------------"

shift 5

#process the command line arguments
COUNT_DIR=$5
OUTPUT_PREFIX=$6
DiffExp_Index=$7
COL_SUFFIX=$8
COUNT_PROGRAM=$9

#Print variables (make sure they appear correctly):
echo "-----------------------"
echo "Start of variable list:"
echo "-----------------------"
echo "SCRIPT_DIR:"
echo ${SCRIPT_DIR}
echo "Dataset_DIR:"
echo ${Dataset_DIR}
echo "Dataset_Label:"
echo ${Dataset_Label}
echo "GTF_Files_DIR:"
echo ${GTF_Files_DIR}
echo "ANNOTATION_FILE:"
echo ${ANNOTATION_FILE}
echo "CONDITION_1_NAME:"
echo ${CONDITION_1_NAME}
echo "CONDITION_2_NAME:"
echo ${CONDITION_2_NAME}
echo "Lengths_DIR:"
echo ${Lengths_DIR}
echo "GENE_LENGTHS_FILE:"
echo ${GENE_LENGTHS_FILE}
echo "COUNT_DIR:"
echo ${COUNT_DIR}
echo "OUTPUT_PREFIX:"
echo ${OUTPUT_PREFIX}
echo "DiffExp_Index:"
echo ${DiffExp_Index}
echo "COL_SUFFIX:"
echo ${COL_SUFFIX}
echo "COUNT_PROGRAM:"
echo ${COUNT_PROGRAM}
echo "-----------------------"
echo "End of variable list"
echo "-----------------------"

# Now let's keep track of some information just in case anything goes wrong
echo "=========================================================="
#Use to calculate job time:
#Start_Time in seconds
Start_Time=$(date +"%s")
echo "Starting on : $(date)"
echo "Running on node : $(hostname)"
echo "Current directory : $(pwd)"
echo "Current job ID : $JOB_ID"
echo "Current job name : $JOB_NAME"
echo "Task index number : $SGE_TASK_ID"
echo "Parameter for multiple cores : $NSLOTS"
echo "=========================================================="

# Go to local scratch directory
echo
echo 'Change dir to scratch directory'
echo
cd ${TMPDIR}
echo
echo 'Print scratch directory location:'
echo
echo $TMPDIR
#--------------------------------------
echo
echo 'Loading required modules...'
echo
#Make sure the shebang line = #!/bin/bash -l
#Need the -l option to load modules
#Search for latest program installed:
#For captial "R" remove (-i Ignore case sensitivity.) option
#module avail -t 2>&1 | grep R
module load R/R-3.1.1
#--------------------------------------
#module help R/R-3.1.1
#----------- Module Specific Help for 'R/R-3.1.1' ------------------
#R R-3.1.1: R is a system/language for statistical computation and graphics.
#R 3.1.1  is a system (language) for statistical computation and graphics
#R consists of a language plus run-time enviroment with graphics, a debugger,
#access to certain system functions and the ability to run programs stored in
#script files
module load samtools
#----------------------------------------------------------------------------------
#Output dir:
OUTPUT_DIR=${SCRIPT_DIR}/'Output_'${DiffExp_Index}'_'${COUNT_PROGRAM}'_'${COL_SUFFIX}
#Using an if statement to make the output folder if it does not exists
if [ ! -d $OUTPUT_DIR ]; then
	mkdir $OUTPUT_DIR; 
fi
#-----------------------------------------------------
#Create folders to store input files:
mkdir -p Input/Condition_1
mkdir -p Input/Condition_2
#-----------------------------------------------------
#Copy gtf file (annotation file):
cp ${GTF_Files_DIR}/${ANNOTATION_FILE} .
#Copy over text files indicating Condition_1 and Condition_2 samples:
cp ${SCRIPT_DIR}/Condition_1.txt .
cp ${SCRIPT_DIR}/Condition_2.txt .
#Copy Rscripts:
cp ${SCRIPT_DIR}/Scripts/differentialAnalysis.R .
cp ${SCRIPT_DIR}/Scripts/formatForSegex_ver3.R .
cp ${SCRIPT_DIR}/Scripts/Diff_Genes.R .
cp ${SCRIPT_DIR}/Scripts/Venn_Diff_Genes.R . 
#----------------------------------------------------------------------------------
#Use text files to indicate Condition_1 and Condition_2 samples
#Condition_1.txt
#Condition_2.txt
#----------------------------------------------------------------------------------
#Need to preprocess the Condition_*.txt files to make sure they will work:
#This will guarantee that only tabs exists between columns:
awk '{print $1"\t"$2"\t"$3}' Condition_1.txt > Condition_1.temp1
mv Condition_1.temp1 Condition_1.txt
awk '{print $1"\t"$2"\t"$3}' Condition_2.txt > Condition_2.temp1
mv Condition_2.temp1 Condition_2.txt
#----------------------------------------------------------------------------------
#-----------------------------------------------
#Copy Condition_1 sample count files to Condition_1 folder
################################################
#The text file is formatted like the following:
#----------------------------------------------
#Sample_DIR	Sample_ID	Description
#Sample_Waxman-TP17	G83_M1	Male 8wk-pool 1
#Sample_Waxman-TP18	G83_M2	Male 8wk-pool 2
#Sample_Waxman-TP19	G83_M3	Female 8wk-pool 1
#Sample_Waxman-TP20	G83_M4	Female 8wk-pool 2	
#----------------------------------------------
#The 1st column: The Sample_DIR name
#The 2nd column: Waxman Lab Sample_ID 
#The 3rd column: Sample's description 
################################################
#Text file has a header line to ignore:
tail -n +2 Condition_1.txt > Condition_1.temp
echo "------------------------------------------"
#Use a while loop to run jobs
while IFS=$'\t' read -r -a myArray
do
#---------------------------
##Check that text file is read in properly:
echo 'Sample_DIR:'
Sample_DIR=${myArray[0]}
echo $Sample_DIR
echo 'Sample_ID:'
Sample_ID=${myArray[1]}
echo $Sample_ID
echo 'Description:'
Description=${myArray[2]}
echo $Description
#---------------------------
#Split by underscore and get the 2nd part (M number)
M_Num=$(echo $Sample_ID | cut -d'_' -f 2)
echo "M_Num:"
echo ${M_Num}
#Append variable string to itself:
M_Num_Cond1_List+=${M_Num}
#---------------------------
echo 'Copy Condition_1 sample count files to Condition_1 folder'
cp ${Dataset_DIR}/${Sample_ID}/fastq/tophat2/HTSeq/${COUNT_DIR}/${Sample_ID}'_HTSeq.out' ./Input/Condition_1
#---------------------------
echo 'calculate mapped reads: '${Sample_ID}
#---------------------------
#The following samtools command is too time-consuming
#samtools view -c ${Dataset_DIR}/${Sample_ID}/fastq/tophat2/${Sample_ID}'_primary_unique.bam' > ./Input/Condition_1/${Sample_ID}'_num_mapped_reads.txt'
#---------------------------
#The TopHat_Paired_End job already created a *_statistics_for_primary_unique_reads.txt
#Extract the number of uniquely_mapped_reads, then save it to a text file:
#Copy *_statistics_for_primary_unique_reads.txt to compute node:
cp ${Dataset_DIR}/${Sample_ID}/fastq/tophat2/$Sample_ID'_statistics_for_primary_unique_reads.txt' .
#Extract the number of uniquely_mapped_reads:
uniquely_mapped_reads=$(grep 'total' $Sample_ID'_statistics_for_primary_unique_reads.txt'  | awk '{print $1}')
#save it to a text file:
echo $uniquely_mapped_reads >> ./Input/Condition_1/${Sample_ID}'_num_mapped_reads.txt'
#Remove the *_statistics_for_primary_unique_reads.txt
rm $Sample_ID'_statistics_for_primary_unique_reads.txt'
#---------------------------
done < Condition_1.temp
#---------------------------
echo "M_Num_Cond1_List:"
echo ${M_Num_Cond1_List}
#---------------------------
#Remove the temp file:
rm Condition_1.temp
echo "------------------------------------------"
###############################################
#----------------------------------------------------------------------------------
#Copy Condition_2 sample count files to Condition_2 folder
################################################
#The text file is formatted like the following:
#----------------------------------------------
#Sample_DIR	Sample_ID	Description
#Sample_Waxman-TP17	G83_M1	Male 8wk-pool 1
#Sample_Waxman-TP18	G83_M2	Male 8wk-pool 2
#Sample_Waxman-TP19	G83_M3	Female 8wk-pool 1
#Sample_Waxman-TP20	G83_M4	Female 8wk-pool 2	
#----------------------------------------------
#The 1st column: The Sample_DIR name
#The 2nd column: Waxman Lab Sample_ID 
#The 3rd column: Sample's description 
################################################
#Text file has a header line to ignore:
tail -n +2 Condition_2.txt > Condition_2.temp
echo "------------------------------------------"
#Use a while loop to run jobs
while IFS=$'\t' read -r -a myArray
do
#---------------------------
##Check that text file is read in properly:
echo 'Sample_DIR:'
Sample_DIR=${myArray[0]}
echo $Sample_DIR
echo 'Sample_ID:'
Sample_ID=${myArray[1]}
echo $Sample_ID
echo 'Description:'
Description=${myArray[2]}
echo $Description
#---------------------------
#Split by underscore and get the 2nd part (M number)
M_Num=$(echo $Sample_ID | cut -d'_' -f 2)
echo "M_Num:"
echo ${M_Num}
#Append variable string to itself:
M_Num_Cond2_List+=${M_Num}
#---------------------------
echo 'Copy Condition_2 sample count files to Condition_2 folder'
cp ${Dataset_DIR}/${Sample_ID}/fastq/tophat2/HTSeq/${COUNT_DIR}/${Sample_ID}'_HTSeq.out' ./Input/Condition_2
#---------------------------
echo 'calculate mapped reads: '${Sample_ID}
#---------------------------
#The following samtools command is too time-consuming
#samtools view -c ${Dataset_DIR}/${Sample_ID}/fastq/tophat2/${Sample_ID}'_primary_unique.bam' > ./Input/Condition_2/${Sample_ID}'_num_mapped_reads.txt'
#---------------------------
#The TopHat_Paired_End job already created a *_statistics_for_primary_unique_reads.txt
#Extract the number of uniquely_mapped_reads, then save it to a text file:
#Copy *_statistics_for_primary_unique_reads.txt to compute node:
cp ${Dataset_DIR}/${Sample_ID}/fastq/tophat2/$Sample_ID'_statistics_for_primary_unique_reads.txt' .
#Extract the number of uniquely_mapped_reads:
uniquely_mapped_reads=$(grep 'total' $Sample_ID'_statistics_for_primary_unique_reads.txt'  | awk '{print $1}')
#save it to a text file:
echo $uniquely_mapped_reads >> ./Input/Condition_2/${Sample_ID}'_num_mapped_reads.txt'
#Remove the *_statistics_for_primary_unique_reads.txt
rm $Sample_ID'_statistics_for_primary_unique_reads.txt'
#---------------------------
done < Condition_2.temp
#---------------------------
echo "M_Num_Cond2_List:"
echo ${M_Num_Cond2_List}
#---------------------------
#Remove the temp file:
rm Condition_2.temp
echo "------------------------------------------"
###############################################
echo "=========================================================="
echo
echo 'Number of replicates in each condition:'
echo
cd Input/Condition_1
NUM_REP_CONDITION1=$(ls -l | grep '.out' | wc -l)
echo 'NUM_REP_CONDITION1: '$NUM_REP_CONDITION1
cd ../../
cd Input/Condition_2
NUM_REP_CONDITION2=$(ls -l | grep '.out' | wc -l)
echo 'NUM_REP_CONDITION1: '$NUM_REP_CONDITION1
#----------------------------------------------------------------------------------
#Need to check if NUM_REP_CONDITION* equals 1
#If there's only 1 replicate (1 sample) per condition can't run current DE analysis
#------------------------------------------
if [ "${NUM_REP_CONDITION1}" == "1" ] || [ "${NUM_REP_CONDITION2}" == "1" ] ;
then
echo
echo "WARNING:"
echo
echo "Either your Condition_1 or Condition_2 has only 1 sample (no replicates)"
echo "The differential expression analysis is designed for cases where there are at least 2 replicates/biological condition."
echo "The resulting differential expression analysis will be converted to a stringent version due to absence of replicate samples."
echo "Samples from this comparison will be treated as \"replicates\" to estimate the gene level variance in edgeR and, separately in, DESeq2."
echo "Expect very few to zero DE genes from DESeq2, EdgeR should yield a DE gene count."
echo "Interpreting the results: use the edgeR fold change or DESeq ratio (not DESeq fold change)"
echo "Note regarding the SEGEX upload file(s):"
echo "Since DESeq2 fold changes are not the same as the ratio of the normalized counts between the conditions, so the output of the segex upload file will be different between \"DESeq ratio\" and \"DESeq foldChange\". The former is truly the ratio between the normalized counts between the two conditions (plus pseudocounts) whereas the latter is the fold change estimated by DESeq."
fi
echo "=========================================================="
#----------------------------------------------------------------------------------
cd ../../
#Rename folder names to match CONDITION_1_NAME and CONDITION_2_NAME
mv ./Input/Condition_1 ./Input/$CONDITION_1_NAME
mv ./Input/Condition_2 ./Input/$CONDITION_2_NAME
#----------------------------------------------------------------------------------
echo
echo 'Renaming input count files'
echo
#The differentialAnalysis.R is expecting file names within each folder to also be a function of the condition names
#Use an array to rename the files
cd ${TMPDIR}/Input/$CONDITION_1_NAME
arr=(*.out)
for ((i=0; i<${#arr[@]}; i++)); do
#do something to each element of array
#echo 'Original file name:'
filename=${arr[$i]}
#echo "${filename}"
#Extract extension
name=${filename%\.out}
#echo $name
#----------------------------------------------------------------------------------
#Need an if statement to check if HTSeq was used
#If HTSeq was used, then remove last 5 lines (remove special counters)
if [[ $name == *"HTSeq"* ]] ; then
echo "Counting program: HTSeq"
echo "Removing last 5 lines (remove special counters)"
head -n -5 ${name}.out > ${name}.temp1
mv ${name}.temp1 ${name}.out
else
echo "Counting program: Not HTSeq"
echo "No lines removed."
fi
#----------------------------------------------------------------------------------
#echo 'Modified file name:'
mod_name="${CONDITION_1_NAME}${i}.out"
#echo ${mod_name}
#Rename the file
mv ${filename} ${TMPDIR}/Input/${mod_name}
mv ${filename%\_HTSeq.out}'_num_mapped_reads.txt' ${TMPDIR}/Input/${CONDITION_1_NAME}${i}'_num_mapped_reads.txt'
done
#Use an array to rename the files
cd ${TMPDIR}/Input/$CONDITION_2_NAME
arr=(*.out)
for ((i=0; i<${#arr[@]}; i++)); do
#do something to each element of array
#echo 'Original file name:'
filename=${arr[$i]}
#echo "${filename}"
#Extract extension
name=${filename%\.out}
#echo $name
#----------------------------------------------------------------------------------
#Need an if statement to check if HTSeq was used
#If HTSeq was used, then remove last 5 lines (remove special counters)
if [[ $name == *"HTSeq"* ]] ; then
echo "Counting program: HTSeq"
echo "Removing last 5 lines (remove special counters)"
head -n -5 ${name}.out > ${name}.temp1
mv ${name}.temp1 ${name}.out
else
echo "Counting program: Not HTSeq"
echo "No lines removed."
fi
#----------------------------------------------------------------------------------
#echo 'Modified file name:'
mod_name="${CONDITION_2_NAME}${i}.out"
#echo ${mod_name}
#Rename the file
mv ${filename} ${TMPDIR}/Input/${mod_name}
mv ${filename%\_HTSeq.out}'_num_mapped_reads.txt' ${TMPDIR}/Input/${CONDITION_2_NAME}${i}'_num_mapped_reads.txt'
done
#----------------------------------------------------------------------------------
#copying lengths
cp ${Lengths_DIR}/${GENE_LENGTHS_FILE} ${TMPDIR}/Input/.
echo "number of mapped reads"
cat ${TMPDIR}/Input/*_num_mapped_reads.txt
echo "=========================================================="
echo
echo "List files in Input"
echo
ls -al ${TMPDIR}/Input/*
echo "=========================================================="
#Initialize variables:
OUTPUT_COUNT_FOLDER=${TMPDIR}/Input
#----------------------------------------------------------------------------------
cd ${TMPDIR}
echo
echo 'Starting to run my commands'
echo
# run my commands.
#----------------------------------------------------------------------------------
echo 'Printing Rscript command:'
echo "Rscript differentialAnalysis.R $CONDITION_1_NAME $CONDITION_2_NAME ${NUM_REP_CONDITION1} ${NUM_REP_CONDITION2} $ANNOTATION_FILE $OUTPUT_COUNT_FOLDER $OUTPUT_PREFIX $GENE_LENGTHS_FILE"
#Run the command:
Rscript differentialAnalysis.R $CONDITION_1_NAME $CONDITION_2_NAME ${NUM_REP_CONDITION1} ${NUM_REP_CONDITION2} $ANNOTATION_FILE $OUTPUT_COUNT_FOLDER $OUTPUT_PREFIX $GENE_LENGTHS_FILE
#----------------------------------------------------------------------------------
#Output file will be in "Input folder"
echo "=========================================================="
echo
echo "Create SEGEX formatted file:"
echo
#cd in to Input folder to run script:
cd ${TMPDIR}/Input
#Move formatForSegex_ver3.R to Input folder
mv ${TMPDIR}/formatForSegex_ver3.R ${TMPDIR}/Input/
#Call the Rscript
#Rscript formatForSegex_ver3.R <input file> <output file> <pseudocount>
#<output file> = <SEGEX#>_<Condition 1 Description>_vs_<Condition 2 Description>_refseq_ForSegexUpload.txt
#For the DiffExp file, do not change the following line:
Differential_Expression_File=$OUTPUT_PREFIX'_'$CONDITION_1_NAME'_'$CONDITION_2_NAME.txt
SEGEX_File=${CONDITION_2_NAME}'_'${Dataset_Label}'_'${M_Num_Cond2_List}'_vs_'${CONDITION_1_NAME}'_'${Dataset_Label}'_'${M_Num_Cond1_List}'_'$OUTPUT_PREFIX'_forSEGEXUpload'
pseudocount=1
echo 'Printing Rscript command:'
echo "Rscript formatForSegex_ver3.R  $Differential_Expression_File $SEGEX_File $pseudocount $COL_SUFFIX"
Rscript formatForSegex_ver3.R  $Differential_Expression_File $SEGEX_File $pseudocount $COL_SUFFIX
echo "=========================================================="
Comparison_Info=${CONDITION_2_NAME}'_'${Dataset_Label}'_'${M_Num_Cond2_List}'_'${CONDITION_1_NAME}'_'${Dataset_Label}'_'${M_Num_Cond1_List}
echo 'Comparison_Info:'
echo ${Comparison_Info}
echo '#----------------------------------------------------------------------------------'
echo 'Running Diff_Genes.R'
echo '#----------------------------------------------------------------------------------'
#Copy Diff_Genes.R to Input folder
mv ${TMPDIR}/Diff_Genes.R ${TMPDIR}/Input/
#Call the Rscript
echo 'Printing Rscript command:'
echo "Rscript Diff_Genes.R ${SEGEX_File}_DESeq.txt 2 0.05 DESeq ${COL_SUFFIX}'_'${Comparison_Info}"
Rscript Diff_Genes.R ${SEGEX_File}_DESeq.txt 2 0.05 DESeq ${COL_SUFFIX}'_'${Comparison_Info}
echo "Rscript Diff_Genes.R ${SEGEX_File}_EdgeR.txt 2 0.05 EdgeR ${COL_SUFFIX}'_'${Comparison_Info}"
Rscript Diff_Genes.R ${SEGEX_File}_EdgeR.txt 2 0.05 EdgeR ${COL_SUFFIX}'_'${Comparison_Info}
echo '#----------------------------------------------------------------------------------'
echo 'Running Venn_Diff_Genes.R'
echo '#----------------------------------------------------------------------------------'
#Move Venn_Diff_Genes.R to Input folder
mv ${TMPDIR}/Venn_Diff_Genes.R ${TMPDIR}/Input/
echo 'Printing Rscript command:'
echo '#----------------------------------------------------------------------------------'
#Down DE gene comparison:
Down_DE_Gene_File1="Down_Genes_DESeq_"${COL_SUFFIX}'_'${Comparison_Info}".txt"
Down_DE_Gene_File2="Down_Genes_EdgeR_"${COL_SUFFIX}'_'${Comparison_Info}".txt"
echo "Rscript Venn_Diff_Genes.R "${Down_DE_Gene_File1} ${Down_DE_Gene_File2} ${COL_SUFFIX}'_Counting' ${DiffExp_Index}
Rscript Venn_Diff_Genes.R ${Down_DE_Gene_File1} ${Down_DE_Gene_File2} ${COL_SUFFIX}'_Counting' ${DiffExp_Index}
echo '#----------------------------------------------------------------------------------'
#Up DE gene comparison:
Up_DE_Gene_File1="Up_Genes_DESeq_"${COL_SUFFIX}'_'${Comparison_Info}".txt"
Up_DE_Gene_File2="Up_Genes_EdgeR_"${COL_SUFFIX}'_'${Comparison_Info}".txt"
echo "Rscript Venn_Diff_Genes.R "${Up_DE_Gene_File1} ${Up_DE_Gene_File2} ${COL_SUFFIX}'_Counting' ${DiffExp_Index}
Rscript Venn_Diff_Genes.R ${Up_DE_Gene_File1} ${Up_DE_Gene_File2} ${COL_SUFFIX}'_Counting' ${DiffExp_Index}
echo '#----------------------------------------------------------------------------------'
#echo
#echo 'Merging png images into one pdf'
#echo 
#convert *.png ${DiffExp_Index}'_Venn_Diagrams'.pdf
#Since I want to have all *.png on a single page after the DiffExp_Summary.sh, I just want to copy the *.png to storage
echo '#----------------------------------------------------------------------------------'
echo 'Merging Count.Table(s) into one text file'
echo '#----------------------------------------------------------------------------------'
cat Count.Table.*.txt > ${DiffExp_Index}'_Venn_Tables_'${Comparison_Info}.txt
rm Count.Table.*.txt
echo "=========================================================="
echo "Renaming the Differential_Expression_File"
mv ${Differential_Expression_File} ${OUTPUT_PREFIX}'_'${Comparison_Info}.txt
Differential_Expression_File=${OUTPUT_PREFIX}'_'${Comparison_Info}.txt
echo "=========================================================="
cd ${TMPDIR}
echo
echo "List files in Input"
echo
ls -al ${TMPDIR}/Input/*
echo "=========================================================="
#Copy files back to storage
cp ${TMPDIR}/Input/$Differential_Expression_File $OUTPUT_DIR
cp ${TMPDIR}/Input/${SEGEX_File}* $OUTPUT_DIR
cp ${TMPDIR}/Input/*_Genes*.txt $OUTPUT_DIR
cp ${TMPDIR}/Input/*.png $OUTPUT_DIR
cp ${TMPDIR}/Input/${DiffExp_Index}'_Venn_Tables_'${Comparison_Info}.txt $OUTPUT_DIR

echo
echo "List files in scratch"
echo
ls -alh
echo

echo "=========================================================="
echo "Re-naming files in OUTPUT_DIR"
echo "Need to append the COUNT_PROGRAM name to all output files"
cd ${OUTPUT_DIR}
#----------------------------------------------------------------
Text_file_list=*.txt
for Text_file in ${Text_file_list}
do
#echo ${Text_file}
#Extract file name:
file_name=${Text_file%\.txt}
#echo $file_name
#Rename files:
mv ${Text_file} ${file_name}'_'${COUNT_PROGRAM}'.txt'
done
#----------------------------------------------------------------
PNG_file_list=*.png
for PNG_file in ${PNG_file_list}
do
#echo ${PNG_file}
#Extract file name:
file_name=${PNG_file%\.png}
#echo $file_name
#Rename files:
mv ${PNG_file} ${file_name}'_'${COUNT_PROGRAM}'.png'
done
#----------------------------------------------------------------
echo
echo "List files in OUTPUT_DIR"
echo
ls -alh
echo "=========================================================="

echo "=========================================================="
echo "Finished on : $(date)"
#Use to calculate job time:
#End_Time in seconds
End_Time=$(date +"%s")
diff=$(($End_Time-$Start_Time))
echo "$(($diff / 60)) minutes and $(($diff % 60)) seconds elapsed."
echo "=========================================================="