#!/bin/bash -l ################################################################################## #Andy Rampersaud, 02.22.16 #This script is called by setup_CollectInsertSizeMetrics.sh ################################################################################## # Specify which shell to use #$ -S /bin/bash # Run on the current working directory #$ -cwd # Join standard output and error to a single file #$ -j n # change to y if you want a single qlog file ################################################################################## #Initialize variables from CollectInsertSizeMetrics.sh ################################################################################## #checking the command line arg #-ne : "is not equal to" if [ $# -ne 4 ] ; then echo "Need 4 arguments for the qsub command:" echo "qsub -N ${Job_Name}'_'${Sample_ID} -P waxmanlab -l h_rt=${TIME_LIMIT} CollectInsertSizeMetrics.qsub ${Sample_ID} ${Dataset_DIR} ${Sample_Labels_DIR} ${SCRIPT_DIR}" exit 0 fi #http://www.ibm.com/developerworks/library/l-bash-parameters/ #Note: If you have more than 9 parameters, you cannot use $10 to refer to the tenth one. You must first either process or save the first parameter ($1), then use the shift command to drop parameter 1 and move all remaining parameters down 1, so that $10 becomes $9 and so on. #http://unix.stackexchange.com/questions/104420/how-to-use-command-line-arguments-in-a-shell-script #If you need access more than 9 command line arguments, you can use the shift command. Example: shift 2 renames $3 to $1, $4 to $2 etc. #process the command line arguments Sample_ID=$1 Dataset_DIR=$2 Sample_Labels_DIR=$3 SCRIPT_DIR=$4 #Print variables (make sure they appear correctly): echo "-----------------------" echo "Start of variable list:" echo "-----------------------" echo "Sample_ID:" echo ${Sample_ID} echo "Dataset_DIR:" echo ${Dataset_DIR} echo "Sample_Labels_DIR:" echo ${Sample_Labels_DIR} echo "SCRIPT_DIR:" echo ${SCRIPT_DIR} echo "-----------------------" echo "End of variable list" echo "-----------------------" # Now let's keep track of some information just in case anything goes wrong echo "==========================================================" #Use to calculate job time: #Start_Time in seconds Start_Time=$(date +"%s") echo "Starting on : $(date)" echo "Running on node : $(hostname)" echo "Current directory : $(pwd)" echo "Current job ID : $JOB_ID" echo "Current job name : $JOB_NAME" echo "Task index number : $SGE_TASK_ID" echo "Parameter for multiple cores : $NSLOTS" echo "==========================================================" # Go to local scratch directory echo echo 'Change dir to scratch directory' echo cd ${TMPDIR} echo echo 'Print scratch directory location:' echo echo $TMPDIR #-------------------------------------- echo echo 'Loading required modules...' echo echo echo 'Loading required modules...' echo #Make sure the shebang line = #!/bin/bash -l #Need the -l option to load modules #Search for latest program installed: #module avail -t 2>&1 | grep -i picard #picard/1.123(default) #picard/picard-1.81_jar_x86_64 #Load module: module load picard/1.123 #module help picard/1.123 #--------------------------------------------------------------------------------- #----------- Module Specific Help for 'picard/1.123' --------------- #picard 1.123 Tools for manipulating high-throughput sequence data/formats #Picard comprises Java-based command-line utilities that manipulate SAM files, and a Java API (SAM-JDK) for creating new programs that read and write SAM files. Both SAM text format and SAM binary (BAM) format are supported. #For more information on picard, please see www.broadinstitute.github.io/picard #--------------------------------------------------------------------------------- # copy user input data files to scratch cp ${Dataset_DIR}/${Sample_ID}/fastq/tophat2/${Sample_ID}'_primary_unique.bam' . #Initialize INPUT_BAM: INPUT_BAM=${Sample_ID}'_primary_unique.bam' STORAGE_DIR=${Dataset_DIR}/${Sample_ID}/fastq/tophat2 #Create CollectInsertSizeMetrics output folder to store files: OUTPUT_DIR='CollectInsertSizeMetrics' ############################### if [ ! -d $OUTPUT_DIR ]; then mkdir $OUTPUT_DIR fi ############################### echo echo 'List files in scratch directory:' echo ls -alh echo echo 'Starting to run my commands' echo echo echo 'Starting CollectInsertSizeMetrics command' echo #-------------------------------------------------------------------------------- #CollectInsertSizeMetrics --help #USAGE: CollectInsertSizeMetrics [options] #Documentation: http://broadinstitute.github.io/picard/command-line-overview.html#CollectInsertSizeMetrics #Reads a SAM or BAM file and writes a file containing metrics about the statistical distribution of insert size (excluding duplicates) and generates a Histogram plot. #Version: 1.123(286a232caea2fdc8fdd88574c09c460b46386fff_1413818736) #Options: #--help #-h Displays options specific to this tool. #--stdhelp #-H Displays options specific to this tool AND options common to all Picard command line # tools. #--version Displays program version. #HISTOGRAM_FILE=File #H=File File to write insert size Histogram chart to. Required. #DEVIATIONS=Double Generate mean, sd and plots by trimming the data down to MEDIAN + # DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This is done because insert size data typically # includes enough anomalous values from chimeras and other artifacts to make the mean and # sd grossly misleading regarding the real distribution. Default value: 10.0. This option # can be set to 'null' to clear the default value. #HISTOGRAM_WIDTH=Integer #W=Integer Explicitly sets the Histogram width, overriding automatic truncation of Histogram tail. # Also, when calculating mean and standard deviation, only bins <= Histogram_WIDTH will be # included. Default value: null. #MINIMUM_PCT=Float #M=Float When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that # have fewer than this percentage of overall reads. (Range: 0 to 1). Default value: 0.05. # This option can be set to 'null' to clear the default value. #METRIC_ACCUMULATION_LEVEL=MetricAccumulationLevel #LEVEL=MetricAccumulationLevel The level(s) at which to accumulate metrics. Possible values: {ALL_READS, SAMPLE, # LIBRARY, READ_GROUP} This option may be specified 0 or more times. This option can be set # to 'null' to clear the default list. #INPUT=File #I=File Input SAM or BAM file. Required. #OUTPUT=File #O=File File to write the output to. Required. #REFERENCE_SEQUENCE=File #R=File Reference sequence fasta Default value: null. #ASSUME_SORTED=Boolean #AS=Boolean If true (default), then the sort order in the header file will be ignored. Default # value: true. This option can be set to 'null' to clear the default value. Possible # values: {true, false} #STOP_AFTER=Long Stop after processing N reads, mainly for debugging. Default value: 0. This option can # be set to 'null' to clear the default value. #-------------------------------------------------------------------------------- echo 'Printing command:' echo "CollectInsertSizeMetrics HISTOGRAM_FILE=$OUTPUT_DIR/${Sample_ID}'_hist.pdf' INPUT=${INPUT_BAM} OUTPUT=$OUTPUT_DIR/${Sample_ID}'_metrics'" echo #Run command: CollectInsertSizeMetrics HISTOGRAM_FILE=$OUTPUT_DIR/${Sample_ID}'_hist.pdf' INPUT=${INPUT_BAM} OUTPUT=$OUTPUT_DIR/${Sample_ID}'_metrics' #-------------------------------------------------------------------------------- echo echo 'Finished CollectInsertSizeMetrics command' echo echo echo 'Copy output to storage dir' echo cp -r $OUTPUT_DIR $STORAGE_DIR echo echo "List files in scratch" echo ls -alh echo "==========================================================" echo "Finished on : $(date)" #Use to calculate job time: #End_Time in seconds End_Time=$(date +"%s") diff=$(($End_Time-$Start_Time)) echo "$(($diff / 3600)) hours, $((($diff / 60) % 60)) minutes and $(($diff % 60)) seconds elapsed." echo "=========================================================="