root/trunk/narwhal/bin/run_tools.sh @ 10

Revision 10, 4.1 KB (checked in by r.w.w.brouwer@…, 3 years ago)

Deconvuluted runtools.sh install directory function

  • Property svn:executable set to *
Line 
1#!/bin/env bash
2
3#
4#
5#
6
7#
8#
9# FUNCTIONS
10#
11#
12
13usage() {
14    echo $1
15    echo "
16Usage: run_tools.sh -j [maxjobs] RUNFOLDER
17
18
19
20"
21    exit 2
22}
23
24
25# VARIABLES
26#
27idir=$( readlink -f $0 )
28idir=$( dirname $idir )
29idir=${idir/%bin/}
30
31align="${idir}tools/align.py"                   # the location of the alignment script
32profile="${idir}conf/profiles.json"             # the default location for the profiles
33MAXJOBS=8                                       # run at most 8 parallel processes
34RUNFOLDER=""                                    # the run folder to write the results in
35mismatches=0
36
37echo "`date` Started"
38
39#
40# PARSE THE OPTIONS
41#
42while getopts 'j:m:' opt; do
43    case $opt in
44        'j')
45            MAXJOBS=$OPTARG
46            ;;
47        'm')
48            mismatches=$OPTARG
49            ;;
50    esac
51done
52
53#
54shift $(($OPTIND - 1))
55RUNFOLDER=$( readlink -f $1 ) 
56echo "`date` Processing folder ${RUNFOLDER}"
57
58# ADD ENVIRONMENTAL VARIABLES
59#
60#
61
62# Modify the path as to include the narwhal tools directory
63
64NARWHAL_TOOLS="${idir}tools/"
65export PATH=$NARWHAL_TOOLS:$PATH
66export PYTHONPATH=$PYTHONPATH:"${idir}/lib/python/"
67
68
69# PREFLIGHT CHECKS
70#
71# determine whether the neccessary tools are in the path
72tools=( nice cat sync qseq2fastq samtools demultiplex bamstats parallel)
73for t in ${tools[@]} ; do
74    which $t > /dev/null 2> /dev/null
75    (( $? != 0 )) && usage "Tool ${t} is not present in the PATH" 
76done
77
78# DATA PREPROCESSING
79#
80#
81#
82
83# Convert the qseq to fastq files
84optfile="$( find $RUNFOLDER -name 'qseq2fastq.param' )"
85
86if [[ -f $optfile ]] && [[ $(wc -l $optfile) > 0 ]] ; then
87        echo "`date` Processing qseq conversions from ${optfile}"
88        parallel -a $optfile --colsep ';' -j $MAXJOBS qseq2fastq {1} {2}
89
90fi
91
92# Concatenate the tile FastQ files to he correct sample files. Parallelisation of this step
93# should be on the output file level.
94optfile="$( find $RUNFOLDER -name 'concatenate.param')"
95if [[ -f $optfile ]] && [[ $(wc -l $optfile) > 0 ]] ; then
96        echo "`date` Processing non multiplexed reads from ${optfile}"
97        parallel -a $optfile --colsep ';' -j 1 "cat {1} >> {2}; sync;"
98fi
99
100
101# Index the tiled FastQ reads. For each tile continaing a barcode an independent index will be created. 
102optfile="$( find $RUNFOLDER -name 'demultiplex_index.param' )"
103if [[ -f $optfile ]] && [[ $(wc -l $optfile) > 0 ]] ; then
104        echo "`date` Indexing multiplexed FastQ files from ${optfile}"
105        parallel -a $optfile --colsep ';' -j $MAXJOBS demultiplex index -f {1}  -i {2} -b {3} -s {4} -c {5} -m $mismatches
106fi
107
108# Divide the reads
109optfile="$( find $RUNFOLDER -name 'demultiplex_divide.param' )"
110if [[ -f $optfile ]] && [[ $(wc -l $optfile) > 0 ]] ; then
111        echo "`date` Dividing multiplexed FastQ files from ${optfile}"
112        parallel -a $optfile --colsep ';' -j 1 demultiplex divide -f {1}  -i {2} -o {3} -c {4}
113fi
114
115# ALIGNMENT
116#
117#
118#
119
120# Perform the alignment by running the python alignment script
121optfile="$( find $RUNFOLDER -name 'alignment.param' )"
122if [[ -f $optfile ]] && [[ $(wc -l $optfile) > 0 ]] ; then
123        echo "`date` Aligning samples from ${optfile}"
124        parallel -a $optfile --colsep ';' -j 1 $align -p $profile -f "{1}" -s "{2}" -r "{3}" -t "{4}" -o "{5}" -P "{6}"
125fi
126
127# DATA POSTPROCESSING
128#
129#
130#
131
132# Convert the SAM files to sorted BAM files etc.
133optfile="$( find $RUNFOLDER -name 'sambam.param' )"
134if [[ -f $optfile ]] && [[ $(wc -l $optfile) > 0 ]] ; then
135        echo "`date` Performing file format conversions from ${optfile}"
136        parallel -a $optfile --colsep ';' -j $MAXJOBS "samtools view -Sbo {2} {1}; samtools sort {2} {3}; samtools index {4};" 
137fi
138
139# Calculate statistics over the BAM files
140optfile="$( find $RUNFOLDER -name 'bamstats.param' )"
141if [[ -f $optfile ]] && [[ $(wc -l $optfile) > 0 ]] ; then
142        echo "`date` Generating statistics from ${optfile}"
143        parallel -a $optfile --colsep ';' -j $MAXJOBS "bamstats {1} {3} >> {4} ; plotter.R --prefix {3} ; quality_info.sh -p {3} -n {2} -s {4} -o {5}"
144fi
145
146# DATA PACKAGING
147#
148#
149#
150
151# pack the largest data
152find $RUNFOLDER -name '*.sam' >> $RUNFOLDER/pack.param
153find $RUNFOLDER -name '*.fastq' >> $RUNFOLDER/pack.param
154find $RUNFOLDER -name '*.index' >> $RUNFOLDER/pack.param
155
156echo "`date` Packing output data"
157parallel -a $RUNFOLDER/pack.param -j $MAXJOBS gzip --best {}
158echo "`date` Finished"
159
Note: See TracBrowser for help on using the browser.