Mercurial > repos > portiahollyoak > temp
comparison scripts/TEMP_Absence.sh @ 0:28d1a6f8143f draft
planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
| author | portiahollyoak |
|---|---|
| date | Mon, 25 Apr 2016 13:08:56 -0400 |
| parents | |
| children | ca36262102d8 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:28d1a6f8143f |
|---|---|
| 1 #!/bin/bash -x | |
| 2 # TEMP (Transposable Element Movement present in a Population) | |
| 3 # 2013-06-14 | |
| 4 # Jiali Zhuang(jiali.zhuang@umassmed.edu) | |
| 5 # Zhiping Weng Lab | |
| 6 # Programs in Bioinformatics and Integrative Biology | |
| 7 # University of Massachusetts Medical School | |
| 8 | |
| 9 #usage function | |
| 10 usage() { | |
| 11 echo -en "\e[1;36m" | |
| 12 cat <<EOF | |
| 13 | |
| 14 usage: $0 -i input_file.sorted.bam -s scripts_directory -o output_directory -r transposon_rpmk.bed -t reference.2bit -f fragment_size -c CPUs -h | |
| 15 | |
| 16 TEMP is a software package for detecting transposable elements (TEs) | |
| 17 insertions and excisions from pooled high-throughput sequencing data. | |
| 18 Please send questions, suggestions and bug reports to: | |
| 19 jiali.zhuang@umassmed.edu | |
| 20 | |
| 21 Options: | |
| 22 -i Input file in bam format with full path. Please sort and index the file before calling this program. | |
| 23 Sorting and indexing can be done by 'samtools sort' and 'samtools index' | |
| 24 -s Directory where all the scripts are | |
| 25 -o Path to output directory. Default is current directory | |
| 26 -r Annotated transposon positions in the genome (e.g., repeakMask) in bed6 format with full path | |
| 27 -t 2bit file for the reference genome (can be downloaded from UCSC Genome Browser) | |
| 28 -f An integer specifying the length of the fragments (inserts) of the library. Default is 500 | |
| 29 -c An integer specifying the number of CUPs used. Default is 4 | |
| 30 -h Show help message | |
| 31 | |
| 32 EOF | |
| 33 echo -en "\e[0m" | |
| 34 } | |
| 35 | |
| 36 # taking options | |
| 37 while getopts "hi:c:f:o:r:s:t:" OPTION | |
| 38 do | |
| 39 case $OPTION in | |
| 40 h) | |
| 41 usage && exit 1 | |
| 42 ;; | |
| 43 i) | |
| 44 BAM=$OPTARG | |
| 45 ;; | |
| 46 f) | |
| 47 INSERT=$OPTARG | |
| 48 ;; | |
| 49 o) | |
| 50 OUTDIR=$OPTARG | |
| 51 ;; | |
| 52 c) | |
| 53 CPU=$OPTARG | |
| 54 ;; | |
| 55 s) | |
| 56 BINDIR=$OPTARG | |
| 57 ;; | |
| 58 r) | |
| 59 TEBED=$OPTARG | |
| 60 ;; | |
| 61 t) | |
| 62 REF=$OPTARG | |
| 63 ;; | |
| 64 ?) | |
| 65 usage && exit 1 | |
| 66 ;; | |
| 67 esac | |
| 68 done | |
| 69 | |
| 70 if [[ -z $BAM ]] || [[ -z $BINDIR ]] || [[ -z $TEBED ]] || [[ -z $REF ]] | |
| 71 then | |
| 72 usage && exit 1 | |
| 73 fi | |
| 74 [ ! -z "${CPU##*[!0-9]*}" ] || CPU=4 | |
| 75 [ ! -z "${INSERT##*[!0-9]*}" ] || INSERT=500 | |
| 76 [ ! -z $OUTDIR ] || OUTDIR=$PWD | |
| 77 | |
| 78 mkdir -p "${OUTDIR}" || echo -e "\e[1;31mWarning: Cannot create directory ${OUTDIR}. Using the direcory of input fastq file\e[0m" | |
| 79 cd ${OUTDIR} || echo -e "\e[1;31mError: Cannot access directory ${OUTDIR}... Exiting...\e[0m" || exit 1 | |
| 80 touch ${OUTDIR}/.writting_permission && rm -rf ${OUTDIR}/.writting_permission || echo -e "\e[1;31mError: Cannot write in directory ${OUTDIR}... Exiting...\e[0m" || exit 1 | |
| 81 | |
| 82 function checkExist { | |
| 83 echo -ne "\e[1;32m\"${1}\" is using: \e[0m" && which "$1" | |
| 84 [[ $? != 0 ]] && echo -e "\e[1;36mError: cannot find software/function ${1}! Please make sure that you have installed the pipeline correctly.\nExiting...\e[0m" && exit 1 | |
| 85 } | |
| 86 echo -e "\e[1;35mTesting required softwares/scripts:\e[0m" | |
| 87 checkExist "echo" | |
| 88 checkExist "rm" | |
| 89 checkExist "mkdir" | |
| 90 checkExist "date" | |
| 91 checkExist "mv" | |
| 92 checkExist "sort" | |
| 93 checkExist "touch" | |
| 94 checkExist "awk" | |
| 95 checkExist "grep" | |
| 96 checkExist "bwa" | |
| 97 checkExist "samtools" | |
| 98 echo -e "\e[1;35mDone with testing required softwares/scripts, starting pipeline...\e[0m" | |
| 99 | |
| 100 name=`basename $BAM` | |
| 101 i=${name/.sorted.bam/} | |
| 102 echo $name | |
| 103 echo $i | |
| 104 if [[ ! -s $name ]] | |
| 105 then | |
| 106 cp $BAM ./ | |
| 107 fi | |
| 108 if [[ ! -s $name.bai ]] | |
| 109 then cp $BAM.bai ./ | |
| 110 fi | |
| 111 | |
| 112 #Detect excision sites | |
| 113 samtools view -XF 0x2 $name > $i.unpair.sam | |
| 114 awk -F "\t" '{OFS="\t"; if ($9 != 0) print $0}' $i.unpair.sam > temp1.sam | |
| 115 perl $BINDIR/pickUniqIntervalPos.pl temp1.sam $INSERT > $i.unproper.uniq.interval.bed | |
| 116 | |
| 117 rm temp1.sam $i.unpair.sam | |
| 118 | |
| 119 # Sometimes $i.unproper.uniq.interval.bed contains malformed bed entries | |
| 120 # These must be removed to prevent the script failing | |
| 121 awk '{if ($3>=$2 && $3 > 0 && $2 > 0) print $0}' $i.unproper.uniq.interval.bed > $i.unproper.uniq.interval.fixed.bed | |
| 122 mv $i.unproper.uniq.interval.fixed.bed $i.unproper.uniq.interval.bed | |
| 123 | |
| 124 # Map to transposons | |
| 125 bedtools intersect -a $TEBED -b $i.unproper.uniq.interval.bed -f 1.0 -wo > temp | |
| 126 perl $BINDIR/filterFalsePositive.ex.pl temp $INSERT $i.final.pairs.rpmk.bed | |
| 127 bedtools intersect -a $TEBED -b $i.final.pairs.rpmk.bed -f 1.0 -wo > temp2 | |
| 128 | |
| 129 perl $BINDIR/excision.clustering.pl temp2 $i.excision.cluster.rpmk | |
| 130 rm temp temp2 $i.unproper.uniq.interval.bed $i.final.pairs.rpmk.bed | |
| 131 | |
| 132 # Identify breakpoints using soft-clipping information | |
| 133 perl $BINDIR/pickSoftClipping.over.pl $i.excision.cluster.rpmk $REF > $i.excision.cluster.rpmk.sfcp | |
| 134 perl $BINDIR/refine_breakpoint.ex.pl | |
| 135 | |
| 136 # Estimate excision sites frequencies | |
| 137 perl $BINDIR/pickOverlapPair.ex.pl $i.excision.cluster.rpmk.refined.bp > $i.excision.cluster.rpmk.refined.bp.refsup | |
| 138 perl $BINDIR/summarize_excision.pl |
