annotate TEfinder @ 1:2edb80d68a1b draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit 5d19bf766a0ac0121ce45b5c532ea5e43825082b
author iuc
date Fri, 23 Sep 2022 11:00:13 +0000
parents 838fb3a1678f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
1 #!/usr/bin/env bash
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
2
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
3 ##
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
4 ##
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
5 ## Authors: Vista Sohrab & Dilay Hazal Ayhan
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
6 ## Date: January 15, 2021
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
7 ## Description: TEfinder uses discordant reads to detect novel transposable element insertion events in short read paired-end sample sequencing data.
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
8 ## Software dependencies include bedtools 2.28.0 or later, samtools 1.3 or later, picard 2.0.1 or later
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
9 ## Required inputs include sample alignment file (.bam|.sam), reference genome FASTA (.fa), reference TE annotation in GFF/GTF or GFF3 (.gff|.gtf), and TEs of interest (.txt)
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
10 ##
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
11 ## University of Massachusetts Amherst
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
12 ##
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
13 ##
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
14 ##
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
15 ##
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
16
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
17 set -e
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
18
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
19 margs=4
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
20
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
21 # Functions
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
22 function example {
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
23 echo -e "example: TEfinder -alignment sample.bam -fa reference.fa -gtf TEs.gtf -te List_of_TEs.txt"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
24 }
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
25
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
26 function help {
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
27 echo -e "REQUIRED:"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
28 echo -e " -alignment, --alignmentFile STR sample reads aligned to reference genome (BAM/SAM file)"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
29 echo -e " -fa, --FastaFile STR reference genome FASTA index (FA file)"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
30 echo -e " -gtf, --TransposonsInGenome STR reference genome TE annotation (GFF2/GTF file)"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
31 echo -e " -te, --TransposonsToSearch STR TE names (single column text file)\n"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
32 echo -e "OPTIONAL:"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
33 echo -e " -bamo, --DiscordantReads STR BAM output\n"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
34 echo -e " -bedo, --bTEinsertions STR TEinsertions BED output\n"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
35 echo -e " -gtfo, --gTEinsertions STR TEinsertions GTF output\n"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
36 echo -e " -fis, --FragmentInsertSize INT short-read sequencing fragment insert size [400]"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
37 echo -e " -picard, --pathToPicardjar STR path to picard tools .jar file [picard.jar]"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
38 echo -e " -md, --MaxDistanceForMerge INT maximum distance between reads for bedtools merge [150]"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
39 echo -e " -k, --MaxTSDLength INT maximum TE target site duplication (TSD) length [20]"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
40 echo -e " -maxHeapMem, --MaxHeapMemory INT java maximum heap memory allocation for picard in Mb [2000]"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
41 echo -e " -workingdir, --WorkingDirectory STR working directory name [TEfinder_<Date>]"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
42 echo -e " -out, --OutputFormat STR output format as GTF [BED]"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
43 echo -e " -outname, --OutputName STR output name prefix added to file names [null]"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
44 echo -e " -threads, --Threads INT number of threads for samtools multi-threading [1]"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
45 echo -e " -intermed --IntermediateFiles STR keep intermediate files created by pipeline [no]"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
46 echo -e " -h, --help prints help\n"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
47 example
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
48 }
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
49
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
50 # check if mandatory args are empty
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
51 function margs_check {
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
52 if [ $# -lt $margs ]; then
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
53 echo -e "One or more required parameters are missing."
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
54 example
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
55 exit 1 # error
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
56 fi
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
57 }
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
58
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
59 # main workflow
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
60 #### : comment out
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
61 function pipeline() {
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
62 mkdir ${workingdir}/${line}
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
63 currdir=${workingdir}/${line}
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
64 echo -e $(date) " Transposon analysis for "${line}" has started\n"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
65
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
66 grep -P '[^(\w|\d|\-|\_|\#|\.)]'${line}'[^(\w|\d|\-|\_|\#|\.)]' $gtf > ${currdir}/${line}_TE.gff
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
67 echo -e $(date) " Individual TE GFF has been created for "${line}"\n" ####
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
68
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
69 bedtools intersect -abam ${workingdir}/${outname}Alignments.bam -b ${currdir}/${line}_TE.gff -wa > ${currdir}/${line}_MappedReadsToTE.bam
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
70 echo -e $(date) " Mapped reads to TE via bedtools intersect has been completed for "${line}"\n" ####
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
71 samtools view -@ $threads ${currdir}/${line}_MappedReadsToTE.bam | \
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
72 awk -v Ins=`expr $fis \* 10` '{if (($7 != "=") || ($9 > Ins) || ($9 < -Ins)) print $1}' > ${currdir}/${line}_ReadID.txt
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
73 echo -e $(date) " Identifying discordant read IDs has been completed for "${line}"\n" ####
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
74
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
75 # if discordant readID file exists, then continue with remainder of TE analysis
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
76 if [[ -s ${currdir}/${line}_ReadID.txt ]]
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
77 then
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
78 #java $maxHeapMem -jar $picard FilterSamReads I=${workingdir}/${outname}Alignments.bam O=${currdir}/${line}_DiscordantPairs.bam READ_LIST_FILE=${currdir}/${line}_ReadID.txt FILTER=includeReadList WRITE_READS_FILES=false
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
79 $picard $maxHeapMem FilterSamReads -I ${workingdir}/${outname}Alignments.bam -O ${currdir}/${line}_DiscordantPairs.bam -READ_LIST_FILE ${currdir}/${line}_ReadID.txt -FILTER includeReadList -WRITE_READS_FILES false &>/dev/null
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
80 echo -e $(date) " Filtering original alignment based on discordant reads IDs is complete for "${line}"\n" ####
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
81
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
82 bedtools merge -d $md -S + -c 1 -o count -i ${currdir}/${line}_DiscordantPairs.bam | \
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
83 awk '{if ($4 > 3) print $0}' > ${currdir}/${line}_plusCluster.bed
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
84 echo -e $(date) " Primary reads from the + strand have been merged if read count greater than 3 for "${line}"\n" ####
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
85
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
86 bedtools merge -d $md -S - -c 1 -o count -i ${currdir}/${line}_DiscordantPairs.bam | \
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
87 awk '{if ($4 > 3) print $0}' > ${currdir}/${line}_minusCluster.bed
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
88 echo -e $(date) " Primary reads from the - strand have been merged if read count greater than 3 for "${line}"\n" ####
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
89
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
90 # filtering edges piped into bedtools merge (keeping read counts greater than 3 in the line above)
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
91 ## find the closest minus strand to the plus strand in the cluster
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
92 ## filter by the distance between the plus and minus clusters - only retain pairs if reads are 0-100 bases away
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
93 ## if plus strand start is less than minus strand start and plus strand end is less than minus strand end then in proper orientation
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
94 bedtools closest -d -g ${workingdir}/reference.fa.fai -t first -a ${currdir}/${line}_plusCluster.bed -b ${currdir}/${line}_minusCluster.bed | \
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
95 awk -v TSD=$k '{if ($9 <= TSD && $9 >= 0) print $0}' | \
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
96 awk '{if ($2 < $6 && $3 < $7) print $0}' > ${currdir}/${line}_plusminus.bed
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
97 echo -e $(date) " Filtration of clusters in proper orientation using bedtools closest has been completed for "${line}"\n" ####
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
98
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
99 # if plus strand end is greater than minus strand start, then report the pair
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
100 awk '{if ($3 > $6) print $1"\t"$6"\t"$3"\t"$0}' ${currdir}/${line}_plusminus.bed > ${currdir}/${line}_plusminus_1.bed
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
101 echo -e $(date) " Overlapping reads TE insertions reported for "${line}"\n" ####
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
102
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
103 #if plus strand end is less than or equal to minus strand start and the region in between is less than a user-defined value k, report the pair
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
104 awk -v TSD=$k '{if ($3 <= $6 && $6 - $3 < TSD) print $1"\t"$3 - 1"\t"$6 + 1"\t"$0}' ${currdir}/${line}_plusminus.bed > \
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
105 ${currdir}/${line}_plusminus_2.bed
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
106 echo -e $(date) " Non-overlapping reads TE insertions reported for "${line}"\n" ####
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
107
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
108 #combine reported TE insertions
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
109 cat ${currdir}/${line}_plusminus_1.bed ${currdir}/${line}_plusminus_2.bed | \
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
110 awk -v TEname=$line '{$0=TEname"\t"$0}1' | sort -k 1 | sort -k 2 > \
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
111 ${currdir}/${line}_insertionRegion.txt
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
112
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
113
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
114 cat ${currdir}/${line}_insertionRegion.txt >> ${workingdir}/insertions.txt
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
115 echo -e $(date) " TE insertions for "${line}" have been reported.\n" ####
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
116
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
117
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
118 echo -e $(date) " Transposon named "${line}" is processed.\n"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
119 else
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
120 echo -e $(date) " Transposon named "${line}" is processed. No discordant reads found.\n"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
121 rm -r ${currdir}
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
122 fi
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
123 }
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
124
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
125 # functions end
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
126
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
127 # get arguments
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
128 fa=
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
129 alignment=
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
130 gtf=
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
131 te=
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
132 out=
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
133 intermed=
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
134 bamo="DiscordantReads.bam"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
135 bedo="TEinsertions.bed"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
136 gtfo="TEinsertions.gtf"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
137 outname=""
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
138 fis=400
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
139 picard="picard"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
140 maxHeapMem=-Xmx2000m
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
141 md=150
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
142 k=20
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
143 d=$(date +%Y%m%d%H%M%S)
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
144 # workingdir=TEfinder_${d}
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
145 workingdir="TEfinder"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
146 threads=1
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
147
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
148 while [ "$1" != "" ];
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
149 do
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
150 case $1 in
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
151 -fa | --FastaFile )
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
152 shift
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
153 fa=$1 ;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
154 -alignment | --alignmentFile )
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
155 shift
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
156 alignment=$1 ;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
157 -gtf | --TransposonsInGenome )
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
158 shift
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
159 gtf=$1 ;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
160 -te | --TransposonsToSearch )
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
161 shift
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
162 te=$1 ;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
163 -fis | --FragmentInsertSize )
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
164 shift
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
165 fis=$1 ;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
166 -picard | --pathToPicardjar )
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
167 shift
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
168 picard=$1 ;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
169 -md | --MaxDistanceForMerge )
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
170 shift
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
171 md=$1 ;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
172 -k | --MaxTSDLength )
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
173 shift
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
174 k=$1 ;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
175 -bamo | --DiscordantReads )
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
176 shift
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
177 bamo=$1 ;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
178 -bedo | --bTEinsertions )
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
179 shift
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
180 bedo=$1 ;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
181 -gtfo | --gTEinsertions )
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
182 shift
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
183 gtfo=$1 ;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
184 -maxHeapMem | --MaxHeapMemory )
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
185 shift
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
186 maxHeapMem="-Xmx"$1"m" ;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
187 -workingdir | --WorkingDirectory )
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
188 shift
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
189 workingdir=$1 ;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
190 -out | --OutputFormat )
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
191 shift
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
192 out=$1 ;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
193 -outname | --OutputName )
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
194 shift
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
195 outname=$1 ;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
196 -threads | --Threads )
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
197 shift
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
198 threads=$1 ;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
199 -intermed | --IntermediateFiles )
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
200 shift
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
201 intermed=$1 ;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
202 -h | --help )
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
203 help
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
204 exit;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
205 *) # error
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
206 echo "TEfinder: illegal option $1"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
207 example
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
208 exit 1 ;;
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
209 esac
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
210 shift
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
211 done
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
212 margs_check $fa $alignment $gtf $te
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
213
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
214 # main
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
215
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
216 mkdir ${workingdir}
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
217
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
218 # remove empty lines from user provided TE list if present
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
219 sed '/^$/d' $te > ${workingdir}"/userTE_noEmptyLines.txt"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
220
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
221 # create output file
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
222 printf "%s\t" "track name=TEfinder" "type=bedDetail" "description=FR:forward read, RR:reverse read, InsRegion:insertion region start and end positions, FILTER:comma separated filters" > ${workingdir}/${outname}TEinsertions.bed
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
223 printf "\n" >> ${workingdir}/${outname}TEinsertions.bed
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
224
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
225 # create fasta index (fai) file
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
226 cp $fa ${workingdir}/reference.fa
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
227 samtools faidx ${workingdir}/reference.fa
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
228
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
229 # sort alignment input
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
230 samtools sort -@ $threads -o ${workingdir}/alignmentInput.sorted.bam ${alignment}
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
231 echo -e $(date) " Alignment file sorted successfully.\n"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
232
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
233 # remove secondary and supplementary alignments from sorted bam
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
234 samtools view -F 2304 -@ $threads -o ${workingdir}/${outname}Alignments.bam ${workingdir}/alignmentInput.sorted.bam
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
235 echo -e $(date) " Alignments are filtered - secondary and supplementary alignments have been removed. \n"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
236
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
237 # run pipeline for each TE
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
238 while IFS="" read -r line || [ -n "$line" ]
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
239 do
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
240 pipeline &
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
241 done < ${workingdir}/userTE_noEmptyLines.txt
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
242 wait
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
243 echo -e $(date) " All transposons are processed. Finalizing...\n"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
244
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
245 # combine discordant bam files
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
246 samtools merge -@ $threads -r ${workingdir}/${outname}DiscordantReads.bam ${workingdir}/*/*_DiscordantPairs.bam
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
247 echo -e $(date) " BAM Output: Discordant pair alignment file is now available.\n"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
248 # Sorting by position
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
249 samtools sort -@ $threads ${workingdir}/${outname}DiscordantReads.bam | samtools view -h -o ${workingdir}/${outname}DiscordantReads.sam
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
250 grep -v '^@PG' ${workingdir}/${outname}DiscordantReads.sam > ${workingdir}/${outname}DiscordantReadsNoPG.sam
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
251 rm ${workingdir}/${outname}DiscordantReads.sam
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
252 samtools view -hb -x "PG" --no-PG --remove-flags "PG" -O BAM ${workingdir}/${outname}DiscordantReadsNoPG.sam -o ${bamo}
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
253 rm ${workingdir}/${outname}DiscordantReadsNoPG.sam
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
254
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
255 # update output BED file with TEfinder results: organize the starting file
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
256 awk '{print $2"\t"$3"\t"$4"\t"$1"\t"$8+$12"\t.\tFR="$8";RR="$12";InsRegion="$6"-"$11";FILTER="}' ${workingdir}/insertions.txt > ${workingdir}/TEinsertions_putative.bed
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
257 # find the entries in repeat regions for filtering
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
258 bedtools intersect -wa -u -a ${workingdir}/TEinsertions_putative.bed -b $gtf > ${workingdir}/TEinsertions_putative_inrepeat.bed
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
259 # filtering process
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
260 while IFS="" read -r line || [ -n "$line" ]
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
261 do
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
262 #located in repeat region
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
263 if (grep -Fxq "$line" "${workingdir}/TEinsertions_putative_inrepeat.bed")
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
264 then
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
265 line=$line"in_repeat,"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
266 fi
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
267
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
268 #weak evidence
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
269 readc=$(echo $line | awk '{print $5}')
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
270 if (( $readc < 10 ))
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
271 then
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
272 line=$line"weak_evidence,"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
273 fi
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
274
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
275 #strand-biased
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
276 FR=$(echo $line | grep -o 'FR=[[:digit:]]*' | cut -f2 -d'=')
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
277 RR=$(echo $line | grep -o 'RR=[[:digit:]]*' | cut -f2 -d'=')
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
278 var1=$(echo 'e(l('$FR')*1.25)' | bc -l)
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
279 var2=$(echo 'e(l('$FR')*0.8)' | bc -l)
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
280
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
281 if [ $(echo "$RR > $var1" | bc) -eq 1 ] || [ $(echo "$RR < $var2" | bc) -eq 1 ]
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
282 then
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
283 line=$line"strand_bias,"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
284 fi
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
285
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
286 #pass
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
287 lastchar=${line: -1}
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
288 if [ $lastchar == "," ]
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
289 then
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
290 line=${line::${#line}-1}
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
291 else
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
292 line=$line"PASS"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
293 fi
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
294
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
295 #write to final output
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
296 printf "%s\n" "$line" >> ${workingdir}/${outname}TEinsertions.bed
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
297
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
298 done < ${workingdir}/TEinsertions_putative.bed
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
299 wait
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
300 echo -e $(date) " BED Output: TEfinder output BED file is now available.\n"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
301 # Sorting
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
302 # cp ${workingdir}/${outname}TEinsertions.bed ${outo}
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
303 bedtools sort -chrThenSizeA -i ${workingdir}/${outname}TEinsertions.bed > ${bedo}
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
304 # cat ${bedo}
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
305
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
306 # gtf option - create output GTF files with TEfinder results
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
307 if [ ! -z "$out" ]
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
308 then
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
309 awk 'FNR > 1 {print $1"\tTEfinder\tTIP\t"$2 + 1"\t"$3"\t"$5"\t.\t.\tte_name \""$4"\"; tags \""$7"\""}' ${workingdir}/${outname}TEinsertions.bed > ${workingdir}/${outname}TEinsertions.gtf
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
310 bedtools sort -chrThenSizeA -i ${workingdir}/${outname}TEinsertions.gtf > ${gtfo}
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
311 # awk 'FNR > 1 {print $1"\tTEfinder\tTIP\t"$2 + 1"\t"$3"\t"$5"\t.\t.\tte_name \""$4"\"; tags \""$7"\""}' ${bedo} > ${gtfo}
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
312 # Sorting
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
313 # cp ${workingdir}/${outname}TEinsertions.gtf ${gtfo}
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
314 echo -e "\n\n"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
315 # cat ${gtfo}
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
316 # bedtools sort -chrThenSizeA -i ${workingdir}/${outname}TEinsertions.gtf > ${gtfo}
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
317 echo -e $(date) " GTF Output: TEfinder output GTF file is now available.\n"
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
318 fi
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
319
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
320 # clean working directory
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
321 if [ -z "$intermed" ]
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
322 then
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
323 rm ${workingdir}/TEinsertions_putative.bed ${workingdir}/TEinsertions_putative_inrepeat.bed ${workingdir}/reference.fa ${workingdir}/reference.fa.fai \
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
324 ${workingdir}/alignmentInput.sorted.bam ${workingdir}/insertions.txt ${workingdir}/${outname}Alignments.bam ${workingdir}/userTE_noEmptyLines.txt
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
325 rm -r ${workingdir}/*/
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
326 fi
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
327
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
328 if [ `wc -l <${workingdir}/${outname}TEinsertions.bed` -le "1" ]
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
329 then
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
330 echo -e $(date) " Error: TEfinder run unsuccessful."
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
331 else
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
332 echo -e $(date) " TE insertion output files have been created. TEfinder completed successfully."
838fb3a1678f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
iuc
parents:
diff changeset
333 fi