Mercurial > repos > sarahinraauzeville > rnaseq_tophat2
comparison sm_tophat2_toolshed.pl @ 2:f50a064ebd1c draft
Uploaded
author | sarahinraauzeville |
---|---|
date | Thu, 11 Feb 2016 08:45:37 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:038c61725cfb | 2:f50a064ebd1c |
---|---|
1 #!/usr/bin/perl -w | |
2 | |
3 # usage : perl sm_tophat.pl <read1 file> <read2 file> | |
4 # Sarah Maman - 2016 | |
5 # Copyright (C) 2016 INRA | |
6 # This program is free software: you can redistribute it and/or modify | |
7 # it under the terms of the GNU General Public License as published by | |
8 # the Free Software Foundation, either version 3 of the License, or | |
9 # (at your option) any later version. | |
10 # | |
11 # This program is distributed in the hope that it will be useful, | |
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 # GNU General Public License for more details. | |
15 # | |
16 # You should have received a copy of the GNU General Public License | |
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 # | |
19 | |
20 | |
21 use strict; | |
22 use File::Basename; | |
23 use Config::IniFiles; | |
24 | |
25 my $cfg = Config::IniFiles->new( -file => "/path/to/PATH.ini" ); | |
26 my $path = $cfg->val( 'workPath', 'FILEPATH_DEV' ); | |
27 | |
28 my $TOPHAT = $cfg->val( 'toolsPath', 'TOPHAT2_PATH' ); | |
29 my $bowtie2build = $cfg->val( 'toolsPath', 'BOWTIE2_INDEXATION_PATH' ); | |
30 | |
31 my $lib = $ARGV[0]; | |
32 my $input_read1 = $ARGV[1]; | |
33 my $input_read2 = $ARGV[2]; | |
34 my $reference_selector = $ARGV[3]; | |
35 my $input_reference = $ARGV[4]; | |
36 my $p = $ARGV[5]; | |
37 my $r = $ARGV[6]; | |
38 my $max_intron = $ARGV[7]; | |
39 my $output_bam = $ARGV[8]; | |
40 my $output_bed = $ARGV[9]; | |
41 my $output_unmapped_bam = $ARGV[10]; | |
42 my $zip = $ARGV[11]; | |
43 my $gtf_cond = $ARGV[12]; | |
44 my $inputGTF = $ARGV[13]; | |
45 | |
46 | |
47 print STDOUT "REFERENCE: *$input_reference*\n\n"; | |
48 | |
49 my $cmd = ''; my $cmd2 = ''; | |
50 my $poption ="", | |
51 my $roption =""; | |
52 my $max_intron_option =""; | |
53 my ($nb) = ($output_bam=~/galaxy_dataset_(\d+)\.\S+$/); | |
54 my $liboption=""; | |
55 my $genome_index_base=""; | |
56 | |
57 if (!$lib eq ""){$liboption = "--library-type $lib";} | |
58 if (!$p eq ""){$poption = "-p $p";} | |
59 if (!$r eq ""){$roption = "-r $r";} | |
60 if (!$max_intron eq ""){$max_intron_option = "--max-intron-length $max_intron";} | |
61 | |
62 my $gtfoption =''; | |
63 if (!$gtf_cond eq "F"){$gtfoption = "-G $inputGTF";} | |
64 else {$gtfoption ="";} | |
65 | |
66 #Creation du repertoire de sortie des resultats | |
67 `cd $path/; mkdir $nb/; chmod -R 777 $nb/;`; | |
68 | |
69 my $working_dir="$path/$nb/"; | |
70 # if the Biologist has his own reference file | |
71 # generate the bowtie index for that reference | |
72 print STDOUT "reference_selector : *$reference_selector*\n\n"; | |
73 if($reference_selector eq "history"){ | |
74 | |
75 # copy your fasta file to the working directory | |
76 `cp $input_reference "$working_dir/reference.fasta"`; | |
77 chdir($working_dir) or die "system failed: chdir($working_dir) : $?"; | |
78 my $info=`pwd`; | |
79 print STDOUT "INFO:Changed to working directory: $info \n "; | |
80 | |
81 # index the reference | |
82 # get the "genome_index_base" ? | |
83 $cmd = "($bowtie2build reference.fasta genome_index_base ) >& ./tophat.log 2>&1"; | |
84 | |
85 #Info pour les biologistes | |
86 print STDOUT "Tophat : \n\n $cmd \n\n "; | |
87 system $cmd; | |
88 | |
89 # retrieve the new reference path | |
90 $genome_index_base="$path/$nb/genome_index_base"; | |
91 } | |
92 else{ | |
93 $genome_index_base=$input_reference; | |
94 } | |
95 | |
96 print STDOUT "genome_index_base: *$genome_index_base*\n\n"; | |
97 | |
98 #Donner l extension fastq.gz si les fichiers sont zippes | |
99 if ($zip eq "YES"){ | |
100 | |
101 print STDOUT "FASTQ files zipped ? $zip \n\n "; | |
102 `cp $input_read1 $input_read1.fastq.gz ; `; | |
103 `cp $input_read2 $input_read2.fastq.gz ; `; | |
104 | |
105 $cmd = "($TOPHAT -o '$path/$nb/' $poption $max_intron_option $roption $gtfoption $genome_index_base $input_read1.fastq.gz $input_read2.fastq.gz) >& ./tophat.log 2>&1"; | |
106 | |
107 #Info pour les biologistes | |
108 print STDOUT "Tophat : \n\n $cmd \n\n "; | |
109 system $cmd; | |
110 } | |
111 else | |
112 { | |
113 $cmd2 = "($TOPHAT -o '$path/$nb/' $liboption $poption $max_intron_option $roption $gtfoption $genome_index_base $input_read1 $input_read2) >& ./tophat.log 2>&1"; | |
114 | |
115 #Info pour les biologistes | |
116 print STDOUT "Tophat : \n\n $cmd2 \n\n "; | |
117 system $cmd2; | |
118 } | |
119 | |
120 | |
121 | |
122 if (! -e "$path/$nb/accepted_hits.bam") | |
123 { | |
124 print STDERR "BAM FILE NOT FOUND\n"; | |
125 } | |
126 else | |
127 { | |
128 `mv "$path/$nb/accepted_hits.bam" $output_bam`; | |
129 } | |
130 | |
131 if (! -e "$path/$nb/unmapped.bam") | |
132 { | |
133 print STDERR "unmapped.bam FILE NOT FOUND\n"; | |
134 } | |
135 else | |
136 { | |
137 `mv "$path/$nb/unmapped.bam" $output_unmapped_bam`; | |
138 } | |
139 | |
140 | |
141 if (! -e "$path/$nb/junctions.bed") | |
142 { | |
143 print STDERR "JUNCTIONS BED FILE NOT FOUND\n"; | |
144 } | |
145 else | |
146 { | |
147 `mv "$path/$nb/junctions.bed" $output_bed`; | |
148 } |