annotate sm_tophat2_toolshed.pl @ 2:f50a064ebd1c draft

Uploaded
author sarahinraauzeville
date Thu, 11 Feb 2016 08:45:37 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
1 #!/usr/bin/perl -w
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
2
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
3 # usage : perl sm_tophat.pl <read1 file> <read2 file>
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
4 # Sarah Maman - 2016
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
5 # Copyright (C) 2016 INRA
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
6 # This program is free software: you can redistribute it and/or modify
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
7 # it under the terms of the GNU General Public License as published by
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
8 # the Free Software Foundation, either version 3 of the License, or
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
9 # (at your option) any later version.
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
10 #
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
11 # This program is distributed in the hope that it will be useful,
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
14 # GNU General Public License for more details.
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
15 #
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
16 # You should have received a copy of the GNU General Public License
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
18 #
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
19
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
20
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
21 use strict;
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
22 use File::Basename;
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
23 use Config::IniFiles;
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
24
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
25 my $cfg = Config::IniFiles->new( -file => "/path/to/PATH.ini" );
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
26 my $path = $cfg->val( 'workPath', 'FILEPATH_DEV' );
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
27
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
28 my $TOPHAT = $cfg->val( 'toolsPath', 'TOPHAT2_PATH' );
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
29 my $bowtie2build = $cfg->val( 'toolsPath', 'BOWTIE2_INDEXATION_PATH' );
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
30
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
31 my $lib = $ARGV[0];
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
32 my $input_read1 = $ARGV[1];
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
33 my $input_read2 = $ARGV[2];
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
34 my $reference_selector = $ARGV[3];
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
35 my $input_reference = $ARGV[4];
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
36 my $p = $ARGV[5];
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
37 my $r = $ARGV[6];
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
38 my $max_intron = $ARGV[7];
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
39 my $output_bam = $ARGV[8];
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
40 my $output_bed = $ARGV[9];
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
41 my $output_unmapped_bam = $ARGV[10];
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
42 my $zip = $ARGV[11];
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
43 my $gtf_cond = $ARGV[12];
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
44 my $inputGTF = $ARGV[13];
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
45
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
46
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
47 print STDOUT "REFERENCE: *$input_reference*\n\n";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
48
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
49 my $cmd = ''; my $cmd2 = '';
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
50 my $poption ="",
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
51 my $roption ="";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
52 my $max_intron_option ="";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
53 my ($nb) = ($output_bam=~/galaxy_dataset_(\d+)\.\S+$/);
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
54 my $liboption="";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
55 my $genome_index_base="";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
56
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
57 if (!$lib eq ""){$liboption = "--library-type $lib";}
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
58 if (!$p eq ""){$poption = "-p $p";}
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
59 if (!$r eq ""){$roption = "-r $r";}
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
60 if (!$max_intron eq ""){$max_intron_option = "--max-intron-length $max_intron";}
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
61
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
62 my $gtfoption ='';
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
63 if (!$gtf_cond eq "F"){$gtfoption = "-G $inputGTF";}
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
64 else {$gtfoption ="";}
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
65
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
66 #Creation du repertoire de sortie des resultats
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
67 `cd $path/; mkdir $nb/; chmod -R 777 $nb/;`;
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
68
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
69 my $working_dir="$path/$nb/";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
70 # if the Biologist has his own reference file
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
71 # generate the bowtie index for that reference
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
72 print STDOUT "reference_selector : *$reference_selector*\n\n";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
73 if($reference_selector eq "history"){
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
74
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
75 # copy your fasta file to the working directory
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
76 `cp $input_reference "$working_dir/reference.fasta"`;
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
77 chdir($working_dir) or die "system failed: chdir($working_dir) : $?";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
78 my $info=`pwd`;
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
79 print STDOUT "INFO:Changed to working directory: $info \n ";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
80
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
81 # index the reference
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
82 # get the "genome_index_base" ?
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
83 $cmd = "($bowtie2build reference.fasta genome_index_base ) >& ./tophat.log 2>&1";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
84
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
85 #Info pour les biologistes
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
86 print STDOUT "Tophat : \n\n $cmd \n\n ";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
87 system $cmd;
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
88
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
89 # retrieve the new reference path
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
90 $genome_index_base="$path/$nb/genome_index_base";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
91 }
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
92 else{
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
93 $genome_index_base=$input_reference;
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
94 }
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
95
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
96 print STDOUT "genome_index_base: *$genome_index_base*\n\n";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
97
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
98 #Donner l extension fastq.gz si les fichiers sont zippes
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
99 if ($zip eq "YES"){
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
100
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
101 print STDOUT "FASTQ files zipped ? $zip \n\n ";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
102 `cp $input_read1 $input_read1.fastq.gz ; `;
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
103 `cp $input_read2 $input_read2.fastq.gz ; `;
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
104
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
105 $cmd = "($TOPHAT -o '$path/$nb/' $poption $max_intron_option $roption $gtfoption $genome_index_base $input_read1.fastq.gz $input_read2.fastq.gz) >& ./tophat.log 2>&1";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
106
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
107 #Info pour les biologistes
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
108 print STDOUT "Tophat : \n\n $cmd \n\n ";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
109 system $cmd;
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
110 }
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
111 else
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
112 {
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
113 $cmd2 = "($TOPHAT -o '$path/$nb/' $liboption $poption $max_intron_option $roption $gtfoption $genome_index_base $input_read1 $input_read2) >& ./tophat.log 2>&1";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
114
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
115 #Info pour les biologistes
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
116 print STDOUT "Tophat : \n\n $cmd2 \n\n ";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
117 system $cmd2;
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
118 }
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
119
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
120
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
121
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
122 if (! -e "$path/$nb/accepted_hits.bam")
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
123 {
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
124 print STDERR "BAM FILE NOT FOUND\n";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
125 }
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
126 else
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
127 {
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
128 `mv "$path/$nb/accepted_hits.bam" $output_bam`;
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
129 }
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
130
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
131 if (! -e "$path/$nb/unmapped.bam")
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
132 {
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
133 print STDERR "unmapped.bam FILE NOT FOUND\n";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
134 }
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
135 else
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
136 {
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
137 `mv "$path/$nb/unmapped.bam" $output_unmapped_bam`;
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
138 }
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
139
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
140
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
141 if (! -e "$path/$nb/junctions.bed")
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
142 {
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
143 print STDERR "JUNCTIONS BED FILE NOT FOUND\n";
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
144 }
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
145 else
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
146 {
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
147 `mv "$path/$nb/junctions.bed" $output_bed`;
f50a064ebd1c Uploaded
sarahinraauzeville
parents:
diff changeset
148 }