Mercurial > repos > sarahinraauzeville > rnaseq_tophat2
diff sm_tophat2_toolshed.pl @ 2:f50a064ebd1c draft
Uploaded
author | sarahinraauzeville |
---|---|
date | Thu, 11 Feb 2016 08:45:37 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sm_tophat2_toolshed.pl Thu Feb 11 08:45:37 2016 -0500 @@ -0,0 +1,148 @@ +#!/usr/bin/perl -w + +# usage : perl sm_tophat.pl <read1 file> <read2 file> +# Sarah Maman - 2016 +# Copyright (C) 2016 INRA +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + + +use strict; +use File::Basename; +use Config::IniFiles; + +my $cfg = Config::IniFiles->new( -file => "/path/to/PATH.ini" ); +my $path = $cfg->val( 'workPath', 'FILEPATH_DEV' ); + +my $TOPHAT = $cfg->val( 'toolsPath', 'TOPHAT2_PATH' ); +my $bowtie2build = $cfg->val( 'toolsPath', 'BOWTIE2_INDEXATION_PATH' ); + +my $lib = $ARGV[0]; +my $input_read1 = $ARGV[1]; +my $input_read2 = $ARGV[2]; +my $reference_selector = $ARGV[3]; +my $input_reference = $ARGV[4]; +my $p = $ARGV[5]; +my $r = $ARGV[6]; +my $max_intron = $ARGV[7]; +my $output_bam = $ARGV[8]; +my $output_bed = $ARGV[9]; +my $output_unmapped_bam = $ARGV[10]; +my $zip = $ARGV[11]; +my $gtf_cond = $ARGV[12]; +my $inputGTF = $ARGV[13]; + + +print STDOUT "REFERENCE: *$input_reference*\n\n"; + +my $cmd = ''; my $cmd2 = ''; +my $poption ="", +my $roption =""; +my $max_intron_option =""; +my ($nb) = ($output_bam=~/galaxy_dataset_(\d+)\.\S+$/); +my $liboption=""; +my $genome_index_base=""; + +if (!$lib eq ""){$liboption = "--library-type $lib";} +if (!$p eq ""){$poption = "-p $p";} +if (!$r eq ""){$roption = "-r $r";} +if (!$max_intron eq ""){$max_intron_option = "--max-intron-length $max_intron";} + +my $gtfoption =''; +if (!$gtf_cond eq "F"){$gtfoption = "-G $inputGTF";} +else {$gtfoption ="";} + +#Creation du repertoire de sortie des resultats +`cd $path/; mkdir $nb/; chmod -R 777 $nb/;`; + +my $working_dir="$path/$nb/"; +# if the Biologist has his own reference file +# generate the bowtie index for that reference +print STDOUT "reference_selector : *$reference_selector*\n\n"; +if($reference_selector eq "history"){ + + # copy your fasta file to the working directory + `cp $input_reference "$working_dir/reference.fasta"`; + chdir($working_dir) or die "system failed: chdir($working_dir) : $?"; + my $info=`pwd`; + print STDOUT "INFO:Changed to working directory: $info \n "; + + # index the reference + # get the "genome_index_base" ? + $cmd = "($bowtie2build reference.fasta genome_index_base ) >& ./tophat.log 2>&1"; + + #Info pour les biologistes + print STDOUT "Tophat : \n\n $cmd \n\n "; + system $cmd; + + # retrieve the new reference path + $genome_index_base="$path/$nb/genome_index_base"; +} +else{ + $genome_index_base=$input_reference; +} + +print STDOUT "genome_index_base: *$genome_index_base*\n\n"; + +#Donner l extension fastq.gz si les fichiers sont zippes +if ($zip eq "YES"){ + + print STDOUT "FASTQ files zipped ? $zip \n\n "; + `cp $input_read1 $input_read1.fastq.gz ; `; + `cp $input_read2 $input_read2.fastq.gz ; `; + + $cmd = "($TOPHAT -o '$path/$nb/' $poption $max_intron_option $roption $gtfoption $genome_index_base $input_read1.fastq.gz $input_read2.fastq.gz) >& ./tophat.log 2>&1"; + + #Info pour les biologistes + print STDOUT "Tophat : \n\n $cmd \n\n "; + system $cmd; +} +else +{ + $cmd2 = "($TOPHAT -o '$path/$nb/' $liboption $poption $max_intron_option $roption $gtfoption $genome_index_base $input_read1 $input_read2) >& ./tophat.log 2>&1"; + + #Info pour les biologistes + print STDOUT "Tophat : \n\n $cmd2 \n\n "; + system $cmd2; +} + + + +if (! -e "$path/$nb/accepted_hits.bam") +{ +print STDERR "BAM FILE NOT FOUND\n"; +} +else +{ +`mv "$path/$nb/accepted_hits.bam" $output_bam`; +} + +if (! -e "$path/$nb/unmapped.bam") +{ +print STDERR "unmapped.bam FILE NOT FOUND\n"; +} +else +{ +`mv "$path/$nb/unmapped.bam" $output_unmapped_bam`; +} + + +if (! -e "$path/$nb/junctions.bed") +{ +print STDERR "JUNCTIONS BED FILE NOT FOUND\n"; +} +else +{ +`mv "$path/$nb/junctions.bed" $output_bed`; +}