Mercurial > repos > sarahinraauzeville > rnaseq_tophat2
view sm_tophat2_toolshed.pl @ 3:b89fa46df3a7 draft
Uploaded
author | sarahinraauzeville |
---|---|
date | Thu, 11 Feb 2016 08:45:52 -0500 |
parents | f50a064ebd1c |
children |
line wrap: on
line source
#!/usr/bin/perl -w # usage : perl sm_tophat.pl <read1 file> <read2 file> # Sarah Maman - 2016 # Copyright (C) 2016 INRA # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # use strict; use File::Basename; use Config::IniFiles; my $cfg = Config::IniFiles->new( -file => "/path/to/PATH.ini" ); my $path = $cfg->val( 'workPath', 'FILEPATH_DEV' ); my $TOPHAT = $cfg->val( 'toolsPath', 'TOPHAT2_PATH' ); my $bowtie2build = $cfg->val( 'toolsPath', 'BOWTIE2_INDEXATION_PATH' ); my $lib = $ARGV[0]; my $input_read1 = $ARGV[1]; my $input_read2 = $ARGV[2]; my $reference_selector = $ARGV[3]; my $input_reference = $ARGV[4]; my $p = $ARGV[5]; my $r = $ARGV[6]; my $max_intron = $ARGV[7]; my $output_bam = $ARGV[8]; my $output_bed = $ARGV[9]; my $output_unmapped_bam = $ARGV[10]; my $zip = $ARGV[11]; my $gtf_cond = $ARGV[12]; my $inputGTF = $ARGV[13]; print STDOUT "REFERENCE: *$input_reference*\n\n"; my $cmd = ''; my $cmd2 = ''; my $poption ="", my $roption =""; my $max_intron_option =""; my ($nb) = ($output_bam=~/galaxy_dataset_(\d+)\.\S+$/); my $liboption=""; my $genome_index_base=""; if (!$lib eq ""){$liboption = "--library-type $lib";} if (!$p eq ""){$poption = "-p $p";} if (!$r eq ""){$roption = "-r $r";} if (!$max_intron eq ""){$max_intron_option = "--max-intron-length $max_intron";} my $gtfoption =''; if (!$gtf_cond eq "F"){$gtfoption = "-G $inputGTF";} else {$gtfoption ="";} #Creation du repertoire de sortie des resultats `cd $path/; mkdir $nb/; chmod -R 777 $nb/;`; my $working_dir="$path/$nb/"; # if the Biologist has his own reference file # generate the bowtie index for that reference print STDOUT "reference_selector : *$reference_selector*\n\n"; if($reference_selector eq "history"){ # copy your fasta file to the working directory `cp $input_reference "$working_dir/reference.fasta"`; chdir($working_dir) or die "system failed: chdir($working_dir) : $?"; my $info=`pwd`; print STDOUT "INFO:Changed to working directory: $info \n "; # index the reference # get the "genome_index_base" ? $cmd = "($bowtie2build reference.fasta genome_index_base ) >& ./tophat.log 2>&1"; #Info pour les biologistes print STDOUT "Tophat : \n\n $cmd \n\n "; system $cmd; # retrieve the new reference path $genome_index_base="$path/$nb/genome_index_base"; } else{ $genome_index_base=$input_reference; } print STDOUT "genome_index_base: *$genome_index_base*\n\n"; #Donner l extension fastq.gz si les fichiers sont zippes if ($zip eq "YES"){ print STDOUT "FASTQ files zipped ? $zip \n\n "; `cp $input_read1 $input_read1.fastq.gz ; `; `cp $input_read2 $input_read2.fastq.gz ; `; $cmd = "($TOPHAT -o '$path/$nb/' $poption $max_intron_option $roption $gtfoption $genome_index_base $input_read1.fastq.gz $input_read2.fastq.gz) >& ./tophat.log 2>&1"; #Info pour les biologistes print STDOUT "Tophat : \n\n $cmd \n\n "; system $cmd; } else { $cmd2 = "($TOPHAT -o '$path/$nb/' $liboption $poption $max_intron_option $roption $gtfoption $genome_index_base $input_read1 $input_read2) >& ./tophat.log 2>&1"; #Info pour les biologistes print STDOUT "Tophat : \n\n $cmd2 \n\n "; system $cmd2; } if (! -e "$path/$nb/accepted_hits.bam") { print STDERR "BAM FILE NOT FOUND\n"; } else { `mv "$path/$nb/accepted_hits.bam" $output_bam`; } if (! -e "$path/$nb/unmapped.bam") { print STDERR "unmapped.bam FILE NOT FOUND\n"; } else { `mv "$path/$nb/unmapped.bam" $output_unmapped_bam`; } if (! -e "$path/$nb/junctions.bed") { print STDERR "JUNCTIONS BED FILE NOT FOUND\n"; } else { `mv "$path/$nb/junctions.bed" $output_bed`; }