changeset 26:5c0e8f82bbbf draft

Uploaded
author mcharles
date Wed, 10 Sep 2014 05:14:53 -0400
parents 39376c7204be
children 5b6c476166a7
files rapsodyn/mpileupfilteronblastxml.pl rapsodyn/mpileupfilteronblastxml.xml
diffstat 2 files changed, 0 insertions(+), 308 deletions(-) [+]
line wrap: on
line diff
--- a/rapsodyn/mpileupfilteronblastxml.pl	Wed Sep 10 05:12:05 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,287 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-use Getopt::Long;
-
-my $input_variant_file;
-my $input_blastxml_file;
-my $window_length = 50;
-my $nb_mismatch_max = 2;
-
-GetOptions (
-"input_variant_file=s" => \$input_variant_file,
-"input_blastxml_file=s" => \$input_blastxml_file,
-"window_length=i" => \$window_length,
-"nb_mismatch_max=i" => \$nb_mismatch_max
-) or die("Error in command line arguments\n");
-
-
-open(INB, $input_blastxml_file) or die ("Can't open $input_blastxml_file\n");
-
-
-my $iteration_stop="</Iteration>";
-my $hit_stop="</Hit>";
-my $hsp_stop="</Hsp>";
-my $query_flag_start="<Iteration_query-def>";
-my $query_flag_stop="</Iteration_query-def>";
-my $subject_flag_start="<Hit_def>";
-my $subject_flag_stop="</Hit_def>";
-my $query_HSP_flag_from_start="<Hsp_query-from>";
-my $query_HSP_flag_from_stop="</Hsp_query-from>";
-my $query_HSP_flag_to_start="<Hsp_query-to>";
-my $query_HSP_flag_to_stop="</Hsp_query-to>";
-my $subject_HSP_flag_from_start="<Hsp_hit-from>";
-my $subject_HSP_flag_from_stop="</Hsp_hit-from>";
-my $subject_HSP_flag_to_start="<Hsp_hit-to>";
-my $subject_HSP_flag_to_stop="</Hsp_hit-to>";
-my $query_HSP_flag_seq_start="<Hsp_qseq>";
-my $query_HSP_flag_seq_stop="</Hsp_qseq>";
-my $subject_HSP_flag_seq_start="<Hsp_hseq>";
-my $subject_HSP_flag_seq_stop="</Hsp_hseq>";
-my $HSP_midline_flag_start="<Hsp_midline>";
-my $HSP_midline_flag_stop="</Hsp_midline>";
-
-my %hash;
-
-my $compt=0;
-my $current_query="";
-my $current_subject="";
-my $current_HSP_query_from;
-my $current_HSP_query_to;
-my $current_HSP_subject_from;
-my $current_HSP_subject_to;
-my $current_HSP_midline;
-my $current_HSP_qseq;
-my $current_HSP_hseq;
-my $current_query_short;
-
-while (my $ligne = <INB>) {
-
-	if ($ligne=~/$subject_flag_start(.*?)$subject_flag_stop/){
-		$current_subject=$1;
-		#print "--",$1,"\n";
-	}
-	
-	if ($ligne=~/$query_flag_start(.*?)$query_flag_stop/){
-		$current_query=$1;
-		$current_query_short=$current_query;
-		if ($current_query =~ /^(.*?_\d+)\_/){
-			$current_query_short=$1;
-		}
-		if (!$hash{$current_query_short}){
-			$hash{$current_query_short}=0;
-		}
-			
-
-		#print "--",$1,"\n";
-	}
-	if ($ligne=~/$query_HSP_flag_from_start(.*?)$query_HSP_flag_from_stop/){
-		$current_HSP_query_from=$1;
-		#print "--",$1,"...";
-	}
-	if ($ligne=~/$query_HSP_flag_to_start(.*?)$query_HSP_flag_to_stop/){
-		$current_HSP_query_to=$1;
-		#print $1,"\n";
-	}
-	if ($ligne=~/$subject_HSP_flag_from_start(.*?)$subject_HSP_flag_from_stop/){
-		$current_HSP_subject_from=$1;
-		#print "--",$1,"...";
-	}
-	if ($ligne=~/$subject_HSP_flag_to_start(.*?)$subject_HSP_flag_to_stop/){
-		$current_HSP_subject_to=$1;
-		#print $1,"\n";
-	}
-	if ($ligne=~/$query_HSP_flag_seq_start(.*?)$query_HSP_flag_seq_stop/){
-		$current_HSP_qseq=$1;
-		#print "--",$1,"\n";
-	}
-	if ($ligne=~/$subject_HSP_flag_seq_start(.*?)$subject_HSP_flag_seq_stop/){
-		$current_HSP_hseq=$1;
-		#print "--",$1,"\n";
-	}
-	if ($ligne=~/$HSP_midline_flag_start(.*?)$HSP_midline_flag_stop/){
-		$current_HSP_midline=$1;
-		#print "--",$1,"\n";
-	}
-	
-	if ($ligne=~/$hsp_stop/){
-		if ($current_HSP_query_from){
-			#print "\ntest1\n";
-			#print "Query : $current_query\n";
-			#print "Subject : $current_subject\n";
-			#print "$current_HSP_query_from ... $current_HSP_query_to\n";
-			#print "$current_HSP_subject_from ... $current_HSP_subject_to\n";
-			for (my $i=1;$i<$current_HSP_query_from;$i++){
-				$current_HSP_qseq = "N".$current_HSP_qseq;
-				$current_HSP_midline = " ".$current_HSP_midline;
-				$current_HSP_hseq = "N".$current_HSP_hseq;
-			}
-			for (my $i=$current_HSP_query_to+1;$i<=$window_length*2+1;$i++){
-				$current_HSP_qseq .= "N";
-				$current_HSP_midline .= " ";
-				$current_HSP_hseq .= "N";
-			}
-			
-			my @qseq = split(//,$current_HSP_qseq);
-			my @midline = split(//,$current_HSP_midline);
-			my @hseq = split(//,$current_HSP_hseq);
-			
-			my $comptbase=0;
-			my $compt5p=0;
-			my $compt3p=0;
-			for (my $i=0;$i<=$#qseq;$i++){
-				if ($qseq[$i] ne "-"){
-					$comptbase++; # Va de 1 -> $window_length *2 +1
-				}
-				if ($midline[$i] eq " "){
-					if ($comptbase<=$window_length){ #1 -> $window_length
-						$compt5p++;
-					}
-					elsif ($comptbase>=$window_length+2){ #$window_length+2 -> $window_length *2 + 1;
-						$compt3p++;
-					}
-					else { #+1-$window_length*2+1
-						
-					}
-				}
-			}
-			if (($compt3p<=$nb_mismatch_max)||($compt5p<=$nb_mismatch_max)){
-				$hash{$current_query_short}++;
-			}
-			
-			#print "$current_HSP_qseq\n";
-			#print "$current_HSP_midline\n";
-			#print "$current_HSP_hseq\n";
-			#print "$compt5p // $compt3p\n";
-			#print $hash{$current_query_short},"\n";
-
-		}
-		
-		undef $current_HSP_query_from;
-		undef $current_HSP_query_to;
-		undef $current_HSP_subject_from;
-		undef $current_HSP_subject_to;
-		$current_HSP_midline="";
-		$current_HSP_qseq="";
-		$current_HSP_hseq="";
-	}
-	
-	if ($ligne=~/$iteration_stop/){
-		if ($current_HSP_query_from){
-			#print "\ntest2\n";
-			#print "Query : $current_query\n";
-			#print "Subject : $current_subject\n";
-			#print "$current_HSP_query_from ... $current_HSP_query_to\n";
-			#print "$current_HSP_subject_from ... $current_HSP_subject_to\n";
-			for (my $i=1;$i<$current_HSP_query_from;$i++){
-				$current_HSP_qseq = "N".$current_HSP_qseq;
-				$current_HSP_midline = " ".$current_HSP_midline;
-				$current_HSP_hseq = "N".$current_HSP_hseq;
-			}
-			for (my $i=$current_HSP_query_to+1;$i<=$window_length*2+1;$i++){
-				$current_HSP_qseq .= "N";
-				$current_HSP_midline .= " ";
-				$current_HSP_hseq .= "N";
-			}
-			
-			my @qseq = split(//,$current_HSP_qseq);
-			my @midline = split(//,$current_HSP_midline);
-			my @hseq = split(//,$current_HSP_hseq);
-			
-			my $comptbase=0;
-			my $compt5p=0;
-			my $compt3p=0;
-			for (my $i=0;$i<=$#qseq;$i++){
-				if ($qseq[$i] ne "-"){
-					$comptbase++; # Va de 1 -> $window_length *2 +1
-				}
-				if ($midline[$i] eq " "){
-					if ($comptbase<=$window_length){ #1 -> $window_length
-						$compt5p++;
-					}
-					elsif ($comptbase>=$window_length+2){ #$window_length+2 -> $window_length *2 + 1;
-						$compt3p++;
-					}
-					else { #+1-$window_length*2+1
-						
-					}
-				}
-			}
-			if (($compt3p<=$nb_mismatch_max)||($compt5p<=$nb_mismatch_max)){
-				$hash{$current_query_short}++;
-			}
-			
-			#print "$current_HSP_qseq\n";
-			#print "$current_HSP_midline\n";
-			#print "$current_HSP_hseq\n";
-			#print "$compt5p // $compt3p\n";
-			#print $hash{$current_query_short},"\n";
-		}
-		$current_query="";
-		$current_query_short="";
-		$current_subject="";
-		undef $current_HSP_query_from;
-		undef $current_HSP_query_to;
-		undef $current_HSP_subject_from;
-		undef $current_HSP_subject_to;
-		$current_HSP_midline="";
-		$current_HSP_qseq="";
-		$current_HSP_hseq="";
-	}
-	
-}
-
-close (INB);
-
-# foreach my $key (sort trinombre keys %hash){
-	# print $key," ",$hash{$key},"\n";
-	
-# }
-# exit(0);
-
-open(INV, $input_variant_file) or die ("Can't open $input_variant_file\n");
-
-while (my $ligne = <INV>) {
-	my @champs = split (/\s+/,$ligne);
-	my $header = $champs[0]."_".$champs[1];
-
-	if ($hash{$header}){
-		if ($hash{$header}==1){
-			print "$ligne";
-		}
-		else {
-			#print $hash{$header}," $ligne";
-		}
-	}
-	else {
-		print STDERR "No blast result for ",$header,"\n";
-	}
-
-	
-}
-
-close(INV);
-
-
-
-sub trinombre {
-	my $chra=$a;
-	my $posa=0;
-	my $chrb=$b;
-	my $posb=0;
-	
-	if ($a =~/(.*?)\_(\d+)/){
-		$chra=$1;
-		$posa=$2;
-	}
-	if ($b =~/(.*?)\_(\d+)/){
-		$chrb=$1;
-		$posb=$2;
-	}
-	
-	
-	$chra cmp $chrb
-	||
-	$posa <=> $posb;
-}
-
--- a/rapsodyn/mpileupfilteronblastxml.xml	Wed Sep 10 05:12:05 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-<tool id="mpileupfilteronblastxml" name="mpileupfilteronblastxml" version="0.03">
-<description>Filter mpileup with blast results</description>
-<command interpreter="perl">
-    mpileupfilteronblastxml.pl -input_variant_file $input_variant_file -input_blastxml_file $input_blastxml_file -window_length $window_length -nb_mismatch_max $nb_mismatch_max > $output_file 
-</command>
-<inputs>
-<param name="input_variant_file"  type="data" format="pileup" label="Select a suitable input VARIANT file from your history"/>
-<param name="input_blastxml_file"  type="data" format="xml" label="Select a suitable input BLASTXML file from your history"/>
-<param name="window_length" type="integer" value="50" label="Number of bases extracted before and after the variant position"/>
-<param name="nb_mismatch_max" type="integer" value="3" label="Threshold for mismatch filter"/>
-</inputs>
-<outputs>
- <data name="output_file" format="pileup" label="${tool.name} on ${on_string}"/>
-</outputs>
-
-<help>
-
-
-
-</help>
-</tool>