clifinder: script/CLIFinder.pl comparison

comparison script/CLIFinder.pl @ 15:6d7caeea1e74 draft

"planemo upload for repository https://github.com/GReD-Clermont/CLIFinder/ commit 2a3a263fc5a2123e6988872d23057f8797231a76"

author	clifinder
date	Wed, 12 Feb 2020 05:32:38 -0500
parents	feecd33c8390
children	40695c10cfbd

comparison

equal deleted inserted replaced

-:feecd33c8390
+:6d7caeea1e74
 "size_read:i"   => \(my $size_reads = 100),
 "BDir:i"        => \(my $Bdir = 0),
 "min_L1:i"      => \(my $min_L1 = 50),
 "mis_L1:i"      => \(my $mis_L1 = 2),
 "threads:i"     => \(my $threads = 1),
-'help'          => sub { HelpMessage(0); },
+"help"          => sub { HelpMessage(0); },
-'version'       => sub { VersionMessage(0); },
+"version"       => sub { VersionMessage(0); },
 ) or HelpMessage(1);
 HelpMessage(1) unless @fastq1 && @fastq2 && @name && defined($TE) && defined($ref) && defined($rmsk_source) && defined($refseq) && defined($html) && defined($html_repertory);
 my $iprct = 100 - (($prct / $size_reads)*100) ;
 {
 ###################################################
 # Paired end mapping against L1 promoter sequences#
 ###################################################
-print STDOUT "Alignement of $name[$tabR] to L1\n";
+print STDOUT "Alignment of $name[$tabR] to L1\n";
 my $sam = $html_repertory.'/'.$name[$tabR]."_L1.sam"; push(@garbage, $sam);
 align_paired( $TE, $fastq1[$tabR], $fastq2[$tabR], $sam, $threads, $mis_auth);
-print STDOUT "Alignement done\n";
+print STDOUT "Alignment done\n";
 ##################################################
 # Creation of two fastq for paired halfed mapped:#
 # - _1 correspond to sequences mapped to L1      #
 # - _2 correspond to sequences unmapped to L1    #
 ##################################################
 print STDOUT "Getting pairs with one mate matched to L1 and the other mate undetected by repeatmasker as a repeat sequence\n";
 my $out_ASP_1 = $html_repertory.'/'.$name[$tabR]."_1.fastq"; push(@garbage, $out_ASP_1);
 my $out_ASP_2 = $html_repertory.'/'.$name[$tabR]."_2.fastq"; push(@garbage, $out_ASP_2);
 ##split mate that matched to L1 and others##
 my ($ASP_readsHashR, $half_num_out) = get_half($sam, $mis_L1, $min_L1, $Bdir);
-# $ASP_reads{$line[0]}[0] mapped - $ASP_reads{$line[0]}[1] unmapped
+print STDOUT "Number of half mapped pairs: $half_num_out\n";
 ##pairs obtained after repeatmasker on the other mate##
 my $left = sort_out($threads, $out_ASP_1, $out_ASP_2, $dprct, $eprct, $ASP_readsHashR, $html_repertory);
-print STDOUT "Number of half mapped pairs : $half_num_out\n";
 print STDOUT "Number of pairs after repeatmasker: $left\n";
 ##################################################
 # Alignment of halfed mapped pairs on genome     #
 ##################################################
 my (%frag_uni, @second_R, @second_exp, @results);
 my $merge_target = $html_repertory.'/target_merged.bed'; push(@garbage, $merge_target);
 my $merge = $html_repertory.'/merged.bed'; push(@garbage, $merge);
-open (my $mT, ">".$merge_target) || die "cannot open $merge_target\n";
+open (my $mT, ">".$merge_target) || die "Cannot open $merge_target\n";
-open (my $m, ">".$merge) || die "cannot open $merge\n";
+open (my $m, ">".$merge) || die "Cannot open $merge\n";
-open (my $in, $repMsecond) || die "cannot open secondM\n";
+open (my $in, $repMsecond) || die "Cannot open $repMsecond\n";
 my $cmp = 0;
 while (<$in>)
 {
 chomp $_;
 my @tmp = (0) x scalar(@fastq1);
 $cmp++;
 push @second_R, [$line[0],$line[1],$line[2],$line[3]];
 }
 $cmp = 0;
-open ($in, $repMfirst) || die "cannot open firstM\n";
+open ($in, $repMfirst) || die "Cannot open $repMfirst\n";
 while (<$in>)
 {
 chomp $_;
 my %sec;
 my @line = split /\t/, $_;
 ############################################################
 sub filter_convert_rmsk
 {
 my ($source, $bed, $line_only) = @_;
-open(my $input, $source) || die "cannot open rmsk file! $!\n"; ## Open source file
+open(my $input, $source) || die "Cannot open rmsk file! $!\n"; ## Open source file
-open(my $bedfile, ">".$bed) || die "cannot open output bed file for rmsk! $!\n"; ## Open bed file
+open(my $bedfile, ">".$bed) || die "Cannot open output bed file for rmsk! $!\n"; ## Open bed file
-open(my $linefile, ">".$line_only) || die "cannot open output LINE-only file for rmsk! $!\n"; ## Open line_only file
+open(my $linefile, ">".$line_only) || die "Cannot open output LINE-only file for rmsk! $!\n"; ## Open line_only file
 my @headers;
 my %indices;
 print $linefile "#filter: rmsk.repClass = 'LINE'\n";
 ## store name of file
 my $sam = shift;
 my $mis_L1 = shift;
 my $min_L1 = shift;
 my $Bdir = shift;
-open(my $fic, $sam) || die "cannot open sam file! $!\n"; ## Open file
+open(my $fic, $sam) || die "Cannot open sam file! $!\n"; ## Open file
 my (%ASP_reads); my $cmp = 0; ## Declare variables for
 my $sequence = '';
 my $score = '';
 ##read file##
 mkdir $repout;
 my %notLine;
 ##Write on file containing of readssHashTabR
-open(my $tmp, ">".$second) || die "cannot open temp file $second\n";
+open(my $tmp, ">".$second) || die "Cannot open temp file $second\n";
 while ( my ($k,$v) = each %{$readsHashTabR} )
 {
 print $tmp ${$v}[1] if defined(${$v}[1]);
 }
 close $tmp;
 ##Launch RepeatMasker on fasta file
 `RepeatMasker -s -pa $threads -dir $repout -engine hmmer -species human $fa`;
 my $repfile = $repout.$name.".fa.out";
-open (my $rep, $repfile) || die "cannot open $repfile $!\n";
+open (my $rep, $repfile) || die "Cannot open $repfile $!\n";
 while(<$rep>)
 {
 chomp;
 ## test the percent of repeats ##
 my $string = $_;
 {
 $notLine{$k} = 1 unless ($v->[0] > $dprct || $v->[1] < $eprct);
 }
 ##write resulting reads in both files for paired ##
-open(my $accepted_1, ">".$out1 ) || die "cannot open $out1 file $!\n";
+open(my $accepted_1, ">".$out1 ) || die "Cannot open $out1 file $!\n";
-open(my $accepted_2, ">".$out2 ) || die "cannot open $out2 file $!\n";
+open(my $accepted_2, ">".$out2 ) || die "Cannot open $out2 file $!\n";
 while ( my ($k,$v) = each %{$readsHashTabR} )
 {
 if ( defined (${$v}[0]) && defined (${$v}[1]) )
 {
 unless (defined ($notLine{$k}) && $notLine{$k} == 1)
 $IdCov{$1} = $split_cov[-1] if $split_cov[-1] > $IdCov{$1};
 }
 }
 ## get only first mate that have less tant $iprct repeats ##
-open (my $tmp_fi, 'temp_name_first') || die "cannot open $namefirst!\n";
+open (my $tmp_fi, 'temp_name_first') || die "Cannot open $namefirst!\n";
-open (my $nam_fi, ">".$namefirst) || die "cannot open $namefirst!\n";
+open (my $nam_fi, ">".$namefirst) || die "Cannot open $namefirst!\n";
 while (<$tmp_fi>)
 {
 my @line = split /\t/, $_;
 $line[3] =~ /(.*?)\/[12]/;
 close $tmp_fi; close $nam_fi;
 ## get only  second mate that have less than $iprct repeats ##
-open (my $tmp_sec, 'temp_name_second') || die "cannot open $namesecond!\n";
+open (my $tmp_sec, 'temp_name_second') || die "Cannot open $namesecond!\n";
-open (my $nam_sec, ">".$namesecond) || die "cannot open $namesecond!\n";
+open (my $nam_sec, ">".$namesecond) || die "Cannot open $namesecond!\n";
 while (<$tmp_sec>)
 {
 my @line = split /\t/, $_;
 $line[3] =~ /(.*?)\/[12]/;
 if ($IdCov{$1} <= $iprct/100)
 print $nam_sec $_;
 }
 }
 close $tmp_sec; close $nam_sec;
 }
-#sub results
-#{
-#  my ($out_repertory, $file, $name, $hashRef,$ps) = @_;
-#  my $namefirst = $out_repertory.'/'.$name.'-first.bed'; push(@garbage, $namefirst);
-#  my $namesecond = $out_repertory.'/'.$name.'-second.bed'; push(@garbage, $namesecond);
-#  `samtools view -Sb -f66 $file | bedtools bamtobed -i /dev/stdin > $namefirst`;
-#  `samtools view -Sb -f130 $file | bedtools bamtobed -i /dev/stdin > $namesecond`;
-#  open( my $in, $out_repertory.'/'.$name.'-first.bed') || die "cannot open first read bed\n";
-#  while (<$in>)
-#  {
-#    my @line = split /\t/, $_;
-#    $line[3] =~ /(.*?)\/1/;
-#    ${$hashRef}{$1}= $ps;
-#  }
-#}
 ############################################################
 ##Function blast: blast nucleotide sequences on ref      ###
 ############################################################
 sub extract_blast
 {
 my $file = shift;
 my %hash = ();
-open (my $f, $file) || die "cannot open $file\n";
+open (my $f, $file) || die "Cannot open $file\n";
 while (<$f>)
 {
 chomp $_;
 my ($seq,$id) = split /\t/,$_;
 $seq = $1 if ($seq =~ /(\d+)-(.*?)-(\d+)-(\d+)/);
 File::Copy::Recursive::dircopy "$Bin/js/", "$out/js" or die "Copy failed: $!";
 File::Copy::Recursive::dircopy "$Bin/static/", "$out/static" or die "Copy failed: $!";
 my $chimOut = $html;
-open(my $tab, ">".$chimOut) || die "cannot open $chimOut";
+open(my $tab, ">".$chimOut) || die "Cannot open $chimOut";
 print_header($tab,"Chimerae");
 print $tab "\t\t<tr>\n\t\t\t<th>L1 chromosome</th>\n\t\t\t<th>L1 start</th>\n\t\t\t<th>L1 end</th>\n\t\t\t<th>L1 strand</th>\n";
 for my $i (0..$#fastq1)
 {
 print $tab "\t\t\t<th>$name[$i] read #</th>\n";
 my $out3= $out.'/final_result_chimerae.txt';
 # save result in csv file ##
 my $filed = $out1;
-open(my $tab, ">".$filed) || die "cannot open $filed";
+open(my $tab, ">".$filed) || die "Cannot open $filed";
 print $tab "L1 chromosome \t L1 start \t L1 end \t L1 strand";;
 for my $i (0..$#fastq1)
 {
 print $tab "\t $name[$i] read #";
 }

Mercurial > repos > clifinder > clifinder

comparison script/CLIFinder.pl @ 15:6d7caeea1e74 draft