# HG changeset patch # User clifinder # Date 1581503558 18000 # Node ID 6d7caeea1e74c3cb9c388b07307d9f023a60ed81 # Parent feecd33c83901f9bb1910d0ed56df615c4df8914 "planemo upload for repository https://github.com/GReD-Clermont/CLIFinder/ commit 2a3a263fc5a2123e6988872d23057f8797231a76" diff -r feecd33c8390 -r 6d7caeea1e74 script/CLIFinder.pl --- a/script/CLIFinder.pl Tue Feb 11 09:28:56 2020 -0500 +++ b/script/CLIFinder.pl Wed Feb 12 05:32:38 2020 -0500 @@ -45,8 +45,8 @@ "min_L1:i" => \(my $min_L1 = 50), "mis_L1:i" => \(my $mis_L1 = 2), "threads:i" => \(my $threads = 1), - 'help' => sub { HelpMessage(0); }, - 'version' => sub { VersionMessage(0); }, + "help" => sub { HelpMessage(0); }, + "version" => sub { VersionMessage(0); }, ) or HelpMessage(1); HelpMessage(1) unless @fastq1 && @fastq2 && @name && defined($TE) && defined($ref) && defined($rmsk_source) && defined($refseq) && defined($html) && defined($html_repertory); @@ -95,10 +95,10 @@ # Paired end mapping against L1 promoter sequences# ################################################### - print STDOUT "Alignement of $name[$tabR] to L1\n"; + print STDOUT "Alignment of $name[$tabR] to L1\n"; my $sam = $html_repertory.'/'.$name[$tabR]."_L1.sam"; push(@garbage, $sam); align_paired( $TE, $fastq1[$tabR], $fastq2[$tabR], $sam, $threads, $mis_auth); - print STDOUT "Alignement done\n"; + print STDOUT "Alignment done\n"; ################################################## # Creation of two fastq for paired halfed mapped:# @@ -107,18 +107,15 @@ ################################################## print STDOUT "Getting pairs with one mate matched to L1 and the other mate undetected by repeatmasker as a repeat sequence\n"; - my $out_ASP_1 = $html_repertory.'/'.$name[$tabR]."_1.fastq"; push(@garbage, $out_ASP_1); my $out_ASP_2 = $html_repertory.'/'.$name[$tabR]."_2.fastq"; push(@garbage, $out_ASP_2); ##split mate that matched to L1 and others## my ($ASP_readsHashR, $half_num_out) = get_half($sam, $mis_L1, $min_L1, $Bdir); - # $ASP_reads{$line[0]}[0] mapped - $ASP_reads{$line[0]}[1] unmapped + print STDOUT "Number of half mapped pairs: $half_num_out\n"; ##pairs obtained after repeatmasker on the other mate## my $left = sort_out($threads, $out_ASP_1, $out_ASP_2, $dprct, $eprct, $ASP_readsHashR, $html_repertory); - - print STDOUT "Number of half mapped pairs : $half_num_out\n"; print STDOUT "Number of pairs after repeatmasker: $left\n"; ################################################## @@ -168,9 +165,9 @@ my $merge_target = $html_repertory.'/target_merged.bed'; push(@garbage, $merge_target); my $merge = $html_repertory.'/merged.bed'; push(@garbage, $merge); -open (my $mT, ">".$merge_target) || die "cannot open $merge_target\n"; -open (my $m, ">".$merge) || die "cannot open $merge\n"; -open (my $in, $repMsecond) || die "cannot open secondM\n"; +open (my $mT, ">".$merge_target) || die "Cannot open $merge_target\n"; +open (my $m, ">".$merge) || die "Cannot open $merge\n"; +open (my $in, $repMsecond) || die "Cannot open $repMsecond\n"; my $cmp = 0; while (<$in>) { @@ -185,7 +182,7 @@ } $cmp = 0; -open ($in, $repMfirst) || die "cannot open firstM\n"; +open ($in, $repMfirst) || die "Cannot open $repMfirst\n"; while (<$in>) { chomp $_; @@ -313,9 +310,9 @@ sub filter_convert_rmsk { my ($source, $bed, $line_only) = @_; - open(my $input, $source) || die "cannot open rmsk file! $!\n"; ## Open source file - open(my $bedfile, ">".$bed) || die "cannot open output bed file for rmsk! $!\n"; ## Open bed file - open(my $linefile, ">".$line_only) || die "cannot open output LINE-only file for rmsk! $!\n"; ## Open line_only file + open(my $input, $source) || die "Cannot open rmsk file! $!\n"; ## Open source file + open(my $bedfile, ">".$bed) || die "Cannot open output bed file for rmsk! $!\n"; ## Open bed file + open(my $linefile, ">".$line_only) || die "Cannot open output LINE-only file for rmsk! $!\n"; ## Open line_only file my @headers; my %indices; @@ -491,7 +488,7 @@ my $mis_L1 = shift; my $min_L1 = shift; my $Bdir = shift; - open(my $fic, $sam) || die "cannot open sam file! $!\n"; ## Open file + open(my $fic, $sam) || die "Cannot open sam file! $!\n"; ## Open file my (%ASP_reads); my $cmp = 0; ## Declare variables for my $sequence = ''; my $score = ''; @@ -594,7 +591,7 @@ ##Write on file containing of readssHashTabR - open(my $tmp, ">".$second) || die "cannot open temp file $second\n"; + open(my $tmp, ">".$second) || die "Cannot open temp file $second\n"; while ( my ($k,$v) = each %{$readsHashTabR} ) { print $tmp ${$v}[1] if defined(${$v}[1]); @@ -609,7 +606,7 @@ `RepeatMasker -s -pa $threads -dir $repout -engine hmmer -species human $fa`; my $repfile = $repout.$name.".fa.out"; - open (my $rep, $repfile) || die "cannot open $repfile $!\n"; + open (my $rep, $repfile) || die "Cannot open $repfile $!\n"; while(<$rep>) { chomp; @@ -635,8 +632,8 @@ } ##write resulting reads in both files for paired ## - open(my $accepted_1, ">".$out1 ) || die "cannot open $out1 file $!\n"; - open(my $accepted_2, ">".$out2 ) || die "cannot open $out2 file $!\n"; + open(my $accepted_1, ">".$out1 ) || die "Cannot open $out1 file $!\n"; + open(my $accepted_2, ">".$out2 ) || die "Cannot open $out2 file $!\n"; while ( my ($k,$v) = each %{$readsHashTabR} ) { if ( defined (${$v}[0]) && defined (${$v}[1]) ) @@ -752,8 +749,8 @@ } ## get only first mate that have less tant $iprct repeats ## - open (my $tmp_fi, 'temp_name_first') || die "cannot open $namefirst!\n"; - open (my $nam_fi, ">".$namefirst) || die "cannot open $namefirst!\n"; + open (my $tmp_fi, 'temp_name_first') || die "Cannot open $namefirst!\n"; + open (my $nam_fi, ">".$namefirst) || die "Cannot open $namefirst!\n"; while (<$tmp_fi>) { my @line = split /\t/, $_; @@ -771,8 +768,8 @@ ## get only second mate that have less than $iprct repeats ## - open (my $tmp_sec, 'temp_name_second') || die "cannot open $namesecond!\n"; - open (my $nam_sec, ">".$namesecond) || die "cannot open $namesecond!\n"; + open (my $tmp_sec, 'temp_name_second') || die "Cannot open $namesecond!\n"; + open (my $nam_sec, ">".$namesecond) || die "Cannot open $namesecond!\n"; while (<$tmp_sec>) { my @line = split /\t/, $_; @@ -785,22 +782,6 @@ close $tmp_sec; close $nam_sec; } -#sub results -#{ -# my ($out_repertory, $file, $name, $hashRef,$ps) = @_; -# my $namefirst = $out_repertory.'/'.$name.'-first.bed'; push(@garbage, $namefirst); -# my $namesecond = $out_repertory.'/'.$name.'-second.bed'; push(@garbage, $namesecond); -# `samtools view -Sb -f66 $file | bedtools bamtobed -i /dev/stdin > $namefirst`; -# `samtools view -Sb -f130 $file | bedtools bamtobed -i /dev/stdin > $namesecond`; -# open( my $in, $out_repertory.'/'.$name.'-first.bed') || die "cannot open first read bed\n"; -# while (<$in>) -# { -# my @line = split /\t/, $_; -# $line[3] =~ /(.*?)\/1/; -# ${$hashRef}{$1}= $ps; -# } -#} - ############################################################ ##Function blast: blast nucleotide sequences on ref ### @@ -833,7 +814,7 @@ { my $file = shift; my %hash = (); - open (my $f, $file) || die "cannot open $file\n"; + open (my $f, $file) || die "Cannot open $file\n"; while (<$f>) { chomp $_; @@ -910,7 +891,7 @@ my $chimOut = $html; - open(my $tab, ">".$chimOut) || die "cannot open $chimOut"; + open(my $tab, ">".$chimOut) || die "Cannot open $chimOut"; print_header($tab,"Chimerae"); print $tab "\t\t\n\t\t\tL1 chromosome\n\t\t\tL1 start\n\t\t\tL1 end\n\t\t\tL1 strand\n"; for my $i (0..$#fastq1) @@ -1022,7 +1003,7 @@ # save result in csv file ## my $filed = $out1; - open(my $tab, ">".$filed) || die "cannot open $filed"; + open(my $tab, ">".$filed) || die "Cannot open $filed"; print $tab "L1 chromosome \t L1 start \t L1 end \t L1 strand";; for my $i (0..$#fastq1) {