Mercurial > repos > petr-novak > repeatrxplorer
view bin/align_parsing.pl @ 0:1d1b9e1b2e2f draft
Uploaded
author | petr-novak |
---|---|
date | Thu, 19 Dec 2019 10:24:45 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env perl # Parses align file, calculates read depth (RD) and genome representation (GR) # of individual contigs; outputs contig sequences in fasta format with this # information in the fasta header: # >CLXConitgY (length-average_RD-GR) # # RD profiles along the contig sequences can also be calculated and saved # in a separate file. # # (renamed from align_parsing_2.pl, 2010-05-10) use Getopt::Std; use warnings; getopt('iop'); if ($opt_i) { $align_file = $opt_i; # } else { die "Missing parameter: -i align_file\n"; } if ($opt_o) { $out_file = $opt_o; # } else { $out_file = $align_file.".info"; print "Output file not set, using \"$out_file\"\n"; } if ($opt_p) { $profile_file = $opt_p; # RD profiles will be recorded } else { $profile_file = 0; # will be used below to switch profile output off print "Parameter -p not set, RD profiles will not be saved\n"; } open (ALIGN,$align_file) or die "Could not read from $align_file\n"; open (OUT,">$out_file") or die "Could not write to $out_file\n"; if ($profile_file) { open (PROF,">$profile_file") or die "Could not write to $profile_file\n"; } while ($radek = <ALIGN>) { if ($radek =~/^DETAILED DISPLAY OF CONTIGS/) { # start of alignment section # print $radek; $radek = <ALIGN>; while ($radek =~/\*\*\* (CL\d+) {0,1}Contig (\d+) \*\*\*/) { # parsing individual contigs # print $radek; $contig_id = $1."Contig".$2; @pokryti = (); # array ve ktere bude pro kazdou pozici v sekvenci pokryti $seq = ""; # consensus sekvence contigu # print "$contig_id\n"; # print $radek; $radek = <ALIGN>; while ($radek =~/\: \. \:/) { # -> nasleduje dalsi blok @blok = (); # radky z aktualniho bloku alignmentu do { $radek = <ALIGN>; push(@blok,$radek); } while (not $radek =~/^consensus/); $cons_line = $radek; $radek = <ALIGN>; $radek = <ALIGN>; # toto posledni nacteni radku slouzi v podmince tohoto i nadrazeneho cyklu while if (not $radek) { $radek = " "; }; # aby to nehlasilo chyby na konci souboru kde ty radky muzou chybet pop(@blok); pop(@blok); # odstrani posledni dva radky (mezera a consensus) for ($f=10;$f<=length($cons_line);$f++) { $suma_pozice = 0; if (substr($cons_line,$f,1) =~/([A,T,G,C,N])/) { $seq .= $1; foreach $cteni (@blok) { if (substr($cteni,$f,1) =~/[A,T,G,C,N]/) { $suma_pozice++; } } push(@pokryti,$suma_pozice); } } } $delka_cons = @pokryti; $soucet = 0; $prumer = 0; foreach $suma_pozice (@pokryti) { $soucet += $suma_pozice; } $prumer = sprintf("%0.1f",$soucet/$delka_cons); print OUT ">$contig_id ($delka_cons-$prumer-$soucet)\n"; while ($seq_line = substr($seq,0,60,"")) { print OUT "$seq_line\n"; } if ($profile_file) { print PROF ">$contig_id ($delka_cons-$prumer-$soucet)\n"; foreach $suma_pozice (@pokryti) { print PROF "$suma_pozice "; } print PROF "\n"; } } } } close ALIGN; close OUT; if ($profile_file) { close PROF; }