comparison bin/align_parsing.pl @ 0:1d1b9e1b2e2f draft

Uploaded
author petr-novak
date Thu, 19 Dec 2019 10:24:45 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1d1b9e1b2e2f
1 #!/usr/bin/env perl
2
3 # Parses align file, calculates read depth (RD) and genome representation (GR)
4 # of individual contigs; outputs contig sequences in fasta format with this
5 # information in the fasta header:
6 # >CLXConitgY (length-average_RD-GR)
7 #
8 # RD profiles along the contig sequences can also be calculated and saved
9 # in a separate file.
10 #
11 # (renamed from align_parsing_2.pl, 2010-05-10)
12
13
14 use Getopt::Std;
15 use warnings;
16
17 getopt('iop');
18 if ($opt_i) {
19 $align_file = $opt_i; #
20 } else {
21 die "Missing parameter: -i align_file\n";
22 }
23 if ($opt_o) {
24 $out_file = $opt_o; #
25 } else {
26 $out_file = $align_file.".info";
27 print "Output file not set, using \"$out_file\"\n";
28 }
29 if ($opt_p) {
30 $profile_file = $opt_p; # RD profiles will be recorded
31 } else {
32 $profile_file = 0; # will be used below to switch profile output off
33 print "Parameter -p not set, RD profiles will not be saved\n";
34 }
35
36 open (ALIGN,$align_file) or die "Could not read from $align_file\n";
37 open (OUT,">$out_file") or die "Could not write to $out_file\n";
38 if ($profile_file) {
39 open (PROF,">$profile_file") or die "Could not write to $profile_file\n";
40 }
41
42 while ($radek = <ALIGN>) {
43 if ($radek =~/^DETAILED DISPLAY OF CONTIGS/) { # start of alignment section
44 # print $radek;
45 $radek = <ALIGN>;
46 while ($radek =~/\*\*\* (CL\d+) {0,1}Contig (\d+) \*\*\*/) { # parsing individual contigs
47 # print $radek;
48 $contig_id = $1."Contig".$2;
49 @pokryti = (); # array ve ktere bude pro kazdou pozici v sekvenci pokryti
50 $seq = ""; # consensus sekvence contigu
51 # print "$contig_id\n";
52 # print $radek;
53 $radek = <ALIGN>;
54 while ($radek =~/\: \. \:/) { # -> nasleduje dalsi blok
55 @blok = (); # radky z aktualniho bloku alignmentu
56 do {
57 $radek = <ALIGN>;
58 push(@blok,$radek);
59 } while (not $radek =~/^consensus/);
60 $cons_line = $radek;
61 $radek = <ALIGN>;
62 $radek = <ALIGN>; # toto posledni nacteni radku slouzi v podmince tohoto i nadrazeneho cyklu while
63 if (not $radek) { $radek = " "; }; # aby to nehlasilo chyby na konci souboru kde ty radky muzou chybet
64
65 pop(@blok); pop(@blok); # odstrani posledni dva radky (mezera a consensus)
66
67 for ($f=10;$f<=length($cons_line);$f++) {
68 $suma_pozice = 0;
69 if (substr($cons_line,$f,1) =~/([A,T,G,C,N])/) {
70 $seq .= $1;
71 foreach $cteni (@blok) {
72 if (substr($cteni,$f,1) =~/[A,T,G,C,N]/) {
73 $suma_pozice++;
74 }
75 }
76 push(@pokryti,$suma_pozice);
77 }
78 }
79
80 }
81
82 $delka_cons = @pokryti;
83 $soucet = 0;
84 $prumer = 0;
85 foreach $suma_pozice (@pokryti) {
86 $soucet += $suma_pozice;
87 }
88 $prumer = sprintf("%0.1f",$soucet/$delka_cons);
89
90 print OUT ">$contig_id ($delka_cons-$prumer-$soucet)\n";
91 while ($seq_line = substr($seq,0,60,"")) {
92 print OUT "$seq_line\n";
93 }
94 if ($profile_file) {
95 print PROF ">$contig_id ($delka_cons-$prumer-$soucet)\n";
96 foreach $suma_pozice (@pokryti) {
97 print PROF "$suma_pozice ";
98 }
99 print PROF "\n";
100 }
101
102 }
103 }
104 }
105
106 close ALIGN;
107 close OUT;
108 if ($profile_file) {
109 close PROF;
110 }
111