annotate bin/align_parsing.pl @ 0:1d1b9e1b2e2f draft

Uploaded
author petr-novak
date Thu, 19 Dec 2019 10:24:45 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1 #!/usr/bin/env perl
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
2
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
3 # Parses align file, calculates read depth (RD) and genome representation (GR)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
4 # of individual contigs; outputs contig sequences in fasta format with this
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
5 # information in the fasta header:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
6 # >CLXConitgY (length-average_RD-GR)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
7 #
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
8 # RD profiles along the contig sequences can also be calculated and saved
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
9 # in a separate file.
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
10 #
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
11 # (renamed from align_parsing_2.pl, 2010-05-10)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
12
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
13
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
14 use Getopt::Std;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
15 use warnings;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
16
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
17 getopt('iop');
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
18 if ($opt_i) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
19 $align_file = $opt_i; #
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
20 } else {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
21 die "Missing parameter: -i align_file\n";
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
22 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
23 if ($opt_o) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
24 $out_file = $opt_o; #
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
25 } else {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
26 $out_file = $align_file.".info";
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
27 print "Output file not set, using \"$out_file\"\n";
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
28 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
29 if ($opt_p) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
30 $profile_file = $opt_p; # RD profiles will be recorded
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
31 } else {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
32 $profile_file = 0; # will be used below to switch profile output off
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
33 print "Parameter -p not set, RD profiles will not be saved\n";
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
34 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
35
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
36 open (ALIGN,$align_file) or die "Could not read from $align_file\n";
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
37 open (OUT,">$out_file") or die "Could not write to $out_file\n";
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
38 if ($profile_file) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
39 open (PROF,">$profile_file") or die "Could not write to $profile_file\n";
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
40 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
41
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
42 while ($radek = <ALIGN>) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
43 if ($radek =~/^DETAILED DISPLAY OF CONTIGS/) { # start of alignment section
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
44 # print $radek;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
45 $radek = <ALIGN>;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
46 while ($radek =~/\*\*\* (CL\d+) {0,1}Contig (\d+) \*\*\*/) { # parsing individual contigs
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
47 # print $radek;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
48 $contig_id = $1."Contig".$2;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
49 @pokryti = (); # array ve ktere bude pro kazdou pozici v sekvenci pokryti
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
50 $seq = ""; # consensus sekvence contigu
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
51 # print "$contig_id\n";
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
52 # print $radek;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
53 $radek = <ALIGN>;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
54 while ($radek =~/\: \. \:/) { # -> nasleduje dalsi blok
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
55 @blok = (); # radky z aktualniho bloku alignmentu
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
56 do {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
57 $radek = <ALIGN>;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
58 push(@blok,$radek);
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
59 } while (not $radek =~/^consensus/);
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
60 $cons_line = $radek;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
61 $radek = <ALIGN>;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
62 $radek = <ALIGN>; # toto posledni nacteni radku slouzi v podmince tohoto i nadrazeneho cyklu while
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
63 if (not $radek) { $radek = " "; }; # aby to nehlasilo chyby na konci souboru kde ty radky muzou chybet
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
64
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
65 pop(@blok); pop(@blok); # odstrani posledni dva radky (mezera a consensus)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
66
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
67 for ($f=10;$f<=length($cons_line);$f++) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
68 $suma_pozice = 0;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
69 if (substr($cons_line,$f,1) =~/([A,T,G,C,N])/) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
70 $seq .= $1;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
71 foreach $cteni (@blok) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
72 if (substr($cteni,$f,1) =~/[A,T,G,C,N]/) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
73 $suma_pozice++;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
74 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
75 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
76 push(@pokryti,$suma_pozice);
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
77 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
78 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
79
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
80 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
81
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
82 $delka_cons = @pokryti;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
83 $soucet = 0;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
84 $prumer = 0;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
85 foreach $suma_pozice (@pokryti) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
86 $soucet += $suma_pozice;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
87 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
88 $prumer = sprintf("%0.1f",$soucet/$delka_cons);
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
89
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
90 print OUT ">$contig_id ($delka_cons-$prumer-$soucet)\n";
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
91 while ($seq_line = substr($seq,0,60,"")) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
92 print OUT "$seq_line\n";
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
93 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
94 if ($profile_file) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
95 print PROF ">$contig_id ($delka_cons-$prumer-$soucet)\n";
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
96 foreach $suma_pozice (@pokryti) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
97 print PROF "$suma_pozice ";
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
98 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
99 print PROF "\n";
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
100 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
101
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
102 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
103 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
104 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
105
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
106 close ALIGN;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
107 close OUT;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
108 if ($profile_file) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
109 close PROF;
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
110 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
111