Mercurial > repos > petr-novak > repeatrxplorer
comparison bin/align_parsing.pl @ 0:1d1b9e1b2e2f draft
Uploaded
author | petr-novak |
---|---|
date | Thu, 19 Dec 2019 10:24:45 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1d1b9e1b2e2f |
---|---|
1 #!/usr/bin/env perl | |
2 | |
3 # Parses align file, calculates read depth (RD) and genome representation (GR) | |
4 # of individual contigs; outputs contig sequences in fasta format with this | |
5 # information in the fasta header: | |
6 # >CLXConitgY (length-average_RD-GR) | |
7 # | |
8 # RD profiles along the contig sequences can also be calculated and saved | |
9 # in a separate file. | |
10 # | |
11 # (renamed from align_parsing_2.pl, 2010-05-10) | |
12 | |
13 | |
14 use Getopt::Std; | |
15 use warnings; | |
16 | |
17 getopt('iop'); | |
18 if ($opt_i) { | |
19 $align_file = $opt_i; # | |
20 } else { | |
21 die "Missing parameter: -i align_file\n"; | |
22 } | |
23 if ($opt_o) { | |
24 $out_file = $opt_o; # | |
25 } else { | |
26 $out_file = $align_file.".info"; | |
27 print "Output file not set, using \"$out_file\"\n"; | |
28 } | |
29 if ($opt_p) { | |
30 $profile_file = $opt_p; # RD profiles will be recorded | |
31 } else { | |
32 $profile_file = 0; # will be used below to switch profile output off | |
33 print "Parameter -p not set, RD profiles will not be saved\n"; | |
34 } | |
35 | |
36 open (ALIGN,$align_file) or die "Could not read from $align_file\n"; | |
37 open (OUT,">$out_file") or die "Could not write to $out_file\n"; | |
38 if ($profile_file) { | |
39 open (PROF,">$profile_file") or die "Could not write to $profile_file\n"; | |
40 } | |
41 | |
42 while ($radek = <ALIGN>) { | |
43 if ($radek =~/^DETAILED DISPLAY OF CONTIGS/) { # start of alignment section | |
44 # print $radek; | |
45 $radek = <ALIGN>; | |
46 while ($radek =~/\*\*\* (CL\d+) {0,1}Contig (\d+) \*\*\*/) { # parsing individual contigs | |
47 # print $radek; | |
48 $contig_id = $1."Contig".$2; | |
49 @pokryti = (); # array ve ktere bude pro kazdou pozici v sekvenci pokryti | |
50 $seq = ""; # consensus sekvence contigu | |
51 # print "$contig_id\n"; | |
52 # print $radek; | |
53 $radek = <ALIGN>; | |
54 while ($radek =~/\: \. \:/) { # -> nasleduje dalsi blok | |
55 @blok = (); # radky z aktualniho bloku alignmentu | |
56 do { | |
57 $radek = <ALIGN>; | |
58 push(@blok,$radek); | |
59 } while (not $radek =~/^consensus/); | |
60 $cons_line = $radek; | |
61 $radek = <ALIGN>; | |
62 $radek = <ALIGN>; # toto posledni nacteni radku slouzi v podmince tohoto i nadrazeneho cyklu while | |
63 if (not $radek) { $radek = " "; }; # aby to nehlasilo chyby na konci souboru kde ty radky muzou chybet | |
64 | |
65 pop(@blok); pop(@blok); # odstrani posledni dva radky (mezera a consensus) | |
66 | |
67 for ($f=10;$f<=length($cons_line);$f++) { | |
68 $suma_pozice = 0; | |
69 if (substr($cons_line,$f,1) =~/([A,T,G,C,N])/) { | |
70 $seq .= $1; | |
71 foreach $cteni (@blok) { | |
72 if (substr($cteni,$f,1) =~/[A,T,G,C,N]/) { | |
73 $suma_pozice++; | |
74 } | |
75 } | |
76 push(@pokryti,$suma_pozice); | |
77 } | |
78 } | |
79 | |
80 } | |
81 | |
82 $delka_cons = @pokryti; | |
83 $soucet = 0; | |
84 $prumer = 0; | |
85 foreach $suma_pozice (@pokryti) { | |
86 $soucet += $suma_pozice; | |
87 } | |
88 $prumer = sprintf("%0.1f",$soucet/$delka_cons); | |
89 | |
90 print OUT ">$contig_id ($delka_cons-$prumer-$soucet)\n"; | |
91 while ($seq_line = substr($seq,0,60,"")) { | |
92 print OUT "$seq_line\n"; | |
93 } | |
94 if ($profile_file) { | |
95 print PROF ">$contig_id ($delka_cons-$prumer-$soucet)\n"; | |
96 foreach $suma_pozice (@pokryti) { | |
97 print PROF "$suma_pozice "; | |
98 } | |
99 print PROF "\n"; | |
100 } | |
101 | |
102 } | |
103 } | |
104 } | |
105 | |
106 close ALIGN; | |
107 close OUT; | |
108 if ($profile_file) { | |
109 close PROF; | |
110 } | |
111 |