annotate cpt_psm_recombine/lib/CPT/BioData.pm @ 0:b18e8268bf4e draft

Uploaded
author cpt
date Tue, 05 Jul 2022 05:05:13 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
1 package CPT::BioData;
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
2 use Moose;
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
3 use strict;
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
4 use warnings;
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
5 use autodie;
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
6
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
7 has 'dummy_var' => (isa => 'Str', is => 'ro');
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
8
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
9 my %genbank_feature_tags = (
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
10 "locus_tag" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
11 "gene" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
12 "product" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
13 "allele" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
14 "anticodon" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
15 "artificial_location" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
16 "bio_material" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
17 "bound_moiety" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
18 "cell_line" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
19 "cell_type" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
20 "chromosome" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
21 "citation" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
22 "clone" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
23 "clone_lib" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
24 "codon_start" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
25 "collected_by" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
26 "collection_date" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
27 "compare" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
28 "country" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
29 "cultivar" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
30 "culture_collection" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
31 "db_xref" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
32 "dev_stage" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
33 "direction" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
34 "EC_number" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
35 "ecotype" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
36 "environmental_sample" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
37 "estimated_length" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
38 "exception" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
39 "experiment" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
40 "focus" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
41 "frequency" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
42 "function" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
43 "gap_type" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
44 "gene_synonym" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
45 "germline" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
46 "haplogroup" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
47 "haplotype" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
48 "host" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
49 "identified_by" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
50 "inference" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
51 "isolate" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
52 "isolation_source" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
53 "lab_host" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
54 "lat_lon" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
55 "linkage_evidence" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
56 "macronuclear" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
57 "map" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
58 "mating_type" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
59 "mobile_element_type" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
60 "mod_base" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
61 "mol_type" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
62 "ncRNA_class" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
63 "note" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
64 "number" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
65 "old_locus_tag" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
66 "operon" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
67 "organelle" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
68 "organism" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
69 "partial" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
70 "PCR_conditions" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
71 "PCR_primers" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
72 "phenotype" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
73 "plasmid" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
74 "pop_variant" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
75 "protein_id" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
76 "proviral" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
77 "pseudo" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
78 "rearranged" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
79 "replace" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
80 "ribosomal_slippage" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
81 "rpt_family" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
82 "rpt_type" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
83 "rpt_unit_range" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
84 "rpt_unit_seq" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
85 "satellite" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
86 "segment" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
87 "serotype" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
88 "serovar" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
89 "sex" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
90 "specimen_voucher" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
91 "standard_name" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
92 "strain" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
93 "sub_clone" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
94 "sub_species" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
95 "sub_strain" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
96 "tag_peptide" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
97 "tissue_lib" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
98 "tissue_type" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
99 "transgenic" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
100 "translation" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
101 "transl_except" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
102 "transl_table" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
103 "trans_splicing" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
104 "variety" => 1,
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
105 );
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
106 my %artemis_colours = (
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
107 0 => 'rgb(255,255,255)',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
108 1 => 'rgb(100,100,100)',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
109 2 => 'rgb(255,0,0)',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
110 3 => 'rgb(0,255,0)',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
111 4 => 'rgb(0,0,255)',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
112 5 => 'rgb(0,255,255)',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
113 6 => 'rgb(255,0,255)',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
114 7 => 'rgb(255,255,0)',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
115 8 => 'rgb(152,251,152)',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
116 9 => 'rgb(135,206,250)',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
117 10 => 'rgb(255,165,0)',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
118 11 => 'rgb(200,150,100)',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
119 12 => 'rgb(255,200,200)',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
120 13 => 'rgb(170,170,170)',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
121 14 => 'rgb(0,0,0)',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
122 15 => 'rgb(255,63,63)',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
123 16 => 'rgb(255,127,127)',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
124 17 => 'rgb(255,191,191)',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
125 );
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
126
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
127 sub artemis_colour_decode{
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
128 my ($self, $idx) = @_;
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
129 return $artemis_colours{$idx};
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
130 }
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
131
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
132 my %table321 = (
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
133 'Gly' => 'G', 'Pro' => 'P',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
134 'Ala' => 'A',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
135 'Val' => 'V',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
136 'Leu' => 'L',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
137 'Ile' => 'I',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
138 'Met' => 'M',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
139 'Cys' => 'C',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
140 'Phe' => 'F',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
141 'Tyr' => 'Y',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
142 'Trp' => 'W',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
143 'His' => 'H',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
144 'Lys' => 'K',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
145 'Arg' => 'R',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
146 'Gln' => 'Q',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
147 'Asn' => 'N',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
148 'Glu' => 'E',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
149 'Asp' => 'D',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
150 'Ser' => 'S',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
151 'Thr' => 'T',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
152 'XXX' => 'X',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
153 'End' => '*',
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
154 'Stop' => '*'
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
155 );
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
156
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
157 sub decode321{
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
158 my ($self, $three) = @_;
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
159 return $table321{$three};
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
160 }
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
161
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
162
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
163 sub get321Table {
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
164 my ($self) = @_;
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
165 return \%table321;
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
166 }
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
167
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
168
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
169 sub getTranslationTable {
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
170 my ($self, $table_id) = @_;
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
171 require Bio::Tools::CodonTable;
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
172 my $table = Bio::Tools::CodonTable->new( -id => (defined $table_id? $table_id: 1) );
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
173 my %result;
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
174 my @codons = qw(A C T G);
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
175 foreach my $i (@codons) {
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
176 foreach my $j (@codons) {
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
177 foreach my $k (@codons) {
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
178 $result{"$i$j$k"} = $table->translate("$i$j$k");
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
179 }
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
180 }
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
181 }
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
182 if(defined($table_id) && $table_id == 11){
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
183 $result{TGA} = '*';
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
184 $result{TAA} = '#';
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
185 $result{TAG} = '+';
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
186 }
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
187 return \%result;
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
188 }
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
189
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
190
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
191
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
192 sub isValidTag {
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
193 my ( $self, $tag ) = @_;
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
194 return $genbank_feature_tags{$tag};
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
195 }
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
196
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
197 no Moose;
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
198 1;
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
199
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
200 __END__
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
201
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
202 =pod
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
203
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
204 =encoding UTF-8
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
205
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
206 =head1 NAME
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
207
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
208 CPT::BioData
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
209
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
210 =head1 VERSION
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
211
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
212 version 1.99.4
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
213
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
214 =head2 get321Table
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
215
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
216 $bio->get321Table();
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
217
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
218 Convenience function which returns a codon translation table (3 letter ID to 1 letter code)
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
219
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
220 =head2 getTranslationTable
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
221
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
222 $bio->getTranslationTable();
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
223
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
224 Convenience function which returns a hash translated according to Bio::Tools::CodonTable
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
225
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
226 This is done for speed reasons. CodonTable is very slow and we require better performance
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
227
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
228 =head2 isValidTag
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
229
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
230 if($cptbio->isValidTag('locus_tag')) { ... }
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
231
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
232 Will validate a GBK feature tag
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
233
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
234 =head1 AUTHOR
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
235
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
236 Eric Rasche <rasche.eric@yandex.ru>
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
237
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
238 =head1 COPYRIGHT AND LICENSE
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
239
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
240 This software is Copyright (c) 2014 by Eric Rasche.
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
241
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
242 This is free software, licensed under:
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
243
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
244 The GNU General Public License, Version 3, June 2007
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
245
b18e8268bf4e Uploaded
cpt
parents:
diff changeset
246 =cut