0
|
1 \documentclass[]{article}
|
|
2 \usepackage{lmodern}
|
|
3 \usepackage{amssymb,amsmath}
|
|
4 \usepackage{ifxetex,ifluatex}
|
|
5 \usepackage{fixltx2e} % provides \textsubscript
|
|
6 \usepackage[T1]{fontenc}
|
|
7 \usepackage[utf8]{inputenc}
|
|
8
|
|
9 \addtolength{\oddsidemargin}{-.875in}
|
|
10 \addtolength{\evensidemargin}{-.875in}
|
|
11 \addtolength{\textwidth}{1.75in}
|
|
12
|
|
13 \addtolength{\topmargin}{-.875in}
|
|
14 \addtolength{\textheight}{1.75in}
|
|
15
|
|
16 \usepackage{fancyhdr}
|
|
17 \pagestyle{fancy}
|
|
18 \lhead{GenomeA Compliance Report}
|
|
19 \chead{}
|
|
20 \rhead{ {{record_name | texify}} }
|
|
21 \lfoot{}
|
|
22 \cfoot{\thepage}
|
|
23 \rfoot{}
|
|
24
|
|
25
|
|
26
|
|
27 \usepackage{microtype}
|
|
28 \usepackage{hyperref}
|
|
29 \hypersetup{unicode=true,
|
|
30 pdfborder={0 0 0},
|
|
31 breaklinks=true}
|
|
32 \urlstyle{same} % don't use monospace font for urls
|
|
33 \usepackage{longtable,booktabs}
|
|
34 \date{Compiled \today}
|
|
35 \title{GenomeA Compliance Report for {{record_nice_name | texify}}}
|
|
36
|
|
37 \begin{document}
|
|
38 %\pagestyle{plain}
|
|
39 \maketitle
|
|
40 This report details possible issues with your submitted genome annotations.
|
|
41
|
|
42 \section{Required Changes}
|
|
43
|
|
44 The changes detailed in this section are required for acceptance of your
|
|
45 submission.
|
|
46
|
|
47 \subsection{Missing Gene Features}
|
|
48
|
|
49 These coding sequences (``CDS'' in your GenBank file) are missing the
|
|
50 associated gene feature (``gene''). This is required for validation by NCBI's
|
|
51 rules which are encoded in the sequin and tbl2asn programs.
|
|
52 {%if missing_genes_bad > 0 %}
|
|
53
|
|
54 {{ missing_genes_bad }} out of {{ missing_genes_good + missing_genes_bad
|
|
55 }} features are lacking their associated gene feature.
|
|
56
|
|
57 \begin{longtable}{ll}
|
|
58 \hline
|
|
59 Feature ID & Location\\
|
|
60 \hline
|
|
61 \endhead
|
|
62 {% for row in missing_genes %}
|
|
63 {{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}}\tabularnewline
|
|
64 {% endfor %}
|
|
65 \end{longtable}
|
|
66 {% else %}
|
|
67 You are not missing any gene features
|
|
68 {% endif %}
|
|
69
|
|
70 \subsection{Missing Product Tags}\label{missing-product-tags}
|
|
71
|
|
72 {{missing_tags_good}} out of {{missing_tags_good + missing_tags_bad}} features have product tags (\texttt{/product="..."}).
|
|
73 {% if missing_tags_bad > 0 %}
|
|
74 The following features are missing product tags
|
|
75 \begin{longtable}{ll}
|
|
76 \hline
|
|
77 Feature & Location\\
|
|
78 \hline
|
|
79 \endhead
|
|
80 {% for row in missing_tags %}
|
|
81 {{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}}\tabularnewline
|
|
82 {% endfor %}
|
|
83 \end{longtable}
|
|
84 {% endif %}
|
|
85
|
|
86 \subsection{Missing Locus Tags}\label{missing-locus-tags}
|
|
87
|
|
88 {{gene_model_correction_good}} out of {{gene_model_correction_good + gene_model_correction_bad}} features have valid locus tags (\texttt{/locus\_tag="..."}).
|
|
89 {% if gene_model_correction_bad > 0 %}
|
|
90 The following features have issues with their locus tags
|
|
91 \begin{longtable}{lllll}
|
|
92 \hline
|
|
93 ID & Location & Gene Locus Tag & CDS Locus Tag & Issue \\
|
|
94 \hline
|
|
95 \endhead
|
|
96 {% for row in gene_model_correction %}
|
|
97 {{ row[0].id | texify }} & \texttt{{'{'}}{{row[1].location}}{{'}'}} & {{ row[0].qualifiers['locus_tag'][0] | texify }} & {{ row[1].qualifiers['locus_tag'][0] | texify }} & {{ row[2] | texify }}\tabularnewline
|
|
98 {% endfor %}
|
|
99 \end{longtable}
|
|
100 {% endif %}
|
|
101
|
|
102
|
|
103 \section{Suggested Changes}\label{suggested-changes}
|
|
104
|
|
105 These changes are not required, but are strongly encouraged in order to
|
|
106 provide a uniform genome annotation within the phage community.
|
|
107
|
|
108 \subsection{Start Codons}\label{start-codons}
|
|
109 Nearly all phage genes use ATG, GTG or TTG as start codons. The start codon distribution is as
|
|
110 follows:
|
|
111
|
|
112
|
|
113 \begin{longtable}{lll}
|
|
114 \hline
|
|
115 Start Codon & Count\\
|
|
116 \hline
|
|
117 \endhead
|
|
118 {% for codon_key in weird_starts_overall_sorted_keys %}
|
|
119 {{ codon_key }} & {{ weird_starts_overall[codon_key] }} \\
|
|
120 {% endfor %}
|
|
121 \end{longtable}
|
|
122
|
|
123 {% if weird_starts_bad != 0 %}
|
|
124 There are {{weird_starts_bad }} unusual start codons in the genome, these
|
|
125 should be carefully justified. If there is evidence for these starts, the
|
|
126 GenomeA text should note this.
|
|
127
|
|
128 \begin{longtable}{lll}
|
|
129 \hline
|
|
130 Feature ID & Location & Start Codon\\
|
|
131 \hline
|
|
132 \endhead
|
|
133 {% for row in weird_starts %}
|
|
134 {{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}} & {{row.__start}} \\
|
|
135 {% endfor %}
|
|
136 \end{longtable}
|
|
137
|
|
138 {% endif %}
|
|
139
|
|
140 \subsection{Unannotated RBSs}\label{unannotated-rbss}
|
|
141
|
|
142 The following CDSs either do not have a detectable ribosome binding site (RBS;
|
|
143 Shine-Dalgarno sequence), in which case there is a strong possibility that
|
|
144 this is not the correct start, or there is one but it is not annotated.
|
|
145 Annotating the RBS as part of the gene feature is the best practice.
|
|
146
|
|
147 \begin{longtable}{lllll}
|
|
148 \hline
|
|
149 ID & Location & Error & Upstream (-{{upstream_max}} .. -{{upstream_min}})\\
|
|
150 \hline
|
|
151 \endhead
|
|
152 {% for row in missing_rbs %}
|
|
153 {% if 'Unannotated' not in row.__message%}
|
|
154 {{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}} & {{row.__message | texify}} & \texttt{{'{'}}{{row.__upstream}}{{'}'}} \\
|
|
155 {% endif %}
|
|
156 {% endfor %}
|
|
157 {% for row in missing_rbs %}
|
|
158 {% if 'Unannotated' in row.__message%}
|
|
159 {{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}} & {{row.__message | texify}} & \texttt{{'{'}}{{row.__upstream}}{{'}'}} \\
|
|
160 {% endif %}
|
|
161 {% endfor %}
|
|
162 \end{longtable}
|
|
163
|
|
164 \section{Areas for Further Examination}\label{notes}
|
|
165
|
|
166 These areas may be indicative of a problem, or may simply be
|
|
167 informational. You should examine the areas mentioned in detail to ensure
|
|
168 that the annotations are valid and that no genes are missed.
|
|
169
|
|
170
|
|
171
|
|
172
|
|
173
|
|
174 \subsection{Unusual Gaps}\label{excessive-gaps}
|
|
175
|
|
176 {% if excessive_gap | length == 0 %}
|
|
177 No gaps over {{ params['excessive_gap_dist'] }} nt (for genes on the same
|
|
178 strand) or {{ params['excessive_gap_divergent_dist'] }} (for genes on
|
|
179 opposite strands) were found.
|
|
180 {% else %}
|
|
181 Gaps over {{ params['excessive_gap_dist'] }} nt (for genes on the same
|
|
182 strand) or {{ params['excessive_gap_divergent_dist'] }} (for genes on
|
|
183 opposite strands) were found.
|
|
184
|
|
185 \begin{longtable}{llll}
|
|
186 \hline
|
|
187 Region & Size & Surroundings & Messages\\
|
|
188 \hline
|
|
189 \endhead
|
|
190 {% for row in excessive_gap %}
|
|
191 \texttt{{'{'}}{{row[0]}}..{{row[1]}}{{'}'}} & {{row[1] - row[0]}} & {{row[2] | nice_strand_tex}} {{row[3] | nice_strand_tex}} & {% if row[4] != 0 %}{{row[4]}} ORFs found in this region{% endif %} \\
|
|
192
|
|
193 {% endfor %}
|
|
194 \end{longtable}
|
|
195 {% endif %}
|
|
196
|
|
197
|
|
198
|
|
199
|
|
200 \subsection{Unusual Overlaps}\label{excessive-overlaps}
|
|
201
|
|
202 {% if excessive_overlap | length == 0 %}
|
|
203 No overlaps over {{ params['excessive_overlap_dist'] }} nt (for genes on the same
|
|
204 strand) or {{ params['excessive_overlap_divergent_dist'] }} (for genes on
|
|
205 opposite strands) were found.
|
|
206 {% else %}
|
|
207 Overlaps over {{ params['excessive_overlap_dist'] }} nt (for genes on the same
|
|
208 strand) or {{ params['excessive_overlap_divergent_dist'] }} (for genes on
|
|
209 opposite strands) were found.
|
|
210 \begin{longtable}{llllll}
|
|
211 \hline
|
|
212 \multicolumn{2}{l}{Feature A} & \multicolumn{2}{l}{Feature B} & & \\
|
|
213 ID & Location & ID & Location & Region & Length\\
|
|
214 \hline
|
|
215 \endhead
|
|
216 {% for row in excessive_overlap %}
|
|
217 {{row[0].id | texify}} & \texttt{{'{'}}{{row[0].location}}{{'}'}} & {{row[1].id | texify}} & \texttt{{'{'}}{{row[1].location}}{{'}'}} & {{row[2]}}..{{row[3]}} & {{row[3] - row[2]}} \\
|
|
218 {% endfor %}
|
|
219 \end{longtable}
|
|
220 {% endif %}
|
|
221
|
|
222 \subsection{Coding Density}\label{coding-density}
|
|
223
|
|
224 You have a coding density of {{ coding_density_real }}\% which scores
|
|
225 {{ coding_density }} / 100 on our scale. Most genomes should be in the 90\% to 100\%
|
|
226 coding density range
|
|
227
|
|
228
|
|
229
|
|
230
|
|
231
|
|
232
|
|
233
|
|
234
|
|
235 \end{document}
|