Mercurial > repos > cpt > cpt_phageqc_annotations
comparison cpt_phageqc_annotation/phageqc_report_genomea.tex @ 0:c3140b08d703 draft default tip
Uploaded
author | cpt |
---|---|
date | Fri, 17 Jun 2022 13:00:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c3140b08d703 |
---|---|
1 \documentclass[]{article} | |
2 \usepackage{lmodern} | |
3 \usepackage{amssymb,amsmath} | |
4 \usepackage{ifxetex,ifluatex} | |
5 \usepackage{fixltx2e} % provides \textsubscript | |
6 \usepackage[T1]{fontenc} | |
7 \usepackage[utf8]{inputenc} | |
8 | |
9 \addtolength{\oddsidemargin}{-.875in} | |
10 \addtolength{\evensidemargin}{-.875in} | |
11 \addtolength{\textwidth}{1.75in} | |
12 | |
13 \addtolength{\topmargin}{-.875in} | |
14 \addtolength{\textheight}{1.75in} | |
15 | |
16 \usepackage{fancyhdr} | |
17 \pagestyle{fancy} | |
18 \lhead{GenomeA Compliance Report} | |
19 \chead{} | |
20 \rhead{ {{record_name | texify}} } | |
21 \lfoot{} | |
22 \cfoot{\thepage} | |
23 \rfoot{} | |
24 | |
25 | |
26 | |
27 \usepackage{microtype} | |
28 \usepackage{hyperref} | |
29 \hypersetup{unicode=true, | |
30 pdfborder={0 0 0}, | |
31 breaklinks=true} | |
32 \urlstyle{same} % don't use monospace font for urls | |
33 \usepackage{longtable,booktabs} | |
34 \date{Compiled \today} | |
35 \title{GenomeA Compliance Report for {{record_nice_name | texify}}} | |
36 | |
37 \begin{document} | |
38 %\pagestyle{plain} | |
39 \maketitle | |
40 This report details possible issues with your submitted genome annotations. | |
41 | |
42 \section{Required Changes} | |
43 | |
44 The changes detailed in this section are required for acceptance of your | |
45 submission. | |
46 | |
47 \subsection{Missing Gene Features} | |
48 | |
49 These coding sequences (``CDS'' in your GenBank file) are missing the | |
50 associated gene feature (``gene''). This is required for validation by NCBI's | |
51 rules which are encoded in the sequin and tbl2asn programs. | |
52 {%if missing_genes_bad > 0 %} | |
53 | |
54 {{ missing_genes_bad }} out of {{ missing_genes_good + missing_genes_bad | |
55 }} features are lacking their associated gene feature. | |
56 | |
57 \begin{longtable}{ll} | |
58 \hline | |
59 Feature ID & Location\\ | |
60 \hline | |
61 \endhead | |
62 {% for row in missing_genes %} | |
63 {{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}}\tabularnewline | |
64 {% endfor %} | |
65 \end{longtable} | |
66 {% else %} | |
67 You are not missing any gene features | |
68 {% endif %} | |
69 | |
70 \subsection{Missing Product Tags}\label{missing-product-tags} | |
71 | |
72 {{missing_tags_good}} out of {{missing_tags_good + missing_tags_bad}} features have product tags (\texttt{/product="..."}). | |
73 {% if missing_tags_bad > 0 %} | |
74 The following features are missing product tags | |
75 \begin{longtable}{ll} | |
76 \hline | |
77 Feature & Location\\ | |
78 \hline | |
79 \endhead | |
80 {% for row in missing_tags %} | |
81 {{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}}\tabularnewline | |
82 {% endfor %} | |
83 \end{longtable} | |
84 {% endif %} | |
85 | |
86 \subsection{Missing Locus Tags}\label{missing-locus-tags} | |
87 | |
88 {{gene_model_correction_good}} out of {{gene_model_correction_good + gene_model_correction_bad}} features have valid locus tags (\texttt{/locus\_tag="..."}). | |
89 {% if gene_model_correction_bad > 0 %} | |
90 The following features have issues with their locus tags | |
91 \begin{longtable}{lllll} | |
92 \hline | |
93 ID & Location & Gene Locus Tag & CDS Locus Tag & Issue \\ | |
94 \hline | |
95 \endhead | |
96 {% for row in gene_model_correction %} | |
97 {{ row[0].id | texify }} & \texttt{{'{'}}{{row[1].location}}{{'}'}} & {{ row[0].qualifiers['locus_tag'][0] | texify }} & {{ row[1].qualifiers['locus_tag'][0] | texify }} & {{ row[2] | texify }}\tabularnewline | |
98 {% endfor %} | |
99 \end{longtable} | |
100 {% endif %} | |
101 | |
102 | |
103 \section{Suggested Changes}\label{suggested-changes} | |
104 | |
105 These changes are not required, but are strongly encouraged in order to | |
106 provide a uniform genome annotation within the phage community. | |
107 | |
108 \subsection{Start Codons}\label{start-codons} | |
109 Nearly all phage genes use ATG, GTG or TTG as start codons. The start codon distribution is as | |
110 follows: | |
111 | |
112 | |
113 \begin{longtable}{lll} | |
114 \hline | |
115 Start Codon & Count\\ | |
116 \hline | |
117 \endhead | |
118 {% for codon_key in weird_starts_overall_sorted_keys %} | |
119 {{ codon_key }} & {{ weird_starts_overall[codon_key] }} \\ | |
120 {% endfor %} | |
121 \end{longtable} | |
122 | |
123 {% if weird_starts_bad != 0 %} | |
124 There are {{weird_starts_bad }} unusual start codons in the genome, these | |
125 should be carefully justified. If there is evidence for these starts, the | |
126 GenomeA text should note this. | |
127 | |
128 \begin{longtable}{lll} | |
129 \hline | |
130 Feature ID & Location & Start Codon\\ | |
131 \hline | |
132 \endhead | |
133 {% for row in weird_starts %} | |
134 {{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}} & {{row.__start}} \\ | |
135 {% endfor %} | |
136 \end{longtable} | |
137 | |
138 {% endif %} | |
139 | |
140 \subsection{Unannotated RBSs}\label{unannotated-rbss} | |
141 | |
142 The following CDSs either do not have a detectable ribosome binding site (RBS; | |
143 Shine-Dalgarno sequence), in which case there is a strong possibility that | |
144 this is not the correct start, or there is one but it is not annotated. | |
145 Annotating the RBS as part of the gene feature is the best practice. | |
146 | |
147 \begin{longtable}{lllll} | |
148 \hline | |
149 ID & Location & Error & Upstream (-{{upstream_max}} .. -{{upstream_min}})\\ | |
150 \hline | |
151 \endhead | |
152 {% for row in missing_rbs %} | |
153 {% if 'Unannotated' not in row.__message%} | |
154 {{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}} & {{row.__message | texify}} & \texttt{{'{'}}{{row.__upstream}}{{'}'}} \\ | |
155 {% endif %} | |
156 {% endfor %} | |
157 {% for row in missing_rbs %} | |
158 {% if 'Unannotated' in row.__message%} | |
159 {{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}} & {{row.__message | texify}} & \texttt{{'{'}}{{row.__upstream}}{{'}'}} \\ | |
160 {% endif %} | |
161 {% endfor %} | |
162 \end{longtable} | |
163 | |
164 \section{Areas for Further Examination}\label{notes} | |
165 | |
166 These areas may be indicative of a problem, or may simply be | |
167 informational. You should examine the areas mentioned in detail to ensure | |
168 that the annotations are valid and that no genes are missed. | |
169 | |
170 | |
171 | |
172 | |
173 | |
174 \subsection{Unusual Gaps}\label{excessive-gaps} | |
175 | |
176 {% if excessive_gap | length == 0 %} | |
177 No gaps over {{ params['excessive_gap_dist'] }} nt (for genes on the same | |
178 strand) or {{ params['excessive_gap_divergent_dist'] }} (for genes on | |
179 opposite strands) were found. | |
180 {% else %} | |
181 Gaps over {{ params['excessive_gap_dist'] }} nt (for genes on the same | |
182 strand) or {{ params['excessive_gap_divergent_dist'] }} (for genes on | |
183 opposite strands) were found. | |
184 | |
185 \begin{longtable}{llll} | |
186 \hline | |
187 Region & Size & Surroundings & Messages\\ | |
188 \hline | |
189 \endhead | |
190 {% for row in excessive_gap %} | |
191 \texttt{{'{'}}{{row[0]}}..{{row[1]}}{{'}'}} & {{row[1] - row[0]}} & {{row[2] | nice_strand_tex}} {{row[3] | nice_strand_tex}} & {% if row[4] != 0 %}{{row[4]}} ORFs found in this region{% endif %} \\ | |
192 | |
193 {% endfor %} | |
194 \end{longtable} | |
195 {% endif %} | |
196 | |
197 | |
198 | |
199 | |
200 \subsection{Unusual Overlaps}\label{excessive-overlaps} | |
201 | |
202 {% if excessive_overlap | length == 0 %} | |
203 No overlaps over {{ params['excessive_overlap_dist'] }} nt (for genes on the same | |
204 strand) or {{ params['excessive_overlap_divergent_dist'] }} (for genes on | |
205 opposite strands) were found. | |
206 {% else %} | |
207 Overlaps over {{ params['excessive_overlap_dist'] }} nt (for genes on the same | |
208 strand) or {{ params['excessive_overlap_divergent_dist'] }} (for genes on | |
209 opposite strands) were found. | |
210 \begin{longtable}{llllll} | |
211 \hline | |
212 \multicolumn{2}{l}{Feature A} & \multicolumn{2}{l}{Feature B} & & \\ | |
213 ID & Location & ID & Location & Region & Length\\ | |
214 \hline | |
215 \endhead | |
216 {% for row in excessive_overlap %} | |
217 {{row[0].id | texify}} & \texttt{{'{'}}{{row[0].location}}{{'}'}} & {{row[1].id | texify}} & \texttt{{'{'}}{{row[1].location}}{{'}'}} & {{row[2]}}..{{row[3]}} & {{row[3] - row[2]}} \\ | |
218 {% endfor %} | |
219 \end{longtable} | |
220 {% endif %} | |
221 | |
222 \subsection{Coding Density}\label{coding-density} | |
223 | |
224 You have a coding density of {{ coding_density_real }}\% which scores | |
225 {{ coding_density }} / 100 on our scale. Most genomes should be in the 90\% to 100\% | |
226 coding density range | |
227 | |
228 | |
229 | |
230 | |
231 | |
232 | |
233 | |
234 | |
235 \end{document} |