annotate pyPRADA_1.2/tools/samtools-0.1.16/bcftools/bcf.tex @ 0:acc2ca1a3ba4

Uploaded
author siyuan
date Thu, 20 Feb 2014 00:44:58 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
1 \documentclass[10pt,pdftex]{article}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
2 \usepackage{color}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
3 \definecolor{gray}{rgb}{0.7,0.7,0.7}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
4
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
5 \setlength{\topmargin}{0.0cm}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
6 \setlength{\textheight}{21.5cm}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
7 \setlength{\oddsidemargin}{0cm}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
8 \setlength{\textwidth}{16.5cm}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
9 \setlength{\columnsep}{0.6cm}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
10
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
11 \begin{document}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
12
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
13 \begin{center}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
14 \begin{tabular}{|l|l|l|l|l|}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
15 \hline
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
16 \multicolumn{2}{|c|}{\bf Field} & \multicolumn{1}{c|}{\bf Descrption} & \multicolumn{1}{c|}{\bf Type} & \multicolumn{1}{c|}{\bf Value} \\\hline\hline
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
17 \multicolumn{2}{|l|}{\sf magic} & Magic string & {\tt char[4]} & {\tt BCF\char92 4} \\\hline
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
18 \multicolumn{2}{|l|}{\sf l\_seqnm} & Length of concatenated sequence names & {\tt int32\_t} & \\\hline
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
19 \multicolumn{2}{|l|}{\sf seqnm} & Concatenated names, {\tt NULL} padded & {\tt char[{\sf l\_seqnm}]} & \\\hline
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
20 \multicolumn{2}{|l|}{\sf l\_smpl} & Length of concatenated sample names & {\tt int32\_t} & \\\hline
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
21 \multicolumn{2}{|l|}{\sf smpl} & Concatenated sample names & {\tt char[{\sf l\_smpl}]} & \\\hline
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
22 \multicolumn{2}{|l|}{\sf l\_meta} & Length of the meta text (double-hash lines)& {\tt int32\_t} & \\\hline
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
23 \multicolumn{2}{|l|}{\sf meta} & Meta text, {\tt NULL} terminated & {\tt char[{\sf l\_meta}]} & \\\hline
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
24 \multicolumn{5}{|c|}{\it \color{gray}{List of records until the end of the file}}\\\cline{2-5}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
25 & {\sf seq\_id} & Reference sequence ID & {\tt int32\_t} & \\\cline{2-5}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
26 & {\sf pos} & Position & {\tt int32\_t} & \\\cline{2-5}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
27 & {\sf qual} & Variant quality & {\tt float} & \\\cline{2-5}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
28 & {\sf l\_str} & Length of {\sf str} & {\tt int32\_t} & \\\cline{2-5}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
29 & {\sf str} & {\tt ID+REF+ALT+FILTER+INFO+FORMAT}, {\tt NULL} padded & {\tt char[{\sf l\_str}]} &\\\cline{2-5}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
30 & \multicolumn{4}{c|}{Blocks of data; \#blocks and formats defined by {\tt FORMAT} (table below)}\\
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
31 \hline
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
32 \end{tabular}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
33 \end{center}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
34
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
35 \begin{center}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
36 \begin{tabular}{clp{9cm}}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
37 \hline
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
38 \multicolumn{1}{l}{\bf Field} & \multicolumn{1}{l}{\bf Type} & \multicolumn{1}{l}{\bf Description} \\\hline
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
39 {\tt DP} & {\tt uint16\_t[n]} & Read depth \\
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
40 {\tt GL} & {\tt float[n*G]} & Log10 likelihood of data; $G=\frac{A(A+1)}{2}$, $A=\#\{alleles\}$\\
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
41 {\tt GT} & {\tt uint8\_t[n]} & {\tt missing\char60\char60 7 | phased\char60\char60 6 | allele1\char60\char60 3 | allele2} \\
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
42 {\tt \_GT} & {\tt uint8\_t+uint8\_t[n*P]} & {Generic GT; the first int equals the max ploidy $P$. If the highest bit is set,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
43 the allele is not present (e.g. due to different ploidy between samples).} \\
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
44 {\tt GQ} & {\tt uint8\_t[n]} & {Genotype quality}\\
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
45 {\tt HQ} & {\tt uint8\_t[n*2]} & {Haplotype quality}\\
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
46 {\tt \_HQ} & {\tt uint8\_t+uint8\_t[n*P]} & {Generic HQ}\\
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
47 {\tt IBD} & {\tt uint32\_t[n*2]} & {IBD}\\
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
48 {\tt \_IBD} & {\tt uint8\_t+uint32\_t[n*P]} & {Generic IBD}\\
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
49 {\tt PL} & {\tt uint8\_t[n*G]} & {Phred-scaled likelihood of data}\\
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
50 {\tt PS} & {\tt uint32\_t[n]} & {Phase set}\\
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
51 %{\tt SP} & {\tt uint8\_t[n]} & {Strand bias P-value (bcftools only)}\\
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
52 \emph{Integer} & {\tt int32\_t[n*X]} & {Fix-sized custom Integer; $X$ defined in the header}\\
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
53 \emph{Numeric} & {\tt double[n*X]} & {Fix-sized custom Numeric}\\
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
54 \emph{String} & {\tt uint32\_t+char*} & {\tt NULL} padded concat. strings (int equals to the length) \\
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
55 \hline
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
56 \end{tabular}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
57 \end{center}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
58
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
59 \begin{itemize}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
60 \item A BCF file is in the {\tt BGZF} format.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
61 \item All multi-byte numbers are little-endian.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
62 \item In a string, a missing value `.' is an empty C string ``{\tt
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
63 \char92 0}'' (not ``{\tt .\char92 0}'')
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
64 \item For {\tt GL} and {\tt PL}, likelihoods of genotypes appear in the
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
65 order of alleles in {\tt REF} and then {\tt ALT}. For example, if {\tt
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
66 REF=C}, {\tt ALT=T,A}, likelihoods appear in the order of {\tt
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
67 CC,CT,TT,CA,TA,AA} (NB: the ordering is different from the one in the original
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
68 BCF proposal).
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
69 \item Predefined {\tt FORMAT} fields can be missing from VCF headers, but custom {\tt FORMAT} fields
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
70 are required to be explicitly defined in the headers.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
71 \item A {\tt FORMAT} field with its name starting with `{\tt \_}' is specific to BCF only.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
72 It gives an alternative binary representation of the corresponding VCF field, in case
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
73 the default representation is unable to keep the genotype information,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
74 for example, when the ploidy is not 2 or there are more than 8 alleles.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
75 \end{itemize}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
76
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
77 \end{document}