Mercurial > repos > iuc > gemini_inheritance
changeset 0:3123ce7acd0e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 62ed732cba355e695181924a8ed4cce49ca21c59
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gemini_inheritance.xml Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,486 @@ +<tool id="gemini_inheritance" name="GEMINI inheritance pattern" version="@VERSION@"> + <description>based identification of candidate genes</description> + <macros> + <import>gemini_macros.xml</import> + <xml name="name_X"> + <param name="X" type="text" value="" + label="Alias to use for X chromosome" + help="The tool expects the X chromosome to be named 'X' or 'chrX'. If the reference genome used for variant calling had a different name for it, you will have to specify it here." /> + </xml> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> + <command> +<![CDATA[ + gemini ${inheritance.pattern_type} + + #for $cond in $inheritance.constraint: + #set $filter = str($cond.filter).strip() + #if str($filter): + #if str($inheritance.pattern_type) == "comp_hets" and $cond.overwrite_default_filter: + --gene-where '$filter' + #else: + --filter '$filter' + #end if + #end if + #end for + + #if str($inheritance.pattern_type) in ("comp_hets", "mendel_errors"): + ${inheritance.by_pattern_only} + #end if + + ${inheritance.lenient} + ${inheritance.allow_unaffected} + + #if str($inheritance.pattern_type).startswith('x_linked_') and str($inheritance.X).strip(): + -X "${inheritance.X}" + #end if + + #if int($family_wise.min_kindreds) > 0: + --min-kindreds ${family_wise.min_kindreds} + #end if + + #if str($family_wise.families).strip(): + #set $families = ','.join([f.strip() for f in $family_wise.families.split(',')]) + --families "$families" + #end if + + #if int($family_wise.per_variant_selection.min_dp) > 0: + -d ${family_wise.per_variant_selection.min_dp} + #end if + + #if int($family_wise.per_variant_selection.min_gq) > 0: + --min-gq ${family_wise.per_variant_selection.min_gq} + #end if + + #if int($family_wise.per_variant_selection.max_pl) > -1: + --gt-pl-max ${family_wise.per_variant_selection.max_pl} + #end if + + #set $report = $oformat.report + @COLUMN_SELECT@ + + "${ infile }" + > "${ outfile }" +]]> + </command> + <inputs> + <expand macro="infile" /> + <conditional name="inheritance"> + <param name="pattern_type" type="select" + label="Your assumption about the inheritance pattern of the phenotype of interest"> + <option value="autosomal_recessive">Autosomal recessive</option> + <option value="autosomal_dominant">Autosomal dominant</option> + <option value="x_linked_recessive">X-linked recessive</option> + <option value="x_linked_dominant">X-linked dominant</option> + <option value="de_novo">Autosomal de-novo</option> + <option value="x_linked_de_novo">X-linked de-novo</option> + <option value="comp_hets">Compound heterozygous</option> + <option value="mendel_errors">Violation of mendelian laws (LOH, plausible and implausible de-novo, uniparental disomy)</option> + </param> + <when value="comp_hets"> + <expand macro="insert_constraint"> + <expand macro="overwritable_where_default" default_where="exonic and high-impact variants (SQL clause: is_exonic = 1 or impact_severity != 'LOW')" /> + </expand> + <param argument="--pattern-only" name="by_pattern_only" type="boolean" truevalue="--pattern-only" falsevalue="" checked="false" + label="Ignore sample phenotypes during variant identification" + help="When turned on, the identification of compound heterozygous variant pairs gets based on the family tree only, i.e., the tool looks for heterozygous allele pairs in any kid that weren't occuring together in the parents (see the tool help below for the exact criteria used to detect compound heterozygosity)." /> + <expand macro="lenient" argument="--max-priority" truevalue="--max-priority 3" + help="When turned on, runs the tool with --max-priority 3 instead of the default value 1. This leads to inclusion of more ambiguous cases of compound heterozygosity." /> + <expand macro="unaffected" /> + </when> + <when value="mendel_errors"> + <expand macro="insert_constraint" /> + <param argument="--only-affected" name="by_pattern_only" type="boolean" truevalue="" falsevalue="--only-affected" checked="false" + label="Ignore sample phenotypes during variant identification" + help="When turned on, the identification of candidate variants gets based on the observed inheritance pattern only. The default is to report candidates only if there is evidence for them being phenotypically relevant, i.e., if they are observed in an affected sample." /> + <expand macro="lenient" /> + <param name="allow_unaffected" type="hidden" value="" /> + </when> + <when value="autosomal_recessive"> + <expand macro="insert_constraint" /> + <expand macro="lenient" /> + <expand macro="unaffected" /> + </when> + <when value="autosomal_dominant"> + <expand macro="insert_constraint" /> + <expand macro="lenient" /> + <expand macro="unaffected" /> + </when> + <when value="x_linked_recessive"> + <expand macro="insert_constraint" /> + <param name="lenient" type="hidden" value="" /> + <expand macro="unaffected" /> + <expand macro="name_X" /> + </when> + <when value="x_linked_dominant"> + <expand macro="insert_constraint" /> + <param name="lenient" type="hidden" value="" /> + <expand macro="unaffected" /> + <expand macro="name_X" /> + </when> + <when value="de_novo"> + <expand macro="insert_constraint" /> + <expand macro="lenient" /> + <expand macro="unaffected" /> + </when> + <when value="x_linked_de_novo"> + <expand macro="insert_constraint" /> + <param name="lenient" type="hidden" value="" /> + <expand macro="unaffected" /> + <expand macro="name_X" /> + </when> + </conditional> + <section name="family_wise" title="Family-wise criteria for variant selection" expanded="true"> + <expand macro="min_kindreds" /> + <param argument="--families" name="families" type="text" value="" + label="List of families to restrict the analysis to (comma-separated)" + help="Leave empty for an analysis including all families"/> + <conditional name="per_variant_selection"> + <param name="enabled" type="select" + label="Specify additional criteria to exclude families on a per-variant basis"> + <option value="no">No, analyze all variants from all included families</option> + <option value="yes">Yes, filter variants within families</option> + </param> + <when value="no"> + <param name="min_dp" type="hidden" value="0" /> + <param name="min_gq" type="hidden" value="0" /> + <param name="max_pl" type="hidden" value="-1" /> + </when> + <when value="yes"> + <param argument="-d" name="min_dp" type="integer" value="0" min="0" + label="Per-variant DP threshold for including a family" + help="All samples from a family must have a sequencing depth of at least this value at a given variant site in order for the family to be included in the analysis of this particular variant. default: 0 (do not apply this filter)" /> + <param argument="--min-gq" name="min_gq" type="integer" value="0" min="0" + label="per-variant GQ threshold for including a family" + help="The genotypes of all samples from a family must be called with at least this quality at a given variant site in order for the family to be included in the analysis of this particular variant. default: 0 (do not apply this filter)"> + </param> + <param argument="--gt-pl-max" name="max_pl" type="integer" value="-1" min="-1" + label="per-variant PL threshold for including a family" + help="The genotypes at a given variant site of all samples from a family must not have a higher (phred-scaled) likelihood to be wrong than this value in order for the family to be included in the analysis of this particular variant. default: -1 (do not apply this filter); if used the GEMINI documentation suggests 10 as a reasonable value" /> + </when> + </conditional> + </section> + <section name="oformat" title="Output - included information" expanded="true"> + <expand macro="column_filter" help="The tool reports key information about the inheritance pattern detection for each candidate variant found. It can precede each such row with additional columns, listing information about the variant taken from the variants table of the GEMINI database. Here, you can control which subset of the variants table columns should be added to the output."/> + </section> + </inputs> + <outputs> + <data name="outfile" format="tabular" + label="GEMINI ${inheritance.pattern_type} pattern on ${on_string}" /> + </outputs> + <tests> + <test> + <param name="infile" value="gemini_auto_dom_input.db" ftype="gemini.sqlite" /> + <conditional name="inheritance"> + <param name="pattern_type" value="autosomal_dominant" /> + <param name="lenient" value="true" /> + </conditional> + <conditional name="report"> + <param name="report_selector" value="minimal" /> + </conditional> + <output name="outfile"> + <assert_contents> + <has_line_matching expression="variant_id	gene	.*" /> + </assert_contents> + </output> + </test> + <test> + <param name="infile" value="gemini_auto_dom_input.db" ftype="gemini.sqlite" /> + <conditional name="inheritance"> + <param name="pattern_type" value="autosomal_dominant" /> + <param name="lenient" value="true" /> + </conditional> + <section name="oformat"> + <conditional name="report"> + <param name="report_selector" value="custom" /> + <param name="columns" value="gene,chrom,impact" /> + </conditional> + </section> + <output name="outfile"> + <assert_contents> + <has_line_matching expression="gene	chrom	impact.*" /> + </assert_contents> + </output> + </test> + <test> + <param name="infile" value="gemini_auto_dom_input.db" ftype="gemini.sqlite" /> + <conditional name="inheritance"> + <param name="pattern_type" value="autosomal_dominant" /> + <param name="lenient" value="true" /> + </conditional> + <section name="oformat"> + <conditional name="report"> + <param name="report_selector" value="custom" /> + <!-- test with empty multiselect list and columns specified + via text field instead --> + <param name="extra_cols" value="gene,chrom,impact" /> + </conditional> + </section> + <output name="outfile"> + <assert_contents> + <has_line_matching expression="gene	chrom	impact.*" /> + </assert_contents> + </output> + </test> + <test> + <param name="infile" value="gemini_auto_rec_input.db" ftype="gemini.sqlite" /> + <conditional name="inheritance"> + <param name="pattern_type" value="autosomal_recessive" /> + <param name="lenient" value="true" /> + </conditional> + <section name="oformat"> + <conditional name="report"> + <param name="report_selector" value="custom" /> + <param name="columns" value="gene,chrom,impact" /> + </conditional> + </section> + <output name="outfile"> + <assert_contents> + <has_line_matching expression="gene	chrom	impact.*" /> + </assert_contents> + </output> + </test> + <test> + <param name="infile" value="gemini_de_novo_input.db" ftype="gemini.sqlite" /> + <conditional name="inheritance"> + <param name="pattern_type" value="de_novo" /> + </conditional> + <section name="oformat"> + <conditional name="report"> + <param name="report_selector" value="custom" /> + <param name="columns" value="gene,ref,alt,impact" /> + </conditional> + </section> + <output name="outfile"> + <assert_contents> + <has_line_matching expression="gene	ref	alt	impact.*" /> + </assert_contents> + </output> + </test> + <test> + <param name="infile" value="gemini_comphets_input.db" ftype="gemini.sqlite" /> + <conditional name="inheritance"> + <param name="pattern_type" value="comp_hets" /> + <param name="lenient" value="true" /> + <param name="allow_unaffected" value="true" /> + </conditional> + <section name="oformat"> + <conditional name="report"> + <param name="report_selector" value="custom" /> + <param name="columns" value="chrom,start,end,ref,alt,gene,impact" /> + </conditional> + </section> + <output name="outfile"> + <assert_contents> + <has_line_matching expression="chrom	start	end	.*gene.*" /> + </assert_contents> + </output> + </test> + <test> + <param name="infile" value="gemini_de_novo_input.db" ftype="gemini.sqlite" /> + <conditional name="inheritance"> + <param name="pattern_type" value="mendel_errors" /> + </conditional> + <section name="oformat"> + <conditional name="report"> + <param name="report_selector" value="custom" /> + <param name="columns" value="gene,ref,alt,impact" /> + </conditional> + </section> + <output name="outfile"> + <assert_contents> + <has_line_matching expression="gene	ref	alt	impact	.*violation.*" /> + </assert_contents> + </output> + </test> + </tests> + <help> +<![CDATA[ + +**What it does** + +Assuming you have defined the familial relationships between samples when +loading your VCF into GEMINI, you can use this tool to identify candidate genes +and variants that explain the inheritance pattern of a phenotype of interest. + +**Inheritance pattern detection rules** + +*Autosomal recessive* + +Criteria: + +- all affected must be hom_alt +- [affected] no unaffected can be hom_alt (can be unknown) +- [default] if parents exist they must be unaffected and het for all affected kids +- [default] if there are no affecteds that have a parent, a warning is issued. + +If ``--lenient`` is specified, the 2 criteria prefixed with “[default]” are not +applied. + +If ``--allow-unaffected`` is specified, the criterion prefixed with +“[affected]” is not enforced. + +---- + +*Autosomal dominant* + +Criteria: + +- All affecteds must be het +- [affected] No unaffected can be het or homalt (can be unknown) +- de_novo mutations are not auto_dom (at least not in the first generation) +- At least 1 affected must have 1 affected parent (or have no parents). +- If no affected has a parent, a warning is issued. +- [default] All affecteds must have parents with known phenotype. +- [default] All affected kids must have at least 1 affected parent + +If ``--lenient`` is specified, the criteria prefixed with “[default]” are not +enforced. + +If ``--allow-unaffected`` is specified, the criterion prefixed with +“[affected]” is not enforced. + +Note that, for autosomal dominant, ``--lenient`` allows singleton affecteds to +be used to meet the ``--min-kindreds`` requirement if they are HET. + +If there is incomplete penetrance in the kindred (unaffected obligate carriers), +these individuals currently must be coded as having unknown phenotype or as +being affected. + +---- + +*X-linked recessive* + +Criteria: + +- Affected females must be HOM_ALT +- Unaffected females are HET or HOM_REF +- Affected males are not HOM_REF +- Unaffected males are HOM_REF + +Note: Pseudo-autosomal regions are not accounted for by the tool. + +---- + +*X-linked dominant* + +Criteria: + +- Affected males are HET or HOM_ALT +- Affected females must be HET +- Unaffecteds must be HOM_REF +- girls of affected dad must be affected +- boys of affected dad must be unaffected +- mothers of affected males must be het (and affected) +- at least 1 parent of affected females must be het (and affected). + +Note: Pseudo-autosomal regions are not accounted for by the tool. + +---- + +*De-novo mutations* + +Criteria: + +- all affected must be het +- [affected] all unaffected must be homref or homalt +- at least 1 affected kid must have unaffected parents +- [default] if an affected has affected parents, it’s not de_novo +- [default] all affected kids must have unaffected (or no) parents +- [default] warning if none of the affected samples have parents. + +The last 3 items, prefixed with [default] can be turned off with ``--lenient``. + +If ``--allow-unaffected`` is specified, then the criterion prefixed [affected] +is not enforced. + +---- + +*X-linked de-novo mutations* + +Criteria: + +- affected female child must be het +- affected male child must be hom_alt (or het) +- parents should be unaffected and hom_ref + +Note: Pseudo-autosomal regions are not accounted for by the tool. + +---- + +*Compound heterozygosity* + +Unlike canonical recessive sites where the same recessive allele is inherited +from both parents at the *same* site in the gene, compound heterozygosity +occurs when the individual’s phenotype is caused by two heterozygous recessive +alleles at *different* sites in a particular gene. + +To detect compound heterozygosity, the tool looks for two heterozygous variants +impacting the same gene at different loci. The complicating factor is that this +is a case of *recessive* inheritance and as such, we must also require that the +consequential alleles at each heterozygous site were inherited on different +chromosomes (one from each parent). Hence, where possible, the tool will phase +by transmission. + +Criteria (default): + +- All affected individuals must be heterozygous at both sites. +- No unaffected can be homozygous alterate at either site. +- Neither parent of an affected sample can be homozygous reference at both + sites. +- If any unphased-unaffected is het at both sites, the site will be given lower + priority. +- No phased-unaffected can be heterozygous at both sites. + + a. ``--allow-unaffected`` keeps sites where a phased unaffected shares the + het-pair + b. unphased, unaffected that share the het pair are counted and reported for + each candidate pair. +- Candidates where an affected from the same family does NOT share the same het + pair are removed. +- Sites are automatically phased by transmission when parents are present in + order to remove false positive candidates. + + If data from one or both parents are unavailable and the child’s data was not + phased prior to loading into GEMINI, all comp_het variant pairs will + automatically be given at most priority == 2. If there’s only a single parent + and both the parent and the affected are HET at both sites, the candidate + will have priority 3. + +Criteria (``--pattern-only``): + +- Kid must be HET at both sites. +- Kid must have alts on different chromosomes. +- Neither parent can be HOM_ALT at either site. +- If either parent is phased at both sites and matches the kid, it’s excluded. +- When the above criteria are met, and both parents and kid are phased or + parents are HET at different sites, the priority is 1. +- If either parent is HET at both sites, priority is reduced. +- If both parents are not phased, the priority is 2. +- For every parent that’s a het at both sites, the priority is incremented by 1. +- The priority in a family is the minimum found among all kids. + +---- + +*Violation of Mendelian laws* + +The tool can be used to detect the following kinds of non-Mendelian patterns: + +- loss of heterozygosity (LOH) events +- de-novo mutations +- implausible de-novo mutations +- potential cases of uniparental disomy + +Criteria: + +- LOH: child and one parent are opposite homozygotes; other parent is HET +- plausible de novo: kid is het. parents are same homozygotes +- implausible de novo: kid is homozygote. parents are same homozygotes and opposite to kid. +- uniparental disomy: parents are opposite homozygotes; kid is homozygote + +]]> + </help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gemini_macros.xml Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,254 @@ +<macros> + <!-- gemini version to be used --> + <token name="@VERSION@">0.20.1</token> + <!-- minimal annotation files version required by this version of gemini --> + <token name="@DB_VERSION@">200</token> + + <xml name="requirements"> + <requirements> + <requirement type="package" version="@VERSION@">gemini</requirement> + <yield /> + </requirements> + </xml> + + <xml name="version_command"> + <version_command>gemini --version</version_command> + </xml> + + <xml name="stdio"> + <stdio> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <regex match="Error:" /> + <regex match="Exception:" /> + <yield /> + </stdio> + </xml> + + <xml name="citations"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1003153</citation> + <yield /> + </citations> + </xml> + + <xml name="annotation_dir"> + <param name="annotation_databases" type="select" label="Choose a gemini annotation source"> + <options from_data_table="gemini_versioned_databases"> + <filter type="sort_by" column="0" /> + <filter type="static_value" column="2" value="@DB_VERSION@" /> + </options> + </param> + </xml> + + <xml name="infile"> + <param name="infile" type="data" format="gemini.sqlite" label="GEMINI database" help="Only files with version @VERSION@ are accepted." > + <options options_filter_attribute="metadata.gemini_version" > + <filter type="add_value" value="@VERSION@" /> + </options> + </param> + </xml> + + <xml name="add_header_column"> + <param argument="--header" name="header" type="boolean" truevalue="--header" falsevalue="" checked="True" + label="Add a header of column names to the output" /> + </xml> + + <xml name="column_filter" token_help="" token_minimalset="variant_id, gene"> + <conditional name="report"> + <param name="report_selector" type="select" + label="Set of columns to include in the variant report table" + help="@HELP@"> + <option value="minimal">Minimal (report only a preconfigured minimal set of columns)</option> + <option value="full">Full (report all columns defined in the GEMINI database variants table)</option> + <option value="custom">Custom (report user-specified columns)</option> + </param> + <when value="full" /> + <when value="minimal"> + <param name="columns" type="hidden" value="@MINIMALSET@" /> + <param name="extra_cols" type="hidden" value="" /> + </when> + <when value="custom"> + <param name="columns" type="select" display="checkboxes" multiple="true" optional="true" + label="Choose columns to include in the report" help="(--columns)"> + <option value="gene">gene</option> + <option value="chrom">chrom</option> + <option value="start">start</option> + <option value="end">end</option> + <option value="ref">ref</option> + <option value="alt">alt</option> + <option value="impact">impact</option> + <option value="impact_severity">impact_severity</option> + <option value="max_aaf_all">alternative allele frequency (max_aaf_all)</option> + </param> + <param name="extra_cols" type="text" + label="Additional columns (comma-separated)" + help="Column must be specified by the exact name they have in the GEMINI database, e.g., is_exonic or num_hom_alt, but, for genotype columns, GEMINI wildcard syntax is supported. The order of columns in the list is maintained in the output."> + <expand macro="sanitize_query" /> + </param> + </when> + </conditional> + </xml> + + <xml name="filter" token_argument="--filter"> + <param argument="@ARGUMENT@" name="filter" type="text" + label="Additional constraints expressed in SQL syntax" + help="Constraints defined here will become the WHERE clause of the SQL query issued to the GEMINI database. E.g. alt='G' or impact_severity = 'HIGH'."> + <expand macro="sanitize_query" /> + </param> + </xml> + + <xml name="sanitize_query"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'" /> + </valid> + <mapping initial="none"> + <add source="'" target="'"'"'" /> + </mapping> + </sanitizer> + </xml> + + <xml name="lenient" token_argument="--lenient" token_truevalue="--lenient" token_help="The exact consequence of this setting depends on the type of inheritance pattern you are looking for (see the tool help below)."> + <param argument="@ARGUMENT@" name="lenient" type="boolean" truevalue="@TRUEVALUE@" falsevalue="" checked="False" + label="Include hits with less convincing inheritance patterns" + help= "@HELP@" /> + </xml> + + <xml name="unaffected"> + <param argument="--allow-unaffected" name="allow_unaffected" type="boolean" truevalue="--allow-unaffected" falsevalue="" checked="False" + label="Report candidates shared by unaffected samples" + help="Activating this option will enable the reporting of variants as candidate causative even if they are shared by unaffected samples in the family tree. The default will only report variants that are unique to affected samples."/> + </xml> + + <xml name="min_kindreds" token_label="Minimum number of families with a candidate variant for a gene to be reported" token_help="This is the number of families required to have a variant fitting the inheritance model in the same gene in order for the gene and its variants to be reported. For example, we may only be interested in candidates where at least 4 families have a variant (with a fitting inheritance pattern) in that gene."> + <param argument="--min-kindreds" name="min_kindreds" type="integer" value="1" min="1" + label="@LABEL@" + help="@HELP@" /> + </xml> + + <xml name="insert_constraint" token_max_repeat="1"> + <repeat name="constraint" title="Additional constraints on variants" default="0" max="@MAX_REPEAT@"> + <expand macro="filter" /> + <yield /> + </repeat> + </xml> + + <xml name="overwritable_where_default" token_default_where=""> + <param name="overwrite_default_filter" type="boolean" checked="false" + label="Overwrite the default constraint of this tool" + help="By default, this tool restricts its analysis to @DEFAULT_WHERE@ and this constraint is applied on top of any constraint expressed above. With this option here selected, your custom constraint, if given, will overwrite the default instead." /> + </xml> + + <xml name="gt_filter" token_default_repeat="0" token_min_repeat="0" token_max_repeat="1"> + <repeat name="filter_by_genotype" title="Genotype filter expression" default="@DEFAULT_REPEAT@" min="@MIN_REPEAT@" max="@MAX_REPEAT@"> + <param argument="--gt-filter" name="gt_filter" type="text" value="" area="True" size="5x50" + label="Restrictions to apply to genotype values" help=""> + <expand macro="sanitize_query" /> + <validator type="expression" message="Genotype filter expression cannot be empty">value.strip()</validator> + </param> + <yield /> + </repeat> + </xml> + + <xml name="sample_filter"> + <repeat name="filter_by_sample" title="Sample filter expression" default="0" max="1"> + <param argument="--sample-filter" name="sample_filter" type="text" area="True" size="5x50" + label="SQL filter to use to filter the sample table" help=""> + <expand macro="sanitize_query" /> + <validator type="expression" message="Sample filter expression cannot be empty">value.strip()</validator> + </param> + <param argument="--in" name="in" type="select" + label="A variant must be in either all, none or any samples passing the sample-query filter" + help=""> + <option value="">Return a variant if it is found in any sample passing the sample filter. (default) </option> + <option value="--in all">Return a variant if it is found in ALL samples passing the sample filter. (all)</option> + <option value="--in none">Return a variant if it is found in NO sample passing the sample filter. (none)</option> + <option value="--in only">Return a variant if it is found in any sample passing the sample filter, and in NO sample NOT passing it. (only)</option> + <option value="--in only all">Return a variant if is found in ALL samples passing the sample filter, and in NO sample NOT passing it. (only all)</option> + </param> + <expand macro="min_kindreds" + label="Minimum number of families in which a variant must pass the sample filter" help=""/> + <param argument="--family-wise" name="family_wise" type="boolean" truevalue="--family-wise" falsevalue="" checked="False" + label="Apply the sample-filter on a family-wise basis" help="If a variant passes the sample filter in at least the minimum number of families specified above it is retained." /> + </repeat> + </xml> + + <xml name="region_filter"> + <repeat name="regions" title="Region Filter" default="0" min="0" + help="Filter variant sites by their position in the genome. If multiple Region Filters are specified, all variants that fall in ONE of the regions are reported."> + <param name="chrom" type="text" label="Chromosome"> + <validator type="expression" message="A chromosome identifier is required when specifying a region filter">value.strip()</validator> + </param> + <param name="start" type="text" label="Region Start"> + <validator type="expression" message="an integer number is required">not value or value.isdigit()</validator> + </param> + <param name="stop" type="text" label="Region End"> + <validator type="expression" message="an integer number is required">not value or value.isdigit()</validator> + </param> + </repeat> + </xml> + + <token name="@PROVIDE_ANNO_DATA@"><![CDATA[ + mkdir gemini && + ln -s '${annotation_databases.fields.path}/gemini/data' gemini/data && + export GEMINI_CONFIG='${annotation_databases.fields.path}' && + ]]></token> + + <token name="@MULTILN_SQL_EXPR_TO_CMDLN@"> + #set $sql_expr = str($multiline_sql_expr).strip() + #if str($sql_expr): + #set $sql_expr = $sql_expr.replace('\r\n', '\n') + #set $sql_expr = $sql_expr.replace('\r', '\n') + #set $sql_expr = $sql_expr.replace('\\\n', ' ') + $cmdln_param '$sql_expr' + #end if + </token> + + <token name="@SET_COLS@"> + #if str($report.report_selector) == 'full': + #set cols = "*" + #else: + #if $report.columns and str($report.columns) != '': + #set $cols = str($report.columns) + #else + #set $cols = '' + #end if + #if str($report.extra_cols).strip(): + #if $cols: + #set $cols = $cols + ', ' + str($report.extra_cols) + #else: + #set $cols = str($report.extra_cols) + #end if + #end if + #if not $cols: + #set $cols = "variant_id, gene" + #end if + #end if + </token> + + <token name="@COLUMN_SELECT@"> + @SET_COLS@ + #if $cols != "*" + --columns '$cols' + #end if + </token> + + <token name="@PARSE_REGION_ELEMENTS@"><![CDATA[ + #set $region_elements = [] + #for $r in $regions: + ## The actual chromosome name needs to be single-quoted + ## in SQL, so we need to quote the single quotes like the + ## sanitize_query macro would if the whole was a parameter. + #set $r_elements = ["chrom = '\"'\"'%s'\"'\"'" % str($r.chrom).strip()] + #if str($r.start).strip(): + #silent $r_elements.append("start >= %d" % int($r.start)) + #end if + #if str($r.stop).strip(): + #silent $r_elements.append("end <= %d" % int($r.stop)) + #end if + #silent $region_elements.append("(%s)" % " AND ".join($r_elements)) + #end for + ]]> + </token> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.rst Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,65 @@ +========================= +Galaxy wrapper for GEMINI +========================= + + +GEMINI: a flexible framework for exploring genome variation + +GEMINI (GEnome MINIng) is designed to be a flexible framework for exploring genetic variation in the context of +the wealth of genome annotations available for the human genome. By placing genetic variants, sample genotypes, +and useful genome annotations into an integrated database framework, GEMINI provides a simple, flexible, yet very +powerful system for exploring genetic variation for for disease and population genetics. + +Using the GEMINI framework begins by loading a VCF file into a database. Each variant is automatically +annotated by comparing it to several genome annotations from source such as ENCODE tracks, UCSC tracks, +OMIM, dbSNP, KEGG, and HPRD. All of this information is stored in portable SQLite database that allows +one to explore and interpret both coding and non-coding variation using “off-the-shelf” tools or an +enhanced SQL engine. + +Please also see the original [manuscript](http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1003153). + + +============ +Installation +============ + +It is recommended to install this wrapper via the `Galaxy Tool Shed`. + +.. _`Galaxy Tool Shed`: https://testtoolshed.g2.bx.psu.edu/view/iuc/gemini + + +======= +History +======= +- 0.9.1: Initial public release + + +==================== +Detailed description +==================== + +View the original GEMINI documentation: http://gemini.readthedocs.org/en/latest/index.html + + +=============================== +Wrapper Licence (MIT/BSD style) +=============================== + +Permission to use, copy, modify, and distribute this software and its +documentation with or without modifications and for any purpose and +without fee is hereby granted, provided that any copyright notices +appear in all copies and that both those copyright notices and this +permission notice appear in supporting documentation, and that the +names of the contributors or copyright holders not be used in +advertising or publicity pertaining to distribution of the software +without specific prior permission. + +THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL +WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT +OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +OR PERFORMANCE OF THIS SOFTWARE. +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,4 @@ +<?xml version="1.0" ?> +<repositories description="This requires the GEMINI data manager definition to install all required annotation databases."> + <repository changeset_revision="f57426daa04d" name="data_manager_gemini_database_downloader" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu"/> +</repositories> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/anno.bed Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,3 @@ +chr3 187000000 187150000 +chr3 187150000 187300000 +chr3 187300000 187450000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_actionable_mutations_result.tabular Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,1 @@ +tum_name chrom start end ref alt gene impact is_somatic in_cosmic_census dgidb_info
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_amend.ped Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,10 @@ +#family_id sample_id paternal_id maternal_id sex phenotype +1 1_dad 0 0 -1 1 +1 1_mom 0 0 -1 1 +1 1_kid 1_dad 1_mom -1 2 +2 2_dad 0 0 -1 1 +2 2_mom 0 0 -1 1 +2 2_kid 2_dad 2_mom -1 2 +3 3_dad 0 0 -1 1 +3 3_mom 0 0 -1 1 +3 3_kid 3_dad 3_mom -1 2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_amend.vcf Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,127 @@ +##fileformat=VCFv4.1 +##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)"> +##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered"> +##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?"> +##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions"> +##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias"> +##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction"> +##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes"> +##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation"> +##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality"> +##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads"> +##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities"> +##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth"> +##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"> +##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/M10478.conc.on.pos.realigned.bam, bam/M10475.conc.on.pos.realigned.bam, bam/M10500.conc.on.pos.realigned.bam, bam/M128215.conc.on.pos.realigned.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=10 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_mismatching_base_and_quals=false" +##contig=<ID=chr1,length=249250621,assembly=hg19> +##contig=<ID=chr10,length=135534747,assembly=hg19> +##contig=<ID=chr11,length=135006516,assembly=hg19> +##contig=<ID=chr11_gl000202_random,length=40103,assembly=hg19> +##contig=<ID=chr12,length=133851895,assembly=hg19> +##contig=<ID=chr13,length=115169878,assembly=hg19> +##contig=<ID=chr14,length=107349540,assembly=hg19> +##contig=<ID=chr15,length=102531392,assembly=hg19> +##contig=<ID=chr16,length=90354753,assembly=hg19> +##contig=<ID=chr17,length=81195210,assembly=hg19> +##contig=<ID=chr17_ctg5_hap1,length=1680828,assembly=hg19> +##contig=<ID=chr17_gl000203_random,length=37498,assembly=hg19> +##contig=<ID=chr17_gl000204_random,length=81310,assembly=hg19> +##contig=<ID=chr17_gl000205_random,length=174588,assembly=hg19> +##contig=<ID=chr17_gl000206_random,length=41001,assembly=hg19> +##contig=<ID=chr18,length=78077248,assembly=hg19> +##contig=<ID=chr18_gl000207_random,length=4262,assembly=hg19> +##contig=<ID=chr19,length=59128983,assembly=hg19> +##contig=<ID=chr19_gl000208_random,length=92689,assembly=hg19> +##contig=<ID=chr19_gl000209_random,length=159169,assembly=hg19> +##contig=<ID=chr1_gl000191_random,length=106433,assembly=hg19> +##contig=<ID=chr1_gl000192_random,length=547496,assembly=hg19> +##contig=<ID=chr2,length=243199373,assembly=hg19> +##contig=<ID=chr20,length=63025520,assembly=hg19> +##contig=<ID=chr21,length=48129895,assembly=hg19> +##contig=<ID=chr21_gl000210_random,length=27682,assembly=hg19> +##contig=<ID=chr22,length=51304566,assembly=hg19> +##contig=<ID=chr3,length=198022430,assembly=hg19> +##contig=<ID=chr4,length=191154276,assembly=hg19> +##contig=<ID=chr4_ctg9_hap1,length=590426,assembly=hg19> +##contig=<ID=chr4_gl000193_random,length=189789,assembly=hg19> +##contig=<ID=chr4_gl000194_random,length=191469,assembly=hg19> +##contig=<ID=chr5,length=180915260,assembly=hg19> +##contig=<ID=chr6,length=171115067,assembly=hg19> +##contig=<ID=chr6_apd_hap1,length=4622290,assembly=hg19> +##contig=<ID=chr6_cox_hap2,length=4795371,assembly=hg19> +##contig=<ID=chr6_dbb_hap3,length=4610396,assembly=hg19> +##contig=<ID=chr6_mann_hap4,length=4683263,assembly=hg19> +##contig=<ID=chr6_mcf_hap5,length=4833398,assembly=hg19> +##contig=<ID=chr6_qbl_hap6,length=4611984,assembly=hg19> +##contig=<ID=chr6_ssto_hap7,length=4928567,assembly=hg19> +##contig=<ID=chr7,length=159138663,assembly=hg19> +##contig=<ID=chr7_gl000195_random,length=182896,assembly=hg19> +##contig=<ID=chr8,length=146364022,assembly=hg19> +##contig=<ID=chr8_gl000196_random,length=38914,assembly=hg19> +##contig=<ID=chr8_gl000197_random,length=37175,assembly=hg19> +##contig=<ID=chr9,length=141213431,assembly=hg19> +##contig=<ID=chr9_gl000198_random,length=90085,assembly=hg19> +##contig=<ID=chr9_gl000199_random,length=169874,assembly=hg19> +##contig=<ID=chr9_gl000200_random,length=187035,assembly=hg19> +##contig=<ID=chr9_gl000201_random,length=36148,assembly=hg19> +##contig=<ID=chrM,length=16571,assembly=hg19> +##contig=<ID=chrUn_gl000211,length=166566,assembly=hg19> +##contig=<ID=chrUn_gl000212,length=186858,assembly=hg19> +##contig=<ID=chrUn_gl000213,length=164239,assembly=hg19> +##contig=<ID=chrUn_gl000214,length=137718,assembly=hg19> +##contig=<ID=chrUn_gl000215,length=172545,assembly=hg19> +##contig=<ID=chrUn_gl000216,length=172294,assembly=hg19> +##contig=<ID=chrUn_gl000217,length=172149,assembly=hg19> +##contig=<ID=chrUn_gl000218,length=161147,assembly=hg19> +##contig=<ID=chrUn_gl000219,length=179198,assembly=hg19> +##contig=<ID=chrUn_gl000220,length=161802,assembly=hg19> +##contig=<ID=chrUn_gl000221,length=155397,assembly=hg19> +##contig=<ID=chrUn_gl000222,length=186861,assembly=hg19> +##contig=<ID=chrUn_gl000223,length=180455,assembly=hg19> +##contig=<ID=chrUn_gl000224,length=179693,assembly=hg19> +##contig=<ID=chrUn_gl000225,length=211173,assembly=hg19> +##contig=<ID=chrUn_gl000226,length=15008,assembly=hg19> +##contig=<ID=chrUn_gl000227,length=128374,assembly=hg19> +##contig=<ID=chrUn_gl000228,length=129120,assembly=hg19> +##contig=<ID=chrUn_gl000229,length=19913,assembly=hg19> +##contig=<ID=chrUn_gl000230,length=43691,assembly=hg19> +##contig=<ID=chrUn_gl000231,length=27386,assembly=hg19> +##contig=<ID=chrUn_gl000232,length=40652,assembly=hg19> +##contig=<ID=chrUn_gl000233,length=45941,assembly=hg19> +##contig=<ID=chrUn_gl000234,length=40531,assembly=hg19> +##contig=<ID=chrUn_gl000235,length=34474,assembly=hg19> +##contig=<ID=chrUn_gl000236,length=41934,assembly=hg19> +##contig=<ID=chrUn_gl000237,length=45867,assembly=hg19> +##contig=<ID=chrUn_gl000238,length=39939,assembly=hg19> +##contig=<ID=chrUn_gl000239,length=33824,assembly=hg19> +##contig=<ID=chrUn_gl000240,length=41933,assembly=hg19> +##contig=<ID=chrUn_gl000241,length=42152,assembly=hg19> +##contig=<ID=chrUn_gl000242,length=43523,assembly=hg19> +##contig=<ID=chrUn_gl000243,length=43341,assembly=hg19> +##contig=<ID=chrUn_gl000244,length=39929,assembly=hg19> +##contig=<ID=chrUn_gl000245,length=36651,assembly=hg19> +##contig=<ID=chrUn_gl000246,length=38154,assembly=hg19> +##contig=<ID=chrUn_gl000247,length=36422,assembly=hg19> +##contig=<ID=chrUn_gl000248,length=39786,assembly=hg19> +##contig=<ID=chrUn_gl000249,length=38502,assembly=hg19> +##contig=<ID=chrX,length=155270560,assembly=hg19> +##contig=<ID=chrY,length=59373566,assembly=hg19> +##reference=file:///m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa +##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Consequence|Codons|Amino_acids|Gene|HGNC|Feature|EXON|PolyPhen|SIFT"> +##SnpEffVersion="SnpEff 3.0g (build 2012-08-31), by Pablo Cingolani" +##SnpEffCmd="SnpEff -i vcf -o vcf GRCh37.66 test4.vep.vcf " +##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )' "> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1_dad 1_mom 1_kid 2_dad 2_mom 2_kid 3_dad 3_mom 3_kid +chr10 1142208 . T C 3404.3 . AC=8;AF=1.00;AN=8;DP=122;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=2.6747;MQ=36.00;MQ0=0;QD=27.90;CSQ=intron_variant|||ENSG00000047056|WDR37|ENST00000263150|||,downstream_gene_variant|||ENSG00000047056|WDR37|ENST00000436154|||,intron_variant|||ENSG00000047056|WDR37|ENST00000358220|||,stop_lost|Tga/Cga|*/R|ENSG00000047056|WDR37|ENST00000381329|9/9||;EFF=DOWNSTREAM(MODIFIER||||208|WDR37|protein_coding|CODING|ENST00000436154|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000263150|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000358220|),STOP_LOST(HIGH|MISSENSE|Tga/Cga|*250R|249|WDR37|protein_coding|CODING|ENST00000381329|exon_10_1142110_1142566) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 0/1:1,37:59:87.16:940,87,0 0/1:0,29:49:78.20:899,78,0 1/1:0,24:64:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 48003992 . C T 1047.87 . AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 48004992 . C T 1047.87 . AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 135336656 . G A 38.34 . AC=4;AF=1.00;AN=4;DP=2;Dels=0.00;FS=0.000;HRun=4;HaplotypeScore=0.0000;MQ=37.00;MQ0=0;QD=19.17;CSQ=upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000418356|||,intron_variant&nc_transcript_variant|||ENSG00000214279||ENST00000488261|||,intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541261|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000421586|||,intron_variant|||ENSG00000130649|CYP2E1|ENST00000463117|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000252945|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541080|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000477500|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000480558|||,intron_variant|||ENSG00000214279||ENST00000356567|||;EFF=INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),INTRON(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000463117|),INTRON(MODIFIER||||693|RP11-108K14.4.1|protein_coding|CODING|ENST00000356567|),INTRON(MODIFIER|||||RP11-108K14.4.1|retained_intron|CODING|ENST00000488261|),UPSTREAM(MODIFIER||||305|CYP2E1|protein_coding|CODING|ENST00000418356|),UPSTREAM(MODIFIER||||355|CYP2E1|protein_coding|CODING|ENST00000421586|),UPSTREAM(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000252945|),UPSTREAM(MODIFIER||||85|CYP2E1|protein_coding|CODING|ENST00000541261|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000477500|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000480558|) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 135369532 . T C 122.62 . AC=2;AF=0.25;AN=8;BaseQRankSum=2.118;DP=239;Dels=0.00;FS=5.194;HRun=2;HaplotypeScore=5.7141;MQ=36.02;MQ0=0;MQRankSum=0.082;QD=2.31;ReadPosRankSum=-0.695;CSQ=missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000303903|9/13|benign(0.001)|tolerated(1),missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000432597|10/14|benign(0)|tolerated(1),downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000460441|||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000343131|9/13|benign(0.001)|tolerated(1),intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000171772|SYCE1|ENST00000479535|6/10||,downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000482127|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000130649|CYP2E1|ENST00000368520|6/6||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000368517|9/13|benign(0)|tolerated(1);EFF=DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000460441|),DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000482127|),EXON(MODIFIER|||||CYP2E1|retained_intron|CODING|ENST00000368520|),EXON(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000479535|),INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000368517|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000432597|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|318|SYCE1|protein_coding|CODING|ENST00000303903|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|351|SYCE1|protein_coding|CODING|ENST00000343131|exon_10_135369485_135369551) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_burden_calpha_template.tabular Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,4 @@ +gene T c Z p_value +SYCE1 .+ .+ .+ .+ +WDR37 .+ .+ .+ .+ +ASAH2C .+ .+ .+ .+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_burden_count_highimpact_result.tabular Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,2 @@ +gene 1_kid 3_kid +WDR37 1 2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_burden_count_nonsynonymous_result.tabular Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,5 @@ +gene 1_dad 1_kid 1_mom 2_dad 2_kid 2_mom 3_dad 3_kid 3_mom +SYCE1 0 1 0 0 1 0 0 1 0 +SPRN 0 1 0 0 1 0 1 1 1 +WDR37 0 1 0 0 0 0 0 2 0 +ASAH2C 2 3 2 1 3 1 1 2 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_load_input.vcf Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,70 @@ +##fileformat=VCFv4.1 +##INFO=<ID=LDAF,Number=1,Type=Float,Description="MLE Allele Frequency Accounting for LD"> +##INFO=<ID=AVGPOST,Number=1,Type=Float,Description="Average posterior probability from MaCH/Thunder"> +##INFO=<ID=RSQ,Number=1,Type=Float,Description="Genotype imputation quality from MaCH/Thunder"> +##INFO=<ID=ERATE,Number=1,Type=Float,Description="Per-marker Mutation rate from MaCH/Thunder"> +##INFO=<ID=THETA,Number=1,Type=Float,Description="Per-marker Transition rate from MaCH/Thunder"> +##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants"> +##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record"> +##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints"> +##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints"> +##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=AC,Number=.,Type=Integer,Description="Alternate Allele Count"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Allele Count"> +##ALT=<ID=DEL,Description="Deletion"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=DS,Number=1,Type=Float,Description="Genotype dosage from MaCH/Thunder"> +##FORMAT=<ID=GL,Number=.,Type=Float,Description="Genotype Likelihoods"> +##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele, ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments/README"> +##INFO=<ID=AF,Number=1,Type=Float,Description="Global Allele Frequency based on AC/AN"> +##INFO=<ID=AMR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AMR based on AC/AN"> +##INFO=<ID=ASN_AF,Number=1,Type=Float,Description="Allele Frequency for samples from ASN based on AC/AN"> +##INFO=<ID=AFR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AFR based on AC/AN"> +##INFO=<ID=EUR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from EUR based on AC/AN"> +##INFO=<ID=VT,Number=1,Type=String,Description="indicates what type of variant the line represents"> +##INFO=<ID=SNPSOURCE,Number=.,Type=String,Description="indicates if a snp was called when analysing the low coverage or exome alignment data"> +##reference=GRCh37 +##SnpEffVersion="SnpEff 3.0f (build 2012-08-23), by Pablo Cingolani" +##SnpEffCmd="SnpEff GRCh37.66 -i vcf -o vcf -c /Users/arq5x/src/other/snpEff_3_0/snpEff.config ALL.wgs.integrated_phase1_v3.20101123.snps_indels_sv.sites.vcf.gz " +##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )' "> +#CHROM POS ID REF ALT QUAL FILTER INFO +1 10583 rs58108140 G A 100.0 PASS AVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 10611 rs189107123 C G 100.0 PASS AN=2184;THETA=0.0077;VT=SNP;AA=.;AC=41;ERATE=0.0048;SNPSOURCE=LOWCOV;AVGPOST=0.9330;LDAF=0.0479;RSQ=0.3475;AF=0.02;ASN_AF=0.01;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 13302 rs180734498 C T 100.0 PASS THETA=0.0048;AN=2184;AC=249;VT=SNP;AA=.;RSQ=0.6281;LDAF=0.1573;SNPSOURCE=LOWCOV;AVGPOST=0.8895;ERATE=0.0058;AF=0.11;ASN_AF=0.02;AMR_AF=0.08;AFR_AF=0.21;EUR_AF=0.14;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||209|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),INTRON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 13327 rs144762171 G C 100.0 PASS AVGPOST=0.9698;AN=2184;VT=SNP;AA=.;RSQ=0.6482;AC=59;SNPSOURCE=LOWCOV;ERATE=0.0012;LDAF=0.0359;THETA=0.0204;AF=0.03;ASN_AF=0.02;AMR_AF=0.03;AFR_AF=0.02;EUR_AF=0.04;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||209|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),INTRON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 13957 . TC T 28.0 PASS AA=TC;AC=35;AN=2184;VT=INDEL;AVGPOST=0.8711;RSQ=0.2501;LDAF=0.0788;THETA=0.0100;ERATE=0.0065;AF=0.02;ASN_AF=0.01;AMR_AF=0.02;AFR_AF=0.02;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|) +1 13980 rs151276478 T C 100.0 PASS AN=2184;AC=45;ERATE=0.0034;THETA=0.0139;RSQ=0.3603;LDAF=0.0525;VT=SNP;AA=.;AVGPOST=0.9221;SNPSOURCE=LOWCOV;AF=0.02;ASN_AF=0.02;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|) +1 30923 rs140337953 G T 100.0 PASS AC=1584;AA=T;AN=2184;RSQ=0.5481;VT=SNP;THETA=0.0162;SNPSOURCE=LOWCOV;ERATE=0.0183;LDAF=0.6576;AVGPOST=0.7335;AF=0.73;ASN_AF=0.89;AMR_AF=0.80;AFR_AF=0.48;EUR_AF=0.73;EFF=DOWNSTREAM(MODIFIER|||||FAM138A|processed_transcript|CODING|ENST00000461467|),DOWNSTREAM(MODIFIER|||||FAM138A|protein_coding|CODING|ENST00000417324|),DOWNSTREAM(MODIFIER|||||MIR1302-10|miRNA|NON_CODING|ENST00000408384|),INTRON(MODIFIER||||177|MIR1302-10|antisense|NON_CODING|ENST00000469289|),INTRON(MODIFIER||||236|MIR1302-10|antisense|NON_CODING|ENST00000473358|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|) +1 46402 . C CTGT 31.0 PASS AA=.;RSQ=0.0960;AN=2184;AC=8;VT=INDEL;AVGPOST=0.8325;THETA=0.0121;ERATE=0.0072;LDAF=0.0903;AF=0.0037;ASN_AF=0.0017;AFR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 47190 . G GA 192.0 PASS AA=G;AVGPOST=0.9041;AN=2184;AC=29;VT=INDEL;LDAF=0.0628;THETA=0.0153;RSQ=0.2883;ERATE=0.0041;AF=0.01;AMR_AF=0.0028;AFR_AF=0.06;EFF=INTERGENIC(MODIFIER|||||||||) +1 51476 rs187298206 T C 100.0 PASS ERATE=0.0021;AA=C;AC=18;AN=2184;VT=SNP;THETA=0.0103;LDAF=0.0157;SNPSOURCE=LOWCOV;AVGPOST=0.9819;RSQ=0.5258;AF=0.01;ASN_AF=0.01;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 51479 rs116400033 T A 100.0 PASS RSQ=0.7414;AVGPOST=0.9085;AA=T;AN=2184;THETA=0.0131;AC=235;VT=SNP;LDAF=0.1404;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.11;ASN_AF=0.0035;AMR_AF=0.16;AFR_AF=0.03;EUR_AF=0.22;EFF=INTERGENIC(MODIFIER|||||||||) +1 51914 rs190452223 T G 100.0 PASS ERATE=0.0004;AVGPOST=0.9985;THETA=0.0159;AA=T;AN=2184;VT=SNP;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4089;LDAF=0.0012;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||) +1 51935 rs181754315 C T 100.0 PASS THETA=0.0126;AA=C;AN=2184;RSQ=0.1888;AVGPOST=0.9972;LDAF=0.0015;VT=SNP;AC=0;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0;EFF=INTERGENIC(MODIFIER|||||||||) +1 51954 rs185832753 G C 100.0 PASS LDAF=0.0021;AA=G;AN=2184;RSQ=0.4692;AVGPOST=0.9975;VT=SNP;SNPSOURCE=LOWCOV;THETA=0.0029;ERATE=0.0006;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 52058 rs62637813 G C 100.0 PASS AA=C;ERATE=0.0057;AN=2184;AVGPOST=0.9264;VT=SNP;RSQ=0.4882;AC=64;SNPSOURCE=LOWCOV;LDAF=0.0620;THETA=0.0069;AF=0.03;ASN_AF=0.0017;AMR_AF=0.04;AFR_AF=0.02;EUR_AF=0.05;EFF=INTERGENIC(MODIFIER|||||||||) +1 52144 rs190291950 T A 100.0 PASS THETA=0.0093;ERATE=0.0013;LDAF=0.0156;AA=T;AN=2184;VT=SNP;RSQ=0.5220;AVGPOST=0.9811;SNPSOURCE=LOWCOV;AC=21;AF=0.01;ASN_AF=0.0035;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 52185 . TTAA T 244.0 PASS AA=.;AN=2184;LDAF=0.0124;VT=INDEL;AC=10;THETA=0.0232;RSQ=0.4271;AVGPOST=0.9840;ERATE=0.0037;AF=0.0046;ASN_AF=0.0035;AMR_AF=0.02;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 52238 rs150021059 T G 100.0 PASS THETA=0.0132;AA=G;AN=2184;RSQ=0.6256;VT=SNP;ERATE=0.0026;AVGPOST=0.8617;SNPSOURCE=LOWCOV;AC=1941;LDAF=0.8423;AF=0.89;ASN_AF=0.99;AMR_AF=0.93;AFR_AF=0.64;EUR_AF=0.95;EFF=INTERGENIC(MODIFIER|||||||||) +1 53234 . CAT C 227.0 PASS AA=CAT;AVGPOST=0.9936;AN=2184;VT=INDEL;THETA=0.0119;AC=10;LDAF=0.0074;RSQ=0.6237;ERATE=0.0007;AF=0.0046;AMR_AF=0.0028;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 54353 rs140052487 C A 100.0 PASS THETA=0.0026;AA=C;AN=2184;AC=16;VT=SNP;RSQ=0.5074;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0146;ERATE=0.0058;AF=0.01;ASN_AF=0.01;AMR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||) +1 54421 rs146477069 A G 100.0 PASS ERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 54490 rs141149254 G A 100.0 PASS ERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15;EFF=INTERGENIC(MODIFIER|||||||||) +1 54676 rs2462492 C T 100.0 PASS LDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18;EFF=INTERGENIC(MODIFIER|||||||||) +1 54753 rs143174675 T G 100.0 PASS AA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03;EFF=INTERGENIC(MODIFIER|||||||||) +1 55164 rs3091274 C A 100.0 PASS AN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96;EFF=INTERGENIC(MODIFIER|||||||||) +1 55249 . C CTATGG 443.0 PASS AA=C;AVGPOST=0.9073;ERATE=0.0063;RSQ=0.5891;AN=2184;THETA=0.0038;VT=INDEL;AC=151;LDAF=0.0968;AF=0.07;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 55299 rs10399749 C T 100.0 PASS RSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13;EFF=INTERGENIC(MODIFIER|||||||||) +1 55313 rs182462964 A T 100.0 PASS ERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 55326 rs3107975 T C 100.0 PASS AA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55330 rs185215913 G A 100.0 PASS ERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 55367 rs190850374 G A 100.0 PASS ERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55388 rs182711216 C T 100.0 PASS THETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||) +1 55394 rs2949420 T A 100.0 PASS AC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 55416 rs193242050 G A 100.0 PASS AA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 55427 rs183189405 T C 100.0 PASS THETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 55816 rs187434873 G A 100.0 PASS AN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55850 rs191890754 C G 100.0 PASS AVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55852 rs184233019 G C 100.0 PASS THETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_versioned_databases.loc Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,3 @@ +## GEMINI versioned databases +#DownloadDate dbkey DBversion Description Path +1999-01-01 hg19 200 GEMINI annotations (test snapshot) ${__HERE__}/test-cache
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_windower_template.tabular Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,142 @@ +chr1 0 50000 . +chr1 50000000 50050000 . +chr1 100000000 100050000 . +chr1 150000000 150050000 . +chr1 200000000 200050000 . +chr10 0 50000 . +chr10 50000000 50050000 . +chr10 100000000 100050000 . +chr11 0 50000 . +chr11 50000000 50050000 . +chr11 100000000 100050000 . +chr11_gl000202_random 0 40103 . +chr12 0 50000 . +chr12 50000000 50050000 . +chr12 100000000 100050000 . +chr13 0 50000 . +chr13 50000000 50050000 . +chr13 100000000 100050000 . +chr14 0 50000 . +chr14 50000000 50050000 . +chr14 100000000 100050000 . +chr15 0 50000 . +chr15 50000000 50050000 . +chr15 100000000 100050000 . +chr16 0 50000 . +chr16 50000000 50050000 . +chr17 0 50000 . +chr17 50000000 50050000 . +chr17_ctg5_hap1 0 50000 . +chr17_gl000203_random 0 37498 . +chr17_gl000204_random 0 50000 . +chr17_gl000205_random 0 50000 . +chr17_gl000206_random 0 41001 . +chr18 0 50000 . +chr18 50000000 50050000 . +chr18_gl000207_random 0 4262 . +chr19 0 50000 . +chr19 50000000 50050000 . +chr19_gl000208_random 0 50000 . +chr19_gl000209_random 0 50000 . +chr1_gl000191_random 0 50000 . +chr1_gl000192_random 0 50000 . +chr2 0 50000 . +chr2 50000000 50050000 . +chr2 100000000 100050000 . +chr2 150000000 150050000 . +chr2 200000000 200050000 . +chr20 0 50000 . +chr20 50000000 50050000 . +chr21 0 50000 . +chr21_gl000210_random 0 27682 . +chr22 0 50000 . +chr22 50000000 50050000 . +chr3 0 50000 . +chr3 50000000 50050000 . +chr3 100000000 100050000 . +chr3 150000000 150050000 . +chr4 0 50000 . +chr4 50000000 50050000 . +chr4 100000000 100050000 . +chr4 150000000 150050000 . +chr4_ctg9_hap1 0 50000 . +chr4_gl000193_random 0 50000 . +chr4_gl000194_random 0 50000 . +chr5 0 50000 . +chr5 50000000 50050000 . +chr5 100000000 100050000 . +chr5 150000000 150050000 . +chr6 0 50000 . +chr6 50000000 50050000 . +chr6 100000000 100050000 . +chr6 150000000 150050000 . +chr6_apd_hap1 0 50000 . +chr6_cox_hap2 0 50000 . +chr6_dbb_hap3 0 50000 . +chr6_mann_hap4 0 50000 . +chr6_mcf_hap5 0 50000 . +chr6_qbl_hap6 0 50000 . +chr6_ssto_hap7 0 50000 . +chr7 0 50000 . +chr7 50000000 50050000 . +chr7 100000000 100050000 . +chr7 150000000 150050000 . +chr7_gl000195_random 0 50000 . +chr8 0 50000 . +chr8 50000000 50050000 . +chr8 100000000 100050000 . +chr8_gl000196_random 0 38914 . +chr8_gl000197_random 0 37175 . +chr9 0 50000 . +chr9 50000000 50050000 . +chr9 100000000 100050000 . +chr9_gl000198_random 0 50000 . +chr9_gl000199_random 0 50000 . +chr9_gl000200_random 0 50000 . +chr9_gl000201_random 0 36148 . +chrM 0 16571 . +chrUn_gl000211 0 50000 . +chrUn_gl000212 0 50000 . +chrUn_gl000213 0 50000 . +chrUn_gl000214 0 50000 . +chrUn_gl000215 0 50000 . +chrUn_gl000216 0 50000 . +chrUn_gl000217 0 50000 . +chrUn_gl000218 0 50000 . +chrUn_gl000219 0 50000 . +chrUn_gl000220 0 50000 . +chrUn_gl000221 0 50000 . +chrUn_gl000222 0 50000 . +chrUn_gl000223 0 50000 . +chrUn_gl000224 0 50000 . +chrUn_gl000225 0 50000 . +chrUn_gl000226 0 15008 . +chrUn_gl000227 0 50000 . +chrUn_gl000228 0 50000 . +chrUn_gl000229 0 19913 . +chrUn_gl000230 0 43691 . +chrUn_gl000231 0 27386 . +chrUn_gl000232 0 40652 . +chrUn_gl000233 0 45941 . +chrUn_gl000234 0 40531 . +chrUn_gl000235 0 34474 . +chrUn_gl000236 0 41934 . +chrUn_gl000237 0 45867 . +chrUn_gl000238 0 39939 . +chrUn_gl000239 0 33824 . +chrUn_gl000240 0 41933 . +chrUn_gl000241 0 42152 . +chrUn_gl000242 0 43523 . +chrUn_gl000243 0 43341 . +chrUn_gl000244 0 39929 . +chrUn_gl000245 0 36651 . +chrUn_gl000246 0 38154 . +chrUn_gl000247 0 36422 . +chrUn_gl000248 0 39786 . +chrUn_gl000249 0 38502 . +chrX 0 50000 . +chrX 50000000 50050000 . +chrX 100000000 100050000 . +chrX 150000000 150050000 . +chrY 0 50000 . +chrY 50000000 50050000 .
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini-config.yaml Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,15 @@ +annotation_dir: gemini/data +versions: + ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz: 4 + ESP6500SI.all.snps_indels.tidy.v2.vcf.gz: 2 + ExAC.r0.3.sites.vep.tidy.vcf.gz: 4 + GRCh37-gms-mappability.vcf.gz: 2 + clinvar_20170130.tidy.vcf.gz: 5 + cosmic-v68-GRCh37.tidy.vcf.gz: 3 + dbsnp.b147.20160601.tidy.vcf.gz: 1 + detailed_gene_table_v75: 2 + geno2mp.variants.tidy.vcf.gz: 1 + gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz: 2 + hg19.rmsk.bed.gz: 2 + summary_gene_table_v75: 2 + whole_genome_SNVs.tsv.compressed.gz: 2
Binary file test-data/test-cache/gemini/data/29way_pi_lods_elements_12mers.chr_specific.fdr_0.1_with_scores.txt.hg19.merged.bed.gz has changed
Binary file test-data/test-cache/gemini/data/29way_pi_lods_elements_12mers.chr_specific.fdr_0.1_with_scores.txt.hg19.merged.bed.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz has changed
Binary file test-data/test-cache/gemini/data/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/ESP6500SI.all.snps_indels.tidy.v2.vcf.gz.tbi has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini/data/cancer_gene_census.20140120.tsv Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,19 @@ +ARHH "RAS homolog gene family, member H (TTF)" 399 4 4p13 yes NHL L Dom T BCL6 +BCL5 B-cell CLL/lymphoma 5 603 17 17q22 yes CLL L Dom T MYC +BCL6 B-cell CLL/lymphoma 6 604 3 3q27 yes "NHL, CLL" L Dom "T, Mis" "IG loci, ZNFN1A1, LCP1, PIM1, TFRC, CIITA, NACA, HSPCB, HSPCA, HIST1H4I, IL21R, POU2AF1, ARHH, EIF4A2, SFRS3" +BCOR BCL6 corepressor 54880 X Xp11.4 yes "retinoblastoma, AML, APL (translocation)" Rec "F, N, S, T" RARA yes oculo-facio-cardio-dental genetic +CIITA "class II, major histocompatibility complex, transactivator" 4261 16 16p13 yes "PMBL, Hodgkin lymphoma" L Dom T "FLJ27352, CD274, CD273, RALGDS, RUNDC2A, C16orf75, BCL6" +EIF4A2 "eukaryotic translation initiation factor 4A, isoform 2" 1974 3 3q27.3 yes NHL L Dom T BCL6 +HIST1H4I "histone 1, H4i (H4FM)" 8294 6 6p21.3 yes NHL L Dom T BCL6 +HSPCA "heat shock 90kDa protein 1, alpha" 3320 14 14q32.31 yes NHL L Dom T BCL6 +HSPCB "heat shock 90kDa protein 1, beta" 3326 6 6p12 yes NHL L Dom T BCL6 +IGH@ immunoglobulin heavy locus 3492 14 14q32.33 yes "MM, Burkitt lymphoma, NHL, CLL, B-ALL, MALT, MLCLS" L Dom T "MYC, FGFR3,PAX5, IRTA1, IRF4, CCND1, BCL9, BCL8, BCL6, BCL2, BCL3, BCL10, BCL11A. LHX4, DDX6, NFKB2, PAFAH1B2, PCSK7, CRLF2" +IKZF1 IKAROS family zinc finger 1 10320 7 7p12.2 yes "ALL, DLBCL" L "Rec,Dom" "D,T" BCL6 +IL21R interleukin 21 receptor 50615 16 16p11 yes NHL L Dom T BCL6 +LCP1 lymphocyte cytosolic protein 1 (L-plastin) 3936 13 13q14.1-q14.3 yes NHL L Dom T BCL6 +MYC v-myc myelocytomatosis viral oncogene homolog (avian) 4609 8 8q24.12-q24.13 yes "Burkitt lymphoma, amplified in other cancers, B-CLL" "L, E" Dom "A, T" "IGK@, BCL5, BCL7A , BTG1, TRA@, IGH@" +NACA nascent-polypeptide-associated complex alpha polypeptide 4666 12 12q23-q24.1 yes NHL L Dom T BCL6 +PIM1 pim-1 oncogene 5292 6 6p21.2 yes NHL L Dom T BCL6 +POU2AF1 "POU domain, class 2, associating factor 1 (OBF1)" 5450 11 11q23.1 yes NHL L Dom T BCL6 +SFRS3 "splicing factor, arginine/serine-rich 3" 6428 6 6p21 yes follicular lymphoma L Dom T BCL6 +TFRC "transferrin receptor (p90, CD71)" 7037 3 3q29 yes NHL L Dom T BCL6
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini/data/detailed_gene_table_v75 Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,114 @@ +Chromosome Gene_name Is_hgnc Ensembl_gene_id Ensembl_transcript_id Biotype Transcript_status CCDS_id HGNC_id CDS_length Protein_length Transcript_start Transcript_end strand Synonyms Rvis_pct entrez_gene_id mammalian_phenotype_id +chr3 None 0 ENSG00000239093 ENST00000459452 snoRNA KNOWN None None None None 187141103 187141207 1 None None None None +chr3 None 0 ENSG00000228952 ENST00000440726 lincRNA KNOWN None None None None 187166633 187167238 1 None None None None +chr3 None 0 ENSG00000223401 ENST00000450760 lincRNA KNOWN None None None None 187461474 187463208 1 None None None None +chr3 MASP 0 ENSG00000127241 ENST00000337774 protein_coding KNOWN CCDS33907 None 2100 699 186935942 187009810 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000337774 protein_coding KNOWN CCDS33907 None 2100 699 186935942 187009810 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000337774 protein_coding KNOWN CCDS33907 6901 2100 699 186935942 187009810 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000337774 protein_coding KNOWN CCDS33907 None 2100 699 186935942 187009810 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 MASP 0 ENSG00000127241 ENST00000296280 protein_coding KNOWN CCDS33908 None 2187 728 186951870 187009646 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000296280 protein_coding KNOWN CCDS33908 None 2187 728 186951870 187009646 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000296280 protein_coding KNOWN CCDS33908 6901 2187 728 186951870 187009646 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000296280 protein_coding KNOWN CCDS33908 None 2187 728 186951870 187009646 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 MASP 0 ENSG00000127241 ENST00000392472 protein_coding PUTATIVE None None 1848 615 186951872 187009765 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000392472 protein_coding PUTATIVE None None 1848 615 186951872 187009765 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000392472 protein_coding PUTATIVE None 6901 1848 615 186951872 187009765 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000392472 protein_coding PUTATIVE None None 1848 615 186951872 187009765 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 MASP 0 ENSG00000127241 ENST00000495249 processed_transcript PUTATIVE None None None None 186953655 187009542 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000495249 processed_transcript PUTATIVE None None None None 186953655 187009542 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000495249 processed_transcript PUTATIVE None 6901 None None 186953655 187009542 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000495249 processed_transcript PUTATIVE None None None None 186953655 187009542 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 MASP 0 ENSG00000127241 ENST00000169293 protein_coding KNOWN CCDS33909 None 1143 380 186964149 187009745 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000169293 protein_coding KNOWN CCDS33909 None 1143 380 186964149 187009745 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000169293 protein_coding KNOWN CCDS33909 6901 1143 380 186964149 187009745 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000169293 protein_coding KNOWN CCDS33909 None 1143 380 186964149 187009745 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 MASP 0 ENSG00000127241 ENST00000392470 protein_coding PUTATIVE None None 1065 354 186964947 187009670 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000392470 protein_coding PUTATIVE None None 1065 354 186964947 187009670 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000392470 protein_coding PUTATIVE None 6901 1065 354 186964947 187009670 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000392470 protein_coding PUTATIVE None None 1065 354 186964947 187009670 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 MASP 0 ENSG00000127241 ENST00000460839 retained_intron KNOWN None None None None 186974373 187003796 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000460839 retained_intron KNOWN None None None None 186974373 187003796 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000460839 retained_intron KNOWN None 6901 None None 186974373 187003796 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000460839 retained_intron KNOWN None None None None 186974373 187003796 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 MASP 0 ENSG00000127241 ENST00000392475 protein_coding NOVEL None None 614 203 186974603 187009768 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000392475 protein_coding NOVEL None None 614 203 186974603 187009768 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000392475 protein_coding NOVEL None 6901 614 203 186974603 187009768 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000392475 protein_coding NOVEL None None 614 203 186974603 187009768 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 MASP 0 ENSG00000127241 ENST00000439271 protein_coding PUTATIVE None None 355 117 186980469 187009746 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000439271 protein_coding PUTATIVE None None 355 117 186980469 187009746 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000439271 protein_coding PUTATIVE None 6901 355 117 186980469 187009746 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000439271 protein_coding PUTATIVE None None 355 117 186980469 187009746 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 MASP 0 ENSG00000127241 ENST00000425937 protein_coding PUTATIVE None None 166 54 186980502 187009485 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000425937 protein_coding PUTATIVE None None 166 54 186980502 187009485 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000425937 protein_coding PUTATIVE None 6901 166 54 186980502 187009485 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000425937 protein_coding PUTATIVE None None 166 54 186980502 187009485 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 IFRG28 0 ENSG00000136514 ENST00000259030 protein_coding KNOWN CCDS33910 None 741 246 187086120 187089864 1 RTP4,Z3CXXC4 94.35008257 64108 None +chr3 RTP4 1 ENSG00000136514 ENST00000259030 protein_coding KNOWN CCDS33910 23992 741 246 187086120 187089864 1 IFRG28,Z3CXXC4 94.35008257 64108 None +chr3 Z3CXXC4 0 ENSG00000136514 ENST00000259030 protein_coding KNOWN CCDS33910 None 741 246 187086120 187089864 1 IFRG28,RTP4 94.35008257 64108 None +chr3 SST 1 ENSG00000157005 ENST00000287641 protein_coding KNOWN CCDS3288 11329 351 116 187386694 187388187 -1 SMST 78.16112291 6750 MP:0002873,MP:0005386,MP:0005376,MP:0005381,MP:0003631,MP:0005378 +chr3 SMST 0 ENSG00000157005 ENST00000287641 protein_coding KNOWN CCDS3288 None 351 116 187386694 187388187 -1 SST 78.16112291 6750 MP:0002873,MP:0005386,MP:0005376,MP:0005381,MP:0003631,MP:0005378 +chr3 Z3CXXC2 0 ENSG00000198471 ENST00000358241 protein_coding KNOWN CCDS33911 None 678 225 187416047 187420345 -1 RTP2,MGC78665 69.20853975 344892 MP:0005389 +chr3 RTP2 1 ENSG00000198471 ENST00000358241 protein_coding KNOWN CCDS33911 32486 678 225 187416047 187420345 -1 Z3CXXC2,MGC78665 69.20853975 344892 MP:0005389 +chr3 MGC78665 0 ENSG00000198471 ENST00000358241 protein_coding KNOWN CCDS33911 None 678 225 187416047 187420345 -1 Z3CXXC2,RTP2 69.20853975 344892 MP:0005389 +chr3 None 0 ENSG00000228804 ENST00000449623 protein_coding PUTATIVE None None 390 129 187420101 187451637 1 None None None None +chr3 None 0 ENSG00000228804 ENST00000437407 protein_coding PUTATIVE None None 153 50 187420154 187450203 1 None None None None +chr3 ZNF51 0 ENSG00000113916 ENST00000406870 protein_coding KNOWN CCDS3289 None 2121 706 187439165 187463515 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000406870 protein_coding KNOWN CCDS3289 None 2121 706 187439165 187463515 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000406870 protein_coding KNOWN CCDS3289 None 2121 706 187439165 187463515 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000406870 protein_coding KNOWN CCDS3289 None 2121 706 187439165 187463515 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000406870 protein_coding KNOWN CCDS3289 1001 2121 706 187439165 187463515 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000406870 protein_coding KNOWN CCDS3289 None 2121 706 187439165 187463515 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZNF51 0 ENSG00000113916 ENST00000419510 nonsense_mediated_decay KNOWN None None 168 55 187439175 187454876 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000419510 nonsense_mediated_decay KNOWN None None 168 55 187439175 187454876 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000419510 nonsense_mediated_decay KNOWN None None 168 55 187439175 187454876 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000419510 nonsense_mediated_decay KNOWN None None 168 55 187439175 187454876 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000419510 nonsense_mediated_decay KNOWN None 1001 168 55 187439175 187454876 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000419510 nonsense_mediated_decay KNOWN None None 168 55 187439175 187454876 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZNF51 0 ENSG00000113916 ENST00000232014 protein_coding KNOWN CCDS3289 None 2121 706 187440186 187454357 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000232014 protein_coding KNOWN CCDS3289 None 2121 706 187440186 187454357 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000232014 protein_coding KNOWN CCDS3289 None 2121 706 187440186 187454357 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000232014 protein_coding KNOWN CCDS3289 None 2121 706 187440186 187454357 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000232014 protein_coding KNOWN CCDS3289 1001 2121 706 187440186 187454357 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000232014 protein_coding KNOWN CCDS3289 None 2121 706 187440186 187454357 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZNF51 0 ENSG00000113916 ENST00000450123 protein_coding NOVEL CCDS46975 None 1953 650 187440220 187452670 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000450123 protein_coding NOVEL CCDS46975 None 1953 650 187440220 187452670 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000450123 protein_coding NOVEL CCDS46975 None 1953 650 187440220 187452670 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000450123 protein_coding NOVEL CCDS46975 None 1953 650 187440220 187452670 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000450123 protein_coding NOVEL CCDS46975 1001 1953 650 187440220 187452670 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000450123 protein_coding NOVEL CCDS46975 None 1953 650 187440220 187452670 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZNF51 0 ENSG00000113916 ENST00000479110 retained_intron KNOWN None None None None 187442357 187443411 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000479110 retained_intron KNOWN None None None None 187442357 187443411 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000479110 retained_intron KNOWN None None None None 187442357 187443411 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000479110 retained_intron KNOWN None None None None 187442357 187443411 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000479110 retained_intron KNOWN None 1001 None None 187442357 187443411 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000479110 retained_intron KNOWN None None None None 187442357 187443411 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZNF51 0 ENSG00000113916 ENST00000430339 protein_coding KNOWN None None 365 120 187449515 187452735 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000430339 protein_coding KNOWN None None 365 120 187449515 187452735 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000430339 protein_coding KNOWN None None 365 120 187449515 187452735 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000430339 protein_coding KNOWN None None 365 120 187449515 187452735 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000430339 protein_coding KNOWN None 1001 365 120 187449515 187452735 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000430339 protein_coding KNOWN None None 365 120 187449515 187452735 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZNF51 0 ENSG00000113916 ENST00000480458 processed_transcript KNOWN None None None None 187449553 187463225 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000480458 processed_transcript KNOWN None None None None 187449553 187463225 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000480458 processed_transcript KNOWN None None None None 187449553 187463225 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000480458 processed_transcript KNOWN None None None None 187449553 187463225 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000480458 processed_transcript KNOWN None 1001 None None 187449553 187463225 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000480458 processed_transcript KNOWN None None None None 187449553 187463225 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZNF51 0 ENSG00000113916 ENST00000438077 protein_coding KNOWN None None 312 103 187449568 187455732 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000438077 protein_coding KNOWN None None 312 103 187449568 187455732 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000438077 protein_coding KNOWN None None 312 103 187449568 187455732 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000438077 protein_coding KNOWN None None 312 103 187449568 187455732 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000438077 protein_coding KNOWN None 1001 312 103 187449568 187455732 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000438077 protein_coding KNOWN None None 312 103 187449568 187455732 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZNF51 0 ENSG00000113916 ENST00000470319 retained_intron KNOWN None None None None 187452233 187463260 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000470319 retained_intron KNOWN None None None None 187452233 187463260 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000470319 retained_intron KNOWN None None None None 187452233 187463260 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000470319 retained_intron KNOWN None None None None 187452233 187463260 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000470319 retained_intron KNOWN None 1001 None None 187452233 187463260 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000470319 retained_intron KNOWN None None None None 187452233 187463260 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZNF51 0 ENSG00000113916 ENST00000496823 processed_transcript PUTATIVE None None None None 187453975 187463247 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000496823 processed_transcript PUTATIVE None None None None 187453975 187463247 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000496823 processed_transcript PUTATIVE None None None None 187453975 187463247 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000496823 processed_transcript PUTATIVE None None None None 187453975 187463247 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000496823 processed_transcript PUTATIVE None 1001 None None 187453975 187463247 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000496823 processed_transcript PUTATIVE None None None None 187453975 187463247 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
Binary file test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz has changed
Binary file test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz.tbi has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl66 Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,30 @@ +B8PSA7 BCL6 BCL6 ENSG00000113916 ENST00000450123 None None +C9J1C7 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000450123 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000438077 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000450123 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000169293 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000296280 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000425937 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JMA2 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +F5H2J0 MASP1 MASP1 ENSG00000127241 ENST00000541896 None None +F8W876 MASP1 MASP1 ENSG00000127241 ENST00000169293 None None +F8W876 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +P41182 BCL6 BCL6 ENSG00000113916 ENST00000232014 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000406870 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04080;Neuroactive_ligand_receptor_interaction +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04971;Gastric_acid_secretion +Q5HYM1 MASP1 MASP1 ENSG00000127241 ENST00000541896 None None +Q5QGT7 RTP2 RTP2 ENSG00000198471 ENST00000358241 hsa:344892 None +Q96DX8 RTP4 RTP4 ENSG00000136514 ENST00000259030 hsa:64108 None +Q9NSY8 MASP1 MASP1 ENSG00000127241 ENST00000296280 None None +Q9NSY8 MASP1 MASP1 ENSG00000127241 ENST00000392472 None None +Q9NSY8 MASP1 MASP1 ENSG00000127241 ENST00000541811 None None +none BCL6 BCL6 ENSG00000113916 ENST00000419510 None None
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl67 Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,30 @@ +B8PSA7 BCL6 BCL6 ENSG00000113916 ENST00000450123 None None +C9J1C7 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000450123 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000438077 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000450123 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000169293 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000296280 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000425937 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JMA2 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +F5H2J0 MASP1 MASP1 ENSG00000127241 ENST00000541896 None None +F8W876 MASP1 MASP1 ENSG00000127241 ENST00000169293 None None +F8W876 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +P41182 BCL6 BCL6 ENSG00000113916 ENST00000232014 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000406870 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04080;Neuroactive_ligand_receptor_interaction +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04971;Gastric_acid_secretion +Q5HYM1 MASP1 MASP1 ENSG00000127241 ENST00000541896 None None +Q5QGT7 RTP2 RTP2 ENSG00000198471 ENST00000358241 hsa:344892 None +Q96DX8 RTP4 RTP4 ENSG00000136514 ENST00000259030 hsa:64108 None +Q9NSY8 MASP1 MASP1 ENSG00000127241 ENST00000296280 None None +Q9NSY8 MASP1 MASP1 ENSG00000127241 ENST00000392472 None None +Q9NSY8 MASP1 MASP1 ENSG00000127241 ENST00000541811 None None +none BCL6 BCL6 ENSG00000113916 ENST00000419510 None None
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl68 Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,26 @@ +C9J1C7 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000438077 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000425937 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JMA2 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +F8W876 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +P41182 BCL6 BCL6 ENSG00000113916 ENST00000232014 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000406870 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000450123 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04080;Neuroactive_ligand_receptor_interaction +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04971;Gastric_acid_secretion +Q5QGT7 RTP2 RTP2 ENSG00000198471 ENST00000358241 hsa:344892 None +Q96DX8 RTP4 RTP4 ENSG00000136514 ENST00000259030 hsa:64108 None +none BCL6 BCL6 ENSG00000113916 ENST00000419510 None None
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl69 Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,26 @@ +C9J1C7 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000438077 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000425937 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JMA2 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +F8W876 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +P41182 BCL6 BCL6 ENSG00000113916 ENST00000232014 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000406870 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000450123 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04080;Neuroactive_ligand_receptor_interaction +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04971;Gastric_acid_secretion +Q5QGT7 RTP2 RTP2 ENSG00000198471 ENST00000358241 hsa:344892 None +Q96DX8 RTP4 RTP4 ENSG00000136514 ENST00000259030 hsa:64108 None +none BCL6 BCL6 ENSG00000113916 ENST00000419510 None None
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl70 Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,25 @@ +C9J1C7 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000438077 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000425937 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JMA2 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +F8W876 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +P41182 BCL6 BCL6 ENSG00000113916 ENST00000232014 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000406870 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000450123 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04080;Neuroactive_ligand_receptor_interaction +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04971;Gastric_acid_secretion +Q5QGT7 RTP2 RTP2 ENSG00000198471 ENST00000358241 hsa:344892 None +Q96DX8 RTP4 RTP4 ENSG00000136514 ENST00000259030 hsa:64108 None +none BCL6 BCL6 ENSG00000113916 ENST00000419510 None None
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl71 Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,26 @@ +C9J1C7 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000438077 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000425937 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JMA2 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +F8W876 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +P41182 BCL6 BCL6 ENSG00000113916 ENST00000232014 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000406870 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000450123 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04080;Neuroactive_ligand_receptor_interaction +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04971;Gastric_acid_secretion +Q5QGT7 RTP2 RTP2 ENSG00000198471 ENST00000358241 hsa:344892 None +Q96DX8 RTP4 RTP4 ENSG00000136514 ENST00000259030 hsa:64108 None +none BCL6 BCL6 ENSG00000113916 ENST00000419510 None None
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini/data/summary_gene_table_v75 Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,23 @@ +Chromosome Gene_name Is_hgnc Ensembl_gene_id HGNC_id Synonyms Rvis_pct Strand Transcript_min_start Transcript_max_end Mammalian_phenotype_id +chr3 None 0 ENSG00000239093 None None None 1 187141103 187141207 None +chr3 None 0 ENSG00000228952 None None None 1 187166633 187167238 None +chr3 None 0 ENSG00000223401 None None None 1 187461474 187463208 None +chr3 MASP 0 ENSG00000127241 None PRSS5,MASP1,CRARF 16.8141071 -1 186935942 187009810 None +chr3 PRSS5 0 ENSG00000127241 None MASP1,CRARF,MASP 16.8141071 -1 186935942 187009810 None +chr3 MASP1 1 ENSG00000127241 6901 PRSS5,CRARF,MASP 16.8141071 -1 186935942 187009810 None +chr3 CRARF 0 ENSG00000127241 None PRSS5,MASP1,MASP 16.8141071 -1 186935942 187009810 None +chr3 IFRG28 0 ENSG00000136514 None RTP4,Z3CXXC4 94.35008257 1 187086120 187089864 None +chr3 RTP4 1 ENSG00000136514 23992 IFRG28,Z3CXXC4 94.35008257 1 187086120 187089864 None +chr3 Z3CXXC4 0 ENSG00000136514 None IFRG28,RTP4 94.35008257 1 187086120 187089864 None +chr3 SST 1 ENSG00000157005 11329 SMST 78.16112291 -1 187386694 187388187 MP:0002873,MP:0005386,MP:0005376,MP:0005381,MP:0003631,MP:0005378 +chr3 SMST 0 ENSG00000157005 None SST 78.16112291 -1 187386694 187388187 MP:0002873,MP:0005386,MP:0005376,MP:0005381,MP:0003631,MP:0005378 +chr3 Z3CXXC2 0 ENSG00000198471 None RTP2,MGC78665 69.20853975 -1 187416047 187420345 MP:0005389 +chr3 RTP2 1 ENSG00000198471 32486 Z3CXXC2,MGC78665 69.20853975 -1 187416047 187420345 MP:0005389 +chr3 MGC78665 0 ENSG00000198471 None Z3CXXC2,RTP2 69.20853975 -1 187416047 187420345 MP:0005389 +chr3 None 0 ENSG00000228804 None None None 1 187420101 187451637 None +chr3 ZNF51 0 ENSG00000113916 None LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 -1 187439165 187463515 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 None BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 -1 187439165 187463515 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 None LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 -1 187439165 187463515 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 None LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 -1 187439165 187463515 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 1001 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 -1 187439165 187463515 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 None LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 -1 187439165 187463515 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
Binary file test-data/test-cache/gemini/data/wgEncodeRegTfbsClusteredV2.cell_count.20130213.bed.gz has changed
Binary file test-data/test-cache/gemini/data/wgEncodeRegTfbsClusteredV2.cell_count.20130213.bed.gz.tbi has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/README.rst Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,25 @@ +Prepare Gemini annotation files and test databases for tool tests +================================================================= + +Each version of GEMINI is tied to a particular set of annotation files and +database version. + +The ``build-gemini-testdata.sh`` script in this folder should be used to +regenerate the annotation files and the test databases whenever the GEMINI +version required by the tool wrappers gets upgraded. + +The script requires a working GEMINI installation at the targeted version and +a folder with GEMINI's original annotation files, and can be executed with:: + + sh build-gemini-testdata.sh path/to/gemini/annotation/files + +It will regenerate the annotation files inside test-data/test-cache/gemini/data +and rebuild the *.db files in test-data. + +.. Note:: + + If the version of GEMINI that you are upgrading to uses a gemini-config.yaml + file that is different from the one found in test-data/test-cache you will + have to upgrade this file manually (make sure you leave the line + ``annotation_dir: gemini/data`` unchanged in the process). +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/anno.bed Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,3 @@ +chr3 187000000 187150000 +chr3 187150000 187300000 +chr3 187300000 187450000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/gemini_load_input.vcf Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,70 @@ +##fileformat=VCFv4.1 +##INFO=<ID=LDAF,Number=1,Type=Float,Description="MLE Allele Frequency Accounting for LD"> +##INFO=<ID=AVGPOST,Number=1,Type=Float,Description="Average posterior probability from MaCH/Thunder"> +##INFO=<ID=RSQ,Number=1,Type=Float,Description="Genotype imputation quality from MaCH/Thunder"> +##INFO=<ID=ERATE,Number=1,Type=Float,Description="Per-marker Mutation rate from MaCH/Thunder"> +##INFO=<ID=THETA,Number=1,Type=Float,Description="Per-marker Transition rate from MaCH/Thunder"> +##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants"> +##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record"> +##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints"> +##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints"> +##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=AC,Number=.,Type=Integer,Description="Alternate Allele Count"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Allele Count"> +##ALT=<ID=DEL,Description="Deletion"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=DS,Number=1,Type=Float,Description="Genotype dosage from MaCH/Thunder"> +##FORMAT=<ID=GL,Number=.,Type=Float,Description="Genotype Likelihoods"> +##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele, ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments/README"> +##INFO=<ID=AF,Number=1,Type=Float,Description="Global Allele Frequency based on AC/AN"> +##INFO=<ID=AMR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AMR based on AC/AN"> +##INFO=<ID=ASN_AF,Number=1,Type=Float,Description="Allele Frequency for samples from ASN based on AC/AN"> +##INFO=<ID=AFR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AFR based on AC/AN"> +##INFO=<ID=EUR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from EUR based on AC/AN"> +##INFO=<ID=VT,Number=1,Type=String,Description="indicates what type of variant the line represents"> +##INFO=<ID=SNPSOURCE,Number=.,Type=String,Description="indicates if a snp was called when analysing the low coverage or exome alignment data"> +##reference=GRCh37 +##SnpEffVersion="SnpEff 3.0f (build 2012-08-23), by Pablo Cingolani" +##SnpEffCmd="SnpEff GRCh37.66 -i vcf -o vcf -c /Users/arq5x/src/other/snpEff_3_0/snpEff.config ALL.wgs.integrated_phase1_v3.20101123.snps_indels_sv.sites.vcf.gz " +##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )' "> +#CHROM POS ID REF ALT QUAL FILTER INFO +1 10583 rs58108140 G A 100.0 PASS AVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 10611 rs189107123 C G 100.0 PASS AN=2184;THETA=0.0077;VT=SNP;AA=.;AC=41;ERATE=0.0048;SNPSOURCE=LOWCOV;AVGPOST=0.9330;LDAF=0.0479;RSQ=0.3475;AF=0.02;ASN_AF=0.01;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 13302 rs180734498 C T 100.0 PASS THETA=0.0048;AN=2184;AC=249;VT=SNP;AA=.;RSQ=0.6281;LDAF=0.1573;SNPSOURCE=LOWCOV;AVGPOST=0.8895;ERATE=0.0058;AF=0.11;ASN_AF=0.02;AMR_AF=0.08;AFR_AF=0.21;EUR_AF=0.14;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||209|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),INTRON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 13327 rs144762171 G C 100.0 PASS AVGPOST=0.9698;AN=2184;VT=SNP;AA=.;RSQ=0.6482;AC=59;SNPSOURCE=LOWCOV;ERATE=0.0012;LDAF=0.0359;THETA=0.0204;AF=0.03;ASN_AF=0.02;AMR_AF=0.03;AFR_AF=0.02;EUR_AF=0.04;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||209|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),INTRON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 13957 . TC T 28.0 PASS AA=TC;AC=35;AN=2184;VT=INDEL;AVGPOST=0.8711;RSQ=0.2501;LDAF=0.0788;THETA=0.0100;ERATE=0.0065;AF=0.02;ASN_AF=0.01;AMR_AF=0.02;AFR_AF=0.02;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|) +1 13980 rs151276478 T C 100.0 PASS AN=2184;AC=45;ERATE=0.0034;THETA=0.0139;RSQ=0.3603;LDAF=0.0525;VT=SNP;AA=.;AVGPOST=0.9221;SNPSOURCE=LOWCOV;AF=0.02;ASN_AF=0.02;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|) +1 30923 rs140337953 G T 100.0 PASS AC=1584;AA=T;AN=2184;RSQ=0.5481;VT=SNP;THETA=0.0162;SNPSOURCE=LOWCOV;ERATE=0.0183;LDAF=0.6576;AVGPOST=0.7335;AF=0.73;ASN_AF=0.89;AMR_AF=0.80;AFR_AF=0.48;EUR_AF=0.73;EFF=DOWNSTREAM(MODIFIER|||||FAM138A|processed_transcript|CODING|ENST00000461467|),DOWNSTREAM(MODIFIER|||||FAM138A|protein_coding|CODING|ENST00000417324|),DOWNSTREAM(MODIFIER|||||MIR1302-10|miRNA|NON_CODING|ENST00000408384|),INTRON(MODIFIER||||177|MIR1302-10|antisense|NON_CODING|ENST00000469289|),INTRON(MODIFIER||||236|MIR1302-10|antisense|NON_CODING|ENST00000473358|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|) +1 46402 . C CTGT 31.0 PASS AA=.;RSQ=0.0960;AN=2184;AC=8;VT=INDEL;AVGPOST=0.8325;THETA=0.0121;ERATE=0.0072;LDAF=0.0903;AF=0.0037;ASN_AF=0.0017;AFR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 47190 . G GA 192.0 PASS AA=G;AVGPOST=0.9041;AN=2184;AC=29;VT=INDEL;LDAF=0.0628;THETA=0.0153;RSQ=0.2883;ERATE=0.0041;AF=0.01;AMR_AF=0.0028;AFR_AF=0.06;EFF=INTERGENIC(MODIFIER|||||||||) +1 51476 rs187298206 T C 100.0 PASS ERATE=0.0021;AA=C;AC=18;AN=2184;VT=SNP;THETA=0.0103;LDAF=0.0157;SNPSOURCE=LOWCOV;AVGPOST=0.9819;RSQ=0.5258;AF=0.01;ASN_AF=0.01;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 51479 rs116400033 T A 100.0 PASS RSQ=0.7414;AVGPOST=0.9085;AA=T;AN=2184;THETA=0.0131;AC=235;VT=SNP;LDAF=0.1404;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.11;ASN_AF=0.0035;AMR_AF=0.16;AFR_AF=0.03;EUR_AF=0.22;EFF=INTERGENIC(MODIFIER|||||||||) +1 51914 rs190452223 T G 100.0 PASS ERATE=0.0004;AVGPOST=0.9985;THETA=0.0159;AA=T;AN=2184;VT=SNP;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4089;LDAF=0.0012;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||) +1 51935 rs181754315 C T 100.0 PASS THETA=0.0126;AA=C;AN=2184;RSQ=0.1888;AVGPOST=0.9972;LDAF=0.0015;VT=SNP;AC=0;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0;EFF=INTERGENIC(MODIFIER|||||||||) +1 51954 rs185832753 G C 100.0 PASS LDAF=0.0021;AA=G;AN=2184;RSQ=0.4692;AVGPOST=0.9975;VT=SNP;SNPSOURCE=LOWCOV;THETA=0.0029;ERATE=0.0006;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 52058 rs62637813 G C 100.0 PASS AA=C;ERATE=0.0057;AN=2184;AVGPOST=0.9264;VT=SNP;RSQ=0.4882;AC=64;SNPSOURCE=LOWCOV;LDAF=0.0620;THETA=0.0069;AF=0.03;ASN_AF=0.0017;AMR_AF=0.04;AFR_AF=0.02;EUR_AF=0.05;EFF=INTERGENIC(MODIFIER|||||||||) +1 52144 rs190291950 T A 100.0 PASS THETA=0.0093;ERATE=0.0013;LDAF=0.0156;AA=T;AN=2184;VT=SNP;RSQ=0.5220;AVGPOST=0.9811;SNPSOURCE=LOWCOV;AC=21;AF=0.01;ASN_AF=0.0035;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 52185 . TTAA T 244.0 PASS AA=.;AN=2184;LDAF=0.0124;VT=INDEL;AC=10;THETA=0.0232;RSQ=0.4271;AVGPOST=0.9840;ERATE=0.0037;AF=0.0046;ASN_AF=0.0035;AMR_AF=0.02;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 52238 rs150021059 T G 100.0 PASS THETA=0.0132;AA=G;AN=2184;RSQ=0.6256;VT=SNP;ERATE=0.0026;AVGPOST=0.8617;SNPSOURCE=LOWCOV;AC=1941;LDAF=0.8423;AF=0.89;ASN_AF=0.99;AMR_AF=0.93;AFR_AF=0.64;EUR_AF=0.95;EFF=INTERGENIC(MODIFIER|||||||||) +1 53234 . CAT C 227.0 PASS AA=CAT;AVGPOST=0.9936;AN=2184;VT=INDEL;THETA=0.0119;AC=10;LDAF=0.0074;RSQ=0.6237;ERATE=0.0007;AF=0.0046;AMR_AF=0.0028;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 54353 rs140052487 C A 100.0 PASS THETA=0.0026;AA=C;AN=2184;AC=16;VT=SNP;RSQ=0.5074;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0146;ERATE=0.0058;AF=0.01;ASN_AF=0.01;AMR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||) +1 54421 rs146477069 A G 100.0 PASS ERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 54490 rs141149254 G A 100.0 PASS ERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15;EFF=INTERGENIC(MODIFIER|||||||||) +1 54676 rs2462492 C T 100.0 PASS LDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18;EFF=INTERGENIC(MODIFIER|||||||||) +1 54753 rs143174675 T G 100.0 PASS AA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03;EFF=INTERGENIC(MODIFIER|||||||||) +1 55164 rs3091274 C A 100.0 PASS AN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96;EFF=INTERGENIC(MODIFIER|||||||||) +1 55249 . C CTATGG 443.0 PASS AA=C;AVGPOST=0.9073;ERATE=0.0063;RSQ=0.5891;AN=2184;THETA=0.0038;VT=INDEL;AC=151;LDAF=0.0968;AF=0.07;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 55299 rs10399749 C T 100.0 PASS RSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13;EFF=INTERGENIC(MODIFIER|||||||||) +1 55313 rs182462964 A T 100.0 PASS ERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 55326 rs3107975 T C 100.0 PASS AA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55330 rs185215913 G A 100.0 PASS ERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 55367 rs190850374 G A 100.0 PASS ERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55388 rs182711216 C T 100.0 PASS THETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||) +1 55394 rs2949420 T A 100.0 PASS AC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 55416 rs193242050 G A 100.0 PASS AA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 55427 rs183189405 T C 100.0 PASS THETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 55816 rs187434873 G A 100.0 PASS AN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55850 rs191890754 C G 100.0 PASS AVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55852 rs184233019 G C 100.0 PASS THETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/test.auto_dom.ped Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,9 @@ +1 1_dad 0 0 -1 1 +1 1_mom 0 0 -1 1 +1 1_kid 1_dad 1_mom -1 2 +2 2_dad 0 0 -1 1 +2 2_mom 0 0 -1 2 +2 2_kid 2_dad 2_mom -1 2 +3 3_dad 0 0 -1 2 +3 3_mom 0 0 -1 -9 +3 3_kid 3_dad 3_mom -1 2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/test.auto_dom.vcf Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,127 @@ +##fileformat=VCFv4.1 +##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)"> +##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered"> +##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?"> +##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions"> +##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias"> +##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction"> +##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes"> +##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation"> +##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality"> +##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads"> +##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities"> +##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth"> +##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"> +##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/M10478.conc.on.pos.realigned.bam, bam/M10475.conc.on.pos.realigned.bam, bam/M10500.conc.on.pos.realigned.bam, bam/M128215.conc.on.pos.realigned.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=10 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_mismatching_base_and_quals=false" +##contig=<ID=chr1,length=249250621,assembly=hg19> +##contig=<ID=chr10,length=135534747,assembly=hg19> +##contig=<ID=chr11,length=135006516,assembly=hg19> +##contig=<ID=chr11_gl000202_random,length=40103,assembly=hg19> +##contig=<ID=chr12,length=133851895,assembly=hg19> +##contig=<ID=chr13,length=115169878,assembly=hg19> +##contig=<ID=chr14,length=107349540,assembly=hg19> +##contig=<ID=chr15,length=102531392,assembly=hg19> +##contig=<ID=chr16,length=90354753,assembly=hg19> +##contig=<ID=chr17,length=81195210,assembly=hg19> +##contig=<ID=chr17_ctg5_hap1,length=1680828,assembly=hg19> +##contig=<ID=chr17_gl000203_random,length=37498,assembly=hg19> +##contig=<ID=chr17_gl000204_random,length=81310,assembly=hg19> +##contig=<ID=chr17_gl000205_random,length=174588,assembly=hg19> +##contig=<ID=chr17_gl000206_random,length=41001,assembly=hg19> +##contig=<ID=chr18,length=78077248,assembly=hg19> +##contig=<ID=chr18_gl000207_random,length=4262,assembly=hg19> +##contig=<ID=chr19,length=59128983,assembly=hg19> +##contig=<ID=chr19_gl000208_random,length=92689,assembly=hg19> +##contig=<ID=chr19_gl000209_random,length=159169,assembly=hg19> +##contig=<ID=chr1_gl000191_random,length=106433,assembly=hg19> +##contig=<ID=chr1_gl000192_random,length=547496,assembly=hg19> +##contig=<ID=chr2,length=243199373,assembly=hg19> +##contig=<ID=chr20,length=63025520,assembly=hg19> +##contig=<ID=chr21,length=48129895,assembly=hg19> +##contig=<ID=chr21_gl000210_random,length=27682,assembly=hg19> +##contig=<ID=chr22,length=51304566,assembly=hg19> +##contig=<ID=chr3,length=198022430,assembly=hg19> +##contig=<ID=chr4,length=191154276,assembly=hg19> +##contig=<ID=chr4_ctg9_hap1,length=590426,assembly=hg19> +##contig=<ID=chr4_gl000193_random,length=189789,assembly=hg19> +##contig=<ID=chr4_gl000194_random,length=191469,assembly=hg19> +##contig=<ID=chr5,length=180915260,assembly=hg19> +##contig=<ID=chr6,length=171115067,assembly=hg19> +##contig=<ID=chr6_apd_hap1,length=4622290,assembly=hg19> +##contig=<ID=chr6_cox_hap2,length=4795371,assembly=hg19> +##contig=<ID=chr6_dbb_hap3,length=4610396,assembly=hg19> +##contig=<ID=chr6_mann_hap4,length=4683263,assembly=hg19> +##contig=<ID=chr6_mcf_hap5,length=4833398,assembly=hg19> +##contig=<ID=chr6_qbl_hap6,length=4611984,assembly=hg19> +##contig=<ID=chr6_ssto_hap7,length=4928567,assembly=hg19> +##contig=<ID=chr7,length=159138663,assembly=hg19> +##contig=<ID=chr7_gl000195_random,length=182896,assembly=hg19> +##contig=<ID=chr8,length=146364022,assembly=hg19> +##contig=<ID=chr8_gl000196_random,length=38914,assembly=hg19> +##contig=<ID=chr8_gl000197_random,length=37175,assembly=hg19> +##contig=<ID=chr9,length=141213431,assembly=hg19> +##contig=<ID=chr9_gl000198_random,length=90085,assembly=hg19> +##contig=<ID=chr9_gl000199_random,length=169874,assembly=hg19> +##contig=<ID=chr9_gl000200_random,length=187035,assembly=hg19> +##contig=<ID=chr9_gl000201_random,length=36148,assembly=hg19> +##contig=<ID=chrM,length=16571,assembly=hg19> +##contig=<ID=chrUn_gl000211,length=166566,assembly=hg19> +##contig=<ID=chrUn_gl000212,length=186858,assembly=hg19> +##contig=<ID=chrUn_gl000213,length=164239,assembly=hg19> +##contig=<ID=chrUn_gl000214,length=137718,assembly=hg19> +##contig=<ID=chrUn_gl000215,length=172545,assembly=hg19> +##contig=<ID=chrUn_gl000216,length=172294,assembly=hg19> +##contig=<ID=chrUn_gl000217,length=172149,assembly=hg19> +##contig=<ID=chrUn_gl000218,length=161147,assembly=hg19> +##contig=<ID=chrUn_gl000219,length=179198,assembly=hg19> +##contig=<ID=chrUn_gl000220,length=161802,assembly=hg19> +##contig=<ID=chrUn_gl000221,length=155397,assembly=hg19> +##contig=<ID=chrUn_gl000222,length=186861,assembly=hg19> +##contig=<ID=chrUn_gl000223,length=180455,assembly=hg19> +##contig=<ID=chrUn_gl000224,length=179693,assembly=hg19> +##contig=<ID=chrUn_gl000225,length=211173,assembly=hg19> +##contig=<ID=chrUn_gl000226,length=15008,assembly=hg19> +##contig=<ID=chrUn_gl000227,length=128374,assembly=hg19> +##contig=<ID=chrUn_gl000228,length=129120,assembly=hg19> +##contig=<ID=chrUn_gl000229,length=19913,assembly=hg19> +##contig=<ID=chrUn_gl000230,length=43691,assembly=hg19> +##contig=<ID=chrUn_gl000231,length=27386,assembly=hg19> +##contig=<ID=chrUn_gl000232,length=40652,assembly=hg19> +##contig=<ID=chrUn_gl000233,length=45941,assembly=hg19> +##contig=<ID=chrUn_gl000234,length=40531,assembly=hg19> +##contig=<ID=chrUn_gl000235,length=34474,assembly=hg19> +##contig=<ID=chrUn_gl000236,length=41934,assembly=hg19> +##contig=<ID=chrUn_gl000237,length=45867,assembly=hg19> +##contig=<ID=chrUn_gl000238,length=39939,assembly=hg19> +##contig=<ID=chrUn_gl000239,length=33824,assembly=hg19> +##contig=<ID=chrUn_gl000240,length=41933,assembly=hg19> +##contig=<ID=chrUn_gl000241,length=42152,assembly=hg19> +##contig=<ID=chrUn_gl000242,length=43523,assembly=hg19> +##contig=<ID=chrUn_gl000243,length=43341,assembly=hg19> +##contig=<ID=chrUn_gl000244,length=39929,assembly=hg19> +##contig=<ID=chrUn_gl000245,length=36651,assembly=hg19> +##contig=<ID=chrUn_gl000246,length=38154,assembly=hg19> +##contig=<ID=chrUn_gl000247,length=36422,assembly=hg19> +##contig=<ID=chrUn_gl000248,length=39786,assembly=hg19> +##contig=<ID=chrUn_gl000249,length=38502,assembly=hg19> +##contig=<ID=chrX,length=155270560,assembly=hg19> +##contig=<ID=chrY,length=59373566,assembly=hg19> +##reference=file:///m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa +##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Consequence|Codons|Amino_acids|Gene|HGNC|Feature|EXON|PolyPhen|SIFT"> +##SnpEffVersion="SnpEff 3.0g (build 2012-08-31), by Pablo Cingolani" +##SnpEffCmd="SnpEff -i vcf -o vcf GRCh37.66 test4.vep.vcf " +##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )' "> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1_dad 1_mom 1_kid 2_dad 2_mom 2_kid 3_dad 3_mom 3_kid +chr10 1142208 . T C 3404.3 . AC=8;AF=1.00;AN=8;DP=122;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=2.6747;MQ=36.00;MQ0=0;QD=27.90;CSQ=intron_variant|||ENSG00000047056|WDR37|ENST00000263150|||,downstream_gene_variant|||ENSG00000047056|WDR37|ENST00000436154|||,intron_variant|||ENSG00000047056|WDR37|ENST00000358220|||,stop_lost|Tga/Cga|*/R|ENSG00000047056|WDR37|ENST00000381329|9/9||;EFF=DOWNSTREAM(MODIFIER||||208|WDR37|protein_coding|CODING|ENST00000436154|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000263150|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000358220|),STOP_LOST(HIGH|MISSENSE|Tga/Cga|*250R|249|WDR37|protein_coding|CODING|ENST00000381329|exon_10_1142110_1142566) GT:AD:DP:GQ:PL 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/0:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 1142209 . T C 3404.3 . AC=8;AF=1.00;AN=8;DP=122;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=2.6747;MQ=36.00;MQ0=0;QD=27.90;CSQ=intron_variant|||ENSG00000047056|WDR37|ENST00000263150|||,downstream_gene_variant|||ENSG00000047056|WDR37|ENST00000436154|||,intron_variant|||ENSG00000047056|WDR37|ENST00000358220|||,stop_lost|Tga/Cga|*/R|ENSG00000047056|WDR37|ENST00000381329|9/9||;EFF=DOWNSTREAM(MODIFIER||||208|WDR37|protein_coding|CODING|ENST00000436154|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000263150|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000358220|),STOP_LOST(HIGH|MISSENSE|Tga/Cga|*250R|249|WDR37|protein_coding|CODING|ENST00000381329|exon_10_1142110_1142566) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 0/1:1,37:59:87.16:940,87,0 0/1:0,29:49:78.20:899,78,0 1/1:0,24:64:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 48003992 . C T 1047.87 . AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056) GT:AD:DP:GQ:PL 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/0:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:97.16:940,87,0 0/0:0,29:29:98.20:899,78,0 0/1:0,24:24:96.14:729,66,0 +chr10 48004992 . C T 1047.87 . AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056) GT:AD:DP:GQ:PL 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/0:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 135336656 . G A 38.34 . AC=4;AF=1.00;AN=4;DP=2;Dels=0.00;FS=0.000;HRun=4;HaplotypeScore=0.0000;MQ=37.00;MQ0=0;QD=19.17;CSQ=upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000418356|||,intron_variant&nc_transcript_variant|||ENSG00000214279||ENST00000488261|||,intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541261|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000421586|||,intron_variant|||ENSG00000130649|CYP2E1|ENST00000463117|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000252945|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541080|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000477500|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000480558|||,intron_variant|||ENSG00000214279||ENST00000356567|||;EFF=INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),INTRON(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000463117|),INTRON(MODIFIER||||693|RP11-108K14.4.1|protein_coding|CODING|ENST00000356567|),INTRON(MODIFIER|||||RP11-108K14.4.1|retained_intron|CODING|ENST00000488261|),UPSTREAM(MODIFIER||||305|CYP2E1|protein_coding|CODING|ENST00000418356|),UPSTREAM(MODIFIER||||355|CYP2E1|protein_coding|CODING|ENST00000421586|),UPSTREAM(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000252945|),UPSTREAM(MODIFIER||||85|CYP2E1|protein_coding|CODING|ENST00000541261|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000477500|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000480558|) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/test.auto_rec.ped Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,10 @@ +#family_id sample_id paternal_id maternal_id sex phenotype +1 1_dad 0 0 -1 1 +1 1_mom 0 0 -1 1 +1 1_kid 1_dad 1_mom -1 2 +2 2_dad 0 0 -1 1 +2 2_mom 0 0 -1 1 +2 2_kid 2_dad 2_mom -1 2 +3 3_dad 0 0 -1 1 +3 3_mom 0 0 -1 1 +3 3_kid 3_dad 3_mom -1 2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/test.auto_rec.vcf Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,127 @@ +##fileformat=VCFv4.1 +##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)"> +##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered"> +##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?"> +##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions"> +##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias"> +##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction"> +##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes"> +##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation"> +##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality"> +##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads"> +##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities"> +##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth"> +##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"> +##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/M10478.conc.on.pos.realigned.bam, bam/M10475.conc.on.pos.realigned.bam, bam/M10500.conc.on.pos.realigned.bam, bam/M128215.conc.on.pos.realigned.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=10 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_mismatching_base_and_quals=false" +##contig=<ID=chr1,length=249250621,assembly=hg19> +##contig=<ID=chr10,length=135534747,assembly=hg19> +##contig=<ID=chr11,length=135006516,assembly=hg19> +##contig=<ID=chr11_gl000202_random,length=40103,assembly=hg19> +##contig=<ID=chr12,length=133851895,assembly=hg19> +##contig=<ID=chr13,length=115169878,assembly=hg19> +##contig=<ID=chr14,length=107349540,assembly=hg19> +##contig=<ID=chr15,length=102531392,assembly=hg19> +##contig=<ID=chr16,length=90354753,assembly=hg19> +##contig=<ID=chr17,length=81195210,assembly=hg19> +##contig=<ID=chr17_ctg5_hap1,length=1680828,assembly=hg19> +##contig=<ID=chr17_gl000203_random,length=37498,assembly=hg19> +##contig=<ID=chr17_gl000204_random,length=81310,assembly=hg19> +##contig=<ID=chr17_gl000205_random,length=174588,assembly=hg19> +##contig=<ID=chr17_gl000206_random,length=41001,assembly=hg19> +##contig=<ID=chr18,length=78077248,assembly=hg19> +##contig=<ID=chr18_gl000207_random,length=4262,assembly=hg19> +##contig=<ID=chr19,length=59128983,assembly=hg19> +##contig=<ID=chr19_gl000208_random,length=92689,assembly=hg19> +##contig=<ID=chr19_gl000209_random,length=159169,assembly=hg19> +##contig=<ID=chr1_gl000191_random,length=106433,assembly=hg19> +##contig=<ID=chr1_gl000192_random,length=547496,assembly=hg19> +##contig=<ID=chr2,length=243199373,assembly=hg19> +##contig=<ID=chr20,length=63025520,assembly=hg19> +##contig=<ID=chr21,length=48129895,assembly=hg19> +##contig=<ID=chr21_gl000210_random,length=27682,assembly=hg19> +##contig=<ID=chr22,length=51304566,assembly=hg19> +##contig=<ID=chr3,length=198022430,assembly=hg19> +##contig=<ID=chr4,length=191154276,assembly=hg19> +##contig=<ID=chr4_ctg9_hap1,length=590426,assembly=hg19> +##contig=<ID=chr4_gl000193_random,length=189789,assembly=hg19> +##contig=<ID=chr4_gl000194_random,length=191469,assembly=hg19> +##contig=<ID=chr5,length=180915260,assembly=hg19> +##contig=<ID=chr6,length=171115067,assembly=hg19> +##contig=<ID=chr6_apd_hap1,length=4622290,assembly=hg19> +##contig=<ID=chr6_cox_hap2,length=4795371,assembly=hg19> +##contig=<ID=chr6_dbb_hap3,length=4610396,assembly=hg19> +##contig=<ID=chr6_mann_hap4,length=4683263,assembly=hg19> +##contig=<ID=chr6_mcf_hap5,length=4833398,assembly=hg19> +##contig=<ID=chr6_qbl_hap6,length=4611984,assembly=hg19> +##contig=<ID=chr6_ssto_hap7,length=4928567,assembly=hg19> +##contig=<ID=chr7,length=159138663,assembly=hg19> +##contig=<ID=chr7_gl000195_random,length=182896,assembly=hg19> +##contig=<ID=chr8,length=146364022,assembly=hg19> +##contig=<ID=chr8_gl000196_random,length=38914,assembly=hg19> +##contig=<ID=chr8_gl000197_random,length=37175,assembly=hg19> +##contig=<ID=chr9,length=141213431,assembly=hg19> +##contig=<ID=chr9_gl000198_random,length=90085,assembly=hg19> +##contig=<ID=chr9_gl000199_random,length=169874,assembly=hg19> +##contig=<ID=chr9_gl000200_random,length=187035,assembly=hg19> +##contig=<ID=chr9_gl000201_random,length=36148,assembly=hg19> +##contig=<ID=chrM,length=16571,assembly=hg19> +##contig=<ID=chrUn_gl000211,length=166566,assembly=hg19> +##contig=<ID=chrUn_gl000212,length=186858,assembly=hg19> +##contig=<ID=chrUn_gl000213,length=164239,assembly=hg19> +##contig=<ID=chrUn_gl000214,length=137718,assembly=hg19> +##contig=<ID=chrUn_gl000215,length=172545,assembly=hg19> +##contig=<ID=chrUn_gl000216,length=172294,assembly=hg19> +##contig=<ID=chrUn_gl000217,length=172149,assembly=hg19> +##contig=<ID=chrUn_gl000218,length=161147,assembly=hg19> +##contig=<ID=chrUn_gl000219,length=179198,assembly=hg19> +##contig=<ID=chrUn_gl000220,length=161802,assembly=hg19> +##contig=<ID=chrUn_gl000221,length=155397,assembly=hg19> +##contig=<ID=chrUn_gl000222,length=186861,assembly=hg19> +##contig=<ID=chrUn_gl000223,length=180455,assembly=hg19> +##contig=<ID=chrUn_gl000224,length=179693,assembly=hg19> +##contig=<ID=chrUn_gl000225,length=211173,assembly=hg19> +##contig=<ID=chrUn_gl000226,length=15008,assembly=hg19> +##contig=<ID=chrUn_gl000227,length=128374,assembly=hg19> +##contig=<ID=chrUn_gl000228,length=129120,assembly=hg19> +##contig=<ID=chrUn_gl000229,length=19913,assembly=hg19> +##contig=<ID=chrUn_gl000230,length=43691,assembly=hg19> +##contig=<ID=chrUn_gl000231,length=27386,assembly=hg19> +##contig=<ID=chrUn_gl000232,length=40652,assembly=hg19> +##contig=<ID=chrUn_gl000233,length=45941,assembly=hg19> +##contig=<ID=chrUn_gl000234,length=40531,assembly=hg19> +##contig=<ID=chrUn_gl000235,length=34474,assembly=hg19> +##contig=<ID=chrUn_gl000236,length=41934,assembly=hg19> +##contig=<ID=chrUn_gl000237,length=45867,assembly=hg19> +##contig=<ID=chrUn_gl000238,length=39939,assembly=hg19> +##contig=<ID=chrUn_gl000239,length=33824,assembly=hg19> +##contig=<ID=chrUn_gl000240,length=41933,assembly=hg19> +##contig=<ID=chrUn_gl000241,length=42152,assembly=hg19> +##contig=<ID=chrUn_gl000242,length=43523,assembly=hg19> +##contig=<ID=chrUn_gl000243,length=43341,assembly=hg19> +##contig=<ID=chrUn_gl000244,length=39929,assembly=hg19> +##contig=<ID=chrUn_gl000245,length=36651,assembly=hg19> +##contig=<ID=chrUn_gl000246,length=38154,assembly=hg19> +##contig=<ID=chrUn_gl000247,length=36422,assembly=hg19> +##contig=<ID=chrUn_gl000248,length=39786,assembly=hg19> +##contig=<ID=chrUn_gl000249,length=38502,assembly=hg19> +##contig=<ID=chrX,length=155270560,assembly=hg19> +##contig=<ID=chrY,length=59373566,assembly=hg19> +##reference=file:///m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa +##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Consequence|Codons|Amino_acids|Gene|HGNC|Feature|EXON|PolyPhen|SIFT"> +##SnpEffVersion="SnpEff 3.0g (build 2012-08-31), by Pablo Cingolani" +##SnpEffCmd="SnpEff -i vcf -o vcf GRCh37.66 test4.vep.vcf " +##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )' "> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1_dad 1_mom 1_kid 2_dad 2_mom 2_kid 3_dad 3_mom 3_kid +chr10 1142208 . T C 3404.3 . AC=8;AF=1.00;AN=8;DP=122;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=2.6747;MQ=36.00;MQ0=0;QD=27.90;CSQ=intron_variant|||ENSG00000047056|WDR37|ENST00000263150|||,downstream_gene_variant|||ENSG00000047056|WDR37|ENST00000436154|||,intron_variant|||ENSG00000047056|WDR37|ENST00000358220|||,stop_lost|Tga/Cga|*/R|ENSG00000047056|WDR37|ENST00000381329|9/9||;EFF=DOWNSTREAM(MODIFIER||||208|WDR37|protein_coding|CODING|ENST00000436154|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000263150|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000358220|),STOP_LOST(HIGH|MISSENSE|Tga/Cga|*250R|249|WDR37|protein_coding|CODING|ENST00000381329|exon_10_1142110_1142566) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 0/1:1,37:59:87.16:940,87,0 0/1:0,29:49:78.20:899,78,0 1/1:0,24:64:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 48003992 . C T 1047.87 . AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 48004992 . C T 1047.87 . AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 135336656 . G A 38.34 . AC=4;AF=1.00;AN=4;DP=2;Dels=0.00;FS=0.000;HRun=4;HaplotypeScore=0.0000;MQ=37.00;MQ0=0;QD=19.17;CSQ=upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000418356|||,intron_variant&nc_transcript_variant|||ENSG00000214279||ENST00000488261|||,intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541261|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000421586|||,intron_variant|||ENSG00000130649|CYP2E1|ENST00000463117|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000252945|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541080|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000477500|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000480558|||,intron_variant|||ENSG00000214279||ENST00000356567|||;EFF=INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),INTRON(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000463117|),INTRON(MODIFIER||||693|RP11-108K14.4.1|protein_coding|CODING|ENST00000356567|),INTRON(MODIFIER|||||RP11-108K14.4.1|retained_intron|CODING|ENST00000488261|),UPSTREAM(MODIFIER||||305|CYP2E1|protein_coding|CODING|ENST00000418356|),UPSTREAM(MODIFIER||||355|CYP2E1|protein_coding|CODING|ENST00000421586|),UPSTREAM(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000252945|),UPSTREAM(MODIFIER||||85|CYP2E1|protein_coding|CODING|ENST00000541261|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000477500|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000480558|) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 135369532 . T C 122.62 . AC=2;AF=0.25;AN=8;BaseQRankSum=2.118;DP=239;Dels=0.00;FS=5.194;HRun=2;HaplotypeScore=5.7141;MQ=36.02;MQ0=0;MQRankSum=0.082;QD=2.31;ReadPosRankSum=-0.695;CSQ=missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000303903|9/13|benign(0.001)|tolerated(1),missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000432597|10/14|benign(0)|tolerated(1),downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000460441|||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000343131|9/13|benign(0.001)|tolerated(1),intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000171772|SYCE1|ENST00000479535|6/10||,downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000482127|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000130649|CYP2E1|ENST00000368520|6/6||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000368517|9/13|benign(0)|tolerated(1);EFF=DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000460441|),DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000482127|),EXON(MODIFIER|||||CYP2E1|retained_intron|CODING|ENST00000368520|),EXON(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000479535|),INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000368517|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000432597|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|318|SYCE1|protein_coding|CODING|ENST00000303903|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|351|SYCE1|protein_coding|CODING|ENST00000343131|exon_10_135369485_135369551) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/test.comp_het.ped Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,13 @@ +#Family_ID Individual_ID Paternal_ID Maternal_ID Sex Phenotype ethnicity +1 child_1 dad_1 mom_1 1 2 caucasian +2 child_2 dad_2 mom_2 1 2 caucasian +2 dad_2 -9 -9 1 1 caucasian +2 mom_2 -9 -9 2 1 caucasian +1 dad_1 -9 -9 1 1 caucasian +1 mom_1 -9 -9 2 1 caucasian +3 child_3 dad_3 mom_3 1 2 caucasian +3 dad_3 -9 -9 1 1 caucasian +3 mom_3 -9 -9 2 1 caucasian +4 child_4 dad_4 mom_4 1 2 caucasianNEuropean +4 dad_4 -9 -9 1 1 caucasianNEuropean +4 mom_4 -9 -9 2 1 caucasianNEuropean
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/test.comp_het.vcf Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,120 @@ +##fileformat=VCFv4.1 +##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)"> +##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered"> +##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?"> +##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions"> +##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias"> +##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction"> +##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes"> +##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation"> +##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality"> +##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads"> +##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities"> +##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth"> +##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"> +##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/all.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/human_g1k_v37.fasta rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=20 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_mismatching_base_and_quals=false" +##contig=<ID=1,length=249250621,assembly=b37> +##contig=<ID=10,length=135534747,assembly=b37> +##contig=<ID=11,length=135006516,assembly=b37> +##contig=<ID=12,length=133851895,assembly=b37> +##contig=<ID=13,length=115169878,assembly=b37> +##contig=<ID=14,length=107349540,assembly=b37> +##contig=<ID=15,length=102531392,assembly=b37> +##contig=<ID=16,length=90354753,assembly=b37> +##contig=<ID=17,length=81195210,assembly=b37> +##contig=<ID=18,length=78077248,assembly=b37> +##contig=<ID=19,length=59128983,assembly=b37> +##contig=<ID=2,length=243199373,assembly=b37> +##contig=<ID=20,length=63025520,assembly=b37> +##contig=<ID=21,length=48129895,assembly=b37> +##contig=<ID=22,length=51304566,assembly=b37> +##contig=<ID=3,length=198022430,assembly=b37> +##contig=<ID=4,length=191154276,assembly=b37> +##contig=<ID=5,length=180915260,assembly=b37> +##contig=<ID=6,length=171115067,assembly=b37> +##contig=<ID=7,length=159138663,assembly=b37> +##contig=<ID=8,length=146364022,assembly=b37> +##contig=<ID=9,length=141213431,assembly=b37> +##contig=<ID=GL000191.1,length=106433,assembly=b37> +##contig=<ID=GL000192.1,length=547496,assembly=b37> +##contig=<ID=GL000193.1,length=189789,assembly=b37> +##contig=<ID=GL000194.1,length=191469,assembly=b37> +##contig=<ID=GL000195.1,length=182896,assembly=b37> +##contig=<ID=GL000196.1,length=38914,assembly=b37> +##contig=<ID=GL000197.1,length=37175,assembly=b37> +##contig=<ID=GL000198.1,length=90085,assembly=b37> +##contig=<ID=GL000199.1,length=169874,assembly=b37> +##contig=<ID=GL000200.1,length=187035,assembly=b37> +##contig=<ID=GL000201.1,length=36148,assembly=b37> +##contig=<ID=GL000202.1,length=40103,assembly=b37> +##contig=<ID=GL000203.1,length=37498,assembly=b37> +##contig=<ID=GL000204.1,length=81310,assembly=b37> +##contig=<ID=GL000205.1,length=174588,assembly=b37> +##contig=<ID=GL000206.1,length=41001,assembly=b37> +##contig=<ID=GL000207.1,length=4262,assembly=b37> +##contig=<ID=GL000208.1,length=92689,assembly=b37> +##contig=<ID=GL000209.1,length=159169,assembly=b37> +##contig=<ID=GL000210.1,length=27682,assembly=b37> +##contig=<ID=GL000211.1,length=166566,assembly=b37> +##contig=<ID=GL000212.1,length=186858,assembly=b37> +##contig=<ID=GL000213.1,length=164239,assembly=b37> +##contig=<ID=GL000214.1,length=137718,assembly=b37> +##contig=<ID=GL000215.1,length=172545,assembly=b37> +##contig=<ID=GL000216.1,length=172294,assembly=b37> +##contig=<ID=GL000217.1,length=172149,assembly=b37> +##contig=<ID=GL000218.1,length=161147,assembly=b37> +##contig=<ID=GL000219.1,length=179198,assembly=b37> +##contig=<ID=GL000220.1,length=161802,assembly=b37> +##contig=<ID=GL000221.1,length=155397,assembly=b37> +##contig=<ID=GL000222.1,length=186861,assembly=b37> +##contig=<ID=GL000223.1,length=180455,assembly=b37> +##contig=<ID=GL000224.1,length=179693,assembly=b37> +##contig=<ID=GL000225.1,length=211173,assembly=b37> +##contig=<ID=GL000226.1,length=15008,assembly=b37> +##contig=<ID=GL000227.1,length=128374,assembly=b37> +##contig=<ID=GL000228.1,length=129120,assembly=b37> +##contig=<ID=GL000229.1,length=19913,assembly=b37> +##contig=<ID=GL000230.1,length=43691,assembly=b37> +##contig=<ID=GL000231.1,length=27386,assembly=b37> +##contig=<ID=GL000232.1,length=40652,assembly=b37> +##contig=<ID=GL000233.1,length=45941,assembly=b37> +##contig=<ID=GL000234.1,length=40531,assembly=b37> +##contig=<ID=GL000235.1,length=34474,assembly=b37> +##contig=<ID=GL000236.1,length=41934,assembly=b37> +##contig=<ID=GL000237.1,length=45867,assembly=b37> +##contig=<ID=GL000238.1,length=39939,assembly=b37> +##contig=<ID=GL000239.1,length=33824,assembly=b37> +##contig=<ID=GL000240.1,length=41933,assembly=b37> +##contig=<ID=GL000241.1,length=42152,assembly=b37> +##contig=<ID=GL000242.1,length=43523,assembly=b37> +##contig=<ID=GL000243.1,length=43341,assembly=b37> +##contig=<ID=GL000244.1,length=39929,assembly=b37> +##contig=<ID=GL000245.1,length=36651,assembly=b37> +##contig=<ID=GL000246.1,length=38154,assembly=b37> +##contig=<ID=GL000247.1,length=36422,assembly=b37> +##contig=<ID=GL000248.1,length=39786,assembly=b37> +##contig=<ID=GL000249.1,length=38502,assembly=b37> +##contig=<ID=MT,length=16569,assembly=b37> +##contig=<ID=X,length=155270560,assembly=b37> +##contig=<ID=Y,length=59373566,assembly=b37> +##reference=file:///home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/human_g1k_v37.fasta +##SnpEffVersion="SnpEff 3.2 (build 2013-03-14), by Pablo Cingolani" +##SnpEffCmd="SnpEff -i vcf -o vcf GRCh37.66 /if2/arq5x/cphg-quinlan/projects/sms-elsea/varCalling/all.vcf " +##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon | GenotypeNum [ | ERRORS | WARNINGS ] )' "> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT child_1 child_2 dad_2 mom_2 dad_1 mom_1 child_3 dad_3 mom_3 child_4 dad_4 mom_4 +1 16977 . G A 2022.88 . AC=9;AF=0.375;AN=24;BaseQRankSum=-25.424;DP=2999;DS;Dels=0.00;FS=4.077;HRun=0;HaplotypeScore=1.6017;InbreedingCoeff=-0.5953;MQ=10.20;MQ0=124;MQRankSum=0.084;QD=0.90;ReadPosRankSum=0.727;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492||1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|6|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|7|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|7|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|6|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|7|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|7|1) GT:AD:DP:GQ:PL 0/1:181,69:250:4.24:4,0,401 0/0:190,60:250:5.21:0,5,708 0/1:167,83:250:72.66:73,0,721 0/0:187,63:250:63.12:0,63,633 0/0:221,29:250:96.57:0,97,1729 0/1:160,90:250:99:175,0,537 0/1:177,73:250:99:412,0,723 0/1:183,67:250:99:209,0,838 0/1:174,76:250:99:284,0,844 0/1:194,53:248:26.15:26,0,756 0/1:149,101:250:99:681,0,496 0/1:184,66:250:99:229,0,561 +1 17222 . A G 225.47 . AC=4;AF=0.167;AN=24;BaseQRankSum=-1.725;DP=2985;DS;Dels=0.00;FS=6.073;HRun=0;HaplotypeScore=1.1157;InbreedingCoeff=-0.2212;MQ=14.57;MQ0=605;MQRankSum=-3.790;QD=0.23;ReadPosRankSum=1.860;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|8|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|6|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|6|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|6|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|6|1) GT:AD:DP:GQ:PL 0/0:224,25:250:44.94:0,45,987 0/1:190,44:241:55.80:56,0,776 0/0:230,17:250:90.16:0,90,940 0/1:212,34:250:36.11:36,0,754 0/0:191,55:248:18.04:0,18,1324 0/0:232,17:250:84.90:0,85,1270 0/0:223,27:250:99:0,129,1563 0/0:237,13:250:99:0,107,1537 0/1:207,42:249:99:106,0,1096 0/1:204,43:247:91.42:91,0,1280 0/0:229,20:249:99:0,113,1404 0/0:221,29:250:3.04:0,3,1183 +1 17363 . TTCT T 628.85 . AC=2;AF=0.083;AN=24;BaseQRankSum=4.577;DP=2951;DS;FS=10.112;HRun=0;HaplotypeScore=316.7300;InbreedingCoeff=-0.0909;MQ=22.73;MQ0=27;MQRankSum=1.681;QD=1.33;ReadPosRankSum=0.329;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|8|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|6|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|6|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|6|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|6|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|5|1),SPLICE_SITE_ACCEPTOR(HIGH|||||WASH7P|protein_coding|NON_CODING|ENST00000438504|6|1),SPLICE_SITE_ACCEPTOR(HIGH|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|6|1),SPLICE_SITE_ACCEPTOR(HIGH|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|6|1) GT:AD:DP:GQ:PL 0/0:250,0:70:99:0,169,3371 0/0:250,0:78:99:0,199,4105 0/0:250,0:70:99:0,187,3984 0/0:250,0:63:99:0,166,3406 0/0:243,0:89:99:0,262,5364 0/0:250,0:62:99:0,172,3575 0/0:250,0:79:99:0,223,4726 0/0:249,1:80:99:0,181,4749 0/0:234,1:85:99:0,241,5015 0/1:205,27:73:99:371,0,3406 0/1:225,16:74:99:318,0,3528 0/0:250,0:64:99:0,193,3988 +1 17563 . G A 79.88 . AC=1;AF=0.042;AN=24;BaseQRankSum=3.375;DP=3000;DS;Dels=0.00;FS=9.603;HRun=0;HaplotypeScore=0.9909;InbreedingCoeff=-0.0518;MQ=30.15;MQ0=97;MQRankSum=-1.386;QD=0.32;ReadPosRankSum=-0.446;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|7|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|4|1) GT:AD:DP:GQ:PL 0/0:243,7:250:99:0,202,2559 0/0:246,4:250:99:0,187,2479 0/0:250,0:250:99:0,256,3232 0/0:249,1:250:99:0,239,3309 0/0:231,18:250:21.61:0,22,2802 0/0:248,2:250:99:0,214,2657 0/1:232,18:250:99:121,0,1801 0/0:249,1:250:99:0,263,3184 0/0:238,12:250:6.72:0,7,2591 0/0:250,0:250:99:0,343,3875 0/0:247,3:250:99:0,280,3428 0/0:250,0:250:99:0,301,3416 +1 17697 . G C 255.3 . AC=4;AF=0.167;AN=24;BaseQRankSum=-1.815;DP=2999;DS;Dels=0.00;FS=0.000;HRun=1;HaplotypeScore=1.7379;InbreedingCoeff=-0.2256;MQ=15.66;MQ0=86;MQRankSum=2.999;QD=0.26;ReadPosRankSum=-2.160;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|7|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|4|1) GT:AD:DP:GQ:PL 0/1:214,36:250:99:132,0,595 0/0:249,1:250:78.22:0,78,967 0/0:248,1:250:81.24:0,81,1022 0/1:216,34:250:91.94:92,0,404 0/0:226,24:250:56.86:0,57,1099 0/0:245,4:249:23.87:0,24,704 0/0:225,25:250:2.97:0,3,746 0/1:216,33:250:80.80:81,0,810 0/0:245,4:250:73.97:0,74,1201 0/1:222,28:250:12.96:13,0,920 0/0:249,1:250:87.24:0,87,1000 0/0:182,66:250:8.40:0,8,703 +1 17722 . A G 32.03 . AC=3;AF=0.125;AN=24;BaseQRankSum=0.923;DP=2937;DS;Dels=0.00;FS=0.000;HRun=2;HaplotypeScore=1.9343;InbreedingCoeff=-0.1033;MQ=14.33;MQ0=62;MQRankSum=-4.474;QD=0.04;ReadPosRankSum=0.750;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|7|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|4|1) GT:AD:DP:GQ:PL 0/0:247,3:250:51.15:0,51,628 0/0:250,0:250:60.18:0,60,746 0/0:250,0:250:54.17:0,54,685 0/0:249,1:250:54.15:0,54,644 0/0:233,0:233:81.25:0,81,1014 0/0:235,4:239:45.13:0,45,549 0/0:249,0:250:45.13:0,45,514 0/0:249,1:250:78.20:0,78,853 0/0:247,0:247:90.24:0,90,1012 0/1:227,17:244:0.06:0,0,670 0/1:214,11:225:4.62:5,0,542 0/1:236,13:249:71.41:71,0,448 +1 17730 . C A 102.87 . AC=5;AF=0.208;AN=24;BaseQRankSum=-11.508;DP=2968;DS;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=1.0610;InbreedingCoeff=-0.2498;MQ=13.12;MQ0=24;MQRankSum=-4.433;QD=0.08;ReadPosRankSum=1.952;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|6|1),SPLICE_SITE_ACCEPTOR(HIGH|||||WASH7P|protein_coding|NON_CODING|ENST00000430492|7|1) GT:AD:DP:GQ:PL 0/0:246,4:250:36.11:0,36,464 0/0:244,5:249:57.17:0,57,727 0/0:244,6:250:48.15:0,48,628 0/0:247,3:250:51.13:0,51,621 0/1:242,8:250:26.26:26,0,830 0/0:246,4:250:48.14:0,48,601 0/1:238,11:250:48.54:49,0,386 0/0:244,5:249:42.12:0,42,491 0/0:239,3:243:23.74:0,24,609 0/1:221,13:234:13.85:14,0,482 0/1:232,11:243:7:7,0,501 0/1:238,12:250:73.18:73,0,368 +1 17746 . A G 607.7 . AC=8;AF=0.333;AN=24;BaseQRankSum=13.191;DP=2993;DS;Dels=0.00;FS=0.000;HRun=1;HaplotypeScore=0.4155;InbreedingCoeff=-0.5280;MQ=11.87;MQ0=5;MQRankSum=-4.672;QD=0.30;ReadPosRankSum=3.574;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|3|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|3|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|3|1) GT:AD:DP:GQ:PL 0/0:206,44:250:7.45:0,7,373 0/0:221,29:250:5.12:0,5,468 0/0:224,26:250:3.61:0,4,382 0/0:216,34:250:9.49:0,9,405 0/1:243,7:250:43.06:43,0,781 0/1:195,55:250:99:113,0,232 0/1:218,32:250:44.30:44,0,212 0/1:219,31:250:0.03:0,0,330 0/1:211,39:250:74.42:74,0,236 0/1:209,34:243:68.40:68,0,298 0/1:172,77:249:99:193,0,137 0/1:219,30:250:99:137,0,197
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/test.de_novo.ped Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,9 @@ +1 1_dad 0 0 -1 1 +1 1_mom 0 0 -1 1 +1 1_kid 1_dad 1_mom -1 2 +2 2_dad 0 0 -1 1 +2 2_mom 0 0 -1 1 +2 2_kid 2_dad 2_mom -1 2 +3 3_dad 0 0 -1 1 +3 3_mom 0 0 -1 1 +3 3_kid 3_dad 3_mom -1 2 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/test.de_novo.vcf Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,127 @@ +##fileformat=VCFv4.1 +##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)"> +##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered"> +##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?"> +##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions"> +##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias"> +##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction"> +##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes"> +##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation"> +##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality"> +##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads"> +##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities"> +##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth"> +##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"> +##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/M10478.conc.on.pos.realigned.bam, bam/M10475.conc.on.pos.realigned.bam, bam/M10500.conc.on.pos.realigned.bam, bam/M128215.conc.on.pos.realigned.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=10 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_mismatching_base_and_quals=false" +##contig=<ID=chr1,length=249250621,assembly=hg19> +##contig=<ID=chr10,length=135534747,assembly=hg19> +##contig=<ID=chr11,length=135006516,assembly=hg19> +##contig=<ID=chr11_gl000202_random,length=40103,assembly=hg19> +##contig=<ID=chr12,length=133851895,assembly=hg19> +##contig=<ID=chr13,length=115169878,assembly=hg19> +##contig=<ID=chr14,length=107349540,assembly=hg19> +##contig=<ID=chr15,length=102531392,assembly=hg19> +##contig=<ID=chr16,length=90354753,assembly=hg19> +##contig=<ID=chr17,length=81195210,assembly=hg19> +##contig=<ID=chr17_ctg5_hap1,length=1680828,assembly=hg19> +##contig=<ID=chr17_gl000203_random,length=37498,assembly=hg19> +##contig=<ID=chr17_gl000204_random,length=81310,assembly=hg19> +##contig=<ID=chr17_gl000205_random,length=174588,assembly=hg19> +##contig=<ID=chr17_gl000206_random,length=41001,assembly=hg19> +##contig=<ID=chr18,length=78077248,assembly=hg19> +##contig=<ID=chr18_gl000207_random,length=4262,assembly=hg19> +##contig=<ID=chr19,length=59128983,assembly=hg19> +##contig=<ID=chr19_gl000208_random,length=92689,assembly=hg19> +##contig=<ID=chr19_gl000209_random,length=159169,assembly=hg19> +##contig=<ID=chr1_gl000191_random,length=106433,assembly=hg19> +##contig=<ID=chr1_gl000192_random,length=547496,assembly=hg19> +##contig=<ID=chr2,length=243199373,assembly=hg19> +##contig=<ID=chr20,length=63025520,assembly=hg19> +##contig=<ID=chr21,length=48129895,assembly=hg19> +##contig=<ID=chr21_gl000210_random,length=27682,assembly=hg19> +##contig=<ID=chr22,length=51304566,assembly=hg19> +##contig=<ID=chr3,length=198022430,assembly=hg19> +##contig=<ID=chr4,length=191154276,assembly=hg19> +##contig=<ID=chr4_ctg9_hap1,length=590426,assembly=hg19> +##contig=<ID=chr4_gl000193_random,length=189789,assembly=hg19> +##contig=<ID=chr4_gl000194_random,length=191469,assembly=hg19> +##contig=<ID=chr5,length=180915260,assembly=hg19> +##contig=<ID=chr6,length=171115067,assembly=hg19> +##contig=<ID=chr6_apd_hap1,length=4622290,assembly=hg19> +##contig=<ID=chr6_cox_hap2,length=4795371,assembly=hg19> +##contig=<ID=chr6_dbb_hap3,length=4610396,assembly=hg19> +##contig=<ID=chr6_mann_hap4,length=4683263,assembly=hg19> +##contig=<ID=chr6_mcf_hap5,length=4833398,assembly=hg19> +##contig=<ID=chr6_qbl_hap6,length=4611984,assembly=hg19> +##contig=<ID=chr6_ssto_hap7,length=4928567,assembly=hg19> +##contig=<ID=chr7,length=159138663,assembly=hg19> +##contig=<ID=chr7_gl000195_random,length=182896,assembly=hg19> +##contig=<ID=chr8,length=146364022,assembly=hg19> +##contig=<ID=chr8_gl000196_random,length=38914,assembly=hg19> +##contig=<ID=chr8_gl000197_random,length=37175,assembly=hg19> +##contig=<ID=chr9,length=141213431,assembly=hg19> +##contig=<ID=chr9_gl000198_random,length=90085,assembly=hg19> +##contig=<ID=chr9_gl000199_random,length=169874,assembly=hg19> +##contig=<ID=chr9_gl000200_random,length=187035,assembly=hg19> +##contig=<ID=chr9_gl000201_random,length=36148,assembly=hg19> +##contig=<ID=chrM,length=16571,assembly=hg19> +##contig=<ID=chrUn_gl000211,length=166566,assembly=hg19> +##contig=<ID=chrUn_gl000212,length=186858,assembly=hg19> +##contig=<ID=chrUn_gl000213,length=164239,assembly=hg19> +##contig=<ID=chrUn_gl000214,length=137718,assembly=hg19> +##contig=<ID=chrUn_gl000215,length=172545,assembly=hg19> +##contig=<ID=chrUn_gl000216,length=172294,assembly=hg19> +##contig=<ID=chrUn_gl000217,length=172149,assembly=hg19> +##contig=<ID=chrUn_gl000218,length=161147,assembly=hg19> +##contig=<ID=chrUn_gl000219,length=179198,assembly=hg19> +##contig=<ID=chrUn_gl000220,length=161802,assembly=hg19> +##contig=<ID=chrUn_gl000221,length=155397,assembly=hg19> +##contig=<ID=chrUn_gl000222,length=186861,assembly=hg19> +##contig=<ID=chrUn_gl000223,length=180455,assembly=hg19> +##contig=<ID=chrUn_gl000224,length=179693,assembly=hg19> +##contig=<ID=chrUn_gl000225,length=211173,assembly=hg19> +##contig=<ID=chrUn_gl000226,length=15008,assembly=hg19> +##contig=<ID=chrUn_gl000227,length=128374,assembly=hg19> +##contig=<ID=chrUn_gl000228,length=129120,assembly=hg19> +##contig=<ID=chrUn_gl000229,length=19913,assembly=hg19> +##contig=<ID=chrUn_gl000230,length=43691,assembly=hg19> +##contig=<ID=chrUn_gl000231,length=27386,assembly=hg19> +##contig=<ID=chrUn_gl000232,length=40652,assembly=hg19> +##contig=<ID=chrUn_gl000233,length=45941,assembly=hg19> +##contig=<ID=chrUn_gl000234,length=40531,assembly=hg19> +##contig=<ID=chrUn_gl000235,length=34474,assembly=hg19> +##contig=<ID=chrUn_gl000236,length=41934,assembly=hg19> +##contig=<ID=chrUn_gl000237,length=45867,assembly=hg19> +##contig=<ID=chrUn_gl000238,length=39939,assembly=hg19> +##contig=<ID=chrUn_gl000239,length=33824,assembly=hg19> +##contig=<ID=chrUn_gl000240,length=41933,assembly=hg19> +##contig=<ID=chrUn_gl000241,length=42152,assembly=hg19> +##contig=<ID=chrUn_gl000242,length=43523,assembly=hg19> +##contig=<ID=chrUn_gl000243,length=43341,assembly=hg19> +##contig=<ID=chrUn_gl000244,length=39929,assembly=hg19> +##contig=<ID=chrUn_gl000245,length=36651,assembly=hg19> +##contig=<ID=chrUn_gl000246,length=38154,assembly=hg19> +##contig=<ID=chrUn_gl000247,length=36422,assembly=hg19> +##contig=<ID=chrUn_gl000248,length=39786,assembly=hg19> +##contig=<ID=chrUn_gl000249,length=38502,assembly=hg19> +##contig=<ID=chrX,length=155270560,assembly=hg19> +##contig=<ID=chrY,length=59373566,assembly=hg19> +##reference=file:///m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa +##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Consequence|Codons|Amino_acids|Gene|HGNC|Feature|EXON|PolyPhen|SIFT"> +##SnpEffVersion="SnpEff 3.0g (build 2012-08-31), by Pablo Cingolani" +##SnpEffCmd="SnpEff -i vcf -o vcf GRCh37.66 test4.vep.vcf " +##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )' "> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1_dad 1_mom 1_kid 2_dad 2_mom 2_kid 3_dad 3_mom 3_kid +chr10 1142208 . T C 3404.3 . AC=8;AF=1.00;AN=8;DP=122;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=2.6747;MQ=36.00;MQ0=0;QD=27.90;CSQ=intron_variant|||ENSG00000047056|WDR37|ENST00000263150|||,downstream_gene_variant|||ENSG00000047056|WDR37|ENST00000436154|||,intron_variant|||ENSG00000047056|WDR37|ENST00000358220|||,stop_lost|Tga/Cga|*/R|ENSG00000047056|WDR37|ENST00000381329|9/9||;EFF=DOWNSTREAM(MODIFIER||||208|WDR37|protein_coding|CODING|ENST00000436154|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000263150|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000358220|),STOP_LOST(HIGH|MISSENSE|Tga/Cga|*250R|249|WDR37|protein_coding|CODING|ENST00000381329|exon_10_1142110_1142566) GT:AD:DP:GQ:PL 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,23:24:66.14:729,66,0 0/0:1,37:59:87.16:940,87,0 0/0:0,29:49:78.20:899,78,0 0/0:0,22:64:66.14:729,66,0 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 +chr10 48003992 . C T 1047.87 . AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,23:24:66.14:729,66,0 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,23:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,23:24:66.14:729,66,0 +chr10 48004992 . C T 1047.87 . AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,23:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,23:24:66.14:729,66,0 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,23:24:66.14:729,66,0 +chr10 135336656 . G A 38.34 . AC=4;AF=1.00;AN=4;DP=2;Dels=0.00;FS=0.000;HRun=4;HaplotypeScore=0.0000;MQ=37.00;MQ0=0;QD=19.17;CSQ=upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000418356|||,intron_variant&nc_transcript_variant|||ENSG00000214279||ENST00000488261|||,intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541261|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000421586|||,intron_variant|||ENSG00000130649|CYP2E1|ENST00000463117|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000252945|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541080|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000477500|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000480558|||,intron_variant|||ENSG00000214279||ENST00000356567|||;EFF=INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),INTRON(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000463117|),INTRON(MODIFIER||||693|RP11-108K14.4.1|protein_coding|CODING|ENST00000356567|),INTRON(MODIFIER|||||RP11-108K14.4.1|retained_intron|CODING|ENST00000488261|),UPSTREAM(MODIFIER||||305|CYP2E1|protein_coding|CODING|ENST00000418356|),UPSTREAM(MODIFIER||||355|CYP2E1|protein_coding|CODING|ENST00000421586|),UPSTREAM(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000252945|),UPSTREAM(MODIFIER||||85|CYP2E1|protein_coding|CODING|ENST00000541261|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000477500|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000480558|) GT:AD:DP:GQ:PL 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 135369532 . T C 122.62 . AC=2;AF=0.25;AN=8;BaseQRankSum=2.118;DP=239;Dels=0.00;FS=5.194;HRun=2;HaplotypeScore=5.7141;MQ=36.02;MQ0=0;MQRankSum=0.082;QD=2.31;ReadPosRankSum=-0.695;CSQ=missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000303903|9/13|benign(0.001)|tolerated(1),missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000432597|10/14|benign(0)|tolerated(1),downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000460441|||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000343131|9/13|benign(0.001)|tolerated(1),intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000171772|SYCE1|ENST00000479535|6/10||,downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000482127|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000130649|CYP2E1|ENST00000368520|6/6||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000368517|9/13|benign(0)|tolerated(1);EFF=DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000460441|),DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000482127|),EXON(MODIFIER|||||CYP2E1|retained_intron|CODING|ENST00000368520|),EXON(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000479535|),INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000368517|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000432597|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|318|SYCE1|protein_coding|CODING|ENST00000303903|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|351|SYCE1|protein_coding|CODING|ENST00000343131|exon_10_135369485_135369551) GT:AD:DP:GQ:PL 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,22:24:66.14:729,66,0 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,21:24:66.14:729,66,0 0/0:1,37:50:87.16:940,87,0 0/0:0,29:50:78.20:899,78,0 0/1:0,24:50:66.14:729,66,0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-gemini-testdata.sh Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,72 @@ +cd "$(dirname "$0")" + +export GEMINI_CONFIG=../test-cache +OUT_PTH=$GEMINI_CONFIG/gemini/data +GENOMIC_REGION=3:187000000-187500000 + + +if [ -n "$1" ]; then + +IN_PTH="$1" +# downsample all vcf and bed annotation files to the region of interest and reindex +for vcf in `ls $IN_PTH/*.gz | grep -v hprd_interaction_edges.gz -` +do + python ./shrink_tabix.py $vcf -r $GENOMIC_REGION -o $OUT_PTH/`basename $vcf` +done + +# downsample gene_table files to the region of interest +echo "$IN_PTH/summary_gene_table_v75 -> $OUT_PTH/summary_gene_table_v75" +python ./shrink_simple_tab.py $IN_PTH/summary_gene_table_v75 -r chr$GENOMIC_REGION -c 0 8 9 -n 1 -o $OUT_PTH/summary_gene_table_v75 + +echo "$IN_PTH/detailed_gene_table_v75 -> $OUT_PTH/detailed_gene_table_v75" +python ./shrink_simple_tab.py $IN_PTH/detailed_gene_table_v75 -r chr$GENOMIC_REGION -c 0 11 12 -n 1 -o $OUT_PTH/detailed_gene_table_v75 + +# filter kegg_pathway files to retain only records of the genes listed +# in the downsampled summary_gene_table +for kegg in `ls $IN_PTH/kegg_pathways_*` +do + echo "$kegg -> $OUT_PTH/`basename $kegg`" + cut -f2 $OUT_PTH/summary_gene_table_v75 | grep -Fv None | grep -Fwf - $kegg > $OUT_PTH/`basename $kegg` +done + +# filter hprd_interaction file to retain only records of the genes listed +# in the downsampled summary_gene_table +echo "$IN_PTH/hprd_interaction_edges.gz -> $OUT_PTH/hprd_interaction_edges.gz" +bgzip -dc $IN_PTH/hprd_interaction_edges.gz > $OUT_PTH/hprd_interaction_edges +cut -f2 $OUT_PTH/summary_gene_table_v75 | grep -Fv None | grep -Ff - $OUT_PTH/hprd_interaction_edges | bgzip > $OUT_PTH/hprd_interaction_edges.gz +rm $OUT_PTH/hprd_interaction_edges + +# filter cancer_gene_census file to retain only records of the genes listed +# in the downsampled summary_gene_table; +# TO DO: make the filter stricter by looking for matches only in the first +# column of the cancer_gene_census file (but the file is relatively small anyway) +echo "$IN_PTH/cancer_gene_census.20140120.tsv -> $OUT_PTH/cancer_gene_census.20140120.tsv" +cut -f2 $OUT_PTH/summary_gene_table_v75 | grep -Fv None | grep -Fwf - $IN_PTH/cancer_gene_census.20140120.tsv > $OUT_PTH/cancer_gene_census.20140120.tsv + +else + echo "no path to gemini annotation files provided - only building test databases" +fi + + +# now use gemini load to build the test databases +echo "Building gemini test databases" +echo "Test databases for gemini_load" +gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/gemini_load_input.vcf -t snpEff ../gemini_load_result1.db +gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/gemini_load_input.vcf -t snpEff --skip-gene-tables --no-load-genotypes ../gemini_load_result2.db +echo "Test database for gemini_amend" +gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.auto_rec.vcf -t snpEff ../gemini_amend_input.db +echo "Test database for gemini_annotate" +bgzip -c build-data anno.bed > build-data/anno.bed.gz +tabix --force -p bed build-data/anno.bed.gz +cp ../gemini_load_result1.db ../gemini_annotate_result.db +gemini --annotation-dir $OUT_PTH annotate -f build-data/anno.bed.gz -c anno5 -a count ../gemini_annotate_result.db +echo "Test database for gemini_set_somatic" +cp ../gemini_load_result1.db ../gemini_is_somatic_result.db +gemini set_somatic --min-somatic-score 5.65 ../gemini_is_somatic_result.db +echo "Test database for gemini_de_novo and gemini_mendel_errors" +gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.de_novo.vcf -p build-data/test.de_novo.ped -t snpEff ../gemini_de_novo_input.db +echo "Test database for gemini_comp_hets" +gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.comp_het.vcf -p build-data/test.comp_het.ped -t snpEff ../gemini_comphets_input.db +echo "Test databases for gemini_autosomal" +gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.auto_rec.vcf -p build-data/test.auto_rec.ped -t snpEff ../gemini_auto_rec_input.db +gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.auto_dom.vcf -p build-data/test.auto_dom.ped -t snpEff ../gemini_auto_dom_input.db
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/shrink_simple_tab.py Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,61 @@ +from __future__ import print_function + +import argparse +from functools import partial + + +def keep_line(line, pos_cols, region): + fields = line.rstrip().split(b'\t') + if fields[pos_cols[0]] == region[0]: # same chromosome + if ( + region[1] < int(fields[pos_cols[1]]) < region[2] + ) or ( + region[1] < int(fields[pos_cols[2]]) < region[2] + ): + return True + + +def main(infile, ofile, num_header_lines): + print(infile, '->', ofile) + with open(infile, 'rb') as i: + with open(ofile, 'wb') as o: + # copy header lines + for c in range(num_header_lines): + o.write(next(i)) + for line in i: + if keep_line(line): + o.write(line) + + +if __name__ == '__main__': + p = argparse.ArgumentParser() + p.add_argument('infile') + p.add_argument( + '-r', '--region', + required=True, + help='the region of the input file to rewrite' + ) + p.add_argument( + '-o', '--ofile', + required=True, + help="the name of the output file" + ) + p.add_argument( + '-c', '--cols', + nargs=3, type=int, required=True, + help="the columns of the input file specifying chrom, start and stop, " + "respectively" + ) + p.add_argument( + '-n', '--num-header-lines', + type=int, default=0, + help='the number of header lines present in the input; These will ' + 'always be copied over to the new file.' + ) + args = vars(p.parse_args()) + + chrom, reg = args['region'].split(':') + region = [chrom.encode()] + [int(x) for x in reg.split('-')] + keep_line = partial(keep_line, pos_cols=args['cols'], region=region) + + main(args['infile'], args['ofile'], args['num_header_lines'])
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/shrink_tabix.py Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,42 @@ +from __future__ import print_function + +import argparse + +import pysam + + +def main(infile, ofile, region): + print(infile, '->', ofile) + with pysam.Tabixfile(infile) as i: + fformat = i.format.lower() + if fformat == 'sam': + fformat = 'bed' + if ofile[-3:] == '.gz': + ofile = ofile[:-3] + with open(ofile, 'w') as o: + try: + region_it = i.fetch(region=region) + except ValueError: + region_it = i.fetch(region='chr' + region) + for line in i.header: + o.write(line + '\n') + for line in region_it: + o.write(str(line) + '\n') + pysam.tabix_index(ofile, preset=fformat, force=True) + + +if __name__ == '__main__': + p = argparse.ArgumentParser() + p.add_argument('infile') + p.add_argument( + '-r', '--region', + required=True, + help='the region of the input file to rewrite' + ) + p.add_argument( + '-o', '--ofile', + required=True, + help="the name of the output file" + ) + args = vars(p.parse_args()) + main(**args)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gemini_versioned_databases.loc.sample Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,3 @@ +## GEMINI versioned databases +#DownloadDate dbkey DBversion Description Path +#2018-07-08 hg19 181 GEMINI annotations (2018-07-08 snapshot) /path/to/data
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,7 @@ +<tables> + <table name="gemini_versioned_databases" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, version, name, path</columns> + <file path="tool-data/gemini_versioned_databases.loc" /> + </table> +</tables> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,8 @@ +<tables> + <!-- Location of gemini annotation files for testing --> + <table name="gemini_versioned_databases" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, version, name, path</columns> + <file path="${__HERE__}/test-data/gemini_versioned_databases.loc" /> + </table> +</tables> +