# HG changeset patch # User iuc # Date 1547246919 18000 # Node ID 4b26f6c99227a6c181fbb878c84444b99b65d058 # Parent a26f0a30df65eda1862c99c9c7d577a215970116 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 62ed732cba355e695181924a8ed4cce49ca21c59 diff -r a26f0a30df65 -r 4b26f6c99227 gemini_gene_wise.xml --- a/gemini_gene_wise.xml Fri Dec 14 12:55:02 2018 -0500 +++ b/gemini_gene_wise.xml Fri Jan 11 17:48:39 2019 -0500 @@ -1,5 +1,5 @@ - - Custom genotype filtering by gene + + Discover per-gene variant patterns across families gemini_macros.xml gene_wise @@ -11,34 +11,62 @@ 0: - --min_filters $min_filters + --min-filters $min_filters #end if - #set $multiline_sql_expr = $gt_filter - #set $cmdln_param = "--gt-filter" - @MULTILN_SQL_EXPR_TO_CMDLN@ + #for $filter in $filter_by_genotype: + #set $multiline_sql_expr = str($filter.gt_filter) + #if $filter.is_required: + #set $cmdln_param = "--gt-filter-required" + #else: + #set $cmdln_param = "--gt-filter" + #end if + @MULTILN_SQL_EXPR_TO_CMDLN@ + #end for + #set $report = $oformat.report @COLUMN_SELECT@ - @CMDLN_SQL_FILTER_FILTER_OPTION@ + #set $where_clause_elements = [] + #set $filter_cmdln_param = '--filter' + #for $cond in $constraint: + #if str($cond.filter).strip(): + #silent $where_clause_elements.append(str($cond.filter).strip()) + #if $cond.overwrite_default_filter: + #set $filter_cmdln_param = '--where' + #end if + #end if + #end for - "${ infile }" - > "${ outfile }" + @PARSE_REGION_ELEMENTS@ + #if $region_elements: + #silent $where_clause_elements.append(" OR ".join($region_elements)) + #end if + #set $filter = " AND ".join($where_clause_elements) + #if str($filter): + $filter_cmdln_param '$filter' + #end if + + '$infile' + > '$outfile' ]]> - - - - - - - - - + + + + + + + + +
+ +
@@ -46,10 +74,12 @@ - + + + - + @@ -58,7 +88,81 @@ `__ + +----- + +*Genotype filters* + +The syntax for specifying a genotype filter (``--gt-filter`` command line +option) is the same as for the *GEMINI query* tool and is described `here +`__. + +The difference with the *gene_wise* tool is that it lets you specify multiple +such filters and, if you do, every filter can be met by a **different variant** +as long as all of them are in the **same gene**. + +This is useful if your analysis includes several families that you suspect +(based on a shared phenotype) to have the same gene affected, but not +necessarily through the same variant. In this case, you can formulate one filter +per family like, for example:: + + gt_types.fam1_kid == HET and gt_types.fam1_mom == HOM_REF and gt_types.fam1_dad == HOM_REF + + gt_types.fam2_kid == HET + + gt_types.fam3_kid == HET + +, which would allow you to find a causal gene that's affected by different +(dominant) variants in children from three different families. Note that the +first filter combines three conditions applied to family 1, which, thus, must +be met by the same variant site. + +*Regular and required filters* (``--gt-filter`` *vs* ``--gt-filter-required``) +and the *Minimum number of filters* + +For every single genotype filter you define you can specify whether it should +be applied as a regular or as a required filter. The difference is that, if a +variant doesn't pass a required filter it is excluded from further analysis. +Of the regular filters, a gene and its variants only have to pass a threshold +number defined by *Minimum number of filters* (``--min-filters``). Imagine, +with the above filters you had specified ``--min-filters`` as ``2``, then a +gene for which the child in family 3 carries one copy of a variant allele and +the child in family 3 carries a copy of a different allele would be reported +no matter if any other allele in that gene passes the first filter, *etc.*. + +----- + +*Region filters* + +They let you restrict your analysis to parts of the genome, which can be useful +if you have prior knowledge of the approximate location of the causative gene. + +If you specify more then one region filter, they get combined with a logical +*OR*, meaning variants and genes falling in *any* of the regions are reported. + +----- + +*Additional constraints on variants* + +These get translated directly into the WHERE clause of an SQL query and, thus, +have to be expressed in valid SQL syntax. Of particular interest, here, is the +fact that, by default, the *gene-wise* tool applies the WHERE clause: +``is_exonic = 1 and impact_severity != 'LOW'``, which means the tool only +considers variants in exons that are not of *LOW* impact severity (*i.e.*, not +silent mutations). While this can be a good and biologically justifiable +setting, you can overwrite it if you need. + +Note that in SQL syntax tests for equality use a single ``=``, while genotype +filters (discussed above) are following Python syntax and use ``==`` for the +same purpose. Also note that non-numerical values need to be enclosed in +single-quotes, *e.g.* ``'LOW'``, but numerical values must *NOT* be. + ]]> diff -r a26f0a30df65 -r 4b26f6c99227 gemini_macros.xml --- a/gemini_macros.xml Fri Dec 14 12:55:02 2018 -0500 +++ b/gemini_macros.xml Fri Jan 11 17:48:39 2019 -0500 @@ -1,15 +1,12 @@ - 0.18.1 + 0.20.1 - 181 + 200 gemini - tabix - - @@ -24,9 +21,17 @@ + + + + 10.1371/journal.pcbi.1003153 + + + + @@ -36,31 +41,36 @@ - - - - - - - + + + + + - - + + + - + - - - + + + + - - - + + + + + + + @@ -69,27 +79,23 @@ - + - + + + - - - - - - - - - - - - - + + + + @@ -103,10 +109,90 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + value.strip() + + + + + + + + + + value.strip() + + + + + + + + + + + + + + + + + value.strip() + + + not value or value.isdigit() + + + not value or value.isdigit() + + + + @@ -119,67 +205,50 @@ #end if - - #if str($filter.filter_selector) == 'yes' and $filter.filter: - --filter '${ str( $filter.filter ) }' + + #if str($report.report_selector) == 'full': + #set cols = "*" + #else: + #if $report.columns and str($report.columns) != '': + #set $cols = str($report.columns) + #else + #set $cols = '' + #end if + #if str($report.extra_cols).strip(): + #if $cols: + #set $cols = $cols + ', ' + str($report.extra_cols) + #else: + #set $cols = str($report.extra_cols) + #end if + #end if + #if not $cols: + #set $cols = "variant_id, gene" + #end if #end if - #if $report.report_selector != 'all': - --columns "${report.columns} - #if str($report.extra_cols).strip() - #echo ','+','.join(str($report.extra_cols).split()) - #end if - " + @SET_COLS@ + #if $cols != "*" + --columns '$cols' #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 10.1371/journal.pcbi.1003153 - - - - - - - - - - value is not None and value.metadata.gemini_version == "@VERSION@" - - - + = %d" % int($r.start)) + #end if + #if str($r.stop).strip(): + #silent $r_elements.append("end <= %d" % int($r.stop)) + #end if + #silent $region_elements.append("(%s)" % " AND ".join($r_elements)) + #end for + ]]> + diff -r a26f0a30df65 -r 4b26f6c99227 repository_dependencies.xml --- a/repository_dependencies.xml Fri Dec 14 12:55:02 2018 -0500 +++ b/repository_dependencies.xml Fri Jan 11 17:48:39 2019 -0500 @@ -1,4 +1,4 @@ - + \ No newline at end of file diff -r a26f0a30df65 -r 4b26f6c99227 test-data/gemini_amend_input.db Binary file test-data/gemini_amend_input.db has changed diff -r a26f0a30df65 -r 4b26f6c99227 test-data/gemini_annotate_result.db Binary file test-data/gemini_annotate_result.db has changed diff -r a26f0a30df65 -r 4b26f6c99227 test-data/gemini_auto_dom_input.db Binary file test-data/gemini_auto_dom_input.db has changed diff -r a26f0a30df65 -r 4b26f6c99227 test-data/gemini_auto_rec_input.db Binary file test-data/gemini_auto_rec_input.db has changed diff -r a26f0a30df65 -r 4b26f6c99227 test-data/gemini_comphets_input.db Binary file test-data/gemini_comphets_input.db has changed diff -r a26f0a30df65 -r 4b26f6c99227 test-data/gemini_de_novo_input.db Binary file test-data/gemini_de_novo_input.db has changed diff -r a26f0a30df65 -r 4b26f6c99227 test-data/gemini_is_somatic_result.db Binary file test-data/gemini_is_somatic_result.db has changed diff -r a26f0a30df65 -r 4b26f6c99227 test-data/gemini_load_result1.db Binary file test-data/gemini_load_result1.db has changed diff -r a26f0a30df65 -r 4b26f6c99227 test-data/gemini_load_result2.db Binary file test-data/gemini_load_result2.db has changed diff -r a26f0a30df65 -r 4b26f6c99227 test-data/gemini_versioned_databases.loc --- a/test-data/gemini_versioned_databases.loc Fri Dec 14 12:55:02 2018 -0500 +++ b/test-data/gemini_versioned_databases.loc Fri Jan 11 17:48:39 2019 -0500 @@ -1,3 +1,3 @@ ## GEMINI versioned databases #DownloadDate dbkey DBversion Description Path -1999-01-01 hg19 181 GEMINI annotations (test snapshot) ${__HERE__}/test-cache +1999-01-01 hg19 200 GEMINI annotations (test snapshot) ${__HERE__}/test-cache diff -r a26f0a30df65 -r 4b26f6c99227 test-data/test-cache/gemini-config.yaml --- a/test-data/test-cache/gemini-config.yaml Fri Dec 14 12:55:02 2018 -0500 +++ b/test-data/test-cache/gemini-config.yaml Fri Jan 11 17:48:39 2019 -0500 @@ -2,12 +2,14 @@ versions: ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz: 4 ESP6500SI.all.snps_indels.tidy.v2.vcf.gz: 2 - ExAC.r0.3.sites.vep.tidy.vcf.gz: 3 + ExAC.r0.3.sites.vep.tidy.vcf.gz: 4 GRCh37-gms-mappability.vcf.gz: 2 - clinvar_20160203.tidy.vcf.gz: 5 + clinvar_20170130.tidy.vcf.gz: 5 cosmic-v68-GRCh37.tidy.vcf.gz: 3 - dbsnp.b141.20140813.hg19.tidy.vcf.gz: 4 + dbsnp.b147.20160601.tidy.vcf.gz: 1 detailed_gene_table_v75: 2 geno2mp.variants.tidy.vcf.gz: 1 + gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz: 2 hg19.rmsk.bed.gz: 2 summary_gene_table_v75: 2 + whole_genome_SNVs.tsv.compressed.gz: 2 diff -r a26f0a30df65 -r 4b26f6c99227 test-data/test-cache/gemini/data/clinvar_20160203.tidy.vcf.gz Binary file test-data/test-cache/gemini/data/clinvar_20160203.tidy.vcf.gz has changed diff -r a26f0a30df65 -r 4b26f6c99227 test-data/test-cache/gemini/data/clinvar_20160203.tidy.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/clinvar_20160203.tidy.vcf.gz.tbi has changed diff -r a26f0a30df65 -r 4b26f6c99227 test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz Binary file test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz has changed diff -r a26f0a30df65 -r 4b26f6c99227 test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz.tbi has changed diff -r a26f0a30df65 -r 4b26f6c99227 test-data/test-cache/gemini/data/dbsnp.b141.20140813.hg19.tidy.vcf.gz Binary file test-data/test-cache/gemini/data/dbsnp.b141.20140813.hg19.tidy.vcf.gz has changed diff -r a26f0a30df65 -r 4b26f6c99227 test-data/test-cache/gemini/data/dbsnp.b141.20140813.hg19.tidy.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/dbsnp.b141.20140813.hg19.tidy.vcf.gz.tbi has changed diff -r a26f0a30df65 -r 4b26f6c99227 test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz Binary file test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz has changed diff -r a26f0a30df65 -r 4b26f6c99227 test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz.tbi has changed diff -r a26f0a30df65 -r 4b26f6c99227 test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz Binary file test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz has changed diff -r a26f0a30df65 -r 4b26f6c99227 test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz.tbi has changed