# HG changeset patch # User iuc # Date 1547246822 18000 # Node ID cd00221d67cbaeb403efb0556eb9439ffefb73d3 # Parent 7ca6716748c2672eab802beb6410035f4620478d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 62ed732cba355e695181924a8ed4cce49ca21c59 diff -r 7ca6716748c2 -r cd00221d67cb gemini_macros.xml --- a/gemini_macros.xml Fri Dec 14 12:51:59 2018 -0500 +++ b/gemini_macros.xml Fri Jan 11 17:47:02 2019 -0500 @@ -1,15 +1,12 @@ - 0.18.1 + 0.20.1 - 181 + 200 gemini - tabix - - @@ -24,9 +21,17 @@ + + + + 10.1371/journal.pcbi.1003153 + + + + @@ -36,31 +41,36 @@ - - - - - - - + + + + + - - + + + - + - - - + + + + - - - + + + + + + + @@ -69,27 +79,23 @@ - + - + + + - - - - - - - - - - - - - + + + + @@ -103,10 +109,90 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + value.strip() + + + + + + + + + + value.strip() + + + + + + + + + + + + + + + + + value.strip() + + + not value or value.isdigit() + + + not value or value.isdigit() + + + + @@ -119,67 +205,50 @@ #end if - - #if str($filter.filter_selector) == 'yes' and $filter.filter: - --filter '${ str( $filter.filter ) }' + + #if str($report.report_selector) == 'full': + #set cols = "*" + #else: + #if $report.columns and str($report.columns) != '': + #set $cols = str($report.columns) + #else + #set $cols = '' + #end if + #if str($report.extra_cols).strip(): + #if $cols: + #set $cols = $cols + ', ' + str($report.extra_cols) + #else: + #set $cols = str($report.extra_cols) + #end if + #end if + #if not $cols: + #set $cols = "variant_id, gene" + #end if #end if - #if $report.report_selector != 'all': - --columns "${report.columns} - #if str($report.extra_cols).strip() - #echo ','+','.join(str($report.extra_cols).split()) - #end if - " + @SET_COLS@ + #if $cols != "*" + --columns '$cols' #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 10.1371/journal.pcbi.1003153 - - - - - - - - - - value is not None and value.metadata.gemini_version == "@VERSION@" - - - + = %d" % int($r.start)) + #end if + #if str($r.stop).strip(): + #silent $r_elements.append("end <= %d" % int($r.stop)) + #end if + #silent $region_elements.append("(%s)" % " AND ".join($r_elements)) + #end for + ]]> + diff -r 7ca6716748c2 -r cd00221d67cb gemini_query.xml --- a/gemini_query.xml Fri Dec 14 12:51:59 2018 -0500 +++ b/gemini_query.xml Fri Jan 11 17:47:02 2019 -0500 @@ -1,8 +1,32 @@ - + Querying the GEMINI database gemini_macros.xml query + + + + + + + + + + + + + + + + + @@ -10,91 +34,251 @@ 0: + --min-kindreds ${i.min_kindreds} + #end if + ${i.in} + #set $multiline_sql_expr = str($i.sample_filter) + #set $cmdln_param = "--sample-filter" + @MULTILN_SQL_EXPR_TO_CMDLN@ + #end for - $show_samples - $show_families - $family_wise - $header - $dgidb - #if $region.strip(): - --region "${region}" + #if str($query.oformat.report.format) == 'with_samples': + #set $sample_delim = str($query.oformat.report.sample_delim) or ',' + --show-samples --sample-delim '$sample_delim' + #elif str($query.oformat.report.format) == 'with_samples_flattened': + --show-samples --format sampledetail + #elif str($query.oformat.report.format) == 'with_families': + #set $sample_delim = str($query.oformat.report.sample_delim) or ',' + --show-families --sample-delim '$sample_delim' + #elif str($query.oformat.report.format) == 'carrier_summary': + --carrier-summary-by-phenotype + #if str($query.oformat.report.phenotype).strip(): + '${query.oformat.report.phenotype}' + #else: + affected + #end if + #else: + --format ${query.oformat.report.format} #end if - #if int($min_kindreds) > 0: - --min-kindreds $min_kindreds + + #if str($query.interface) == 'basic': + ## build the SQL query string from its components + #if str($query.oformat.report.format) in ('vcf', 'tped'): + #set $cols = "*" + #else: + #set $report = $query.oformat.report.report + @SET_COLS@ + #end if + #set $q = "SELECT %s FROM variants" % $cols + #set $where_clause_elements = [] + #if str($query.filter).strip(): + #silent $where_clause_elements.append(str($query.filter).strip()) + #end if + + #set $regions = $query.regions + @PARSE_REGION_ELEMENTS@ + #if $region_elements: + #silent $where_clause_elements.append(" OR ".join($region_elements)) + #end if + #if $where_clause_elements: + #set $q = $q + " WHERE " + " AND ".join($where_clause_elements) + #end if + #if str($query.oformat.report.order_by).strip(): + #set $q = $q + " ORDER BY " + str($query.oformat.report.order_by).strip() + str($query.oformat.report.sort_order) + #end if + #else + ## The user entered the SQL query string directly. + #set $q = str($query.q) #end if - ##--format FORMAT Format of output (JSON, TPED or default) # we will take default for the time being - ## --sample-delim STRING The delimiter to be used with the --show-samples option. #set $multiline_sql_expr = $q #set $cmdln_param = "-q" @MULTILN_SQL_EXPR_TO_CMDLN@ - "${ infile }" - > "${ outfile }" + '$infile' + > '$outfile' ]]> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + + value.strip() + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
- + + + + + + - - + + + + @@ -106,10 +290,116 @@ `__ +in the GEMINI documentation. + +The tool supports regular genotype filters like:: + + gt.sample1 == HET and gt_depths.sample1 >= 15 + +, which would keep only variants for which sample 1 is a heterozygous carrier +and if the genomic position in sample1 is covered by at least 15 sequencing +reads, as well as GEMINI wildcard filters of the general form +*(COLUMN).(SAMPLE_FILTER).(RULE).(RULE_ENFORCEMENT)* like:: + + (gt_types).(phenotype==2).(!=HOM_REF).(all) + +, which keeps only variants for which all phenotypic samples are homozygous. + +*Sample filters* + +Sample filters have the same format as the second component of the genotype +wildcard filters above, so:: + + phenotype == 2 + +would filter for phenotypically affected samples. In this case, however, the +filter determines, from which samples variants should be reported, i.e., here, +only variants found in phenotypically affected samples become analyzed. You can +use the ``--in`` filter to adjust the exact meaning of the sample filter. + +*Region filters* + +They let you restrict your analysis to parts of the genome, which can be useful +if you have prior knowledge of the approximate location of a variant of +interest. + +If you specify more then one region filter, they get combined with a logical +*OR*, meaning variants and genes falling in *any* of the regions are reported. -http://gemini.readthedocs.org/en/latest/content/querying.html +*Additional constraints on variants* + +These get translated directly into the WHERE clause of an SQL query and, thus, +have to be expressed in valid SQL syntax. As an example you could use:: + + is_exonic = 1 and impact_severity != 'LOW' + +to indicate that you are only interested in exonic variants that are not of +*LOW* impact severity, *i.e.*, not silent mutations. + +Note that in SQL syntax tests for equality use a single ``=``, while genotype +filters (discussed above) are following Python syntax and use ``==`` for the +same purpose. Also note that non-numerical values need to be enclosed in +single-quotes, *e.g.* ``'LOW'``, but numerical values must *NOT* be. + +----- + +*Building your query with the Advanced query constructor* + +For the sake of simplicity, the basic mode of the tool limits your queries to +the variants table of the underlying database. While this still allows many +useful queries to be formulated, it prevents you from joining information from +other tables (in particular, the gene_detailed table) or to query a different +table directly. + +In advanced mode, you take responsibility for formulating the complete SQL +query in correct syntax, which allows you to do anything you could do with the +command line tool. Beyond querying other tables, this includes changing output +column names, deriving simple statistics on columns using the SQL Min, Max, +Count, Avg and Sum functions, and more. + +The price you pay for this extra flexibility is that you will have to make sure +that any other tool options you set are compatible with the result of your +particular query. For example, most output formats except the tabular default +output of GEMINI are incompatible with non-standard queries. Choosing +non-compatible options can result in them getting ignored silently, but also +in tool errors, or in problems with downstream tools. + +The chapter `Querying the GEMINI database +`__ of the +GEMINI documentation can get you started with formulating your own queries. + +Note that genotype filters and sample filters cannot be expressed as genuine +SQL queries, so even the Advanced query constructor is offering them. Region +filters and sort order of rows and columns on the other hand can be controlled +through SQL queries, like in this example:: + + SELECT gene, chrom, start, end, ref, alt FROM variants WHERE chrom = 'chr1' + AND start >= 10000000 and stop <= 20000000 and is_lof = 1 ORDER BY chrom, + start + +, which would report all loss-of-function variants between 10,000,000 and +20,000,000 on chr1 and report the selected columns sorted on chromosome, then +position. + ]]> diff -r 7ca6716748c2 -r cd00221d67cb repository_dependencies.xml --- a/repository_dependencies.xml Fri Dec 14 12:51:59 2018 -0500 +++ b/repository_dependencies.xml Fri Jan 11 17:47:02 2019 -0500 @@ -1,4 +1,4 @@ - + \ No newline at end of file diff -r 7ca6716748c2 -r cd00221d67cb test-data/gemini_amend_input.db Binary file test-data/gemini_amend_input.db has changed diff -r 7ca6716748c2 -r cd00221d67cb test-data/gemini_annotate_result.db Binary file test-data/gemini_annotate_result.db has changed diff -r 7ca6716748c2 -r cd00221d67cb test-data/gemini_auto_dom_input.db Binary file test-data/gemini_auto_dom_input.db has changed diff -r 7ca6716748c2 -r cd00221d67cb test-data/gemini_auto_rec_input.db Binary file test-data/gemini_auto_rec_input.db has changed diff -r 7ca6716748c2 -r cd00221d67cb test-data/gemini_comphets_input.db Binary file test-data/gemini_comphets_input.db has changed diff -r 7ca6716748c2 -r cd00221d67cb test-data/gemini_de_novo_input.db Binary file test-data/gemini_de_novo_input.db has changed diff -r 7ca6716748c2 -r cd00221d67cb test-data/gemini_is_somatic_result.db Binary file test-data/gemini_is_somatic_result.db has changed diff -r 7ca6716748c2 -r cd00221d67cb test-data/gemini_load_result1.db Binary file test-data/gemini_load_result1.db has changed diff -r 7ca6716748c2 -r cd00221d67cb test-data/gemini_load_result2.db Binary file test-data/gemini_load_result2.db has changed diff -r 7ca6716748c2 -r cd00221d67cb test-data/gemini_versioned_databases.loc --- a/test-data/gemini_versioned_databases.loc Fri Dec 14 12:51:59 2018 -0500 +++ b/test-data/gemini_versioned_databases.loc Fri Jan 11 17:47:02 2019 -0500 @@ -1,3 +1,3 @@ ## GEMINI versioned databases #DownloadDate dbkey DBversion Description Path -1999-01-01 hg19 181 GEMINI annotations (test snapshot) ${__HERE__}/test-cache +1999-01-01 hg19 200 GEMINI annotations (test snapshot) ${__HERE__}/test-cache diff -r 7ca6716748c2 -r cd00221d67cb test-data/test-cache/gemini-config.yaml --- a/test-data/test-cache/gemini-config.yaml Fri Dec 14 12:51:59 2018 -0500 +++ b/test-data/test-cache/gemini-config.yaml Fri Jan 11 17:47:02 2019 -0500 @@ -2,12 +2,14 @@ versions: ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz: 4 ESP6500SI.all.snps_indels.tidy.v2.vcf.gz: 2 - ExAC.r0.3.sites.vep.tidy.vcf.gz: 3 + ExAC.r0.3.sites.vep.tidy.vcf.gz: 4 GRCh37-gms-mappability.vcf.gz: 2 - clinvar_20160203.tidy.vcf.gz: 5 + clinvar_20170130.tidy.vcf.gz: 5 cosmic-v68-GRCh37.tidy.vcf.gz: 3 - dbsnp.b141.20140813.hg19.tidy.vcf.gz: 4 + dbsnp.b147.20160601.tidy.vcf.gz: 1 detailed_gene_table_v75: 2 geno2mp.variants.tidy.vcf.gz: 1 + gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz: 2 hg19.rmsk.bed.gz: 2 summary_gene_table_v75: 2 + whole_genome_SNVs.tsv.compressed.gz: 2 diff -r 7ca6716748c2 -r cd00221d67cb test-data/test-cache/gemini/data/clinvar_20160203.tidy.vcf.gz Binary file test-data/test-cache/gemini/data/clinvar_20160203.tidy.vcf.gz has changed diff -r 7ca6716748c2 -r cd00221d67cb test-data/test-cache/gemini/data/clinvar_20160203.tidy.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/clinvar_20160203.tidy.vcf.gz.tbi has changed diff -r 7ca6716748c2 -r cd00221d67cb test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz Binary file test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz has changed diff -r 7ca6716748c2 -r cd00221d67cb test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz.tbi has changed diff -r 7ca6716748c2 -r cd00221d67cb test-data/test-cache/gemini/data/dbsnp.b141.20140813.hg19.tidy.vcf.gz Binary file test-data/test-cache/gemini/data/dbsnp.b141.20140813.hg19.tidy.vcf.gz has changed diff -r 7ca6716748c2 -r cd00221d67cb test-data/test-cache/gemini/data/dbsnp.b141.20140813.hg19.tidy.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/dbsnp.b141.20140813.hg19.tidy.vcf.gz.tbi has changed diff -r 7ca6716748c2 -r cd00221d67cb test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz Binary file test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz has changed diff -r 7ca6716748c2 -r cd00221d67cb test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz.tbi has changed diff -r 7ca6716748c2 -r cd00221d67cb test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz Binary file test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz has changed diff -r 7ca6716748c2 -r cd00221d67cb test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz.tbi has changed