Mercurial > repos > iuc > ncbi_datasets
changeset 17:da8260eba74b draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit 1c7abf3293422fb432b5acd2ef178e7536d65f0b
author | iuc |
---|---|
date | Fri, 21 Feb 2025 18:44:07 +0000 |
parents | a6a475ed58cb |
children | |
files | datasets_gene.xml datasets_genome.xml macros.xml |
diffstat | 3 files changed, 127 insertions(+), 38 deletions(-) [+] |
line wrap: on
line diff
--- a/datasets_gene.xml Fri Jun 07 14:19:58 2024 +0000 +++ b/datasets_gene.xml Fri Feb 21 18:44:07 2025 +0000 @@ -75,7 +75,7 @@ ## unzip and rehydrate if any data is to be downloaded (include is not None) #if $file_choices.kingdom_cond.include ## unzip - && 7z x -y ncbi_dataset.zip > 7z.log + && unzip ncbi_dataset.zip #end if ]]></command> <inputs> @@ -114,7 +114,7 @@ </valid> </sanitizer> </param> - <param argument="--include-flanks-bp" type="integer" optional="true" min="0" label="Length of flanking nucleotides" help="WP accessions only"/> + <param argument="--include-flanks-bp" type="integer" optional="true" min="0" value="" label="Length of flanking nucleotides" help="WP accessions only"/> </when> <when value="taxon"> <expand macro="taxon_positional"/> @@ -196,6 +196,9 @@ <data name="gene_fasta" label="NCBI Gene Datasets: Gene fasta" format="fasta" from_work_dir="ncbi_dataset/data/gene.fna"> <filter>file_choices['kingdom_cond']['include'] and "gene" in file_choices['kingdom_cond']['include']</filter> </data> + <data name="gene_flanks" label="NCBI Gene Datasets: Flanking faste" format="fasta" from_work_dir="ncbi_dataset/data/gene_flank.fna"> + <filter>query['subcommand']['download_by'] == "accession" and query['subcommand']['include_flanks_bp'] != ""</filter> + </data> <data name="rna_fasta" label="NCBI Gene Datasets: RNA fasta" format="fasta" from_work_dir="ncbi_dataset/data/rna.fna"> <filter>file_choices['kingdom_cond']['include'] and "rna" in file_choices['kingdom_cond']['include']</filter> </data> @@ -353,7 +356,7 @@ <has_text text="house mouse"/> <has_text text="XR_004936704.1"/> <has_n_lines min="130"/> - <has_n_columns n="38"/> + <has_n_columns n="39"/> </assert_contents> </output> <output name="threep_utr_fasta"> @@ -437,15 +440,13 @@ </output> </test> - <!-- 9: datasets download gene accession WP_004675351.1 + include_flanks_bp - test broken at the moment https://github.com/ncbi/datasets/issues/328 - --> - <test expect_failure="true"> <!-- expect_num_outputs="3" --> + <!-- 9: datasets download gene accession WP_003249567.1 + include_flanks_bp --> + <test expect_num_outputs="4"> <conditional name="query|subcommand"> <param name="download_by" value="accession"/> <conditional name="text_or_file"> <param name="text_or_file" value="text"/> - <param name="accession" value="WP_004675351.1"/> + <param name="accession" value="WP_003249567.1"/> </conditional> <param name="include_flanks_bp" value="100"/> </conditional> @@ -455,9 +456,10 @@ <param name="include" value="gene,protein"/> </conditional> </section> - <!-- <output name="gene_data_report"> + <output name="gene_data_report"> <assert_contents> - <has_text text="glcE"/> + <has_text text="WP_003249567.1"/> + <has_text text="menG"/> <has_n_lines n="2"/> <has_n_columns n="7"/> </assert_contents> @@ -467,18 +469,23 @@ <has_text text=">"/> </assert_contents> </output> + <output name="gene_flanks"> + <assert_contents> + <has_text text=">"/> + </assert_contents> + </output> <output name="protein_fasta"> <assert_contents> <has_text text=">"/> </assert_contents> - </output> --> + </output> <assert_command> <has_text text="include-flanks-bp 100"/> </assert_command> </test> <!-- 10: datasets download gene taxon human --> - <test expect_num_outputs="1"> + <!-- <test expect_num_outputs="1"> <conditional name="query|subcommand"> <param name="download_by" value="taxon"/> <param name="taxon_positional" value="human"/> @@ -495,9 +502,9 @@ <has_n_columns n="8"/> </assert_contents> </output> - </test> + </test> --> <!-- 11: datasets download gene taxon human + \-\-fasta-filter --> - <test expect_num_outputs="2"> + <!-- <test expect_num_outputs="2"> <conditional name="query|subcommand"> <param name="download_by" value="taxon"/> <param name="taxon_positional" value="human"/> @@ -524,7 +531,8 @@ <assert_contents> <has_text text=">" n="1" /> </assert_contents> - </output></test> + </output> + </test> --> </tests> <help> <![CDATA[
--- a/datasets_genome.xml Fri Jun 07 14:19:58 2024 +0000 +++ b/datasets_genome.xml Fri Feb 21 18:44:07 2025 +0000 @@ -55,7 +55,7 @@ ## unzip and rehydrate if any data is to be downloaded (include is not None) #if $file_choices.include ## unzip - && 7z x -y ncbi_dataset.zip > 7z.log + && unzip ncbi_dataset.zip ## rehydrate && datasets rehydrate @@ -463,8 +463,8 @@ </section> <output_collection name="sequence_report" type="list" count="2"/> <output_collection name="genome_fasta" type="list:list" count="2"> - <expand macro="genome_fasta_assert" el1="GCF_000002945.1" el2="GCF_000002945.1_ASM294v2" expression=">NC_[0-9]+\.[0-9]+ Schizosaccharomyces pombe (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="4"/> - <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\.[0-9]+ Saccharomyces cerevisiae S288[Cc] (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="17"/> + <expand macro="genome_fasta_assert" el1="GCF_000002945.2" el2="GCF_000002945.2_ASM294v3" expression=">NC_[0-9]+\.[0-9]+ Schizosaccharomyces pombe.*" expression_n="4"/> + <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\.[0-9]+ Saccharomyces cerevisiae S288[Cc].*" expression_n="17"/> </output_collection> </test> <!-- tax_exact_match should filter out strains
--- a/macros.xml Fri Jun 07 14:19:58 2024 +0000 +++ b/macros.xml Fri Feb 21 18:44:07 2025 +0000 @@ -1,5 +1,5 @@ <macros> - <token name="@TOOL_VERSION@">16.20.0</token> + <token name="@TOOL_VERSION@">16.41.0</token> <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">23.0</token> <token name="@LICENSE@">MIT</token> @@ -11,8 +11,8 @@ <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">ncbi-datasets-cli</requirement> - <requirement type="package" version="2024.2.2">ca-certificates</requirement> - <requirement type="package" version="16.02">p7zip</requirement> + <requirement type="package" version="2025.1.31">ca-certificates</requirement> + <requirement type="package" version="6.0">unzip</requirement> </requirements> </xml> <xml name="bio_tools"> @@ -116,9 +116,28 @@ none #end if ]]></token> + <xml name="tsv_report_columns"> <param name="report_columns" type="select" multiple="true" optional="false" label="Columns in the report"> <option value="accession">Assembly Accession</option> + <option value="ani-best-ani-match-ani">ANI Best ANI match ANI</option> + <option value="ani-best-ani-match-assembly">ANI Best ANI match Assembly</option> + <option value="ani-best-ani-match-assembly_coverage">ANI Best ANI match Assembly Coverage</option> + <option value="ani-best-ani-match-category">ANI Best ANI match Type Category</option> + <option value="ani-best-ani-match-organism">ANI Best ANI match Organism</option> + <option value="ani-best-ani-match-type_assembly_coverage">ANI Best ANI match Type Assembly Coverage</option> + <option value="ani-best-match-status">ANI Best match status</option> + <option value="ani-category">ANI Category</option> + <option value="ani-check-status">ANI Check status</option> + <option value="ani-comment">ANI Comment</option> + <option value="ani-submitted-ani-match-ani">ANI Declared ANI match ANI</option> + <option value="ani-submitted-ani-match-assembly">ANI Declared ANI match Assembly</option> + <option value="ani-submitted-ani-match-assembly_coverage">ANI Declared ANI match Assembly Coverage</option> + <option value="ani-submitted-ani-match-category">ANI Declared ANI match Type Category</option> + <option value="ani-submitted-ani-match-organism">ANI Declared ANI match Organism</option> + <option value="ani-submitted-ani-match-type_assembly_coverage">ANI Declared ANI match Type Assembly Coverage</option> + <option value="ani-submitted-organism">ANI Submitted organism</option> + <option value="ani-submitted-species">ANI Submitted species</option> <option value="annotinfo-busco-complete">Annotation BUSCO Complete</option> <option value="annotinfo-busco-duplicated">Annotation BUSCO Duplicated</option> <option value="annotinfo-busco-fragmented">Annotation BUSCO Fragmented</option> @@ -137,23 +156,29 @@ <option value="annotinfo-pipeline">Annotation Pipeline</option> <option value="annotinfo-provider">Annotation Provider</option> <option value="annotinfo-release-date">Annotation Release Date</option> - <option value="annotinfo-release-version">Annotation Release Version</option> <option value="annotinfo-report-url">Annotation Report URL</option> <option value="annotinfo-software-version">Annotation Software Version</option> <option value="annotinfo-status">Annotation Status</option> + <option value="assminfo-assembly-method">Assembly Assembly Method</option> <option value="assminfo-atypicalis-atypical">Assembly Atypical Is Atypical</option> <option value="assminfo-atypicalwarnings">Assembly Atypical Warnings</option> + <option value="assminfo-bioproject">Assembly BioProject Accession</option> <option value="assminfo-bioproject-lineage-accession">Assembly BioProject Lineage Accession</option> - <option value="assminfo-bioproject-lineage-parent-accession">Assembly BioProject Lineage Parent Accession</option> <option value="assminfo-bioproject-lineage-parent-accessions">Assembly BioProject Lineage Parent Accessions</option> <option value="assminfo-bioproject-lineage-title">Assembly BioProject Lineage Title</option> <option value="assminfo-biosample-accession">Assembly BioSample Accession</option> + <option value="assminfo-biosample-age">Assembly BioSample Age</option> <option value="assminfo-biosample-attribute-name">Assembly BioSample Attribute Name</option> <option value="assminfo-biosample-attribute-value">Assembly BioSample Attribute Value</option> + <!-- https://github.com/ncbi/datasets/issues/452 --> + <option value="assminfo-biosample-biomaterial-provider-">Assembly BioSample Biomaterial provider</option> <option value="assminfo-biosample-bioproject-accession">Assembly BioSample BioProject Accession</option> - <option value="assminfo-biosample-bioproject-parent-accession">Assembly BioSample BioProject Parent Accession</option> <option value="assminfo-biosample-bioproject-parent-accessions">Assembly BioSample BioProject Parent Accessions</option> <option value="assminfo-biosample-bioproject-title">Assembly BioSample BioProject Title</option> + <option value="assminfo-biosample-breed">Assembly BioSample Breed</option> + <option value="assminfo-biosample-collected-by">Assembly BioSample Collected by</option> + <option value="assminfo-biosample-collection-date">Assembly BioSample Collection date</option> + <option value="assminfo-biosample-cultivar">Assembly BioSample Cultivar</option> <option value="assminfo-biosample-description-comment">Assembly BioSample Description Comment</option> <option value="assminfo-biosample-description-organism-common-name">Assembly BioSample Description Organism Common Name</option> <option value="assminfo-biosample-description-organism-infraspecific-breed">Assembly BioSample Description Organism Infraspecific Names Breed</option> @@ -166,49 +191,84 @@ <option value="assminfo-biosample-description-organism-pangolin">Assembly BioSample Description Organism Pangolin Classification</option> <option value="assminfo-biosample-description-organism-tax-id">Assembly BioSample Description Organism Taxonomic ID</option> <option value="assminfo-biosample-description-title">Assembly BioSample Description Title</option> + <option value="assminfo-biosample-development-stage">Assembly BioSample Development stage</option> + <option value="assminfo-biosample-ecotype">Assembly BioSample Ecotype</option> + <option value="assminfo-biosample-geo-loc-name">Assembly BioSample Geographic location</option> + <option value="assminfo-biosample-host">Assembly BioSample Host</option> + <option value="assminfo-biosample-identified-by">Assembly BioSample Identified by</option> <option value="assminfo-biosample-ids-db">Assembly BioSample Sample Identifiers Database</option> <option value="assminfo-biosample-ids-label">Assembly BioSample Sample Identifiers Label</option> <option value="assminfo-biosample-ids-value">Assembly BioSample Sample Identifiers Value</option> + <option value="assminfo-biosample-ifsac-category">Assembly BioSample IFSAC category</option> + <option value="assminfo-biosample-isolate">Assembly BioSample Isolate</option> + <option value="assminfo-biosample-isolate-name-alias">Assembly BioSample Isolate name alias</option> + <option value="assminfo-biosample-isolation-source">Assembly BioSample Isolation source</option> <option value="assminfo-biosample-last-updated">Assembly BioSample Last updated</option> + <option value="assminfo-biosample-lat-lon">Assembly BioSample Latitude / Longitude</option> <option value="assminfo-biosample-models">Assembly BioSample Models</option> <option value="assminfo-biosample-owner-contact-lab">Assembly BioSample Owner Contact Lab</option> <option value="assminfo-biosample-owner-name">Assembly BioSample Owner Name</option> <option value="assminfo-biosample-package">Assembly BioSample Package</option> + <option value="assminfo-biosample-project-name">Assembly BioSample Project name</option> <option value="assminfo-biosample-publication-date">Assembly BioSample Publication date</option> + <option value="assminfo-biosample-sample-name">Assembly BioSample Sample name</option> + <option value="assminfo-biosample-serotype">Assembly BioSample Serotype</option> + <option value="assminfo-biosample-serovar">Assembly BioSample Serovar</option> + <option value="assminfo-biosample-sex">Assembly BioSample Sex</option> + <option value="assminfo-biosample-source-type">Assembly BioSample Source type</option> <option value="assminfo-biosample-status-status">Assembly BioSample Status Status</option> <option value="assminfo-biosample-status-when">Assembly BioSample Status When</option> + <option value="assminfo-biosample-strain">Assembly BioSample Strain</option> + <option value="assminfo-biosample-sub-species">Assembly BioSample Sub-species</option> <option value="assminfo-biosample-submission-date">Assembly BioSample Submission date</option> + <option value="assminfo-biosample-tissue">Assembly BioSample Tissue</option> <option value="assminfo-blast-url">Assembly Blast URL</option> <option value="assminfo-description">Assembly Description</option> + <option value="assminfo-grouping-method">Assembly Grouping Method</option> <option value="assminfo-level">Assembly Level</option> <option value="assminfo-linked-assmaccession">Assembly Linked Assembly Accession</option> <option value="assminfo-linked-assmtype">Assembly Linked Assembly Type</option> + <option value="assminfo-long-name">Assembly LongName</option> <option value="assminfo-name">Assembly Name</option> - <option value="assminfo-paired-assmaccession">Assembly Paired Assembly Accession</option> - <option value="assminfo-paired-assmname">Assembly Paired Assembly Name</option> - <option value="assminfo-paired-assmstatus">Assembly Paired Assembly Status</option> + <option value="assminfo-notes">Assembly Notes</option> + <option value="assminfo-paired-assm-accession">Assembly Paired Assembly Accession</option> + <option value="assminfo-paired-assm-changed">Assembly Paired Assembly Changed</option> + <option value="assminfo-paired-assm-manual-diff">Assembly Paired Assembly Manual Diff</option> + <option value="assminfo-paired-assm-name">Assembly Paired Assembly Name</option> + <option value="assminfo-paired-assm-only-genbank">Assembly Paired Assembly Only Genbank</option> + <option value="assminfo-paired-assm-only-refseq">Assembly Paired Assembly Only RefSeq</option> + <option value="assminfo-paired-assm-refseq-genbank-are-different">Assembly Paired Assembly RefSeq GenBank Are Different</option> + <option value="assminfo-paired-assm-status">Assembly Paired Assembly Status</option> <option value="assminfo-refseq-category">Assembly Refseq Category</option> + <option value="assminfo-release-date">Assembly Release Date</option> <option value="assminfo-sequencing-tech">Assembly Sequencing Tech</option> <option value="assminfo-status">Assembly Status</option> - <option value="assminfo-submission-date">Assembly Submission Date</option> <option value="assminfo-submitter">Assembly Submitter</option> + <option value="assminfo-suppression-reason">Assembly Suppression Reason</option> <option value="assminfo-synonym">Assembly Synonym</option> <option value="assminfo-type">Assembly Type</option> <option value="assmstats-contig-l50">Assembly Stats Contig L50</option> <option value="assmstats-contig-n50">Assembly Stats Contig N50</option> <option value="assmstats-gaps-between-scaffolds-count">Assembly Stats Gaps Between Scaffolds Count</option> - <option value="assmstats-gc-count">Assembly Stats GC Count</option> <option value="assmstats-gc-percent">Assembly Stats GC Percent</option> + <option value="assmstats-genome-coverage">Assembly Stats Genome Coverage</option> <option value="assmstats-number-of-component-sequences">Assembly Stats Number of Component Sequences</option> <option value="assmstats-number-of-contigs">Assembly Stats Number of Contigs</option> + <option value="assmstats-number-of-organelles">Assembly Stats Number of Organelles</option> <option value="assmstats-number-of-scaffolds">Assembly Stats Number of Scaffolds</option> <option value="assmstats-scaffold-l50">Assembly Stats Scaffold L50</option> <option value="assmstats-scaffold-n50">Assembly Stats Scaffold N50</option> <option value="assmstats-total-number-of-chromosomes">Assembly Stats Total Number of Chromosomes</option> <option value="assmstats-total-sequence-len">Assembly Stats Total Sequence Length</option> <option value="assmstats-total-ungapped-len">Assembly Stats Total Ungapped Length</option> + <option value="checkm-completeness">CheckM completeness</option> + <option value="checkm-completeness-percentile">CheckM completeness percentile</option> + <option value="checkm-contamination">CheckM contamination</option> + <option value="checkm-marker-set">CheckM marker set</option> + <option value="checkm-marker-set-rank">CheckM marker set rank</option> + <option value="checkm-species-tax-id">CheckM species tax id</option> + <option value="checkm-version">CheckM version</option> <option value="current-accession">Current Accession</option> - <option value="organelle-assembly-name">Organelle Assembly Name</option> <option value="organelle-bioproject-accessions">Organelle BioProject Accessions</option> <option value="organelle-description">Organelle Description</option> <option value="organelle-infraspecific-name">Organelle Infraspecific Name</option> @@ -225,6 +285,8 @@ <option value="organism-pangolin">Organism Pangolin Classification</option> <option value="organism-tax-id">Organism Taxonomic ID</option> <option value="source_database">Source Database</option> + <option value="type_material-display_text">Type Material Display Text</option> + <option value="type_material-label">Type Material Label</option> <option value="wgs-contigs-url">WGS contigs URL</option> <option value="wgs-project-accession">WGS project accession</option> <option value="wgs-url">WGS URL</option> @@ -237,14 +299,10 @@ <option value="annotation-assembly-accession">Annotation Assembly Accession</option> <option value="annotation-assembly-name">Annotation Assembly Name</option> <option value="annotation-genomic-range-accession">Annotation Genomic Range Accession</option> - <option value="annotation-genomic-range-exon-order">Annotation Genomic Range Exons Order</option> - <option value="annotation-genomic-range-exon-orientation">Annotation Genomic Range Exons Orientation</option> - <option value="annotation-genomic-range-exon-start">Annotation Genomic Range Exons Start</option> - <option value="annotation-genomic-range-exon-stop">Annotation Genomic Range Exons Stop</option> - <option value="annotation-genomic-range-range-order">Annotation Genomic Range Order</option> - <option value="annotation-genomic-range-range-orientation">Annotation Genomic Range Orientation</option> - <option value="annotation-genomic-range-range-start">Annotation Genomic Range Start</option> - <option value="annotation-genomic-range-range-stop">Annotation Genomic Range Stop</option> + <option value="annotation-genomic-range-order">Annotation Genomic Range Exons Order</option> + <option value="annotation-genomic-range-orientation">Annotation Genomic Range Exons Orientation</option> + <option value="annotation-genomic-range-start">Annotation Genomic Range Exons Start</option> + <option value="annotation-genomic-range-stop">Annotation Genomic Range Exons Stop</option> <option value="annotation-genomic-range-seq-name">Annotation Genomic Range Seq Name</option> <option value="annotation-release-date">Annotation Release Date</option> <option value="annotation-release-name">Annotation Release Name</option> @@ -260,12 +318,30 @@ <option value="genomic-region-gene-range-range-start">Genomic Region Gene Range Start</option> <option value="genomic-region-gene-range-range-stop">Genomic Region Gene Range Stop</option> <option value="genomic-region-genomic-region-type">Genomic Region Genomic Region Type</option> + <option value="go-assigned-by">Gene Ontology Assigned By</option> + <option value="go-bp-evidence-code">Gene Ontology Biological Process Evidence Code</option> + <option value="go-bp-id">Gene Ontology Biological Process Go ID</option> + <option value="go-bp-name">Gene Ontology Biological Process Name</option> + <option value="go-bp-qualifier">Gene Ontology Biological Process Qualifier</option> + <option value="go-bp-reference-pmid">Gene Ontology Biological Process Reference PMID</option> + <option value="go-cc-evidence-code">Gene Ontology Cellular Component Evidence Code</option> + <option value="go-cc-id">Gene Ontology Cellular Component Go ID</option> + <option value="go-cc-name">Gene Ontology Cellular Component Name</option> + <option value="go-cc-qualifier">Gene Ontology Cellular Component Qualifier</option> + <option value="go-cc-reference-pmid">Gene Ontology Cellular Component Reference PMID</option> + <option value="go-mf-evidence-code">Gene Ontology Molecular Function Evidence Code</option> + <option value="go-mf-id">Gene Ontology Molecular Function Go ID</option> + <option value="go-mf-name">Gene Ontology Molecular Function Name</option> + <option value="go-mf-qualifier">Gene Ontology Molecular Function Qualifier</option> + <option value="go-mf-reference-pmid">Gene Ontology Molecular Function Reference PMID</option> <option value="group-id">Gene Group Identifier</option> <option value="group-method">Gene Group Method</option> + <option value="locus-tag">Locus Tag</option> <option value="name-authority">Nomenclature Authority</option> <option value="name-id">Nomenclature ID</option> <option value="omim-ids">OMIM IDs</option> <option value="orientation">Orientation</option> + <option value="protein-count">Proteins</option> <option value="ref-standard-gene-range-accession">Reference Standard Gene Range Sequence Accession</option> <option value="ref-standard-gene-range-range-order">Reference Standard Gene Range Order</option> <option value="ref-standard-gene-range-range-orientation">Reference Standard Gene Range Orientation</option> @@ -274,14 +350,19 @@ <option value="ref-standard-genomic-region-type">Reference Standard Genomic Region Type</option> <option value="replaced-gene-id">Replaced NCBI GeneID</option> <option value="rna-type">RNA Type</option> + <option value="summary-date">Summary Date</option> + <option value="summary-description">Summary Description</option> + <option value="summary-source">Summary Source</option> <option value="swissprot-accessions">SwissProt Accessions</option> <option value="symbol">Symbol</option> <option value="synonyms">Synonyms</option> <option value="tax-id">Taxonomic ID</option> <option value="tax-name">Taxonomic Name</option> + <option value="transcript-count">Transcripts</option> <yield/> </param> </xml> + <xml name="prok_gene_tsv_report_columns"> <param name="report_columns" type="select" multiple="true" optional="false" label="Columns in the report"> <option value="accession">Accession</option>