view sarscov2formatter.xml @ 1:e3a7995dce75 draft

"planemo upload for repository commit 1bd215457a39296c069caa6be30939f4f1bdadeb"
author iuc
date Fri, 20 Nov 2020 18:21:33 +0000
parents 1c664ff29354
children 2e993ff8e7dc
line wrap: on
line source

<tool id="sarscov2formatter" name="sarscov2formatter" version="0.5.3+galaxy1" profile="18.01">
        <requirement type="package" version="0.5.3">sarscov2formatter</requirement>
-a '$align'
#if $source.source_choice == 'ncbi':
    -m ncbi
    -m '$source.meta'
#end if
        <param name="align" type="data" format="fasta" label="Multiple Sequence Alignment (MSA)" help='MSA to be used in HyPhy analysis' />
        <conditional name="source" >
            <param name="source_choice" type="select" label="NCBI datasource or other?" >
                <option value="ncbi">NCBI</option>
                <option value="other">Other</option>
            <when value="ncbi" />
            <when value="other" >
                 <param name="meta" type="data" format="tabular" label="Metadata source" help="Tabular file with metadata with the correct columns (see below)" />
        <data name="msa" format="fasta" from_work_dir="msa.fasta" label="${}: MSA" />
        <data name="dups" format="json" from_work_dir="duplicates.json" label="${}: Duplicates" />
        <data name="outmeta" format="json" from_work_dir="meta.json" label="${}: Metadata" />
        <!-- note: test with ncbi yields unstable results for meta -> assert contents.
             also needs a surprising amount memory (1.7G) which might
             become impractival in the future -->
            <param name="align" ftype="fasta" value="align.fasta" />
            <param name="source_choice" value="ncbi" />
            <output name="msa" ftype="fasta" compare="diff" value="msa.fasta" />
            <output name="dups" ftype="json" compare="diff" value="dups.json" />
            <output name="outmeta" ftype="json">
                    <has_line_matching expression="\{"/>
                    <has_line_matching expression="\}"/>
                    <has_text_matching expression='"LR757995": \{'/>
                    <has_text_matching expression='"collected": '/>
                    <has_text_matching expression='"collected": '/>
                    <has_text_matching expression='"location": '/>
                    <has_text_matching expression='"country": '/>
                    <has_text_matching expression='"locality": '/>
                    <has_text_matching expression='"state": '/>
                    <has_text_matching expression='"subregion": '/>
        <!-- TODO test with tabular input, does not work yet
             test.tsv has been generated with
<!--        <test>-->
<!--            <param name="align" ftype="fasta" value="align.fasta" />-->
<!--            <conditional name="source" >-->
<!--                <param name="source_choice" value="other" />-->
<!--                <param name="meta" ftype="tabular" value="test.tsv" />-->
<!--            </conditional>-->
<!--            <output name="msa" ftype="fasta" compare="diff" value="msa-other.fasta" />-->
<!--            <output name="dups" ftype="json" compare="diff" value="dups-other.json" />-->
<!--            <output name="outmeta" ftype="json" compare="diff" value="meta-other.json" />-->
<!--        </test>-->


Custom sript that performs necessary formatting operations for the SARS-CoV2 Selection Analysis workflow.

If using non-NCBI data, the metadata input file must be tabular with the following columns: ID, collection_date, country, state (optional), and locality (optional). Optional columns should still be created even if they are not used.

Dates should be of the format: YYMMDD (example: May 1 2020 = 20200501).

        <citation type="bibtex">
            author = {Nicholas Keener},
            year = {2020},
            title = {sarscov2formatter},
            publisher = {Github},
            journal = {Github repository},
            url = {},