view VCFCarto_wrapper.xml @ 7:a6b557df86db draft

Uploaded
author urgi-team
date Tue, 15 Dec 2015 05:35:36 -0500
parents
children
line wrap: on
line source

<tool id="VCFCarto" name="VCFCarto" version="0.01">
    <description>VCFcarto can convert a tabulated marker file into a file with only the markers from 2 parents </description>
    <requirements>
        <requirement type="package" version="1.0">VCF_Gandalf_Tools</requirement>
    </requirements>
    <version_command>
  VCFCarto.py --version
    </version_command>
    <command interpreter="python">
    VCFCarto_wrapper.py -f $inputTabular -o $outputVCFCarto -A $parentA -H $parentH
    #if str($outputType) == "carto"
     -p -g --graphHTML $output_html --dirGraphs "$output_html.files_path"
    #end if
    #if str($outputType) == "MergedCarto"
     -p -g --graphHTML $output_html --dirGraphs "$output_html.files_path" -m --mergeFile $output_bed
    #end if
    </command>
    <inputs>
        <param name="inputTabular" type="data" format="tabular" label="indicate your tabulated marker file"/>
        <param name="parentA" size="20" type="text" value="V1" label="indicate parent 1 name (A)"/>
        <param name="parentH" size="20" type="text" value="V2" label="indicate parent 2 name (H)"/>
        <param name="outputType" type="select" display="radio" label="select type of output" multiple="False">
            <option value="raw" >7 caracter code</option>
            <option value="carto" >A - H code</option>
            <option value="MergedCarto" >A - H code and merge</option>
        </param>
    </inputs>
    <outputs>
        <data format="tabular" name="outputVCFCarto" label="${tool.name} on ${on_string} (tabular)"/>
        <data format="html" name="output_html" label="${tool.name} graphs on ${on_string} (html)">
            <filter>not outputType == "raw"</filter>
        </data>
        <data format="bed" name="output_bed" label="${tool.name} markers on ${on_string} (bed)">
            <filter>outputType == "MergedCarto"</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="inputTabular" value="VCFCarto_input.tab"/>
            <param name="parentA" value="REF1"/>
            <param name="parentH" value="REF2"/>
            <param name="outputType" value="raw"/>
            <output name="outputVCFCarto" file="VCFCarto_output.tab" ftype="tabular"/>
        </test>
        <test>
            <param name="inputTabular" value="VCFCarto_input.tab"/>
            <param name="parentA" value="REF1"/>
            <param name="parentH" value="REF2"/>
            <param name="outputType" value="MergedCarto"/>
            <output name="outputVCFCarto" file="VCFCarto_output_merged.tab" ftype="tabular"/>
            <output name="output_bed" file="VCFCarto_output_merged.bed" ftype="bed"/>
        </test>
    </tests>
    <help><![CDATA[
      
**VCFcarto converts a tabulated marker file into a file with only the markers from 2 parents**

.. class:: infomark

expected input format is the output from VCFStorage.

-----

**what it does :**

VCFcarto converts a tabulated marker file into a file with only the markers from 2 parents, refA and refH. 

2 formats are possible, either the input format is conserved, or the format is changed into a 3 letter format

-----

**input format :**

.. class:: infomark

expected input format is the output from VCFStorage.

the expected format is a tab delimited format file where all genomic positions are in rows, and all strains are in columns

For each position and each genome, a code is attributed : 

- for the reference : ::

    A,T,G,C for the corresponding nucleotidic acid 

- for the genomes : ::

    U if the position was not refered in the VCF file 
    R if the base is similar to the reference 
    F if the base has been filtered out
    A,T,G,C if the genome has a validated SNP at the position

-----

**output format :**

for the main output, 2 formats are possible : 

- The first format is similar to the input format (same columns and code) but will only be conserved lines where the 2 parents have different alleles. 

- The second format (A - H format) will have a much simpler code ::

    "A" when the strain allele is the same as parent A
    "H" when the strain allele is the same as parent H
    "-" in any other case (base filtered out, different base, base unmapped etc...)

the second format may be used as an input for a cartographic tool. 

If you decide to have the A - H format, you can also merge consecutive markers that carries the same information (every strains are similars between the two markers). If you decide to do so, new markers will be generated and a bed file will do the link between the input and the output markers.

Finally, graphical output will be displayed to visualise the result. 

-----

**example :**

input : ::

	CHROM	POS	reference	REF1	G01	REF2	G02	G03	G04	G05	G06	G07	G08	G09	G10	G11	G12
	Chr1	1	A	R	R	R	R	U	R	R	R	R	R	R	R	R	R
	Chr1	2	T	R	R	R	R	R	U	R	R	R	R	R	R	R	R
	Chr1	3	G	R	R	R	R	R	R	R	R	R	R	R	R	R	R
	Chr1	4	G	R	R	R	R	R	R	R	R	R	R	R	R	F	R
	Chr1	5	G	R	R	R	R	R	R	U	F	R	R	R	R	R	R
	Chr1	6	C	R	R	R	R	R	R	R	R	R	R	R	R	R	U
	Chr1	7	A	G	C	C	C	F	C	C	C	C	C	G	C	G	G
	Chr1	8	G	R	R	R	R	R	R	R	R	R	R	R	R	R	R
	Chr1	9	C	R	T	T	R	T	T	T	U	R	T	R	T	T	T
	Chr1	10	T	R	R	R	R	R	R	R	R	R	R	R	R	R	U
	Chr1	11	T	R	R	R	R	R	R	R	R	R	R	R	F	R	R
	Chr1	12	A	R	R	R	R	U	R	R	R	R	F	R	R	R	R
	Chr1	13	A	R	R	G	G	R	F	R	F	G	R	G	R	R	F
	Chr1	14	A	R	R	R	R	R	R	R	R	F	R	R	R	R	R
	Chr1	15	G	R	R	R	U	R	F	R	R	R	R	R	R	U	U
	Chr1	16	G	A	R	R	A	R	R	U	F	R	R	A	A	R	A
	Chr1	17	A	R	G	G	R	U	R	R	G	G	R	G	U	R	G
	Chr1	18	C	R	R	R	R	R	U	R	R	R	R	R	R	R	R
	Chr1	19	G	C	U	R	C	R	C	U	R	R	C	C	C	R	C
	Chr1	20	G	A	U	R	A	R	A	U	R	R	A	A	A	R	A
	Chr1	21	G	T	U	R	T	R	T	U	R	R	T	T	T	R	T
	Chr1	22	A	T	U	R	T	R	T	U	R	R	T	T	T	R	T
	Chr1	23	C	T	T	R	T	R	R	R	T	R	U	T	R	T	T
	Chr1	24	T	R	R	R	R	R	U	R	R	R	R	R	R	R	F
	Chr1	25	G	R	F	R	R	R	R	R	U	R	F	R	R	R	R
	Chr1	26	T	R	R	C	C	C	C	C	R	R	C	R	C	R	U
	Chr1	27	C	R	R	G	G	G	G	R	G	R	G	R	G	R	R
	Chr1	28	C	G	T	T	T	G	G	T	T	F	T	G	T	T	G
	Chr1	29	G	T	R	R	R	R	T	R	T	R	T	T	R	T	R
	Chr1	30	T	R	R	R	R	R	R	R	R	R	R	R	R	R	R
	Chr1	31	A	R	R	R	R	F	R	R	R	R	F	R	R	R	R
	Chr1	32	A	G	G	R	G	G	G	R	R	G	G	G	G	G	R
	Chr1	33	G	R	R	R	R	R	R	R	R	R	R	R	R	R	R
	Chr1	34	C	R	R	R	R	R	R	R	R	R	R	R	R	R	R
	Chr1	35	C	R	R	R	R	R	F	R	R	R	R	R	R	R	U
	Chr2	1	T	R	R	R	F	R	R	R	R	R	R	R	R	R	R
	Chr2	2	A	C	R	R	C	C	U	R	R	R	R	C	C	C	U
	Chr2	3	C	R	R	R	R	R	R	U	R	R	R	R	R	R	R
	Chr2	4	C	R	R	R	R	R	R	R	U	R	R	R	R	F	R
	Chr2	5	T	R	R	R	R	R	R	R	R	R	R	R	R	R	R
	Chr2	6	C	R	R	R	R	R	R	R	R	R	R	R	R	R	R
	Chr2	7	A	T	F	R	U	R	T	T	T	R	T	T	F	T	T
	Chr2	8	T	R	R	R	R	R	R	R	R	R	R	R	R	R	R
	Chr2	9	C	R	R	R	R	R	R	R	R	R	R	R	R	R	R
	Chr2	10	G	R	T	T	T	T	R	T	R	R	R	R	R	U	R
	Chr2	11	C	R	A	A	A	A	R	A	R	R	R	R	R	U	R
	Chr2	12	A	R	T	T	T	T	R	T	R	R	R	R	R	U	R
	Chr2	13	T	R	C	C	C	C	R	C	R	R	R	R	R	U	R
	Chr2	14	C	T	A	A	T	A	T	A	T	A	T	T	A	A	A
	Chr2	15	T	R	R	R	F	R	R	R	R	R	R	R	R	R	R
	Chr2	16	A	R	R	R	R	R	R	R	U	R	R	R	R	R	R
	Chr2	17	A	R	U	R	R	R	R	R	R	R	R	R	R	R	F
	Chr2	18	G	R	R	R	R	R	R	R	R	R	R	R	R	R	R
	Chr2	19	A	R	R	R	R	R	R	F	R	R	R	R	R	R	R
	Chr2	20	C	R	R	R	R	R	R	R	F	R	R	R	R	R	R
	Chr2	21	G	A	R	R	A	A	A	R	R	R	A	A	R	R	R
	Chr2	22	A	R	R	R	R	R	R	F	R	R	R	R	R	R	R
	Chr2	23	A	R	R	T	T	R	R	T	T	T	T	T	R	R	R
	Chr2	24	T	R	R	R	R	R	R	U	R	R	R	R	R	R	F
	Chr2	25	T	R	A	A	R	R	A	R	A	R	R	A	R	R	A
	Chr2	26	G	R	R	R	R	R	R	R	R	R	R	R	R	R	R
	Chr2	27	A	R	R	R	R	R	R	R	R	R	R	R	R	U	R
	Chr2	28	C	R	U	R	R	F	F	R	R	F	R	F	U	R	R
	Chr2	29	G	R	R	R	R	R	R	F	R	R	R	R	R	R	R
	Chr2	30	T	A	A	G	A	G	G	A	A	G	F	G	G	G	U
	Chr2	31	A	R	R	R	R	R	R	R	R	U	U	R	R	R	R
	Chr2	32	G	R	R	R	R	R	R	U	U	R	R	R	R	R	R
	Chr2	33	G	R	U	R	R	R	R	U	R	R	R	R	R	R	R
	Chr2	34	A	R	R	R	U	R	R	R	R	R	R	R	R	R	R
	Chr2	35	G	R	R	R	R	R	R	R	R	R	R	R	R	R	R
	Chr2	36	T	R	R	R	R	R	R	U	R	R	R	R	R	R	R
	Chr3	1	T	U	R	R	R	R	R	U	R	R	R	R	R	R	R
	Chr3	2	T	R	R	U	R	R	R	U	R	R	R	R	R	R	R
	Chr3	3	T	F	R	R	R	R	R	U	R	R	R	R	R	R	R
	Chr3	4	T	R	R	F	R	R	R	U	R	R	R	R	R	R	R


output :

- without A - H code : ::

	CHROM	POS	reference	REF1	G01	REF2	G02	G03	G04	G05	G06	G07	G08	G09	G10	G11	G12
	Chr1	7	A	G	C	C	C	F	C	C	C	C	C	G	C	G	G
	Chr1	9	C	R	T	T	R	T	T	T	U	R	T	R	T	T	T
	Chr1	13	A	R	R	G	G	R	F	R	F	G	R	G	R	R	F
	Chr1	16	G	A	R	R	A	R	R	U	F	R	R	A	A	R	A
	Chr1	17	A	R	G	G	R	U	R	R	G	G	R	G	U	R	G
	Chr1	19	G	C	U	R	C	R	C	U	R	R	C	C	C	R	C
	Chr1	20	G	A	U	R	A	R	A	U	R	R	A	A	A	R	A
	Chr1	21	G	T	U	R	T	R	T	U	R	R	T	T	T	R	T
	Chr1	22	A	T	U	R	T	R	T	U	R	R	T	T	T	R	T
	Chr1	23	C	T	T	R	T	R	R	R	T	R	U	T	R	T	T
	Chr1	26	T	R	R	C	C	C	C	C	R	R	C	R	C	R	U
	Chr1	27	C	R	R	G	G	G	G	R	G	R	G	R	G	R	R
	Chr1	28	C	G	T	T	T	G	G	T	T	F	T	G	T	T	G
	Chr1	29	G	T	R	R	R	R	T	R	T	R	T	T	R	T	R
	Chr1	32	A	G	G	R	G	G	G	R	R	G	G	G	G	G	R
	Chr2	2	A	C	R	R	C	C	U	R	R	R	R	C	C	C	U
	Chr2	7	A	T	F	R	U	R	T	T	T	R	T	T	F	T	T
	Chr2	10	G	R	T	T	T	T	R	T	R	R	R	R	R	U	R
	Chr2	11	C	R	A	A	A	A	R	A	R	R	R	R	R	U	R
	Chr2	12	A	R	T	T	T	T	R	T	R	R	R	R	R	U	R
	Chr2	13	T	R	C	C	C	C	R	C	R	R	R	R	R	U	R
	Chr2	14	C	T	A	A	T	A	T	A	T	A	T	T	A	A	A
	Chr2	21	G	A	R	R	A	A	A	R	R	R	A	A	R	R	R
	Chr2	23	A	R	R	T	T	R	R	T	T	T	T	T	R	R	R
	Chr2	25	T	R	A	A	R	R	A	R	A	R	R	A	R	R	A
	Chr2	30	T	A	A	G	A	G	G	A	A	G	F	G	G	G	U

- with A - H code but no markers : ::

	CHROM	POS	reference	REF1	G01	REF2	G02	G03	G04	G05	G06	G07	G08	G09	G10	G11	G12
	Chr1	7	-	A	H	H	H	-	H	H	H	H	H	A	H	A	A
	Chr1	9	-	A	H	H	A	H	H	H	-	A	H	A	H	H	H
	Chr1	13	-	A	A	H	H	A	-	A	-	H	A	H	A	A	-
	Chr1	16	-	A	H	H	A	H	H	-	-	H	H	A	A	H	A
	Chr1	17	-	A	H	H	A	-	A	A	H	H	A	H	-	A	H
	Chr1	19	-	A	-	H	A	H	A	-	H	H	A	A	A	H	A
	Chr1	20	-	A	-	H	A	H	A	-	H	H	A	A	A	H	A
	Chr1	21	-	A	-	H	A	H	A	-	H	H	A	A	A	H	A
	Chr1	22	-	A	-	H	A	H	A	-	H	H	A	A	A	H	A
	Chr1	23	-	A	A	H	A	H	H	H	A	H	-	A	H	A	A
	Chr1	26	-	A	A	H	H	H	H	H	A	A	H	A	H	A	-
	Chr1	27	-	A	A	H	H	H	H	A	H	A	H	A	H	A	A
	Chr1	28	-	A	H	H	H	A	A	H	H	-	H	A	H	H	A
	Chr1	29	-	A	H	H	H	H	A	H	A	H	A	A	H	A	H
	Chr1	32	-	A	A	H	A	A	A	H	H	A	A	A	A	A	H
	Chr2	2	-	A	H	H	A	A	-	H	H	H	H	A	A	A	-
	Chr2	7	-	A	-	H	-	H	A	A	A	H	A	A	-	A	A
	Chr2	10	-	A	H	H	H	H	A	H	A	A	A	A	A	-	A
	Chr2	11	-	A	H	H	H	H	A	H	A	A	A	A	A	-	A
	Chr2	12	-	A	H	H	H	H	A	H	A	A	A	A	A	-	A
	Chr2	13	-	A	H	H	H	H	A	H	A	A	A	A	A	-	A
	Chr2	14	-	A	H	H	A	H	A	H	A	H	A	A	H	H	H
	Chr2	21	-	A	H	H	A	A	A	H	H	H	A	A	H	H	H
	Chr2	23	-	A	A	H	H	A	A	H	H	H	H	H	A	A	A
	Chr2	25	-	A	H	H	A	A	H	A	H	A	A	H	A	A	H
	Chr2	30	-	A	A	H	A	H	H	A	A	H	-	H	H	H	-

- with A - H code and merge  : 
 
 - tab file : ::
 
	CHROM	POS	reference	REF1	G01	REF2	G02	G03	G04	G05	G06	G07	G08	G09	G10	G11	G12
	Chr1	*M_00001	-	A	H	H	H	-	H	H	H	H	H	A	H	A	A
	Chr1	*M_00002	-	A	H	H	A	H	H	H	-	A	H	A	H	H	H
	Chr1	*M_00003	-	A	A	H	H	A	-	A	-	H	A	H	A	A	-
	Chr1	*M_00004	-	A	H	H	A	H	H	-	-	H	H	A	A	H	A
	Chr1	*M_00005	-	A	H	H	A	-	A	A	H	H	A	H	-	A	H
	Chr1	*M_00006	-	A	-	H	A	H	A	-	H	H	A	A	A	H	A
	Chr1	*M_00007	-	A	A	H	A	H	H	H	A	H	-	A	H	A	A
	Chr1	*M_00008	-	A	A	H	H	H	H	H	A	A	H	A	H	A	-
	Chr1	*M_00009	-	A	A	H	H	H	H	A	H	A	H	A	H	A	A
	Chr1	*M_00010	-	A	H	H	H	A	A	H	H	-	H	A	H	H	A
	Chr1	*M_00011	-	A	H	H	H	H	A	H	A	H	A	A	H	A	H
	Chr1	*M_00012	-	A	A	H	A	A	A	H	H	A	A	A	A	A	H
	Chr2	*M_00013	-	A	H	H	A	A	-	H	H	H	H	A	A	A	-
	Chr2	*M_00014	-	A	-	H	-	H	A	A	A	H	A	A	-	A	A
	Chr2	*M_00015	-	A	H	H	H	H	A	H	A	A	A	A	A	-	A
	Chr2	*M_00016	-	A	H	H	A	H	A	H	A	H	A	A	H	H	H
	Chr2	*M_00017	-	A	H	H	A	A	A	H	H	H	A	A	H	H	H
	Chr2	*M_00018	-	A	A	H	H	A	A	H	H	H	H	H	A	A	A
	Chr2	*M_00019	-	A	H	H	A	A	H	A	H	A	A	H	A	A	H
	Chr2	*M_00020	-	A	A	H	A	H	H	A	A	H	-	H	H	H	-

 - bed file : :: 
    
	Chr1	7	7	*M_00001
	Chr1	9	9	*M_00002
	Chr1	13	13	*M_00003
	Chr1	16	16	*M_00004
	Chr1	17	17	*M_00005
	Chr1	19	22	*M_00006
	Chr1	23	23	*M_00007
	Chr1	26	26	*M_00008
	Chr1	27	27	*M_00009
	Chr1	28	28	*M_00010
	Chr1	29	29	*M_00011
	Chr1	32	32	*M_00012
	Chr2	2	2	*M_00013
	Chr2	7	7	*M_00014
	Chr2	10	13	*M_00015
	Chr2	14	14	*M_00016
	Chr2	21	21	*M_00017
	Chr2	23	23	*M_00018
	Chr2	25	25	*M_00019
	Chr2	30	30	*M_00020


-----

**reference :**

]]>
    </help>
</tool>