view fsd_regions.xml @ 4:b202c97deabe draft

planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
author mheinzl
date Mon, 08 Oct 2018 05:53:50 -0400
parents 85d870b8ae92
children eabfdc012d7b
line wrap: on
line source

<?xml version="1.0" encoding="UTF-8"?>
<tool id="fsd_regions" name="Duplex Sequencing Analysis: fsd_regions" version="1.0.0">
    <description>Family size distribution (FSD) of user-specified regions in the reference genome</description>
    <requirements>
        <requirement type="package" version="2.7">python</requirement>
        <requirement type="package" version="1.4.0">matplotlib</requirement>
    </requirements>
    <command>
        python2 '$__tool_directory__/fsd_regions.py' --inputFile '$file1' --inputName1 '$file1.name' --ref_genome '$file2' --output_pdf $output_pdf --output_tabular $output_tabular 
    </command>
    <inputs>
        <param name="file1" type="data" format="tabular" label="Dataset 1: input tags of whole dataset" optional="false" help="Input in tabular format with the family size, tags and the direction of the strand ('ab' or 'ba') for each family."/>
        <param name="file2" type="data" format="txt" label="Dataset 2: input tags aligned to the reference genome" help="Input in txt format with the regions in the reference genome and the tags, which were aligned to the reference genome."/>
    </inputs>
    <outputs>
        <data name="output_pdf" format="pdf" />
        <data name="output_tabular" format="tabular"/>
    </outputs>
    <tests>
        <test>
            <param name="file1" value="Test_data.tabular"/>
            <param name="file2" value="Test_data_regions.txt"/>
            <output name="output_pdf" file="output_file.pdf" lines_diff="136"/>
            <output name="output_tabular" file="output_file.tabular"/>
        </test>
    </tests>
    <help> <![CDATA[

**What it does**
        
    This tool will create a distribution of family sizes of all tags, which were aligned to the reference genome. The distribution is separated after the regions of the reference genome.
               
        
**Input**
        
    This tools expects a tabular file with the tags of all families, their sizes and information about forward (ab) and reverse (ba) strands. 
    
    +-----+----------------------------+----+
    | 1   | AAAAAAAAAAAATGTTGGAATCTT   | ba |
    +-----+----------------------------+----+
    | 10  | AAAAAAAAAAAGGCGGTCCACCCC   | ab |
    +-----+----------------------------+----+
    | 28  | AAAAAAAAAAATGGTATGGACCGA   | ab |
    +-----+----------------------------+----+
    
    
    In addition, a TXT file with the regions and all tags that were aligned to the reference genome is required.      This file can obtained from a different tool.
    
    +-----------+------------------------------+
    | 87_636    | AAATCAAAGTATGAATGAAGTTGCCT   |
    +-----------+------------------------------+
    | 87_636    | AAATTCATAGCATTAATTTCAACGGG   |
    +-----------+------------------------------+
    | 656_1143  | GGGGCAGCCATATTGGCAATTATCAT   |
    +-----------+------------------------------+
    
**Output**
        
    The output is a PDF file with the plot and a tabular file with the data of the plot.
        
        
**About Author**
        
    Author: Monika Heinzl
    
    Department: Institute of Bioinformatics, Johannes Kepler University Linz, Austria
    
    Contact: monika.heinzl@edumail.at
        
        ]]> 

    </help>
    <citations>
        <citation type="bibtex">
            @misc{duplex,
            author = {Heinzl, Monika},
            year = {2018},
            title = {Development of algorithms for the analysis of duplex sequencing data}
         }
        </citation>
    </citations>
</tool>