view syndiva.xml @ 0:0254731f047b draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/SynDivA commit 90c5ec603e2c6b8c49d2dc7ec1b1e97f9d8fb92c
author iuc
date Thu, 23 Jun 2022 22:32:13 +0000
parents
children
line wrap: on
line source

<tool id="syndiva" name="syndiva" version="1.0" profile="21.05">
    <description>
        diversity analysis of synthetic libraries of a Fibronectin domain
    </description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements">
        <expand macro="matplotlib_requirement" />
        <expand macro="biopython_requirement"/>
    </expand>
    <command detect_errors="exit_code">
    <![CDATA[
    mkdir -p '$report.extra_files_path' &&
    python '$__tool_directory__/syndiva.py'
        -i '$fasta_file'
        -p '$pattern'
        -5 '$restriction_site_5'
        -3 '$restriction_site_3'
        -o '$report.extra_files_path'
    && cp '${report.extra_files_path}'/*\.html output.html
    ]]> 
    </command>
    <inputs>
        <param name="fasta_file" type="data" format="fasta" label="DNA FASTA file"/>
        <param name="pattern" type="text" area="true" label="Sequence pattern" >
            <validator type="empty_field" />
            <validator type="regex" message="An expression is required and is allowed to contain only protein alphabet letters (ACDEFGHIKLMNPQRSTVWY), numbers and the characters -:*  ">^[ACDEFGHIKLMNPQRSTVWY\d\-:*]+$</validator>
        </param>
        <param name="restriction_site_5" type="text" label="5&#39; Restriction sites" help="Sequence of the restrict" >
            <validator type="empty_field" />
            <validator type="regex" message="An expression is required and is allowed to contain only DNA alphabet letters (ATGC)">^[ATGC]{4,10}$</validator>
        </param>
        <param name="restriction_site_3" type="text" label="3&#39; Restriction sites" help="Sequence of the restrict" >
            <validator type="empty_field" />
            <validator type="regex" message="An expression is required and is allowed to contain only DNA alphabet letters (ATGC)">^[ATGC]{4,10}$</validator>
        </param>
    </inputs>
    <outputs>
        <data format="html" name="report" from_work_dir="output.html" />
    </outputs>
    <tests>
        <test>
            <param name="fasta_file" value="syndiva_datatest.fasta"/>
            <param name="pattern" value="AAAGSSVSSVPTKLEVVAATPTSLLISWDA:4-6:V:1:YYRITYGETGGNSPVQEFTVPG:3:TATISGLSPGVDYTITVYA:11-12:PISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAETVESCLAKSHTENSFTNVWKDDKTLDRYANYE"/>
            <param name="restriction_site_5" value="GCGGCCGC"/>
            <param name="restriction_site_3" value="GGTACC"/>
            <output name="report" file="syndiva_report.html"  ftype="html" lines_diff="2"/>
        </test>
    </tests>
    <help>
    <![CDATA[
- **Scientific context**

- *SynDivA* was developed to analyze the diversity of synthetic libraries of a Fibronectin domain.
- This diversity is generated in the context of a project of directed evolution using a phage display approach, to obtain ligands with high affinity and specificity for biological targets. It was introduced in three loops of the domain. It is both a variation of amino acids and a variation of the lengths of loops.
- *SynDivA* is used before the step of selecting banks of interest, by ensuring quality - and therefore of diversity - of the bank by determining, by projection, the number of unique and functional sequences.

- **Description**

- *SynDivA* is implemented in Python.

- The computations are divided into three steps:

    1. Pre-processing of the input data (determination of the orientation of the sequences, determination of the reading frame, translation of nucleotide sequences in protein sequences, elimination of the sequences "wastes" containing stop codons in variable regions)
    2. Alignment of sequences 2-2
    3. Statistics calculations on the alignments (distances)

- The results are presented as an HTML report.

- **Example**

- Pattern : AAAGSSVSSVPTKLEVVAATPTSLLISWDA:4-6:V:1:YYRITYGETGGNSPVQEFTVPG:3:TATISGLSPGVDYTITVYA:11-12:PISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAETVESCLAKSHTENSFTNVWKDDKTLDRYANYE
- 5' restriction site : GCGGCCGC
- 3' restriction site : GGTACC

 ]]></help>
    <expand macro="citations" />
</tool>