Mercurial > repos > petr-novak > various_galaxy_tools
diff extract_GFF_Features.xml @ 0:696e702ebf74 draft
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
author | petr-novak |
---|---|
date | Mon, 09 May 2022 08:26:30 +0000 |
parents | |
children | 639c0edb7e64 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_GFF_Features.xml Mon May 09 08:26:30 2022 +0000 @@ -0,0 +1,114 @@ +<tool id="Extract_features1" name="Extract features" version="1.0.0"> + <description>from GFF data</description> + <command interpreter="python">extract_GFF_Features.py $input1 $out_file1 ${column_choice.col} ${column_choice.feature}</command> + <inputs> + <param format="gff" name="input1" type="data" label="Select GFF data"/> + <conditional name="column_choice"> + <param name="col" type="select" label="From"> + <option value="0" selected="true">Column 1 / Sequence name</option> + <option value="1">Column 2 / Source</option> + <option value="2">Column 3 / Feature</option> + <option value="6">Column 7 / Strand</option> + <option value="7">Column 8 / Frame</option> + </param> + <when value="0"> + <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns"> + <options from_dataset="input1"> + <column name="name" index="0"/> + <column name="value" index="0"/> + <filter type="unique_value" name="unique" column="0"/> + </options> + </param> + </when> + <when value="1"> + <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns"> + <options from_dataset="input1"> + <column name="name" index="1"/> + <column name="value" index="1"/> + <filter type="unique_value" name="unique" column="1"/> + </options> + </param> + </when> + <when value="2"> + <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns"> + <options from_dataset="input1"> + <column name="name" index="2"/> + <column name="value" index="2"/> + <filter type="unique_value" name="unique" column="2"/> + </options> + </param> + </when> + <when value="6"> + <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns"> + <options from_dataset="input1"> + <column name="name" index="6"/> + <column name="value" index="6"/> + <filter type="unique_value" name="unique" column="6"/> + </options> + </param> + </when> + <when value="7"> + <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns"> + <options from_dataset="input1"> + <column name="name" index="7"/> + <column name="value" index="7"/> + <filter type="unique_value" name="unique" column="7"/> + </options> + </param> + </when> + </conditional> + </inputs> + <outputs> + <data format="input" name="out_file1" metadata_source="input1"/> + </outputs> + <tests> + <test> + <param name="input1" value="5.gff"/> + <param name="col" value="0" /> + <param name="feature" value="chr5,chr6,chr7,chr8" /> + <output name="out_file1" file="Extract_features1_out.gff"/> + </test> + </tests> + <help> + +**What it does** + +This tool extracts selected features from GFF data. + +----- + +**Example** + +Selecting **promoter** from the following GFF data:: + + chr22 GeneA enhancer 10000000 10001000 500 + . TGA + chr22 GeneA promoter 10010000 10010100 900 + . TGA + chr22 GeneB promoter 10020000 10025000 400 - . TGB + chr22 GeneB CCDS2220 10030000 10065000 800 - . TGB + +will produce the following output:: + + chr22 GeneA promoter 10010000 10010100 900 + . TGA + chr22 GeneB promoter 10020000 10025000 400 - . TGB + +---- + +.. class:: infomark + +**About formats** + +**GFF format** General Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF lines have nine tab-separated fields:: + + 1. seqname - Must be a chromosome or scaffold. + 2. source - The program that generated this feature. + 3. feature - The name of this type of feature. Some examples of standard feature types are "CDS", "start_codon", "stop_codon", and "exon". + 4. start - The starting position of the feature in the sequence. The first base is numbered 1. + 5. end - The ending position of the feature (inclusive). + 6. score - A score between 0 and 1000. If there is no score value, enter ".". + 7. strand - Valid entries include '+', '-', or '.' (for don't know/care). + 8. frame - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'. + 9. group - All lines with the same group are linked together into a single item. + + + </help> +</tool>