annotate gff2bed.xml @ 0:696e702ebf74 draft

"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
author petr-novak
date Mon, 09 May 2022 08:26:30 +0000
parents
children 639c0edb7e64
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
1 <tool id="gff2bed1" name="GFF-to-BED" version="1.0.1">
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
2 <description>converter</description>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
3 <edam_operations>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
4 <edam_operation>operation_3434</edam_operation>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
5 </edam_operations>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
6 <command interpreter="python">gff_to_bed_converter.py $input $out_file1</command>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
7 <inputs>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
8 <param format="gff" name="input" type="data" label="Convert this dataset"/>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
9 </inputs>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
10 <outputs>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
11 <data format="bed" name="out_file1" />
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
12 </outputs>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
13 <tests>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
14 <test>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
15 <param name="input" value="5.gff" ftype="gff"/>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
16 <output name="out_file1" file="gff2bed_out.bed"/>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
17 </test>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
18 <test>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
19 <param name="input" value="gff2bed_in2.gff" ftype="gff"/>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
20 <output name="out_file1" file="gff2bed_out2.bed"/>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
21 </test>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
22 <test>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
23 <!-- Test conversion of gff3 file. -->
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
24 <param name="input" value="5.gff3" ftype="gff"/>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
25 <output name="out_file1" file="gff2bed_out3.bed"/>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
26 </test>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
27 </tests>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
28 <help>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
29
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
30 **What it does**
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
31
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
32 This tool converts data from GFF format to BED format (scroll down for format description).
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
33
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
34 --------
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
35
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
36 **Example**
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
37
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
38 The following data in GFF format::
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
39
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
40 chr22 GeneA enhancer 10000000 10001000 500 + . TGA
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
41 chr22 GeneA promoter 10010000 10010100 900 + . TGA
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
42
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
43 Will be converted to BED (**note** that 1 is subtracted from the start coordinate)::
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
44
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
45 chr22 9999999 10001000 enhancer 0 +
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
46 chr22 10009999 10010100 promoter 0 +
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
47
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
48 ------
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
49
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
50 .. class:: infomark
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
51
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
52 **About formats**
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
53
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
54 **BED format** Browser Extensible Data format was designed at UCSC for displaying data tracks in the Genome Browser. It has three required fields and several additional optional ones:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
55
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
56 The first three BED fields (required) are::
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
57
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
58 1. chrom - The name of the chromosome (e.g. chr1, chrY_random).
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
59 2. chromStart - The starting position in the chromosome. (The first base in a chromosome is numbered 0.)
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
60 3. chromEnd - The ending position in the chromosome, plus 1 (i.e., a half-open interval).
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
61
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
62 The additional BED fields (optional) are::
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
63
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
64 4. name - The name of the BED line.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
65 5. score - A score between 0 and 1000.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
66 6. strand - Defines the strand - either '+' or '-'.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
67 7. thickStart - The starting position where the feature is drawn thickly at the Genome Browser.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
68 8. thickEnd - The ending position where the feature is drawn thickly at the Genome Browser.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
69 9. reserved - This should always be set to zero.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
70 10. blockCount - The number of blocks (exons) in the BED line.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
71 11. blockSizes - A comma-separated list of the block sizes. The number of items in this list should correspond to blockCount.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
72 12. blockStarts - A comma-separated list of block starts. All of the blockStart positions should be calculated relative to chromStart. The number of items in this list should correspond to blockCount.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
73 13. expCount - The number of experiments.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
74 14. expIds - A comma-separated list of experiment ids. The number of items in this list should correspond to expCount.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
75 15. expScores - A comma-separated list of experiment scores. All of the expScores should be relative to expIds. The number of items in this list should correspond to expCount.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
76
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
77 **GFF format** General Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF lines have nine tab-separated fields::
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
78
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
79 1. seqname - Must be a chromosome or scaffold.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
80 2. source - The program that generated this feature.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
81 3. feature - The name of this type of feature. Some examples of standard feature types are "CDS", "start_codon", "stop_codon", and "exon".
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
82 4. start - The starting position of the feature in the sequence. The first base is numbered 1.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
83 5. end - The ending position of the feature (inclusive).
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
84 6. score - A score between 0 and 1000. If there is no score value, enter ".".
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
85 7. strand - Valid entries include '+', '-', or '.' (for don't know/care).
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
86 8. frame - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
87 9. group - All lines with the same group are linked together into a single item.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
88
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
89 </help>
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
90 </tool>