annotate trips_create_new_organism/create_annotation_sqlite.xml @ 7:2227d8ce6f5b draft

Uploaded
author triasteran
date Tue, 29 Mar 2022 12:42:11 +0000
parents dd580d6ff10d
children 1a1620b2a7bd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
2227d8ce6f5b Uploaded
triasteran
parents: 6
diff changeset
1 <tool id="create_annotation_sqlite" name="create annotation in sqlite for trips-viz" version="0.1.5conda">
6
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
2 <requirements>
7
2227d8ce6f5b Uploaded
triasteran
parents: 6
diff changeset
3 <requirement type="package" version="3.1.0">python-intervaltree</requirement>
2227d8ce6f5b Uploaded
triasteran
parents: 6
diff changeset
4 <requirement type="package" version="0.4.6">pysqlite3</requirement>
6
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
5 </requirements>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
6 <command detect_errors="exit_code"><![CDATA[
7
2227d8ce6f5b Uploaded
triasteran
parents: 6
diff changeset
7 python3 '$__tool_directory__/create_annotation_sqlite.py' $annotation $fasta $pseudo_utr_len $transcript $gene $output
6
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
8 ]]></command>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
9 <inputs>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
10 <param format="gtf,gff" name="annotation" type="data" label="GTF/GFF3 File"/>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
11 <param format="fasta" name="fasta" type="data" label="Transcriptome FASTA file"/>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
12 <param name="pseudo_utr_len" type="text" label="Pseudo UTR length"/>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
13 <param name="transcript" type="text" label="Example transcript"/>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
14 <param name="gene" type="text" label="Example gene"/>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
15 </inputs>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
16 <outputs>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
17 <data format="sqlite" name="output" />
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
18 </outputs>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
19 <tests>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
20 <test>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
21 <param name="fasta" value="sacCer3_transcripts.fasta"/>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
22 <param name="annotation" value="saccharomyces_cerevisiae.R64-1-1.84.gtf"/>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
23 <param name="pseudo_utr_len" value="300"/>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
24 <param name="transcript" value="YDL248W"/>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
25 <param name="gene" value="COS7"/>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
26 <output name="output" file="output"/>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
27 </test>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
28 </tests>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
29 <help><![CDATA[
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
30 **GTF/GFF3 File**
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
31
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
32 GFF lines have nine required fields that must be tab-separated.
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
33 The GFF3 format addresses the most common extensions to GFF, while preserving backward compatibility with previous formats.
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
34
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
35 Both transcript ids and gene names should be listed in the file.
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
36
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
37 -----
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
38
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
39 **Transcriptome FASTA file**
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
40
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
41 A FASTA file with an entry for every transcript. The headers should be the transcript id's as they appear in the GTF/GFF3 file.
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
42
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
43 -----
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
44
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
45 **Psuedo UTR length**
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
46 An integer representing the length (in nucleotides) to be added to the 5' end and 3' end of every transcript with an annotated
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
47 CDS. Useful for when an organism does not have any annotated UTR's, if it does use 0. If not 0, the extra nucleotides should
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
48 already be present in the FASTA file.
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
49
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
50 -----
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
51
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
52 **Example transcript**
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
53
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
54 An example of a transcript id that appears in the FASTA/GTF/GFF3 file, e.g ENST00000123456
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
55
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
56 -----
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
57
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
58 **Example Gene**
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
59
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
60 An example of a gene name as it appears in the GTF/GFF3 file, e.g BRCA1
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
61
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
62 -----
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
63
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
64 **Output**
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
65
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
66 The output of the script can be downloaded and uploaded to Trips-viz_. by signing in and going to the uploads page, then selecting
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
67 "Upload new transcriptome". When uploaded the new organism will appear on the home page of Trips-viz, or under the transcriptomes
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
68 page if the organism name used is already present on Trips-viz.
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
69
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
70
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
71
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
72 .. _Trips-viz: http://trips.ucc.ie
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
73
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
74 ]]></help>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
75 <citations>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
76 <citation type="bibtex">
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
77 @misc{githubTrips-Viz,
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
78 author = {LastTODO, FirstTODO},
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
79 year = {TODO},
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
80 title = {Trips-Viz},
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
81 publisher = {GitHub},
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
82 journal = {GitHub repository},
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
83 url = {https://github.com/skiniry/Trips-Viz},
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
84 }</citation>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
85 </citations>
dd580d6ff10d Uploaded
triasteran
parents:
diff changeset
86 </tool>