annotate cpt_related_genome_nuc/nuc_relate.xml @ 0:5a5fe0a6f78d draft default tip

Uploaded
author cpt
date Fri, 10 Jun 2022 08:45:13 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
1 <?xml version="1.0"?>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
2 <tool id="edu.tamu.cpt.blast.relatedness.nuc" name="Related Genomes" version="19.1.0.0">
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
3 <description>based on nucleotide blast results</description>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
4 <macros>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
5 <import>macros.xml</import>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
6 <import>cpt-macros.xml</import>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
7 </macros>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
8 <expand macro="requirements"/>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
9 <command detect_errors="aggressive">
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
10 $__tool_directory__/relatedness.py
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
11 ${blastIn.blast}
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
12 $__tool_directory__/TaxID_List.txt
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
13 $access
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
14 $filter
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
15 --hits $hits
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
16 #if $blastIn.blastType == "XML":
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
17 --xmlMode
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
18 #end if
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
19 > $accession_list
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
20 </command>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
21 <inputs>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
22 <conditional name="blastIn">
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
23 <param name="blastType" type="select" label="Blastn Input Type">
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
24 <option value="XML" selected="true">Blast XML</option>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
25 <option value="TSV">Blast Tabular</option>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
26 </param>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
27 <when value="XML">
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
28 <param label="Blastn Results (Blast XML)" name="blast" type="data" format="blastxml"/>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
29 </when>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
30 <when value="TSV">
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
31 <param label="Blastn Results" name="blast" type="data" format="tsv,tabular"/>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
32 </when>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
33 </conditional>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
34 <param label = 'Number of results to return' name="hits" type="integer" size="15" value="5"/>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
35 <param name="access" type="boolean" truevalue="--access" falsevalue="" label="Return Accession Numbers"/>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
36 <param name="filter" type="boolean" truevalue="" falsevalue="--noFilter" checked="true" label="Automatically filter by phage Taxonomy IDs"/>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
37 </inputs>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
38 <outputs>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
39 <data format="tabular" name="accession_list" label="Top BlastN Hits" />
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
40 </outputs>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
41 <tests>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
42 <test>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
43 <conditional name="blastIn">
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
44 <param name="blastType" value="TSV"/>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
45 <param name="blast" value="nuc_relate_in.tab"/>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
46 </conditional>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
47 <param name="hits" value="10"/>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
48 <param name="access" value="--access"/>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
49 <output name="accession_list" file="nuc_relate_out.tab" />
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
50 </test>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
51 <test>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
52 <conditional name="blastIn">
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
53 <param name="blastType" value="TSV"/>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
54 <param name="blast" value="nuc_relate_in.tab"/>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
55 </conditional>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
56 <param name="hits" value="10"/>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
57 <param name="access" value=""/>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
58 <output name="accession_list" file="nuc_relate_out_noaccess.tab" />
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
59 </test>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
60 </tests>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
61 <help>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
62 **What it does**
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
63
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
64 This tool filters a set of BLASTn results and returns the top related genomes based on the total aligned length as determined by BLASTn. The default mode is to only consider phage hits (based on TaxID), but this can the toggled off. Total aligned length here is the sum of all high-scoring pairs (HSP's) between the query DNA sequence and each matched subject sequence in the BLASTn results; the subjects with the greatest total aligned length are presented in descending order and the number of returned sequences is specified by the user at runtime.
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
65
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
66 The input for this tool must be a tabular file from BLASTn with the output columns in the following order: qseqID, length, nident, qlen, slen, salltitles, sallacc, and staxIDs. The tool is also expecting a BLASTn analysis run with a single query DNA sequence against the NCBI nt database. Multiple query sequences or runs against other databases may result in unpredicted behavior. This tool is most commonly run as part of a workflow in which run parameters are properly set.
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
67
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
68 The tool determines the total aligned length of all HSPs between the query and each subject in the BLASTn results, and presents the organisms with the greatest aligned lengths in descending order. Note that the tool does not take alignment quality into account when determining the top related organisms. The tool produces a tabular output with the following columns:
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
69
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
70 TaxID: The subject NCBI TaxID, as found in the BLASTn results.
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
71
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
72 Name: The subject organism name, as found in the BLASTn results.
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
73
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
74 Accessions: The subject NCBI nucleotide accession, as found in the BLASTn results. If there are multiple accessions associated with a single TaxID, each will be listed on a new line.
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
75
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
76 Subject Length: The length of the subject nucleotide sequence, in bp.
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
77
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
78 Number of HSPs: The number of HSP's identified by BLASTn between the query and subject. Note that the HSP's could represent BLASTn alignments that range from the entire query length to only a few dozen bp.
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
79
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
80 Total Aligned Length: The sum of HSP lengths between the query and subject. Note that if the query and/or subject contain repetitive sequence elements that can align multiple times, the total HSP length can exceed that of the query or subject.
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
81
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
82 Dice Score: A simple Dice coefficient calculation, equal to (2 * total aligned length)/(query length + subject length). Note that this value can be greater than 1 if the query and/or subject contain repetitive sequence elements that can align multiple times.
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
83 </help>
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
84 <expand macro="citations-2020" />
5a5fe0a6f78d Uploaded
cpt
parents:
diff changeset
85 </tool>