comparison psiclass.xml @ 0:1b690aa060f6 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/psiclass commit 2dc6baaeb2d595cbd8d8ffe3bf22b199a0ba23e4
author iuc
date Tue, 26 Sep 2023 16:51:18 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1b690aa060f6
1 <tool id="psiclass" name="PsiCLASS" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
2 <description>reference-based transcriptome assembler</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <xrefs>
7 <xref type="bio.tools">psiclass</xref>
8 </xrefs>
9 <expand macro="requirements" />
10 <command detect_errors='exit_code'><![CDATA[
11 mkdir -p "annotation_files" &&
12 #set $bam_filenames = []
13 #for $i,$file in enumerate($bam_files)
14 #set $name = 'bam_' + str($i) + '.bam'
15 ln -s '${file}' $name &&
16 $bam_filenames.append($name)
17 #end for
18 #set $allFiles = ','.join( [ str( $file ) for $file in $bam_filenames ] )
19 psiclass
20 -b $allFiles
21 -p \${GALAXY_SLOTS:-8}
22 #if $splice_conditional.selector == 'true'
23 -s '${splice_conditional.splice_file}'
24 #end if
25 -c $subexonClassifier
26 --sa $intronCoverage
27 --vd $transcriptCoverage
28 #if $stranded
29 --stranded $stranded
30 #end if
31 --maxDpConstraintSize $maxDpConstraintSize
32 $primaryParalog
33 --tssTesQuantile $tssTesQuantile
34 && mv *sample* "./annotation_files"
35 #if $splice_conditional.selector == 'false'
36 && cat ./splice/psiclass_bam.trusted_splice > '${splice_sites}'
37 #end if
38 ]]></command>
39 <inputs>
40 <param argument="-b" name="bam_files" type="data" format="bam" multiple="true" label="BAM file(s)"
41 help="PsiCLASS has been tuned to run on alignments generated with the tools HISAT and STAR" />
42 <conditional name="splice_conditional">
43 <param name="selector" type="select" label="Provide trusted splice sites coordinates" help="It is possible to supply
44 an set of trusted introns, for instance generated by RNASTAR or extracted from the GENCODE gene annotations">
45 <option value="true">Enabled</option>
46 <option value="false" selected="true">Disabled</option>
47 </param>
48 <when value="true">
49 <param argument="-s" name="splice_file" type="data" format="interval" optional="true" label="Splice junction sites file" help="High confidence collapsed splice junction file" />
50 </when>
51 <when value="false"/>
52 </conditional>
53 <param argument="-c" type="float" name="subexonClassifier" min="0" max="1" value="0.05" label="Subexon classifier score threshold"
54 help="Only use the subexons with classifier score minor or equal than the given number" />
55 <param argument="--sa" name="intronCoverage" type="float" min="0" value="0.5" label="Minimum retained intron converage"
56 help="Minimum average number of supported read for retained introns" />
57 <param argument="--vd" name="transcriptCoverage" type="float" min="0" value="1" label="Minimum transcript coverage"
58 help="Minimum average coverage depth of a transcript to be reported" />
59 <param argument="--stranded" type="select" optional="true" label="Library strand information" help="Stranded data shows advantages over
60 non-stranded RNA-Seq data such as higher assembly and differential expression accuracy">
61 <option value="un">unstranded</option>
62 <option value="rf">fr-firststrand (rf): first read from the opposite strand.</option>
63 <option value="fr">fr-secondstrand (fr): first read from the transcript strand</option>
64 </param>
65 <param argument="--maxDpConstraintSize" type="integer" min="-1" value="7" label="Constrain cover iN median exon depth (DP)" help="-1 for infinite"/>
66 <param argument="--primaryParalog" type="boolean" truevalue="--primaryParalog" falsevalue="" checked="false" optional="true"
67 label="Use primary alignment to retain paralog genes" help="Default: use unique alignments" />
68 <param argument="--tssTesQuantile" type="float" min="0" max="1" value="0.5" label="Quantile for transcription start/end sites in subexon graph"/>
69 </inputs>
70 <outputs>
71 <data name="meta_anotation" format="gtf" from_work_dir="psiclass_vote.gtf" label="${tool.name} on ${on_string}: meta-annotation" />
72 <collection name="annotation_collection" type="list" label="${tool.name} on ${on_string}: per-sample annotations">
73 <discover_datasets pattern="__designation_and_ext__" format="gtf" directory="annotation_files"/>
74 </collection>
75 <data name="splice_sites" format="interval" label="${tool.name} on ${on_string}: splice sites">
76 <filter>splice_conditional["selector"] == "false"</filter>
77 </data>
78 </outputs>
79 <tests>
80 <test expect_num_outputs="2">
81 <param name="bam_files" value="reads1.bam,reads2.bam"/>
82 <param name="subexonClassifier" value="0.05"/>
83 <param name="intronCoverage" value="0.5"/>
84 <param name="transcriptCoverage" value="1"/>
85 <param name="stranded" value="un"/>
86 <param name="maxDpConstraintSize" value="7"/>
87 <param name="primaryParalog" value="false"/>
88 <param name="tssTesQuantile" value="0.5"/>
89 <conditional name="splice_conditional">
90 <param name="selector" value="true"/>
91 <param name="splice_file" value="splice_sites.interval"/>
92 </conditional>
93 <output name="meta_anotation" ftype="gtf" file="test01_meta_annotation.gtf" lines_diff="2"/>
94 <output_collection name="annotation_collection" type="list" count="2">
95 <element name="psiclass_sample_0" ftype="gtf" file="test01_annotation_sample0.gtf" lines_diff="2"/>
96 <element name="psiclass_sample_1" ftype="gtf" file="test01_annotation_sample1.gtf" lines_diff="2"/>
97 </output_collection>
98 </test>
99 <test expect_num_outputs="3">
100 <param name="bam_files" value="reads1.bam,reads2.bam"/>
101 <param name="subexonClassifier" value="0.05"/>
102 <param name="intronCoverage" value="0.3"/>
103 <param name="transcriptCoverage" value="0.5"/>
104 <param name="stranded" value="rf"/>
105 <param name="maxDpConstraintSize" value="6"/>
106 <param name="primaryParalog" value="true"/>
107 <param name="tssTesQuantile" value="0.4"/>
108 <conditional name="splice_conditional">
109 <param name="selector" value="false"/>
110 </conditional>
111 <output name="meta_anotation" ftype="gtf" file="test02_meta_annotation.gtf"/>
112 <output_collection name="annotation_collection" type="list" count="2">
113 <element name="psiclass_sample_0" ftype="gtf" file="test02_annotation_sample0.gtf" lines_diff="2"/>
114 <element name="psiclass_sample_1" ftype="gtf" file="test02_annotation_sample1.gtf" lines_diff="2"/>
115 </output_collection>
116 <output name="splice_sites" ftype="interval" file="test02_splice_sites.txt"/>
117 </test>
118 <test expect_num_outputs="3">
119 <param name="bam_files" value="reads1.bam"/>
120 <output name="meta_anotation" ftype="gtf" file="test03_meta_annotation.gtf"/>
121 <output_collection name="annotation_collection" type="list" count="1">
122 <element name="psiclass_sample_0" ftype="gtf" file="test03_annotation_sample0.gtf" lines_diff="2"/>
123 </output_collection>
124 <output name="splice_sites" file="test03_splice_sites.txt" ftype="interval"/>
125 </test>
126 </tests>
127 <help><![CDATA[
128
129 .. class:: infomark
130
131 **What is PsiCLASS?**
132
133 PsiCLASS is a reference-based transcriptome assembler for single or multiple RNA-seq samples.
134
135 Unlike conventional methods that analyze each sample separately and then merge the outcomes to create a unified set of meta-annotations,
136 PsiCLASS takes a multi-sample approach, simultaneously analyzing all RNA-seq data sets in an experiment.
137
138 PsiCLASS is both a transcript assembler and a meta-assembler, producing separate transcript sets for the individual samples and a unified
139 set of meta-annotations. The algorithmic underpinnings of PsiCLASS include using a global subexon splice graph, statistical cross-sample
140 feature (intron, subexon) selection methods, and an efficient dynamic programming algorithm to select a subset of transcripts from among
141 those encoded in the graph, based on the read support in each sample.
142
143 Lastly, the set of meta-annotations is selected from among the transcripts generated for individual samples by voting. While PsiCLASS is
144 highly accurate and efficient for medium-to-large collections of RNA-seq data, its accuracy is equally high for small RNA-seq data sets
145 (2-10 samples) and is competitive to reference methods for single samples. Additionally, its performance is robust with the aggregation
146 method used, including the built-in voting and assembly-based approaches such as StringTie-merge and TACO. Therefore, it can be effectively
147 used as a multi-sample and as a single-sample assembler, as well as in conventional assemble-and-merge protocols.
148
149 ]]></help>
150 <expand macro="citations" />
151 </tool>