Mercurial > repos > iuc > psiclass
diff psiclass.xml @ 0:1b690aa060f6 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/psiclass commit 2dc6baaeb2d595cbd8d8ffe3bf22b199a0ba23e4
| author | iuc |
|---|---|
| date | Tue, 26 Sep 2023 16:51:18 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/psiclass.xml Tue Sep 26 16:51:18 2023 +0000 @@ -0,0 +1,151 @@ +<tool id="psiclass" name="PsiCLASS" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05"> + <description>reference-based transcriptome assembler</description> + <macros> + <import>macros.xml</import> + </macros> + <xrefs> + <xref type="bio.tools">psiclass</xref> + </xrefs> + <expand macro="requirements" /> + <command detect_errors='exit_code'><![CDATA[ + mkdir -p "annotation_files" && + #set $bam_filenames = [] + #for $i,$file in enumerate($bam_files) + #set $name = 'bam_' + str($i) + '.bam' + ln -s '${file}' $name && + $bam_filenames.append($name) + #end for + #set $allFiles = ','.join( [ str( $file ) for $file in $bam_filenames ] ) + psiclass + -b $allFiles + -p \${GALAXY_SLOTS:-8} + #if $splice_conditional.selector == 'true' + -s '${splice_conditional.splice_file}' + #end if + -c $subexonClassifier + --sa $intronCoverage + --vd $transcriptCoverage + #if $stranded + --stranded $stranded + #end if + --maxDpConstraintSize $maxDpConstraintSize + $primaryParalog + --tssTesQuantile $tssTesQuantile + && mv *sample* "./annotation_files" + #if $splice_conditional.selector == 'false' + && cat ./splice/psiclass_bam.trusted_splice > '${splice_sites}' + #end if + ]]></command> + <inputs> + <param argument="-b" name="bam_files" type="data" format="bam" multiple="true" label="BAM file(s)" + help="PsiCLASS has been tuned to run on alignments generated with the tools HISAT and STAR" /> + <conditional name="splice_conditional"> + <param name="selector" type="select" label="Provide trusted splice sites coordinates" help="It is possible to supply + an set of trusted introns, for instance generated by RNASTAR or extracted from the GENCODE gene annotations"> + <option value="true">Enabled</option> + <option value="false" selected="true">Disabled</option> + </param> + <when value="true"> + <param argument="-s" name="splice_file" type="data" format="interval" optional="true" label="Splice junction sites file" help="High confidence collapsed splice junction file" /> + </when> + <when value="false"/> + </conditional> + <param argument="-c" type="float" name="subexonClassifier" min="0" max="1" value="0.05" label="Subexon classifier score threshold" + help="Only use the subexons with classifier score minor or equal than the given number" /> + <param argument="--sa" name="intronCoverage" type="float" min="0" value="0.5" label="Minimum retained intron converage" + help="Minimum average number of supported read for retained introns" /> + <param argument="--vd" name="transcriptCoverage" type="float" min="0" value="1" label="Minimum transcript coverage" + help="Minimum average coverage depth of a transcript to be reported" /> + <param argument="--stranded" type="select" optional="true" label="Library strand information" help="Stranded data shows advantages over + non-stranded RNA-Seq data such as higher assembly and differential expression accuracy"> + <option value="un">unstranded</option> + <option value="rf">fr-firststrand (rf): first read from the opposite strand.</option> + <option value="fr">fr-secondstrand (fr): first read from the transcript strand</option> + </param> + <param argument="--maxDpConstraintSize" type="integer" min="-1" value="7" label="Constrain cover iN median exon depth (DP)" help="-1 for infinite"/> + <param argument="--primaryParalog" type="boolean" truevalue="--primaryParalog" falsevalue="" checked="false" optional="true" + label="Use primary alignment to retain paralog genes" help="Default: use unique alignments" /> + <param argument="--tssTesQuantile" type="float" min="0" max="1" value="0.5" label="Quantile for transcription start/end sites in subexon graph"/> + </inputs> + <outputs> + <data name="meta_anotation" format="gtf" from_work_dir="psiclass_vote.gtf" label="${tool.name} on ${on_string}: meta-annotation" /> + <collection name="annotation_collection" type="list" label="${tool.name} on ${on_string}: per-sample annotations"> + <discover_datasets pattern="__designation_and_ext__" format="gtf" directory="annotation_files"/> + </collection> + <data name="splice_sites" format="interval" label="${tool.name} on ${on_string}: splice sites"> + <filter>splice_conditional["selector"] == "false"</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="2"> + <param name="bam_files" value="reads1.bam,reads2.bam"/> + <param name="subexonClassifier" value="0.05"/> + <param name="intronCoverage" value="0.5"/> + <param name="transcriptCoverage" value="1"/> + <param name="stranded" value="un"/> + <param name="maxDpConstraintSize" value="7"/> + <param name="primaryParalog" value="false"/> + <param name="tssTesQuantile" value="0.5"/> + <conditional name="splice_conditional"> + <param name="selector" value="true"/> + <param name="splice_file" value="splice_sites.interval"/> + </conditional> + <output name="meta_anotation" ftype="gtf" file="test01_meta_annotation.gtf" lines_diff="2"/> + <output_collection name="annotation_collection" type="list" count="2"> + <element name="psiclass_sample_0" ftype="gtf" file="test01_annotation_sample0.gtf" lines_diff="2"/> + <element name="psiclass_sample_1" ftype="gtf" file="test01_annotation_sample1.gtf" lines_diff="2"/> + </output_collection> + </test> + <test expect_num_outputs="3"> + <param name="bam_files" value="reads1.bam,reads2.bam"/> + <param name="subexonClassifier" value="0.05"/> + <param name="intronCoverage" value="0.3"/> + <param name="transcriptCoverage" value="0.5"/> + <param name="stranded" value="rf"/> + <param name="maxDpConstraintSize" value="6"/> + <param name="primaryParalog" value="true"/> + <param name="tssTesQuantile" value="0.4"/> + <conditional name="splice_conditional"> + <param name="selector" value="false"/> + </conditional> + <output name="meta_anotation" ftype="gtf" file="test02_meta_annotation.gtf"/> + <output_collection name="annotation_collection" type="list" count="2"> + <element name="psiclass_sample_0" ftype="gtf" file="test02_annotation_sample0.gtf" lines_diff="2"/> + <element name="psiclass_sample_1" ftype="gtf" file="test02_annotation_sample1.gtf" lines_diff="2"/> + </output_collection> + <output name="splice_sites" ftype="interval" file="test02_splice_sites.txt"/> + </test> + <test expect_num_outputs="3"> + <param name="bam_files" value="reads1.bam"/> + <output name="meta_anotation" ftype="gtf" file="test03_meta_annotation.gtf"/> + <output_collection name="annotation_collection" type="list" count="1"> + <element name="psiclass_sample_0" ftype="gtf" file="test03_annotation_sample0.gtf" lines_diff="2"/> + </output_collection> + <output name="splice_sites" file="test03_splice_sites.txt" ftype="interval"/> + </test> + </tests> + <help><![CDATA[ + +.. class:: infomark + +**What is PsiCLASS?** + +PsiCLASS is a reference-based transcriptome assembler for single or multiple RNA-seq samples. + +Unlike conventional methods that analyze each sample separately and then merge the outcomes to create a unified set of meta-annotations, +PsiCLASS takes a multi-sample approach, simultaneously analyzing all RNA-seq data sets in an experiment. + +PsiCLASS is both a transcript assembler and a meta-assembler, producing separate transcript sets for the individual samples and a unified +set of meta-annotations. The algorithmic underpinnings of PsiCLASS include using a global subexon splice graph, statistical cross-sample +feature (intron, subexon) selection methods, and an efficient dynamic programming algorithm to select a subset of transcripts from among +those encoded in the graph, based on the read support in each sample. + +Lastly, the set of meta-annotations is selected from among the transcripts generated for individual samples by voting. While PsiCLASS is +highly accurate and efficient for medium-to-large collections of RNA-seq data, its accuracy is equally high for small RNA-seq data sets +(2-10 samples) and is competitive to reference methods for single samples. Additionally, its performance is robust with the aggregation +method used, including the built-in voting and assembly-based approaches such as StringTie-merge and TACO. Therefore, it can be effectively +used as a multi-sample and as a single-sample assembler, as well as in conventional assemble-and-merge protocols. + + ]]></help> + <expand macro="citations" /> +</tool>
