view psortb.xml @ 0:b97ecda36fc4 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/psortb commit 9275472791234aa6df4ed6e0e533bd9d74d83bdd
author bgruening
date Thu, 18 Dec 2025 16:15:09 +0000
parents
children
line wrap: on
line source

<tool id="psortb" name="PSORTb" version="3.0.6" profile="24.0">
	<description>Protein subcellular localization prediction for prokaryotes</description>
    <xrefs>
        <xref type="bio.tools">psortb</xref>
    </xrefs>
    <requirements>
		<container type="docker">quay.io/galaxy/psortb-cli:3.0.6</container>
	</requirements>
	<command detect_errors="exit_code">
		<![CDATA[
        mkdir \$TMPDIR/results &&
		/usr/local/psortb/bin/psort
        $gram.gram_choice
		#if $cutoff
			-c $cutoff
		#end if
		#if $divergent
			-d $divergent
		#end if
		-f fasta
        $exact
		-o $output_format
		-i $input_fasta
		&&
		mv \$TMPDIR/results/*_psortb_*.txt $output
		]]>
	</command>

	<inputs>
		<param name="input_fasta" type="data" format="fasta" label="Protein sequences (FASTA)" help="Submit protein sequences in FASTA format."/>

		<section name="gram" title="Organism classification" expanded="true">
			<param name="gram_choice" type="select" label="Organism type">
				<option value="--positive" selected="true">Gram-positive (Bacteria)</option>
				<option value="--negative">Gram-negative (Bacteria)</option>
				<option value="--archaea">Archaea</option>
			</param>
		</section>

		<param name="output_format" type="select" label="Output format" help="Choose PSORTb output format.">
			<option value="normal">Normal (human-readable)</option>
			<option value="terse">3-column (terse)</option>
			<option value="long" selected="true">30-column (long)</option>
		</param>
        <param argument="--exact" type="boolean" truevalue="--exact" falsevalue="" checked="false" label="Skip SCLBLASTe" help="Useful for batch runs of data against itself in SCLBLAST"/>
		<param argument="--cutoff" type="float" optional="true" label="Prediction cutoff" help="Sets a cutoff value for reported results (default: 7.5 used internally)."/>
		<param argument="--divergent" type="float" optional="true" label="Multiple localization cutoff" help="Sets a cutoff for flagging potential multiple localization sites."/>
	</inputs>

	<outputs>
		<data name="output" format="txt" label="PSORTb results on ${on_string}"/>
	</outputs>

	<tests>
		<test expect_num_outputs="1">
			<param name="input_fasta" value="psortb_pos.fa"/>
			<param name="gram|gram_choice" value="--positive"/>
			<param name="output_format" value="normal"/>
			<output name="output" value="psortb_pos_output.txt"/>
		</test>
		<test expect_num_outputs="1">
			<param name="input_fasta" value="psortb_neg.fa"/>
			<param name="gram|gram_choice" value="--negative"/>
			<param name="output_format" value="terse"/>
			<output name="output">
                <assert_contents>
					<has_text_matching expression="SeqID\tLocalization\tScore" />
					<has_text_matching expression="NP_949347\.1 \tUnknown\t7\.0" />
                </assert_contents>
            </output>
		</test>
		<test expect_num_outputs="1">
			<param name="input_fasta" value="psortb_arch.fa"/>
			<param name="gram|gram_choice" value="--archaea"/>
			<param name="output_format" value="long"/>
            <output name="output">
				<assert_contents>
                    <has_text_matching expression="SeqID\s+CMSVM_a_Localization\s+CMSVM_a_Details\s+CWSVM_a_Localization\s+CWSVM_a_Details\s+CytoSVM_a_Localization\s+CytoSVM_a_Details\s+ECSVM_a_Localization\s+ECSVM_a_Details\s+ModHMM_a_Localization\s+ModHMM_a_Details\s+Motif_a_Localization\s+Motif_a_Details\s+Profile_a_Localization\s+Profile_a_Details\s+SCL-BLAST_a_Localization\s+SCL-BLAST_a_Details\s+SCL-BLASTe_a_Localization\s+SCL-BLASTe_a_Details\s+Signal_a_Localization\s+Signal_a_Details\s+Cytoplasmic_Score\s+CytoplasmicMembrane_Score\s+Cellwall_Score\s+Extracellular_Score\s+Final_Localization\s+Final_Localization_Details\s+Final_Score\s+Secondary_Localization\s+PSortb_Version" />
                    <has_text_matching expression="YP_001689002\.1\s+Unknown\s+Unknown\s+Unknown\s+Extracellular\s+Unknown\s+1 internal helix found\s+Unknown\s+No motifs found\s+Unknown\s+No matches to profiles found\s+Extracellular\s+matched 47117675: Flagellin B1 precursor\s+Unknown\s+No matches against database\s+Unknown\s+No signal peptide detected\s+0.01\s+0.00\s+0.02\s+9.97\s+Extracellular\s+9.97\s+Flagellar\s+PSORTb version" />
				</assert_contents>
            </output>
		</test>
	</tests>

	<help>
		<![CDATA[
PSORTb predicts the subcellular localization of bacterial and archaeal proteins.

Input requirements

- Protein sequences in FASTA format. All sequences in one run should belong to the same organism class.

Options

- Organism type: select Gram-positive (`--positive`), Gram-negative (`--negative`), or Archaea (`--archaea`).
- Output format: `normal` (human-readable), `terse` (tab-delimited), or `long` (tab-delimited with module details).
- Cutoff (`-c`): threshold for final localization assignment (documentation suggests ~7.5).
- Multiple localization cutoff (`-d`): threshold to flag possible multiple localization sites.
- Exact (`--exact`): skip SCL-BLASTe step.

Notes

- PSORTb emphasizes precision; proteins with ambiguous signals may be reported as Unknown.
- Long format includes module outputs and localization scores; terse/long are suitable for bulk processing.

Reference

https://psort.org/documentation/index.html
		]]>
	</help>

	<citations>
		<citation type="doi">10.1093/bioinformatics/btq249</citation>
	</citations>
</tool>