view NSPDK_candidateClusters.xml @ 0:165fe96228be draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/NSPDK commit 21aaee40723b5341b4236edeb0e72995c2054053
author rnateam
date Fri, 16 Dec 2016 07:36:07 -0500
parents
children 90a4a2e7d876
line wrap: on
line source

<tool id="NSPDK_candidateClust" name="NSPDK_candidateClusters" version="9.2">
	<requirements>
		<requirement type="package" version="0.1">graphclust-wrappers</requirement>
		<requirement type="package" version="0.5">perl-array-utils</requirement>
		<requirement type="package" version="9.2">nspdk</requirement>
	</requirements>
	<stdio>
		<exit_code range="1:" />
	</stdio>
	<command>
		<![CDATA[

        mkdir -p SVECTOR &&

        cp $data_svector SVECTOR/data.svector &&

        'NSPDK_candidateClusters.pl'  '$data_fasta' '$data_names' $noCache $ensf $oc $usn $knn $nhf $nspdk_nhf_max $nspdk_nhf_step $GLOBAL_num_clusters $max_rad $max_dist_relations

        #if  $iteration_num.iteration_num_selector:
          $iteration_num.CI
          '$blacklist'
          '$final_partition_soft'
          '$fast_cluster_last_round'
          $iteration_num.GLOBAL_hit_blacklist_overlap

        #else:
          1

        #end if

]]>
	</command>
	<inputs>
		<param type="data" name="data_svector" format="zip"  />
		<param type="data" name="data_fasta" format="fasta" />
		<param type="data" name="data_names" format="txt" />
		<conditional name="iteration_num">
			<param name="iteration_num_selector" type="boolean"  checked="no" label="Multiple iterations"  help="for single iteration- NO, for multiple-YES"/>
			<when value="true">
				<param name="CI" type="integer" value="2" size="5" label="Number of current iteration "/>
				<param type="data" name="blacklist" format="txt" />
				<param type="data" name="final_partition_soft" format="txt" />
				<param type="data" name="fast_cluster_last_round" format="txt" />
				<param name="GLOBAL_hit_blacklist_overlap" type="float" value="0.2" size="5" label="Blacklist hit overlap" />
			</when>
			<when value="false" >
				<param name="CI" type="hidden" value="1" size="5" label="Number of current iteration "></param>
			</when>
		</conditional>
		<param name="max_rad" type="integer" value="3" size="5" label="maximum radius " help="-R"/>
		<param name="max_dist_relations" type="integer" value="3" size="5" label="maximum distance relations" help="-D"/>
		<param name="noCache" truevalue="-no-cache" falsevalue="" checked="True" type="boolean"
                label="Deactivate caching of kernel value computation  (-no-cache)" help="to minimize memory usage"/>
		<param name="ensf" type="integer" value="5" size="5" label="eccess neighbour size factor" help="-ensf"/>
		<param name="usn" truevalue="-usn" falsevalue="" checked="True" type="boolean"
                label="Use shared neighbourhood to weight center density (-usn)" help="by default true"/>
		<param name="oc" truevalue="-oc" falsevalue="" checked="True" type="boolean"
                label=" flag to output clusters (-oc)" help="by default true"/>
		<param name="knn" type="integer" value="20" size="5" label="Number of nearest neighbors" help="-knn num"/>
		<param name="nhf" type="integer" value="500" size="5" label="Number of hash functions " help="-nhf num"/>
		<param name="nspdk_nhf_max" type="integer" value="1000" size="5" label="Maximal number of hash functions " />
		<param name="nspdk_nhf_step" type="integer" value="25" size="5" label="Size of step for increasing hash functions " help="The number of hash functions is increased by this value after each iteration."/>
		<param name="GLOBAL_num_clusters" type="integer" value="100" size="5" label="Maxinum number of clusters " />
	</inputs>
	<outputs>
		<data name="fast_cluster" format="txt" from_work_dir="SVECTOR/data.svector.1.fast_cluster" label="fast_cluster.1"  >
			<filter> iteration_num['iteration_num_selector'] is False</filter>
		</data>
		<data name="fast_cluster_sim" format="txt" from_work_dir="SVECTOR/data.svector.1.fast_cluster_sim" label="fast_cluster_sim.1" >
			<filter> iteration_num['iteration_num_selector'] is False </filter>
		</data>
		<data name="black_list" format="txt" from_work_dir="SVECTOR/data.svector.blacklist.1" label="blacklist.1" >
			<filter> iteration_num['iteration_num_selector'] is False </filter>
		</data>
		<data name="fast_cluster_m" format="txt" from_work_dir="SVECTOR/data.svector.*.fast_cluster" label="fast_cluster.$iteration_num.CI"  >
			<filter> iteration_num['iteration_num_selector'] is True</filter>
		</data>
		<data name="fast_cluster_sim_m" format="txt" from_work_dir="SVECTOR/data.svector.*.fast_cluster_sim" label="fast_cluster_sim.$iteration_num.CI" >
			<filter> iteration_num['iteration_num_selector'] is True</filter>
		</data>
		<data name="black_list_m" format="txt" from_work_dir="SVECTOR/data.svector.blacklist.*" label="blacklist.$iteration_num.CI" >
			<filter> iteration_num['iteration_num_selector'] is True</filter>
		</data>
	</outputs>
	<tests>
		<test>
			<param name="data_fasta" value="data.fasta"/>
			<param name="data_names" value="data.names"/>
			<param name="data_svector" value="data.svector.1" ftype="zip"  />
			<conditional name="iteration_num">
				<param name="iteration_num_selector" value="false"/>
			</conditional>
			<param name="noCache" value="-no-cache"/>
			<param name="ensf" value="5"/>
			<param name="oc" value="-oc"/>
			<param name="max_rad" value="3"/>
			<param name="max_dist_relations" value="3"/>
			<param name="usn" value="-usn"/>
			<param name="knn" value="20"/>
			<param name="nhf" value="500"/>
			<param name="nspdk_nhf_max" value="1000"/>
			<param name="nspdk_nhf_step" value="25"/>
			<param name="GLOBAL_num_clusters" value="100"/>
			<output name="fast_cluster" file="SVECTOR/data.svector.1.fast_cluster" />
			<output name="fast_cluster_sim" file="SVECTOR/data.svector.1.fast_cluster_sim" />
			<output name="black_list" file="SVECTOR/data.svector.blacklist.1" />
		</test>
	</tests>
	<help>
		<![CDATA[

**What it does**

Copmutes global feature index and returns top dense sets.
The candidate clusters are chosen as the top ranking neighborhoods provided that the size of their overlap
is below a specified threshold.
For more information see *Fast neighborhood subgraph pairwise distance kernel* paper.


**Parameters**

+ **-knn** : <num nearest neighbors> (default: 10)

+ **-otknn** : flag to output true (i.e. implies full kernel matrix evaluation) k-nearest neighburs (default: 0)

+ **-oc** : flag to output clusters (default: 0)

+ **-nhf** : <num hash functions> for the Locality Sensitive Hashing function (default: 250)

+ **-ensf** : <eccess neighbour size factor> (default: 10) (0 to avoid trimming)

+ **-usn** : use shared neighbourhood to weight center density (default: 0)


    ]]>
	</help>
	<citations>
		<citation type="doi">10.1093/bioinformatics/bts224</citation>
		<citation type="bibtex">@inproceedings{costa2010fast,
        title={Fast neighborhood subgraph pairwise distance kernel},
        author={Costa, Fabrizio and De Grave, Kurt},
        booktitle={Proceedings of the 26th International Conference on Machine Learning},
        pages={255--262},
        year={2010},
        organization={Omnipress}
      }
      </citation>
	</citations>
</tool>