diff NSPDK_candidateClusters.xml @ 0:165fe96228be draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/NSPDK commit 21aaee40723b5341b4236edeb0e72995c2054053
author rnateam
date Fri, 16 Dec 2016 07:36:07 -0500
parents
children 90a4a2e7d876
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NSPDK_candidateClusters.xml	Fri Dec 16 07:36:07 2016 -0500
@@ -0,0 +1,149 @@
+<tool id="NSPDK_candidateClust" name="NSPDK_candidateClusters" version="9.2">
+	<requirements>
+		<requirement type="package" version="0.1">graphclust-wrappers</requirement>
+		<requirement type="package" version="0.5">perl-array-utils</requirement>
+		<requirement type="package" version="9.2">nspdk</requirement>
+	</requirements>
+	<stdio>
+		<exit_code range="1:" />
+	</stdio>
+	<command>
+		<![CDATA[
+
+        mkdir -p SVECTOR &&
+
+        cp $data_svector SVECTOR/data.svector &&
+
+        'NSPDK_candidateClusters.pl'  '$data_fasta' '$data_names' $noCache $ensf $oc $usn $knn $nhf $nspdk_nhf_max $nspdk_nhf_step $GLOBAL_num_clusters $max_rad $max_dist_relations
+
+        #if  $iteration_num.iteration_num_selector:
+          $iteration_num.CI
+          '$blacklist'
+          '$final_partition_soft'
+          '$fast_cluster_last_round'
+          $iteration_num.GLOBAL_hit_blacklist_overlap
+
+        #else:
+          1
+
+        #end if
+
+]]>
+	</command>
+	<inputs>
+		<param type="data" name="data_svector" format="zip"  />
+		<param type="data" name="data_fasta" format="fasta" />
+		<param type="data" name="data_names" format="txt" />
+		<conditional name="iteration_num">
+			<param name="iteration_num_selector" type="boolean"  checked="no" label="Multiple iterations"  help="for single iteration- NO, for multiple-YES"/>
+			<when value="true">
+				<param name="CI" type="integer" value="2" size="5" label="Number of current iteration "/>
+				<param type="data" name="blacklist" format="txt" />
+				<param type="data" name="final_partition_soft" format="txt" />
+				<param type="data" name="fast_cluster_last_round" format="txt" />
+				<param name="GLOBAL_hit_blacklist_overlap" type="float" value="0.2" size="5" label="Blacklist hit overlap" />
+			</when>
+			<when value="false" >
+				<param name="CI" type="hidden" value="1" size="5" label="Number of current iteration "></param>
+			</when>
+		</conditional>
+		<param name="max_rad" type="integer" value="3" size="5" label="maximum radius " help="-R"/>
+		<param name="max_dist_relations" type="integer" value="3" size="5" label="maximum distance relations" help="-D"/>
+		<param name="noCache" truevalue="-no-cache" falsevalue="" checked="True" type="boolean"
+                label="Deactivate caching of kernel value computation  (-no-cache)" help="to minimize memory usage"/>
+		<param name="ensf" type="integer" value="5" size="5" label="eccess neighbour size factor" help="-ensf"/>
+		<param name="usn" truevalue="-usn" falsevalue="" checked="True" type="boolean"
+                label="Use shared neighbourhood to weight center density (-usn)" help="by default true"/>
+		<param name="oc" truevalue="-oc" falsevalue="" checked="True" type="boolean"
+                label=" flag to output clusters (-oc)" help="by default true"/>
+		<param name="knn" type="integer" value="20" size="5" label="Number of nearest neighbors" help="-knn num"/>
+		<param name="nhf" type="integer" value="500" size="5" label="Number of hash functions " help="-nhf num"/>
+		<param name="nspdk_nhf_max" type="integer" value="1000" size="5" label="Maximal number of hash functions " />
+		<param name="nspdk_nhf_step" type="integer" value="25" size="5" label="Size of step for increasing hash functions " help="The number of hash functions is increased by this value after each iteration."/>
+		<param name="GLOBAL_num_clusters" type="integer" value="100" size="5" label="Maxinum number of clusters " />
+	</inputs>
+	<outputs>
+		<data name="fast_cluster" format="txt" from_work_dir="SVECTOR/data.svector.1.fast_cluster" label="fast_cluster.1"  >
+			<filter> iteration_num['iteration_num_selector'] is False</filter>
+		</data>
+		<data name="fast_cluster_sim" format="txt" from_work_dir="SVECTOR/data.svector.1.fast_cluster_sim" label="fast_cluster_sim.1" >
+			<filter> iteration_num['iteration_num_selector'] is False </filter>
+		</data>
+		<data name="black_list" format="txt" from_work_dir="SVECTOR/data.svector.blacklist.1" label="blacklist.1" >
+			<filter> iteration_num['iteration_num_selector'] is False </filter>
+		</data>
+		<data name="fast_cluster_m" format="txt" from_work_dir="SVECTOR/data.svector.*.fast_cluster" label="fast_cluster.$iteration_num.CI"  >
+			<filter> iteration_num['iteration_num_selector'] is True</filter>
+		</data>
+		<data name="fast_cluster_sim_m" format="txt" from_work_dir="SVECTOR/data.svector.*.fast_cluster_sim" label="fast_cluster_sim.$iteration_num.CI" >
+			<filter> iteration_num['iteration_num_selector'] is True</filter>
+		</data>
+		<data name="black_list_m" format="txt" from_work_dir="SVECTOR/data.svector.blacklist.*" label="blacklist.$iteration_num.CI" >
+			<filter> iteration_num['iteration_num_selector'] is True</filter>
+		</data>
+	</outputs>
+	<tests>
+		<test>
+			<param name="data_fasta" value="data.fasta"/>
+			<param name="data_names" value="data.names"/>
+			<param name="data_svector" value="data.svector.1" ftype="zip"  />
+			<conditional name="iteration_num">
+				<param name="iteration_num_selector" value="false"/>
+			</conditional>
+			<param name="noCache" value="-no-cache"/>
+			<param name="ensf" value="5"/>
+			<param name="oc" value="-oc"/>
+			<param name="max_rad" value="3"/>
+			<param name="max_dist_relations" value="3"/>
+			<param name="usn" value="-usn"/>
+			<param name="knn" value="20"/>
+			<param name="nhf" value="500"/>
+			<param name="nspdk_nhf_max" value="1000"/>
+			<param name="nspdk_nhf_step" value="25"/>
+			<param name="GLOBAL_num_clusters" value="100"/>
+			<output name="fast_cluster" file="SVECTOR/data.svector.1.fast_cluster" />
+			<output name="fast_cluster_sim" file="SVECTOR/data.svector.1.fast_cluster_sim" />
+			<output name="black_list" file="SVECTOR/data.svector.blacklist.1" />
+		</test>
+	</tests>
+	<help>
+		<![CDATA[
+
+**What it does**
+
+Copmutes global feature index and returns top dense sets.
+The candidate clusters are chosen as the top ranking neighborhoods provided that the size of their overlap
+is below a specified threshold.
+For more information see *Fast neighborhood subgraph pairwise distance kernel* paper.
+
+
+**Parameters**
+
++ **-knn** : <num nearest neighbors> (default: 10)
+
++ **-otknn** : flag to output true (i.e. implies full kernel matrix evaluation) k-nearest neighburs (default: 0)
+
++ **-oc** : flag to output clusters (default: 0)
+
++ **-nhf** : <num hash functions> for the Locality Sensitive Hashing function (default: 250)
+
++ **-ensf** : <eccess neighbour size factor> (default: 10) (0 to avoid trimming)
+
++ **-usn** : use shared neighbourhood to weight center density (default: 0)
+
+
+    ]]>
+	</help>
+	<citations>
+		<citation type="doi">10.1093/bioinformatics/bts224</citation>
+		<citation type="bibtex">@inproceedings{costa2010fast,
+        title={Fast neighborhood subgraph pairwise distance kernel},
+        author={Costa, Fabrizio and De Grave, Kurt},
+        booktitle={Proceedings of the 26th International Conference on Machine Learning},
+        pages={255--262},
+        year={2010},
+        organization={Omnipress}
+      }
+      </citation>
+	</citations>
+</tool>