changeset 2:5ce7e50f159b draft

Uploaded
author bitlab
date Tue, 18 Dec 2018 06:59:41 -0500
parents fc3f2aefe244
children 0589a1c5f33a
files chromeister/chromeister.xml
diffstat 1 files changed, 10 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/chromeister/chromeister.xml	Mon Dec 17 12:24:59 2018 -0500
+++ b/chromeister/chromeister.xml	Tue Dec 18 06:59:41 2018 -0500
@@ -4,8 +4,10 @@
       <param name="query" type="data" format="fasta" label="Query sequence" help="Query sequence file in fasta format" />
       <param name="db" type="data" format="fasta" label="Reference sequence" help="Reference sequence file in fasta format" />
       <param name="dimension" type="integer" value="1000" label="Dimension" help="Compression size" />
+      <param name="kmer" type="integer" value="32" label="Kmer" help="Seed size used to find unique hits" />
+      <param name="diffuse" type="integer" value="4" label="Difusse value" help="Level of the heuristic subsampling employed" />
    </inputs>
-   <command>echo "\$PWD"; cp $query ${query.name}; cp $db ${db.name}; (/home/galaxy-bitlab/galaxy/tools/chromeister/bin/CHROMEISTER -query ${query.name} -db ${db.name} -dimension $dimension -out ${query.name}-${db.name}.mat) &amp;>/dev/null ; rm ${query.name}; rm ${db.name}; Rscript /home/galaxy-bitlab/galaxy/tools/chromeister/bin/compute_score.R  ${query.name}-${db.name}.mat $dimension; mv ${query.name}-${db.name}.mat $output; mv ${query.name}-${db.name}.mat.filt.png $outputIMAGEN ; mv ${query.name}-${db.name}.mat.events.txt $outputEVENTS; rm hits-XY-${query.name}-${db.name}.mat.hits</command>
+   <command>cp $query "${query.name}"; cp $db "${db.name}"; (/home/galaxy-bitlab/galaxy/tools/chromeister/bin/CHROMEISTER -query "${query.name}" -db "${db.name}" -dimension $dimension -kmer $kmer -diffuse $diffuse -out "${query.name}"-"${db.name}".mat) &amp;>/dev/null ; rm "${query.name}"; rm "${db.name}"; Rscript /home/galaxy-bitlab/galaxy/tools/chromeister/bin/compute_score.R  "${query.name}"-"${db.name}".mat $dimension; mv "${query.name}"-"${db.name}".mat $output; mv "${query.name}"-"${db.name}".mat.filt.png $outputIMAGEN ; mv "${query.name}"-"${db.name}".mat.events.txt $outputEVENTS; rm hits-XY-"${query.name}"-"${db.name}".mat.hits</command>
   <outputs>
       <data name="output" format="txt" label="Comparison matrix"/>
       <data name="outputIMAGEN" format="png" label="Comparison dotplot"/>
@@ -14,18 +16,20 @@
 
   <help>
 
-Chromeister is a heuristic approach for ultra fast previsualization of pairwise genome comparisons. It is able to compare enormous genomes (up to 30 thousand million base pairs, or 10 times the size of the human genome) much faster than other methods while yielding significant, reusable and exploitable information.
+Chromeister is a heuristic approach for the ultra fast previsualization of pairwise genome comparisons. It is able to compare enormous genomes (up to 30 thousand million base pairs, or 10 times the size of the human genome) much faster than other methods while yielding significant, reusable and exploitable information such as synteny blocks, evolutionary events or pairwise genome similarity metrics.
 
 -----
 
 **Manual**
 
-To use Chromeister, simply upload two metagenomes in the fasta format and select these as Query and Reference metagenome. Once so, choose the dimension (compression size) that suit best your comparison.
+To use Chromeister, upload two .fasta files and select these as Sequence X and as Sequence Y. Once so, choose the parameters that best suit your comparison:
+
+- Dimension: This parameter corresponds to the resolution of the comparison. That is, higher resolution is recommended for large genomes (e.g. use 2000 for more than 3 GBps) and lower resolutions should be used for comparisons involving chromosomes or partial genomes.
+- Kmer size: This parameter is the seed size used to find unique hits. The recommended value is 32 for all sequences except for small experiments such as bacterial.
+- Diffuse value: This parameter determines the level of  heuristic subsampling employed. A level of 1 will use perfect indexing (no subsampling). Recommended level is 4, which represents a good trade-off between exact and inexact hits.
 
   </help>
 
   <citations>
   </citations>
-
-
-</tool>
+</tool>
\ No newline at end of file