changeset 0:19b20927172d draft

Uploaded
author swebb
date Tue, 18 Jun 2013 09:11:00 -0400
parents
children 7c9574213c0a
files pyCRAC/pyAlignment2Tab.xml pyCRAC/pyBarcodeFilter.pl pyCRAC/pyBarcodeFilter.xml pyCRAC/pyBinCollector.pl pyCRAC/pyBinCollector.xml pyCRAC/pyCalculateChromosomeLengths.xml pyCRAC/pyCalculateFDRs.xml pyCRAC/pyCalculateMutationFrequencies.xml pyCRAC/pyCheckGTFfile.xml pyCRAC/pyClusterReads.pl pyCRAC/pyClusterReads.xml pyCRAC/pyExtractLinesFromGTF.xml pyCRAC/pyFasta2tab.xml pyCRAC/pyFastqDuplicateRemover.pl pyCRAC/pyFastqDuplicateRemover.xml pyCRAC/pyFastqJoiner.xml pyCRAC/pyFastqSplitter.pl pyCRAC/pyFastqSplitter.xml pyCRAC/pyGTF2bed.xml pyCRAC/pyGTF2bedGraph.pl pyCRAC/pyGTF2bedGraph.xml pyCRAC/pyGetGTFSources.xml pyCRAC/pyGetGeneNamesFromGTF.xml pyCRAC/pyMotif.pl pyCRAC/pyMotif.xml pyCRAC/pyPileup.xml pyCRAC/pyReadAligner.xml pyCRAC/pyReadCounters.pl pyCRAC/pyReadCounters.xml pyCRAC/pySelectMotifsFromGTF.xml pyCRAC/pycrac.chr.loc.sample pyCRAC/pycrac.fasta.loc.sample pyCRAC/pycrac.gtf.loc.sample pyCRAC/pycrac.tab.loc.sample pyCRAC/tool_data_table_conf.xml.sample
diffstat 35 files changed, 3953 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyAlignment2Tab.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,139 @@
+<tool id="pyAlignment2Tab" name="pyAlignment2Tab">
+	  <description>converter</description>
+	  <requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	  </requirements>
+ 	  <command interpreter="python">/usr/local/bin/pyAlignment2Tab.py -f $input --limit $limit -o $output --singlefile
+ 	  </command>
+	  <version_command>/usr/local/bin/pyAlignment2Tab.py --version</version_command>
+ 	  <inputs>
+	      	<param name="input" type="data" format="fasta" label="pyReadAligner output file -f" help="Fasta file"/>
+	      	<param name="limit" type="integer" format="integer" value="90" size="4" label="Set the column width of alignment" help="Enter a value > 50">
+			<validator type="in_range" min="50" message="Please enter a value greater than 50"/>
+	      	</param>
+		<param name="label" type="text" format="txt" size="30" value="pyAlignment2Tab" label="Enter output file label -o" />
+ 	  </inputs>
+ 	  <outputs>
+	    	<data name="output" format="txt" label="${label.value}.tab"/>
+ 	  </outputs>
+	  <help>
+
+.. class:: infomark
+
+**pyAlignment2Tab**
+
+pyAlignment2Tab is part of the pyCRAC_ package. Converts pyReadAligner fasta output to a tabular alignment output.
+
+Example::
+
+    The tool expects a standard pyReadAligner fasta-formatted output file:
+    
+        >GeneX
+        ATGTCTCGTACTAACATGGATACAAGACACGCACATTCTGCTTTACTGGCAGCACCACAGAGTGCTACTGCAAATAGCAGGAGCAGCAACAGCAGCAGCGAGAGTAGTAGTAACAAAAACAATATCAATGTCGGCGTCGGTGACGATAGCGGTAA
+        >257930-10
+        ---TCTCGTACcAACATGGATACAAGACACGCACATTCTGCTT----------------------------------------------------------------------------------------------------------------
+        >3664964-1
+        ---TCTCGcACcAACATGGATACAAGACACGCACATTtTGCTT----------------------------------------------------------------------------------------------------------------
+        >4033560-1
+        ---TCTCGTACcAACATGGATACAAGACACGCACATTCTGtTT----------------------------------------------------------------------------------------------------------------
+        >8571880-1
+        ---TCTCGTACcAACATGGATACAAGACACGCAgATTCTGCTT----------------------------------------------------------------------------------------------------------------
+        >9617396-1
+        ---TCTCGTACcAACATGGATACAAGACACGCcCATTCTGCTT----------------------------------------------------------------------------------------------------------------
+        >843368-5
+        ------------AACAcGGATACAAGACACGCACATTCTG-------------------------------------------------------------------------------------------------------------------
+        >854553-5
+        ------------AACATGGATACAAGACACGCAC--TCTG-------------------------------------------------------------------------------------------------------------------
+        >1522401-2
+        --------------CATGGATACAAGACACGCACATTCTGCTTTACTGGCAGgA-----------------------------------------------------------------------------------------------------
+        >5981234-1
+        --------------CATGGATACAAGACACGCACAcTCTGCTTTACTGGCAGCA-----------------------------------------------------------------------------------------------------
+        >997684-4
+        --------------CATGGATACAAGACACGCACATTCTGCTTTACTGGCAGCA-----------------------------------------------------------------------------------------------------
+        >1046653-4
+        ---------------ATGGATACAAGACACGCACATTCTGCTTTACTGGCAGgAC----------------------------------------------------------------------------------------------------
+        >1103730-4
+        ---------------ATGGATACAAGACACGCACAcTCTG-------------------------------------------------------------------------------------------------------------------
+        >1603913-2
+        ---------------ATGGATACAAGAaACGCACAcTCTG-------------------------------------------------------------------------------------------------------------------
+        >180349-12
+        ---------------ATGGATACAAGACACGCACATTCcGCTTTACTG-----------------------------------------------------------------------------------------------------------
+        >1985106-1
+        ---------------ATGGATACAAGACACGCACATTCgGCTTTACTGGCAGCcC----------------------------------------------------------------------------------------------------
+        >1987775-1
+        ---------------ATGGATACccGACACGCACATTCTGCTTTACTGcCAGCAC----------------------------------------------------------------------------------------------------
+        >2258725-1
+        ---------------ATGGATACAAGACACGCACATTCTGCTTTgCTGGCAGCAC----------------------------------------------------------------------------------------------------
+        >2631987-1
+        ---------------ATGGATACAAGACACGCACATTCTGCTTTACcGGCAGgAC----------------------------------------------------------------------------------------------------
+
+    This will be converted into:
+    
+                1	.........|.........|.........|.........|.........|.........|.........|.........|.........| 90
+        >GeneX          ATGTCTCGTACTAACATGGATACAAGACACGCACATTCTGCTTTACTGGCAGCACCACAGAGTGCTACTGCAAATAGCAGGAGCAGCAAC
+        >257930-10	---TCTCGTACcAACATGGATACAAGACACGCACATTCTGCTT-----------------------------------------------
+        >3664964-1	---TCTCGcACcAACATGGATACAAGACACGCACATTtTGCTT-----------------------------------------------
+        >4033560-1	---TCTCGTACcAACATGGATACAAGACACGCACATTCTGtTT-----------------------------------------------
+        >8571880-1	---TCTCGTACcAACATGGATACAAGACACGCAgATTCTGCTT-----------------------------------------------
+        >9617396-1	---TCTCGTACcAACATGGATACAAGACACGCcCATTCTGCTT-----------------------------------------------
+        >843368-5	------------AACAcGGATACAAGACACGCACATTCTG--------------------------------------------------
+        >854553-5	------------AACATGGATACAAGACACGCAC--TCTG--------------------------------------------------
+        >1522401-2	--------------CATGGATACAAGACACGCACATTCTGCTTTACTGGCAGgA------------------------------------
+        >5981234-1	--------------CATGGATACAAGACACGCACAcTCTGCTTTACTGGCAGCA------------------------------------
+        >997684-4	--------------CATGGATACAAGACACGCACATTCTGCTTTACTGGCAGCA------------------------------------
+        >1046653-4	---------------ATGGATACAAGACACGCACATTCTGCTTTACTGGCAGgAC-----------------------------------
+        >1103730-4	---------------ATGGATACAAGACACGCACAcTCTG--------------------------------------------------
+        >1603913-2	---------------ATGGATACAAGAaACGCACAcTCTG--------------------------------------------------
+        >180349-12	---------------ATGGATACAAGACACGCACATTCcGCTTTACTG------------------------------------------
+        >1985106-1	---------------ATGGATACAAGACACGCACATTCgGCTTTACTGGCAGCcC-----------------------------------
+        >1987775-1	---------------ATGGATACccGACACGCACATTCTGCTTTACTGcCAGCAC-----------------------------------
+        >2258725-1	---------------ATGGATACAAGACACGCACATTCTGCTTTgCTGGCAGCAC-----------------------------------
+        >2631987-1	---------------ATGGATACAAGACACGCACATTCTGCTTTACcGGCAGgAC-----------------------------------
+        >337206-9	---------------ATGGATACAAGACACGCACATTCTGCTTTACTGGCAGCAC-----------------------------------
+        >4616761-1	---------------ATGGATAgAAGACACGCACATTCTGCTTTACTGGtAGCAC-----------------------------------
+        >4756312-1	---------------ATGGATACAAcACACGCACAcTCTG--------------------------------------------------
+        >4763682-1	---------------ATGGATACAAGACACGCACATTCcGCTTTcCTG------------------------------------------
+        >5971268-1	---------------ATGGATACAAGACACGCACATTCcGCTcTACTc------------------------------------------
+        >6644790-1	---------------ATGGATACAAGACACGCACATTCTGCTTTACTcGCAGCAC-----------------------------------
+        >7112423-1	---------------ATGGATACAAGACACGCACATTCTGCTTTACTGtCAGCAC-----------------------------------
+        >7559990-1	---------------ATGGATACAAGACACGCACATTCTGCTTTACTGGCcGgAC-----------------------------------
+        >8007281-1	---------------ATGGATAtAAGACACGCACAcTCTG--------------------------------------------------
+        >9150255-1	---------------ATGGATACAcGACACGCACATTCcGCTTTcCTG------------------------------------------
+        >9180814-1	---------------ATGGATACAAGACACGCACATTCTGCTTTACTGGCAGgcC-----------------------------------
+        >963117-4	---------------ATGGATACAAGACACGCACATTCTGCTTTACcGGCAGCAC-----------------------------------
+        >9672073-1	---------------ATGGATACAAGACACGCACATTCTGCTTTACTGGCAGCcC-----------------------------------
+        >971218-4	---------------ATGGATACAAGACACGCACATcCTGCTTTACTGG-AGCACC----------------------------------
+        >10040274-1	-------------------ATACAAGACACGCACATTCTGCTTTACTGGCAGgACCACA-------------------------------
+        >1063072-4	-------------------ATACAAGACACGCACATTCTGCTTcACTGGCAGCACCACA-------------------------------
+        >1430188-2	-------------------ATACAAGACACGCACATTCTGCTTTACTGGCAGCACCACA-------------------------------
+        >5196741-1	-------------------ATACAAGACACGCACATTCTGCTTcACTGGCcGCACCACA-------------------------------
+        >6017337-1	-------------------ATACAAGACACGCACATTCTGCTTcACTGtCAGaACCcCA-------------------------------
+        >7159053-1	-------------------ATACAAGACACGCACATTCTGCTTTACTGGCAGCACCcaA-------------------------------
+        >7528336-1	-------------------ATACAAGACACGCACATTCTGCTTcACTGGCAGCAaCACA-------------------------------
+        >735584-6	--------------------------------------------------------ACAGAGTGCTACTGCAAAcAGCAGGAGCAGCAAC
+        >8551047-1	--------------------------------------------------------ACAGAGTGCTAtTGCAAAcAGCAGGAGtAGtAAC
+        >3000121-1	------------------------------------------------------------AGTcCTACcGCAAATAGCAGcAGCAGCAAC
+        >928481-5	------------------------------------------------------------AGTGCTACcGCAAATAGCAGGAGCAGCAAC
+        >126987-15	----------------------------------------------------------------------CAAATAGCAGGAGCAGCAAC
+        >3122797-1	----------------------------------------------------------------------CAAATAGCAGGcGCAGCAAC
+        >6684686-1	----------------------------------------------------------------------CAAATAGCAGGAGCAGCAAC
+    
+        Note that the column width here was set to 90 characters
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+Options::
+
+  	-f data.fasta   
+                        Type the path to the fasta file that you want to use.
+  	--limit=90     
+                        Allows the user to set the column width of the alignment. Default=90 characters
+	-o output.fasta 
+                        Provide the name of your output file
+		
+ 	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyBarcodeFilter.pl	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,71 @@
+#!/usr/bin/perl -w
+use strict;
+use Getopt::Long;
+
+my %opt;
+
+
+GetOptions(\%opt, "f=s", "b=s", "out=s", "output_path=s","id=s","m=i", "file_type=s", "both", "r=s", "version", "i");
+
+my $cmnd;
+
+if (exists $opt{version}){
+	$cmnd = "python /usr/local/bin/pyBarcodeFilter.py --version";
+}
+else{
+	$cmnd = "python /usr/local/bin/pyBarcodeFilter.py -f $opt{f} -b $opt{b} -m $opt{m} --file_type $opt{file_type}";
+
+	if(defined $opt{r}){
+
+	$cmnd.= " -r $opt{r}";
+
+		if(exists $opt{both}){
+			$cmnd .= " --both";
+		}
+	}
+
+	if(exists $opt{i}){
+		$cmnd .= " -i";
+	}
+}
+
+# Create the output directory (for the multiple output files)
+my $output_path = $opt{output_path};
+
+system $cmnd;
+
+open(BC,$opt{b}) || die "Cannot open barcode file";
+my %bc;
+while(my $line = <BC>){
+	chomp($line);
+	my ($barcode,$sample) = (split(/\t/,$line))[0,1];
+	$bc{$barcode}=$sample;
+}
+
+system "mv barcode_statistics.txt $opt{out}";
+
+my $ft = lc($opt{file_type});
+
+foreach my $key(keys %bc){
+	my @split = (split(/\//,$opt{f}));
+	my $l = @split;
+	my $output = $split[$l-1];
+	$output = (split(/\./,$output))[0];
+	$output = "$output"."_"."$key"."_"."$bc{$key}"."."."$ft";
+	my $rename = "$output_path/primary_$opt{id}_$bc{$key}-1"."_visible_"."$ft";
+	system "mv $output $rename"; 
+
+	if(defined $opt{r}){
+		my @split2 = (split(/\//,$opt{r}));
+		$l = @split2;
+		$output = $split2[$l-1];
+		$output = (split(/\./,$output))[0];
+		$output = "$output"."_"."$key"."_"."$bc{$key}"."."."$ft";
+		$rename = "$output_path/primary_$opt{id}_$bc{$key}-2"."_visible_"."$ft";
+		system "mv $output $rename"; 
+	}
+}
+
+
+close BC;
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyBarcodeFilter.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,125 @@
+ <tool id ="pyBarcodeFilter" name="pyBarcodeFilter" force_history_refresh="True">
+        <requirements>
+            <requirement type="package">pyCRAC</requirement>
+        </requirements>
+	<command interpreter="perl"> 
+	/usr/local/bin/pyBarcodeFilter.pl
+	--file_type $ftype.type
+	-f $ftype.f
+	-b $barcode
+	-m $mismatch
+	$index
+	--out $out
+	--id $out.id 
+	--output_path $__new_file_path__ 
+	#if $ftype.reverse.rev == "yes":
+        -r=$ftype.reverse.r
+		$ftype.reverse.both
+    #end if#
+	</command>
+	<version_command>pyBarcodeFilter.py --version</version_command>
+	<inputs>
+		<conditional name="ftype">
+		<param name="type" type="select" label="File type">
+			<option value="fastq" selected="true">FASTQ</option>
+			<option value="fasta">FASTA</option>
+		</param>
+		<when value="fastq">
+			<param format="fastq" name="f" type="data" label="FastQ File -f" help="FastQ format" />
+			<conditional name="reverse">
+                <param name="rev" type="select"  label="Add a reverse or paired FastQ file">
+                    <option value="no" selected="true">NO</option>
+                    <option value="yes">YES</option>
+                </param>        
+                <when value="yes">
+                    <param format="fastq" name="r" type="data" label="Reverse FastQ File -f" help="FastQ format" />
+                    <param name="both" type="select"  label="Search for barcode in both reads">
+                        <option value="" selected="true">NO</option>
+                        <option value="--both">YES</option>
+                    </param>
+				</when>
+				<when value="no">
+				</when>
+			</conditional>
+		</when>
+		<when value="fasta">
+			<param format="fasta" name="f" type="data" label="FastA File -f" help="FastA format" />
+			<conditional name="reverse">
+                <param name="rev" type="select"  label="Add a reverse or paired FastA file">
+                    <option value="no" selected="true">NO</option>
+                    <option value="yes">YES</option>
+                </param>        
+                <when value="yes">
+                    <param format="fasta" name="r" type="data" label="Reverse FastA File -f" help="FastA format" />
+                    <param name="both" type="select"  label="Search for barcode in both reads">
+                        <option value="" selected="true">NO</option>
+                        <option value="--both">YES</option>
+				    </param>
+				</when>
+				<when value="no">
+				</when>
+			</conditional>
+		</when>
+		</conditional>
+		<param format="tabular" name="barcode" type="data" label="Barcode File -f" help="Tab delimited file with barcodes and barcode names" />
+		<param format="integer" name="mismatch" type="integer" label="Mismatches -m" value="0" size="3" help="Set the number of allowed mismatches in a barcode">
+			<validator type="in_range" min="0" max="100" message="Please enter a value between 0 and 100"/>
+		</param>      
+		<param name="index" type="select"  label="Split data using Illumina indexing barcode information -i">
+            <option value="" selected="true">NO</option>
+            <option value="-i">YES</option>
+		</param> 
+	</inputs>
+	<outputs>
+		<data format="text" name="out" label="pyBarcodeFilter"/>
+	</outputs>
+	<help>
+
+.. class:: infomark
+
+**pySolexaBarcodeFilter**
+
+pySolexaBarcodeFilter is part of the pyCRAC_ package. Filters sequence files by barcodes.
+
+This tool requires FASTA or FASTQ input files containing the raw data and a text file containing barcode information.
+To process paired end data, use -f and the -r flags to indicate the path to the forward and reverse sequencing reactions, respectively. 
+The barcodes file should two columns separated by a tab (see the table below). The first column should contain the barcode nucleotide sequences. 
+The second column should contain an identifier, for example, the name of the barcode or the name of the experiment.
+The ā€™Nā€™ in the barcode sequence indicates a random nucleotide. Make sure to use a simple text editor like TextEdit (MacOS X), gedit (Linux/Unix) or use a text editor in the terminal. 
+The program is case sensitive: all the nucleotide sequences should be upper case. 
+You can freely combine different barcodes but if you are mixing samples containing random nucleotide barcodes and normal barcodes.
+**NOTE!** make sure to place the regular barcode sequence below the sequence with random nucleotides and make sure the shortest sequence is ALWAYS at the bottom in the column (see below)
+
+Example of a barcode text file::
+
+    NNNCGCTTAGC mutant2
+    NNNGCGCAGC  mutant1
+    NNNATTAG    control
+    NNNTAAGC    myfavprotein
+    AGC         oldcontrol
+    AC          veryfirstbarcodedsample 
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+Options::
+
+  -f FILE, --input_file=FILE		
+                            name of the FASTQ or FASTA input file
+  -r FILE, --reverse_input_file=FILE	
+                            name of the paired (or reverse) FASTQ or FASTA input file
+  --file_type=FASTQ     		
+                            type of file, uncompressed (fasta or fastq) or compressed (fasta.gz or fastq.gz, gzip/gunzip 
+					        compressed). Default is fastq
+  -b FILE, --barcode_list=FILE		
+                            name of tab-delimited file containing barcodes and barcode names
+  -m 1, --mismatches=1  		
+                            to set the number of allowed mismatches in a barcode. A maximum of one mismatch is allowed. Default = 0
+  -i, --index           		
+                            use this option if you want to split the data using the Illumina indexing barcode information
+
+	</help>
+</tool>	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyBinCollector.pl	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,47 @@
+#!/usr/bin/perl -w
+use strict;
+use Getopt::Long;
+
+my %opt=(s=>"genomic",numberofbins=>20);
+
+
+GetOptions(\%opt, "f=s","version","gtf=s","range=i","annotation=s", "numberofbins=i","min_length=i","max_length=i","s=s","o=s","ignorestrand","outputall","sd=s","ssub=s","sdel=s","asd=s","assub=s","asdel=s","out=s","options","bins1=i","bins2=i","id=s");
+
+my $cmnd;
+
+my $prefix = "bc_$opt{id}";
+
+if (exists $opt{version}){
+	$cmnd = "python /usr/local/bin/pyBinCollector.py --version";
+}
+else{
+	$cmnd = "python /usr/local/bin/pyBinCollector.py -f $opt{f} --gtf $opt{gtf} --annotation $opt{annotation} -o $prefix";
+
+	if(exists $opt{outputall})
+	{
+	    $cmnd .= " --outputall";
+	}
+
+	if(exists $opt{options}){
+
+	    $cmnd .= " --range=$opt{range} --numberofbins $opt{numberofbins} --min_length $opt{min_length} --max_length $opt{max_length} -s $opt{s}";
+
+            if(exists $opt{ignorestrand}){ $cmnd .= " --ignorestrand";}
+	    if(exists $opt{bins1}){ $cmnd .= " --binselect $opt{bins1} $opt{bins2}";}
+	}
+}
+
+
+system $cmnd;
+if(exists $opt{outputall}){
+
+    system "mv sense_data_$prefix.txt $opt{sd}";
+    system "mv sense_subs_$prefix.txt $opt{ssub}";
+    system "mv sense_dels_$prefix.txt $opt{sdel}";
+    system "mv anti_sense_data_$prefix.txt $opt{asd}";
+    system "mv anti_sense_subs_$prefix.txt $opt{assub}";
+    system "mv anti_sense_dels_$prefix.txt $opt{asdel}";
+}
+else{
+    system "mv $prefix"."_cumulative_densities_$opt{annotation}"."_$opt{s}_"."$opt{numberofbins}_bins.pileup $opt{out}";
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyBinCollector.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,290 @@
+ <tool id ="pyBinCollector" name="pyBinCollector">
+	<requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	</requirements>
+	<command interpreter="perl"> 
+	pyBinCollector.pl
+	-f $input
+	--gtf $addGTF.gtf
+        #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
+		--annotation $addGTF.annotate.scan.annotation
+	#else:
+		--annotation $addGTF.annotate.annotation
+	#end if#
+	#if $addOpt.options == "edit":
+	        --options
+	        --range $addOpt.range
+		--min_length $addOpt.min_length
+		--max_length $addOpt.max_length
+		--numberofbins $addOpt.numberofbins
+		-s $addOpt.sequence
+		#if $addOpt.limitBins.binselect == "yes":
+			--bins1 $addOpt.limitBins.bs_first 
+			--bins2 $addOpt.limitBins.bs_last
+		#end if#
+		$addOpt.ignore
+		$addOpt.oall.outputall
+	#end if#
+	-o "$input.name"	
+	#if $addOpt.options == "edit" and $addOpt.oall.outputall == "--outputall":
+	   --id $sd.id
+	   --sd $sd
+	   --ssub $ssub
+	   --sdel $sdel
+	   --asd $asd
+	   --assub $assub
+	   --asdel $asdel
+	#else:
+	   --out $out
+	   --id $out.id
+	#end if#
+	</command>
+	<version_command>/usr/local/bin/pyBinCollector.py --version</version_command>
+	<inputs>
+		<param format="gtf" name="input" type="data" label="Input File -f" help="pyReadCounters or pyMotif gtf output files" />
+
+		<conditional name="addGTF">
+			<param name="gtfFile" type="select"  label="Choose GTF File from">
+				<option value="default" selected="true">Defaults</option>
+				<option value="other">History</option>
+			</param>	
+			<when value="default">
+				<param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+					<options from_data_table="pycrac_gtf"/>
+				</param>
+
+				<conditional name="annotate">
+				<param name="annotations" type="select"  label="Select annotation">
+					<option value="all" selected="true">All</option>
+					<option value="manual">Enter in text box</option>
+					<option value="auto">Scan pyGetGTFSources file</option>
+				</param>	
+				<when value="all">
+					<param name="annotation" type="hidden" format="txt" size="10" value="all"/>
+				</when>
+				<when value="manual">
+					<param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
+						<validator type="empty_field" message="Please enter a value"/>			
+					</param>
+				</when>
+				<when value="auto">
+					<param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>	
+						<conditional name="scan">
+						<param name="annotations" type="select"  label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
+							<option value="wait" selected="true">Waiting</option>
+							<option value="scanning">Go</option>
+						</param>	
+						<when value="wait">
+						</when>
+						<when value="scanning">
+						<param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
+							  <options from_dataset="gtf_annotation">
+							    <column name="name" index="0"/>
+							    <column name="value" index="0"/>
+							  </options>
+						</param>      
+						</when>
+						</conditional>
+				</when>
+				</conditional>
+		
+			</when>
+			<when value="other">
+				<param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+				<conditional name="annotate">
+				<param name="annotations" type="select"  label="Select annotation">
+					<option value="all" selected="true">All</option>
+					<option value="manual">Enter in text box</option>
+					<option value="auto">Scan selected file</option>
+				</param>	
+				<when value="all">
+					<param name="annotation" type="hidden" format="txt" size="10" value="all"/>
+				</when>
+				<when value="manual">
+					<param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
+						<validator type="empty_field" message="Please enter a value"/>			
+					</param>
+				</when>
+				<when value="auto">
+					<param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
+						  <options from_dataset="gtf">
+						    <column name="name" index="1"/>
+						    <column name="value" index="1"/>
+						    <filter type="unique_value" name="unique" column="1"/>
+						  </options>
+					</param>      
+				</when>
+				</conditional>
+			</when>
+		</conditional>
+
+
+		<conditional name="addOpt">
+			<param name="options" type="select"  label="Options">
+				<option value="default" selected="true">Default</option>
+				<option value="edit">Edit</option>
+			</param>
+			<when value="edit">
+			        <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
+				  <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
+				</param>
+				<param format="integer" name="numberofbins" type="integer" label="Set the number of bins --numberofbins" value="20" size="7" help="Set the number of bins you want to divide the genes into">
+					<validator type="in_range" min="20" max="1000" message="Please enter a value between 20 and 1000"/>
+				</param>
+				<param format="integer" name="min_length" type="integer" label="Set the minimum gene length (nt) --min_length" value="50" size="7" help="To filter the data for gene length (nucleotides)" >
+					<validator type="in_range" min="20" message="Please enter a value greater than 20"/>
+				</param>
+				<param format="integer" name="max_length" type="integer" label="Set the maximum gene length (nt) --max_length" help="Default = 100000000" value="100000000" size="10" >
+					<validator type="in_range" min="50" max="100000000" message="Please enter a value between 50 and 100000000"/>
+				</param>
+				<param name="sequence" type="select" label="What sequences do you want to run pyBinCollector on? --sequence">
+					<option value="genomic" selected="true">Genomic Sequence</option>
+					<option value="coding">Coding Sequence</option>
+					<option value="intron">Introns</option>
+                                        <option value="exon">Exons</option>
+                                        <option value="CDS">CDS</option>
+                                        <option value="5UTR">5UTR</option>
+                                        <option value="3UTR">3UTR</option>
+				</param>
+				<conditional name="limitBins">
+					<param name="binselect" type="select"  label="Select sequences that map to specific bins --binselect">
+						<option value="no" selected="true">No</option>
+						<option value="yes">Yes</option>
+					</param>
+				<when value="yes">
+					<param format="integer" name="bs_first" type="integer" label="Select First Bin" value="1" size="7">
+						<validator type="in_range" min="1" message="Please enter a value greater than 0"/>
+					</param>
+					<param format="integer" name="bs_last" type="integer" label="Select Last Bin" value="2" size="7">
+						<validator type="in_range" min="2" message="Please enter a value greater than 0"/>
+					</param>
+				</when>
+				<when value="no">
+				</when>
+				</conditional>
+                                <param name="ignore" type="select" label="Ignore strand information? --ignorestrand">
+                                        <option value="" selected="true">No</option>
+                                        <option value="--ignorestrand">Yes</option>
+                                </param>		
+				<conditional name="oall">
+				<param name="outputall" type="select" label="Output all genes --outputall" help="output the normalized distribution for each individual gene, rather than making a cumulative coverage plot">
+                                        <option value="" selected="true">No</option>
+                                        <option value="--outputall">Yes</option>
+				</param>
+				<when value="--outputall"/>
+				<when value=""/>
+				</conditional>
+			</when>	
+			<when value="default">
+			</when>
+		</conditional>
+		<param name="label" type="text" format="txt" size="30" value="pyBinCollector" label="Enter output file label -o" />
+	</inputs>
+
+	<outputs>
+		<data format="gtf" name="out" label="${label.value}.gtf">
+                        <filter>addOpt['oall']['outputall'] == ""</filter>
+		</data>
+                <data format="txt" name="sd" label="sense_data_${label.value}.txt">
+                        <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>		  
+		</data>
+                <data format="txt" name="ssub" label="sense_subs_${label.value}.txt">
+                         <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
+                </data>
+                <data format="txt" name="sdel" label="sense_dels_${label.value}.txt">
+                        <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
+		</data>
+                <data format="txt" name="asd" label="anti_sense_data_${label.value}.txt">
+                        <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
+                </data>
+                <data format="txt" name="assub" label="anti_sense_subs_${label.value}.txt">
+                        <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
+                </data>
+                <data format="txt" name="asdel" label="anti_sense_dels_${label.value}.txt">
+                        <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
+                </data>
+	</outputs>
+	<help>
+
+
+.. class:: infomark
+
+**pyBinCollector**
+
+pyBinCollector is part of the pyCRAC_ package. Allows the user to generate genome-wide coverage plots. Normalises gene lengths by dividing genes into a 
+fixed number of bins and then calculates the hit density in each bin. The program also allows the user to input specific bin numbers to extract 
+blocks/clusters present in these bins.
+
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+File input options::
+
+    -f FILE, --input_file=FILE
+                        Provide the path and name of the pyReadCounters.py or
+                        pyMotif.py GTF file. By default the program expects
+                        data from the standard input.
+    -o OUTPUT_FILE, --output_file=OUTPUT_FILE
+                        To set an output file name. Do not add a file
+                        extension. By default, if the --outputall flag is not
+                        used, the program writes to the standard output.
+    --gtf=yeast.gtf     
+                        type the path to the gtf annotation file that you want
+                        to use. Default is /usr/local/pyCRAC/db/Saccharomyces_
+                        cerevisiae.EF2.59.1.2.gtf
+
+pyBinCollector.py specific options::
+
+    -a protein_coding, --annotation=protein_coding
+                        select which annotation (i.e. protein_coding, ncRNA,
+                        sRNA, rRNA, tRNA, snoRNA, all) you would like to focus
+                        your search on. Default = all
+    --min_length=20     
+                        to set a minimum length threshold for genes. Genes
+                        shorter than the minimal length will be discarded.
+                        Default = 1
+    --max_length=10000  
+                        to set a maximum length threshold for genes. Genes
+                        larger than the maximum length will be discarded.
+                        Default = 100000000
+    -n 20, --numberofbins=20
+                        select the number of bins you want to generate.
+                        Default=20
+    --binselect=2 4     
+                        allows selection of sequences that were mapped to
+                        specific bins. This option expects two numbers, one
+                        for each bin, separated by a space. For example:
+                        --binselect 20 30.
+    --outputall         
+                        use this flag to output the normalized distribution
+                        for each individual gene, rather than making a
+                        cumulative coverage plot. Useful for making box plots
+                        or for making heat maps.
+
+Common options::
+
+    -r 100, --range=100
+                        allows you to set the length of the UTR regions. If
+                        you set '-r 50' or '--range=50', then the program will
+                        set a fixed length (50 bp) regardless of whether the
+                        GTF file has genes with annotated UTRs.
+    -s intron, --sequence=intron
+                        with this option you can select whether you want to
+                        generate bins from the coding or genomic sequence or
+                        introns,exon,CDS, or UTR coordinates. Default =
+                        genomic
+    --ignorestrand      
+                        To ignore strand information and all reads overlapping
+                        with genomic features will be considered sense reads.
+                        Useful for analysing ChIP or RIP data
+
+
+
+ 
+
+	</help>
+</tool>	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyCalculateChromosomeLengths.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,53 @@
+<tool id="pyCalculateChromosomeLengths" name="pyCalculateChromosomeLengths">
+	  <requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	  </requirements>
+ 	  <command interpreter="python">
+	  /usr/local/bin/pyCalculateChromosomeLengths.py 
+	  -f $ftype.input 
+	  --file_type $ftype.filetype 
+	  -o $output </command>
+	  <version_command>/usr/local/bin/pyCalculateChromosomeLengths.py --version</version_command>
+ 	  <inputs>
+	      <conditional name="ftype">
+	      <param name="filetype" type="select"  label="File type">
+                                        <option value="fasta" selected="true">Fasta</option>
+                                        <option value="tab">Tab</option>
+	      </param>        
+	      <when value="fasta">
+		              <param name="input" type="data" format="fasta" label="Input file" help="Fasta or Tab file"/>
+	      </when>
+	      <when value="tab">
+                              <param name="input" type="data" format="tabular" label="Input file" help="Fasta or Tab file"/>
+	      </when>
+	      </conditional>
+ 	  </inputs>
+	  <param name="label" type="text" format="txt" size="30" value="pyCalculateChromosomeLengths" label="Enter output file label -o" />
+ 	  <outputs>
+	    <data name="output" format="txt" label="${label.value}.len"/>
+ 	  </outputs>
+	  <help>
+
+.. class:: infomark
+
+**pyCalculateChromosomeLengths**
+
+pyCalculateChromosomeLengths is part of the pyCRAC_ package. Takes a genome sequence in fasta or tab format and generates a tab-delimited file showing chromosome name and chromosome length.
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+
+-------
+
+**Parameter list**                                                                     
+
+Options::
+
+  -f chromosomes.fasta, --input_file=chromosomes.fasta
+                        provide the name and path of your fasta or tab genomic
+                        sequence file. Default is standard input.
+  --file_type=fasta     
+                        provide the file type (fasta or tab). Default is fasta
+
+ 	  </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyCalculateFDRs.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,247 @@
+ <tool id ="pyCalculateFDRs" name="pyCalculateFDRs">
+	<requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	</requirements>
+	<command interpreter="python"> 
+	/usr/local/bin/pyCalculateFDRs.py
+	-f $ftype.input
+        --file_type $ftype.file_type
+	--gtf=$addGTF.gtf
+
+	#if $addGTF.annotate.annotations != "all":
+	   #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
+	       --annotation $addGTF.annotate.scan.annotation
+	   #else:
+		--annotation $addGTF.annotate.annotation
+	   #end if#
+	#end if#
+	--chromfile=$addChr.chr
+	#if $addOpt.options == "edit"
+                -s $addOpt.sequence
+	        --min $addOpt.min                                                                                                                          
+                --minfdr $addOpt.minfdr                                                                                                                    
+                --iterations=$addOpt.iterations  
+                --range $addOpt.range
+	#end if#
+	-o $output
+
+	</command>
+	<version_command>/usr/local/bin/pyCalculateFDRs.py --version</version_command>
+	<inputs>
+        <conditional name="ftype">
+        <param name="file_type" type="select"  label="Input File Type --file_type" help="Use bed6, gff or gtf input files containing read/cDNA co-ordinates">
+            <option value="gff" selected="true">GFF</option>
+            <option value="bed">Bed6</option>
+            <option value="gtf">GTF</option>
+        </param>
+        <when value="gff">
+            <param format="gff" name="input" type="data" label="Input File --readdatafile" help="GFF format containing read/cDNA co-ordinates" />
+        </when>
+            <when value="gtf">
+                <param format="gtf" name="input" type="data" label="Input File --readdatafile" help="GTF format containing read/cDNA co-ordinates" />
+            </when>
+            <when value="bed">
+                <param format="bed6" name="input" type="data" label="Input File --readdatafile" help="Bed 6 column format containing read/cDNA co-ordinates" />
+            </when>
+        </conditional>
+	  
+        <conditional name="addChr">
+        <param name="chrfile" type="select"  label="Choose Chromosome length file from">
+            <option value="default" selected="true">Defaults</option>
+            <option value="other">History</option>
+        </param>
+        <when value="default">
+            <param name="chr" type="select"  label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes">
+        <options from_data_table="pycrac_chr"/>
+            </param>
+        </when>
+        <when value="other">
+            <param format="tabular" name="chr" type="data" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes. Use pyCrac utility pyCalculateChromosomeLengths to create."/>
+        </when>
+        </conditional>
+
+	    <conditional name="addGTF">
+		    <param name="gtfFile" type="select"  label="Choose GTF File from">
+		        <option value="default" selected="true">Defaults</option>
+		        <option value="other">History</option>
+		  </param>	
+		  <when value="default">
+		    <param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+		      <options from_data_table="pycrac_gtf"/>
+		    </param>
+		    <conditional name="annotate">
+		      <param name="annotations" type="select"  label="Select annotation">
+                <option value="all" selected="true">All</option>
+                <option value="manual">Enter in text box</option>
+                <option value="auto">Scan pyGetGTFSources file</option>
+		      </param>	
+		      <when value="all">
+			    <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
+		      </when>
+		      <when value="manual">
+			    <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
+			      <validator type="empty_field" message="Please enter a value"/>			
+			    </param>
+		      </when>
+		      <when value="auto">
+			<param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>	
+			<conditional name="scan">
+			  <param name="annotations" type="select"  label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
+			    <option value="wait" selected="true">Waiting</option>
+			    <option value="scanning">Go</option>
+			  </param>	
+			  <when value="wait">
+			  </when>
+			  <when value="scanning">
+			    <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
+			      <options from_dataset="gtf_annotation">
+				<column name="name" index="0"/>
+				<column name="value" index="0"/>
+			      </options>
+			    </param>      
+			  </when>
+			</conditional>
+		      </when>
+		    </conditional>		
+		  </when>
+		  <when value="other">
+		    <param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+		    <conditional name="annotate">
+		      <param name="annotations" type="select"  label="Select annotation">
+			<option value="all" selected="true">All</option>
+			<option value="manual">Enter in text box</option>
+			<option value="auto">Scan selected file</option>
+		      </param>	
+		      <when value="all">
+			<param name="annotation" type="hidden" format="txt" size="10" value="all"/>
+		      </when>
+		      <when value="manual">
+			<param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
+			  <validator type="empty_field" message="Please enter a value"/>			
+			</param>
+		      </when>
+		      <when value="auto">
+			<param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
+			  <options from_dataset="gtf">
+			    <column name="name" index="1"/>
+			    <column name="value" index="1"/>
+			    <filter type="unique_value" name="unique" column="1"/>
+			  </options>
+			</param>      
+		      </when>
+		    </conditional>
+		  </when>
+		</conditional>
+		<conditional name="addOpt">
+		  <param name="options" type="select"  label="Standard options">
+		    <option value="default" selected="true">Default</option>
+		    <option value="edit">Edit</option>
+		  </param>
+		  <when value="edit">
+		    <param name="sequence" type="select" label="Align reads to --sequence">
+		      <option value="genomic" selected="true">Genomic Sequence</option>
+		      <option value="coding">Coding Sequence</option>
+		    </param>
+		    <param format="integer" name="min" type="integer" label="Minimum read coverage --min " value="1" size="10" help="Set the minimal read coverage for a region">
+		      <validator type="in_range" min="1" message="Please enter a value >= 1"/>
+		    </param>
+		    <param name="minfdr" type="float" label="Minimum FDR threshold --minfdr" value="0.05" size="6" help="Set a minimal FDR threshold for filtering interval data">
+		      <validator type="in_range" min="0" max="1" message="Please enter a value between 0 and 1"/>
+		    </param>
+		    <param format="integer" name="iterations" type="integer" label="Number of iterations --iterations" value="100" size="6" help="The number of iterations for randomization of read coordinates">
+		      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+		    </param>
+		    <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
+		      <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
+		    </param>
+		  </when>
+		  <when value="default">
+		  </when>
+		</conditional>
+		<param name="label" type="text" format="txt" size="30" value="pyCalculateFDRs" label="Enter output file label -o" />
+	</inputs>
+	<outputs>
+		<data format="gtf" name="output" label="${label.value}.gtf"/>
+	</outputs>
+	<help>
+
+.. class:: infomark
+
+**pyCalculateFDRs**
+
+By default the FDR value is set to 0.05, meaning that there is a 5% chance that the interval is not significantly enriched.
+The tool reports significant intervals in the GTF format and reports overlapping genomic features.
+Mutation frequencies are not included but these can be added using the pyCalculateMutationFrequencies tool
+
+**NOTE!** By default it calls each significant interval an "exon" but this has no meaning! It may overlap with an intron.
+Use bedtools to extract those intervals that overlap with introns or other features
+
+Example of an output file::
+
+    ##gff-version 2
+    # generated by pyCalculateFDRs version 0.0.3, Sat Jun  1 21:16:23 2013
+    # pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05
+    # chromosome	feature	source	start	end	minimal_coverage	strand	.	attributes
+    chrI	protein_coding	exon	140846	140860	5	-	.	gene_id "YAL005C"; gene_name "SSA1"; 
+    chrI	intergenic_region	exon	223118	223164	4	-	.	gene_id "INT_0_179"; gene_name "INT_0_179"; 
+    chrI	intergenic_region	exon	71889	71922	3	+	.	gene_id "INT_0_94"; gene_name "INT_0_94"; 
+    chrII	intergenic_region	exon	296127	296158	3	-	.	gene_id "INT_0_365"; gene_name "INT_0_365"; 
+    chrII	intergenic_region	exon	680697	680722	4	-	.	gene_id "INT_0_626"; gene_name "INT_0_626"; 
+    chrII	intergenic_region	exon	680827	680846	4	-	.	gene_id "INT_0_626"; gene_name "INT_0_626"; 
+    chrII	snRNA	exon	680827	680838	5	-	.	gene_id "LSR1"; gene_name "LSR1"; 
+    chrII	snRNA	exon	680951	681001	5	-	.	gene_id "LSR1"; gene_name "LSR1"; 
+    chrII	intergenic_region	exon	577985	577996	3	-	.	gene_id "INT_0_556"; gene_name "INT_0_556"; 
+    chrII	protein_coding	exon	203838	203887	3	+	.	gene_id "YBL011W"; gene_name "SCT1"; 
+    chrII	protein_coding	exon	296127	296158	3	-	.	gene_id "YBR028C"; gene_name "YBR028C"; 
+
+ 
+pyCalculateFDRs is part of the pyCRAC_ package. Takes interval information in GTF or bed format and calculates False Discovery Rates (FDRs).
+
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+Options::
+
+  -f read_file, --readdatafile=read_file
+                        Name of the bed/gff/gtf file containing the read/cDNA
+                        coordinates
+  --file_type=FILE_TYPE
+                        this tool supports bed6, gtf and gff input files.
+                        Please select from 'bed','gtf' or 'gff'. Default=gtf
+  -o outfile.gtf, --outfile=outfile.gtf
+                        Optional. Provide the name of the output file. Default
+                        is 'selected_intervals.gtf'
+  -r 100, --range=100   
+                        allows you to set the length of the UTR regions. If
+                        you set '-r 50' or '--range=50', then the program will
+                        set a fixed length (50 bp) regardless of whether the
+                        GTF file has genes with annotated UTRs.
+  -a protein_coding, --annotation=protein_coding
+                        select which annotation (i.e. protein_coding, ncRNA,
+                        sRNA, rRNA,snoRNA,snRNA, depending on the source of
+                        your GTF file) you would like to focus your analysis
+                        on. Default = all annotations
+  -c yeast.txt, --chromfile=yeast.txt
+                        Location of the chromosome info file. This file should
+                        have two columns: first column is the names of the
+                        chromosomes, second column is length of the
+                        chromosomes. Default is yeast
+  --gtf=yeast.gtf       
+                        Name of the annotation file. Default is /usr/local/pyC
+                        RAC/db/Saccharomyces_cerevisiae.EF2.59.1.2.gtf
+  -m MINFDR, --minfdr=MINFDR
+                        To set a minimal FDR threshold for filtering interval
+                        data. Default is 0.05
+  --min=MIN             
+                        to set a minimal read coverages for a region. Regions
+                        with coverage less than minimum will be ignoredve an
+                        FDR of zero
+  --iterations=ITERATIONS
+                        to set the number of iterations for randomization of
+                        read coordinates. Default=100
+	</help>
+</tool>	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyCalculateMutationFrequencies.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,126 @@
+<tool id ="pyCalculateMutationFrequencies" name="pyCalculateMutationFrequencies">
+	<requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	</requirements>
+	<command interpreter="python"> 
+	/usr/local/bin/pyCalculateMutationFrequencies.py
+	-r $readdatafile
+	-i $intervaldatafile
+	-c $addChr.chr
+	-o $output
+	--mutsfreq $mutsfreq
+	</command>
+	<version_command>/usr/local/bin/pyCalculateMutationFrequencies.py --version</version_command>
+	<inputs>
+	      <param format="gff" name="readdatafile" type="data" label="GFF Reads File --readdatafile" help="GFF file containing read data" />
+	      <param format="gtf" name="intervaldatafile" type="data" label="GFF Interval File --intervaldatafile" help="GFF file containing interval co-ordinates"/>	      
+	      <conditional name="addChr">
+		<param name="chrfile" type="select"  label="Choose Chromosome length file from">
+		  <option value="default" selected="true">Defaults</option>
+		  <option value="other">History</option>
+		</param>
+		<when value="default">
+		  <param name="chr" type="select"  label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes.Use pyCrac utility pyCalculateChromosomeLengths to create.">
+		    <options from_data_table="pycrac_chr"/>
+		  </param>
+		</when>
+		<when value="other">
+		  <param format="tabular" name="chr" type="data" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes"/>
+		</when>
+	      </conditional>
+
+	      <param format="integer" name="mutsfreq" type="integer" label="Minimum mutation frequency --mutsfreq " value="0" size="10" help="sets the minimal mutations frequency for an interval that you want to have written to our output file">
+		<validator type="in_range" min="0" message="Please enter a value >= 0"/>
+	      </param>
+        <param name="label" type="text" format="txt" size="30" value="pyCalculateMutationFrequencies" label="Enter output file label -o" />
+	</inputs>
+	<outputs>
+		<data format="gtf" name="output" label="${label.value}.gtf"/>
+	</outputs>
+	<help>
+
+.. class:: infomark
+
+**pyCalculateMutationFrequencies**
+
+pyCalculateMutationFrequencies is part of the pyCRAC_ package. Takes an interval file and a pyReadCounters GTF file and calculates (cross-linking induced) mutation frequencies fore each interval.
+This tool can be used to calculate mutation frequencies for significant intervals (pyCalculateFDRs output file) or over-represented motifs (pyMotif GTF output file).
+It expects a pyCRAC GTF count_output_reads.gtf file and a GTF file with the intervals.
+
+For example::
+
+    This pyCalculateFDRs GTF output file::
+
+        ##gff-version 2
+        # generated by pyCalculateFDRs version 0.0.3, Sat Jun  1 21:16:23 2013
+        # pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05
+        # chromosome	feature	source	start	end	minimal_coverage	strand	.	attributes
+        chrII	protein_coding	exon	203838	203887	3	+	.	gene_id "YBL011W"; gene_name "SCT1"; 
+        chrII	intergenic_region	exon	407669	407708	3	+	.	gene_id "INT_0_445"; gene_name "INT_0_445"; 
+        chrII	intergenic_region	exon	585158	585195	2	+	.	gene_id "INT_0_562"; gene_name "INT_0_562"; 
+        chrII	protein_coding	exon	372390	372433	4	-	.	gene_id "YBR067C"; gene_name "TIP1"; 
+        chrII	intergenic_region	exon	380754	380815	6	-	.	gene_id "INT_0_431"; gene_name "INT_0_431"; 
+        chrIII	protein_coding	exon	138001	138044	5	+	.	gene_id "YCR012W"; gene_name "PGK1"; 
+        chrIII	intergenic_region	exon	227997	228036	5	+	.	gene_id "INT_0_885"; gene_name "INT_0_885";
+        chrIII	intergenic_region	exon	227997	228037	4	+	.	gene_id "INT_0_887"; gene_name "INT_0_887";
+        chrIII	tRNA	exon	227997	228037	4	+	.	gene_id "tS(CGA)C"; gene_name "SUP61";
+    
+    Will be converted into::
+        
+        ##gff-version 2
+        # generated by pyCalculateFDRs version 0.0.3, Sat Jun  1 21:16:23 2013
+        # /Library/Frameworks/EPD64.framework/Versions/Current/bin/pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05
+        # chromosome	feature	source	start	end	minimal_coverage	strand	.	attributes
+        chrII	protein_coding	exon	203838	203887	3	+	.	gene_id "YBL011W"; gene_name "SCT1"; # 203882D33.3,203883D33.3,203884D33.3;
+        chrII	intergenic_region	exon	407669	407708	3	+	.	gene_id "INT_0_445"; gene_name "INT_0_445"; # 407680D33.3,407681D33.3;
+        chrII	intergenic_region	exon	585158	585195	2	+	.	gene_id "INT_0_562"; gene_name "INT_0_562"; # 585171D100.0,585172D100.0,585173D100.0;
+        chrII	protein_coding	exon	372390	372433	4	-	.	gene_id "YBR067C"; gene_name "TIP1"; # 372412D50.0,372413D50.0;
+        chrII	intergenic_region	exon	380754	380815	6	-	.	gene_id "INT_0_431"; gene_name "INT_0_431"; # 380786D90.2,380787D90.2;
+        chrIII	protein_coding	exon	138001	138044	5	+	.	gene_id "YCR012W"; gene_name "PGK1"; # 138025D40.0,138026D30.0,138027D40.0;
+        chrIII	intergenic_region	exon	227997	228036	5	+	.	gene_id "INT_0_885"; gene_name "INT_0_885"; # 228006D85.7,228007D100.0;
+        chrIII	intergenic_region	exon	227997	228037	4	+	.	gene_id "INT_0_887"; gene_name "INT_0_887"; # 228006D85.7,228007D100.0;
+        chrIII	tRNA	exon	227997	228037	4	+	.	gene_id "tS(CGA)C"; gene_name "SUP61"; # 228006D85.7,228007D100.0;
+        
+
+The hash character at the end of each line (#) shows chromosomal coordinates of mutated nucleotides within the cluster interval and their mutation frequencies. 
+
+For example::
+    
+    # 228007D100.0
+
+indicates that 100% of the nucleotides in position 228007 were deleted in the interval.
+
+By setting the --mutsfreq flag you can set a limit for the lowest mutation frequency that you want to have reported. 
+This makes it relatively easy to select those significant regions that have nucleotides with high mutation frequencies.
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+Options::
+
+  -i intervals.gtf, --intervaldatafile=intervals.gtf
+                        provide the path to your GTF interval data file.
+  -r reads.gtf, --readdatafile=reads.gtf
+                        provide the path to your GTF read data file.
+  -c yeast.txt, --chromfile=yeast.txt
+                        Location of the chromosome info file. This file should
+                        have two columns: first column is the names of the
+                        chromosomes, second column is length of the
+                        chromosomes. Default is yeast
+  -o intervals_with_muts.gtf, --output_file=intervals_with_muts.gtf
+                        provide a name for an output file. By default it
+                        writes to the standard output
+  --mutsfreq=10, --mutationfrequency=10
+                        sets the minimal mutations frequency for an interval
+                        that you want to have written to our output file.
+                        Default = 0%. Example: if the mutsfrequency is set at
+                        10 and an interval position has a mutated in less than
+                        10% of the reads,then the mutation will not be
+                        reported.
+
+
+	</help>
+ </tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyCheckGTFfile.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,53 @@
+ <tool id ="pyCheckGTFfile" name="pyCheckGTFfile">
+	<requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	</requirements>
+	<command interpreter="python"> 
+	/usr/local/bin/pyCheckGTFfile.py --gtf $addGTF.gtf -o $out
+	</command>
+	<version_command>/usr/local/bin/pyCheckGTFfile.py --version</version_command>
+	<inputs>
+		<conditional name="addGTF">
+			<param name="gtfFile" type="select"  label="Choose GTF File from">
+				<option value="default" selected="true">Defaults</option>
+				<option value="other">History</option>
+			</param>	
+			<when value="default">
+				<param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+					<options from_data_table="pycrac_gtf"/>
+				</param>
+			</when>
+			<when value="other">
+				<param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+			</when>
+		</conditional>
+		<param name="label" type="text" format="txt" size="30" value="pyCheckGTFfile" label="Enter output file label -o" />
+	</inputs>
+
+	<outputs>
+		<data format="gtf" name="out" label="${label.value}.gtf"/>
+	</outputs>
+	<help>
+.. class:: infomark
+
+**pyCheckGTFfile**
+
+pyCheckGTFfile is part of the pyCRAC_ package. Renames duplicated gene names in your GTF annotation file.
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+Options::
+
+  --gtf=gtf input file
+                        type the path to the gtf file that you want to use.
+  -o FILE, --output=FILE
+                        Optional. Specify the name of the output file. Default
+                        is standard output. Make sure it has the .gtf
+                        extension!
+
+	</help>
+</tool>	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyClusterReads.pl	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,10 @@
+#!/usr/bin/perl -w
+use strict;
+use Getopt::Long;
+
+print join(" ",@ARGV,"\n");
+
+
+    
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyClusterReads.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,230 @@
+<tool id="pyClusterReads" name="pyClusterReads" force_history_refresh="True">
+	<requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	</requirements>
+	<command interpreter="python">
+	/usr/local/bin/pyClusterReads.py
+	-f $input
+	--gtf=$addGTF.gtf
+        #if $addGTF.annotate.annotations != "all":
+           #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
+                 --annotation=$addGTF.annotate.scan.annotation
+           #else:
+                --annotation=$addGTF.annotate.annotation
+           #end if#
+	#end if#
+	-o $output
+	#if $addOpt.options == "edit":
+		--range=$addOpt.range
+		--cic=$addOpt.cic
+		--co=$addOpt.co
+		--ch=$addOpt.ch
+		--cl=$addOpt.cl
+		--mutsfreq=$addOpt.mutsfreq
+	#end if#
+	</command>
+	<version_command>/usr/local/bin/pyClusterReads.py --version</version_command>
+	<inputs>
+	    <param format="gtf" name="input" type="data" label="Input Read Data File -f" help="GTF format sorted by position i.e. pyReadCounters output file."/>         
+	    <conditional name="addGTF">
+            <param name="gtfFile" type="select"  label="Choose GTF File from">
+                <option value="default" selected="true">Defaults</option>
+                <option value="other">History</option>
+            </param>        
+            <when value="default">
+                <param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+                    <options from_data_table="pycrac_gtf"/>
+                </param>
+                <conditional name="annotate">
+                <param name="annotations" type="select"  label="Select annotation">
+                    <option value="all" selected="true">All</option>
+                    <option value="manual">Enter in text box</option>
+                    <option value="auto">Scan pyGetGTFSources file</option>
+                </param>        
+                <when value="all">
+                    <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
+                </when>
+                <when value="manual">
+                    <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
+                        <validator type="empty_field" message="Please enter a value"/>                  
+                    </param>
+                </when>
+                <when value="auto">
+                    <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>       
+                        <conditional name="scan">
+                        <param name="annotations" type="select"  label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
+                            <option value="wait" selected="true">Waiting</option>
+                            <option value="scanning">Go</option>
+                        </param>        
+                        <when value="wait">
+                        </when>
+                        <when value="scanning">
+                        <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
+                            <options from_dataset="gtf_annotation">
+                                <column name="name" index="0"/>
+                                <column name="value" index="0"/>
+                            </options>
+                        </param>      
+                        </when>
+                        </conditional>
+                </when>
+                </conditional>
+
+            </when>
+            <when value="other">
+                <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+                <conditional name="annotate">
+                <param name="annotations" type="select"  label="Select annotation">
+                    <option value="all" selected="true">All</option>
+                    <option value="manual">Enter in text box</option>
+                    <option value="auto">Scan selected file</option>
+                </param>        
+                <when value="all">
+                    <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
+                </when>
+                <when value="manual">
+                    <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
+                        <validator type="empty_field" message="Please enter a value"/>                  
+                    </param>
+                </when>
+                <when value="auto">
+                    <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
+                        <options from_dataset="gtf">
+                            <column name="name" index="1"/>
+                            <column name="value" index="1"/>
+                            <filter type="unique_value" name="unique" column="1"/>
+                        </options>
+                    </param>      
+                </when>
+                </conditional>
+            </when>
+	  </conditional>
+
+	  <conditional name="addOpt">
+		<param name="options" type="select"  label="Standard Options">
+			<option value="default" selected="true">Default</option>
+			<option value="edit">Edit</option>
+		</param>	
+		<when value="edit">
+            <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
+                <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
+            </param>
+            <param format="integer" name="ch" type="integer" label="Cluster height --ch" value="2" size="10" help="Minimal height of a cluster">
+                <validator type="in_range" min="1" message="Please enter a value >= 1"/>
+            </param>
+			<param format="integer" name="cl" type="integer" label="Cluster length --cl" value="1" size="10" help="Maximum length of a cluster">
+                <validator type="in_range" min="1" message="Please enter a value >= 1"/>
+			</param>
+			<param format="integer" name="cic" type="integer" label="cDNAs in clusters --cic" value="2" size="10" >
+				<validator type="in_range" min="2" message="Please enter a value >= 1"/>
+			</param>
+			<param format="integer" name="co" type="integer" label="cDNA-cluster nucleotide overlap --co" value="1" size="10" >
+				<validator type="in_range" min="1" message="Please enter a value >= 1"/>
+			</param>
+                <param format="integer" name="mutsfreq" type="integer" label="Minimum mutation frequency for a cluster position --mutsfreq" value="0" size="3" >
+                    <validator type="in_range" min="0" max="100" message="Please enter a value between 0 and 100"/>
+                </param>
+		</when>
+		<when value="default">
+		</when>
+	</conditional>	
+	<param name="label" type="text" format="txt" size="30" value="pyClusterReads" label="Enter output file label -o" />
+	</inputs>
+	<outputs>
+		<data format="gtf" name="output" label="${label.value}_clusters.gtf"/>
+	</outputs>
+	<help>
+
+.. class:: infomark
+
+**pyClusterReads**
+
+pyClusterReads is part of the pyCRAC_ package. Takes a reads_count_output GTF file from pyReadCounters generates clusters from the interval coordinates. 
+Produces a GTF output file with cluster intervals and overlapping genomic features.
+It also includes mutation frequencies (after the # character) for nucleotides in intervals using chromosomal coordinates
+The pyClusterReads GTF output file essentially has the same layout as other pyCRAC GTF output files.
+
+**NOTE!** By default it calls each cluster an "exon" but this has no meaning. It may overlap with an intron.
+Use bedtools to extract those intervals that overlap with introns or other features
+
+The maximum height of the cluster is indicated in column 8. 
+The hash character at the end of each line (#) shows chromosomal coordinates of mutated nucleotides within the cluster interval and their mutation frequencies. 
+
+For example::
+    
+    # 114099S100.0 
+
+indicates that 100% of the nucleotides in position 114099 were substituted in the cluster.
+
+An example of a pyClusterReads output file::
+
+    ##gff-version 2
+    # generated by pyClusterReads.py version 0.0.1, Fri Jan 18 11:59:42 2013
+    # pyClusterReads.py -f count_output_reads.gtf -o count_output_clusters.gtf -v
+    # chromosome    feature source  start   end     cDNAs   strand  height  attributes
+    chrI    cluster exon    112583  112643  6       -       5   gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 112612S75.0;
+    chrI    cluster exon    113176  113232  3       -       3   gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113184S100.0;
+    chrI    cluster exon    113334  113386  2       -       2   gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113349S50.0,113379S100.0;
+    chrI    cluster exon    113534  113564  3       -       3   gene_id "INT_0_119,INT_0_114"; gene_name "INT_0_119,INT_0_114"; # 113554S33.3,113556S33.3,113557S33.3;
+    chrI    cluster exon    113644  113691  5       -       4   gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113649S50.0,113657S33.3,113679S25.0
+    chrI    cluster exon    113912  113958  2       -       2   gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113932S50.0,113946S50.0;
+    chrI    cluster exon    113966  114066  5       -       3   gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113987S50.0,114033S33.3,114039S33.3;
+    chrI    cluster exon    114067  114130  3       -       3   gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 114099S100.0;
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html                                                                          
+
+------
+
+**Parameter list**  
+
+
+File input options::
+
+    -f reads.gtf, --input_file=reads.gtf
+                                  provide the path to your GTF read data file. NOTE the
+				  file has to be correctly sorted! If you used
+				  pyReadCounters to generate the file you should be
+				  fine. If you modified it, use the sort command
+				  described in the manual to sort your file first by
+				  chromosome, then by strand and then by start position.
+    -o clusters.gtf, --output_file=clusters.gtf
+                                  provide a name for an output file. By default it
+				  writes to the standard output
+    --gtf=Yourfavoritegtf.gtf
+                                  type the path to the gtf annotation file that you want
+				  to use
+
+Common pyCRAC options::
+
+    -r 100, --range=100
+                                  allows you to set the length of the UTR regions. If
+				  you set '-r 50' or '--range=50', then the program will
+				  set a fixed length (50 bp) regardless of whether the
+				  GTF annotation file has genes with annotated UTRs.
+    -a protein_coding, --annotation=protein_coding
+                                  select which annotation (i.e. protein_coding, ncRNA,
+				  sRNA, rRNA,snoRNA,snRNA, depending on the source of
+				  your GTF file) you would like to focus your analysis
+				  on. Default = all annotations
+
+Options for cluster analysis::
+
+    --cic=2, --cdnasinclusters=2
+                                  sets the minimal number of overlapping cDNAs in each
+				  cluster. Default = 2
+    --co=5, --clusteroverlap=5
+                                  sets the number of nucleotides cDNA sequences have to
+				  overlap to form a cluster. Default = 1 nucleotide
+    --ch=5, --clusterheight=5
+                                  sets the minimal height of the cluster. Default = 2
+				  nucleotides
+    --cl=100, --clusterlength=100
+                                  to set the maximum cluster sequence length
+    --mutsfreq=10, --mutationfrequency=10
+                                  sets the minimal mutations frequency for a cluster
+				  position in the GTF output file. Default = 0%.
+				  Example: if the mutsfrequency is set at 10 and a
+				  cluster position has a mutated in less than 10% of the
+				  reads, then the mutation will not be reported.
+	</help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyExtractLinesFromGTF.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,77 @@
+ <tool id ="pyExtractLinesFromGTF" name="pyExtractLinesFromGTF">
+	<requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	</requirements>
+	<command interpreter="python"> 
+	/usr/local/bin/pyExtractLinesFromGTF.py --gtf $addGTF.gtf --genes_file $g --attribute $attribute $v -o $out
+	</command>
+	<version_command>/usr/local/bin/pyExtractLinesFromGTF.py --version</version_command>
+	<inputs>
+		<conditional name="addGTF">
+			<param name="gtfFile" type="select"  label="Choose GTF File from">
+				<option value="default" selected="true">Defaults</option>
+				<option value="other">History</option>
+			</param>	
+			<when value="default">
+				<param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+					<options from_data_table="pycrac_gtf"/>
+				</param>
+			</when>
+			<when value="other">
+				<param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+			</when>
+		</conditional>
+		<param format="txt" name="g" type="data" label="File containing gene list --genes_file" help="Tabular file with 1 column of gene or annotation names"/>
+		<param name="attribute" type="select"  label="Select the attribute to extract names from --attribute">
+                                <option value="gene_name" selected="true">gene_name</option>
+                                <option value="gene_id">gene_id</option>
+                                <option value="transcript_name">transcript_name</option>
+                                <option value="transcript_id">transcript_id</option>
+		</param>
+                <param name="v" type="select"  label="Extract lines from GTF that -v">
+                                <option value="" selected="true">Match the gene file</option>
+                                <option value="-v">Do not match in gene fil</option>
+                </param>
+		<param name="label" type="text" format="txt" size="30" value="pyExtractLinesFromGTF" label="Enter output file label -o" />
+	</inputs>
+
+	<outputs>
+		<data format="gtf" name="out" label="${label.value}.gtf"/>
+	</outputs>
+	<help>
+.. class:: infomark
+
+**pyExtractLinesFromGTF**
+
+pyExtractLinesFromGTF is part of the pyCRAC_ package. Extracts lines from a GTF file that contain gene names of interest.
+
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+Options::
+
+  --gtf=Yourfavoritegtf.gtf
+                        type the path to the gtf file that you want to use. By
+                        default it expects data from the standard input.
+  -g FILE, --genes_file=FILE
+                        name of your gene list or annotations list file (1
+                        column)
+  -o OUTFILE, --outfile=OUTFILE
+                        type the name and path of the file you want to write
+                        the output to. Default is standard output
+  -a ATTRIBUTE, --attribute=ATTRIBUTE
+                        from which attribute do you want to extract names?
+                        Choices: gene_name, gene_id, transcript_name,
+                        transcript_id
+  -v                    
+                        similar to grep -v option. Remove the genes from the
+                        GTF that are in the gene list
+
+
+
+	</help>
+</tool>	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyFasta2tab.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,51 @@
+<tool id="pyFasta2Tab" name="pyFasta2Tab">
+	  <description>converter</description>
+	  <requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	  </requirements>
+ 	  <command interpreter="python">/usr/local/bin/pyFasta2tab.py -f $input -o $output
+ 	  </command>
+	  <version_command>/usr/local/bin/pyFasta2tab.py --version</version_command>
+ 	  <inputs>
+	      <param name="input" type="data" format="fasta" label="Fasta file -f"/>
+ 	  </inputs>
+	  <param name="label" type="text" format="txt" size="30" value="pyFasta2Tab" label="Enter output file label -o" />
+ 	  <outputs>
+	    <data name="output" format="tabular" label="${label.value}.tab"/>
+ 	  </outputs>
+	  <help>
+
+.. class:: infomark
+
+**pyFasta2Tab**
+
+pyFasta2Tab is part of the pyCRAC_ package. Converts fasta to tabular format. Is used to convert your reference sequences in fasta format to the tabular format that pyCRAC uses for almost all tools.
+
+Example::
+    
+    >sequence1
+    ATAGGATACATAACCATATTATGAGACC
+    
+Is converted into::
+
+    sequence1   ATAGGATACATAACCATATTATGAGACC
+    
+The pyCRAC package lo
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+
+-------
+
+**Parameter list**                                                                                                                                         
+
+Options::
+
+  -f fasta_file, --input_file=fasta_file
+                        provide the name and path of your fasta input file.
+                        Default is standard input.
+
+        
+
+ 	  </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyFastqDuplicateRemover.pl	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,34 @@
+#!/usr/bin/perl -w
+use strict;
+use Getopt::Long;
+
+my %opt;
+
+
+GetOptions(\%opt, "f=s", "r=s", "o=s", "out2=s", "version","id=s");
+
+my $cmnd;
+
+if (exists $opt{version}){
+	$cmnd = "python /usr/local/bin/pyFastqDuplicateRemover.py --version";
+}
+else{
+	$cmnd = "python /usr/local/bin/pyFastqDuplicateRemover.py -f $opt{f} -o $opt{id}";
+
+	if(defined $opt{r}){
+		$cmnd.= " -r $opt{r}";
+	}
+}
+
+system $cmnd;
+
+
+
+if(defined $opt{r}){
+	system "mv $opt{id}"."_1.fasta $opt{o}";
+	system "mv $opt{id}"."_2.fasta $opt{out2}";
+}
+else{
+	system "mv $opt{id} $opt{o}";
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyFastqDuplicateRemover.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,117 @@
+ <tool id ="pyFastqDuplicateRemover" name="pyFastqDuplicateRemover">
+    <requirements>
+        <requirement type="package">pyCRAC</requirement>
+    </requirements>
+	<command interpreter="perl"> 
+	pyFastqDuplicateRemover.pl
+	-f $ftype.f
+	#if $ftype.reverse.rev == "yes":
+        -r=$ftype.reverse.r
+		--out2 $out2
+    #end if#
+	-o $out
+	--id $out.id
+	</command>
+	<version_command>pyFastqDuplicateRemover.py --version</version_command>
+	<inputs>
+		<conditional name="ftype">
+		<param name="type" type="select"  label="File type">
+			<option value="fastq" selected="true">FASTQ</option>
+			<option value="fasta">FASTA</option>
+		</param>
+		<when value="fastq">
+			<param format="fastq" name="f" type="data" label="FastQ File -f" help="FastQ format" />
+			<conditional name="reverse">
+                <param name="rev" type="select"  label="Add a reverse or paired FastQ file">
+                    <option value="no" selected="true">NO</option>
+                    <option value="yes">YES</option>
+                </param>        
+                <when value="yes">
+				    <param format="fastq" name="r" type="data" label="Reverse FastQ File -f" help="FastQ format" />
+				</when>
+				<when value="no">
+				</when>
+			</conditional>
+		</when>
+		<when value="fasta">
+			<param format="fasta" name="f" type="data" label="FastA File -f" help="FastA format" />
+			<conditional name="reverse">
+                <param name="rev" type="select"  label="Add a reverse or paired FastA file">
+                    <option value="no" selected="true">NO</option>
+                    <option value="yes">YES</option>
+                </param>        
+                <when value="yes">
+				    <param format="fasta" name="r" type="data" label="Reverse FastA File -f" help="FastA format" />
+				</when>
+				<when value="no">
+				</when>
+			</conditional>
+		</when>
+		</conditional>
+		<param name="label" type="text" format="txt" size="30" value="pyFastqDuplicateRemover" label="Enter output file label -o" />
+	</inputs>
+	<outputs>
+		<data format="fasta" name="out" label="${label.value}.fasta"/>
+		<data format="fasta" name="out2" label="${label.value}_reverse.fasta">
+			<filter>ftype['reverse']['rev'] == "yes"</filter>
+		</data>
+	</outputs>
+	<help>
+
+.. class:: infomark
+
+**pyFastqDuplicateRemover**
+
+pyFastqDuplicateRemover is part of the pyCRAC_ package. Removes identical sequences from fastq and fasta files and returns a fasta file with collapsed data.
+
+Can also process paired-end data.
+
+**Examples**
+
+Unprocessed fastq data with six random nucleotides at 5' end of the read::
+    
+    @FCC102EACXX:3:1101:3231:2110#TGACCAAT/1
+    GCGCCTGCCAATTCCATCGTAATGATTAATAGGGACGGTCGGGGGCATC
+    +
+    bb_ceeeegggggiiiiiifghiihiihiiiiiiiiiifggfhiecccc
+    
+After pyBarcodeFilter::
+
+    @FCC102EACXX:3:1101:3231:2110#TGACCAAT/1##GCGCCT
+    TCCATCGTAATGATTAATAGGGACGGTCGGGGGCATC
+    +
+    giiiiiifghiihiihiiiiiiiiiifggfhiecccc
+    
+    This entry is printed to the NNNNNNGCCAAT barcode file.
+
+After pyFastqDuplicateRemover::
+
+    >1_GCGCCT_5/1
+    TCCATCGTAATGATTAATAGGGACGGTCGGGGGCATC
+    
+    The '1' indicates that this is the first unique cDNA in the data
+    GCGCCT is the random barcode sequence
+    the '5' indicates that 5 reads were found with identical read and random barcode sequences
+    the '/1' indicates that the seqeuence originates from the forward sequencing reaction
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+Options::
+
+  -f FILE, --input_file=FILE		
+                                        name of the FASTQ or FASTA input file
+
+  -r FILE, --reverse_input_file=FILE	
+                                        name of the paired (or reverse) FASTQ or FASTA input file
+
+  -o FILE, --output_file=FILE		
+                                        Provide the path and name of the fastq or fasta output file. Default is standard output. 
+					For paired-end data just provide a file name without file extension (!)
+	</help>
+</tool>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyFastqJoiner.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,139 @@
+ <tool id ="pyFastqJoiner" name="pyFastqJoiner">
+    <requirements>
+        <requirement type="package">pyCRAC</requirement>
+    </requirements>
+	<command interpreter="python"> 
+	/usr/local/bin/pyFastqJoiner.py
+	-f $ftype.f1 $ftype.f2
+	-o $out
+	--file_type=$ftype.type
+	#if $joinc.ch == "-c":
+        -c $joinc.c
+    #end if#
+	</command>
+	<version_command>/usr/local/bin/pyFastqJoiner.py --version</version_command>
+	<inputs>
+		<conditional name="ftype">
+            <param name="type" type="select" label="File type">
+                <option value="fastq" selected="true">FASTQ</option>
+                <option value="fasta">FASTA</option>
+            </param>
+            <when value="fastq">
+                <param format="fastq" name="f1" type="data" label="First FastQ File -f" help="FastQ format" />
+                <param format="fastq" name="f2" type="data" label="Second FastQ File -f" help="FastQ format" />
+            </when>
+            <when value="fasta">
+                <param format="fasta" name="f1" type="data" label="First FastA File -f" help="FastA format" />
+                <param format="fasta" name="f2" type="data" label="Second FastA File -f" help="FastA format" />
+            </when>
+		</conditional>
+		<conditional name="joinc">
+            <param name="ch" type="select"  label="Insert a character at join">
+                <option value="" selected="true">NO</option>
+                <option value="-c">YES</option>
+            </param>
+            <when value="-c">
+                <param type="text" name="c" label="Add this character -c" value=":" >
+                    <validator type="empty_field" message="enter a character or turn this option off" />
+                </param>
+            </when>
+            <when value="">
+            </when>
+		</conditional>
+	    <param name="label" type="text" format="txt" size="30" value="pyFastqJoiner" label="Enter output file label -o" />
+	</inputs>
+	<outputs>
+		<data format="input" name="out" label="${label.value}.${ftype.type}"/>
+            <change_format>
+                <when input="ftype.type" value="fasta" format="fasta" />
+            </change_format>
+	</outputs>
+	<help>
+
+.. class:: infomark
+
+**pyFastqJoiner**
+
+pyFastqJoiner is part of the pyCRAC_ package. Merges paired sequences from two fastq or fasta formatted files.
+
+Example::
+
+    Forward reaction:
+    
+    @FCC102EACXX:3:1101:1343:2181#ATCACGAT/1##CAATAG
+    CAAATTAGAGTGTTCAAAGCAGGCGTATTGCTCGAAT
+    +
+    `efhYb][bdQQ`eeaeaYbeY^ceU__IXa[^ZYae
+    @FCC102EACXX:3:1101:1424:2248#ATCACGAT/1##CCAGGA
+    CTAACCATAAACTATGCCTACTAGGGATCCAGAGGTG
+    +
+    ^_adddhJbaehbedd`dIb_^cXaRI^BBBBBBBBB
+    @FCC102EACXX:3:1101:1623:2036#ATCACGAN/1##CTCAGC
+    CAAAGTTAGGGGATCGAAGATGATCAGATACCGTCGT
+    +
+    bghfc^YbgbeadggfdffeaS^ac_X^cegaGZ_ef
+    @FCC102EACXX:3:1101:1574:2214#ATCACGAT/1##CGTTTT
+    CTAATGACCCACTCGGCACCTTACGAAATCAAAGTCT
+    +
+    cdfgYY`cefhhZef\eaggXaceeghfQaeghWNW\
+    
+    Reverse reaction:
+    
+    @FCC102EACXX:3:1101:1343:2181#ATCACGAT/2
+    AGCCTTTAAGTTTCAGCCTTGCGACCATACTCCCCCCAGAACCCAAAGA
+    +
+    YJaSJ`Z`K`YbSb[[daeJRR[YeWd_I^I^ecgc]OV\bdeaegbXb
+    @FCC102EACXX:3:1101:1424:2248#ATCACGAT/2
+    AAGTCCTTTAAGTTACAGCCTTGCGACCATACTACACCCAGAACCCAAA
+    +
+    YJJ\`JQY\`KJ`gY[[QRYY[[`H[_ceI^e[PYO^IWOHW^eaefhh
+    @FCC102EACXX:3:1101:1623:2036#ATCACGAN/2
+    GGCCAATCCTTATTGTGTCTGGACCTGGTGAGTTTCCCCGTGTTGAGTC
+    +
+    PP\`ccQ`eY[bQQ[d`ghehaghfgdg[`gb^bd[ePbH^c_c\a_eg
+    
+    Here the ":" character is used to split the two sequences. This character tells pyFastqSplitter where to split the sequences.
+    This character is ignored by pyFastqDuplicateRemover
+    
+    Result:
+    
+    @FCC102EACXX:3:1101:1343:2181#ATCACGAT/1##CAATAG@FCC102EACXX:3:1101:1343:2181#ATCACGAT/2
+    CAAATTAGAGTGTTCAAAGCAGGCGTATTGCTCGAAT:AGCCTTTAAGTTTCAGCCTTGCGACCATACTCCCCCCAGAACCCAAAGA
+    +
+    `efhYb][bdQQ`eeaeaYbeY^ceU__IXa[^ZYaeYJaSJ`Z`K`YbSb[[daeJRR[YeWd_I^I^ecgc]OV\bdeaegbXb
+    @FCC102EACXX:3:1101:1424:2248#ATCACGAT/1##CCAGGA@FCC102EACXX:3:1101:1424:2248#ATCACGAT/2
+    CTAACCATAAACTATGCCTACTAGGGATCCAGAGGTG:AAGTCCTTTAAGTTACAGCCTTGCGACCATACTACACCCAGAACCCAAA
+    +
+    ^_adddhJbaehbedd`dIb_^cXaRI^BBBBBBBBBYJJ\`JQY\`KJ`gY[[QRYY[[`H[_ceI^e[PYO^IWOHW^eaefhh
+    @FCC102EACXX:3:1101:1623:2036#ATCACGAN/1##CTCAGC@FCC102EACXX:3:1101:1623:2036#ATCACGAN/2
+    CAAAGTTAGGGGATCGAAGATGATCAGATACCGTCGT:GGCCAATCCTTATTGTGTCTGGACCTGGTGAGTTTCCCCGTGTTGAGTC
+    +
+    bghfc^YbgbeadggfdffeaS^ac_X^cegaGZ_efPP\`ccQ`eY[bQQ[d`ghehaghfgdg[`gb^bd[ePbH^c_c\a_eg
+       
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+Options::
+
+    -f fastq_file1 fastq_file2	
+                        Provide the names of two raw data files separated by a single space.
+                        Make sure the first file is the data file of the forward (/1) sequencing reaction.
+
+    --file_type=FASTQ     
+                        Can join fasta and fastq files. Fastq is default
+    
+    -o mergedfastq.fastq, --outfile=mergedfastq.fastq
+                        provide the name of the output file. By default it
+                        will be printed to the standard output
+    
+    -c :  
+                        This option adds the '|' character between the DNA
+                        sequences so that it is much easier to split the data
+                        again later on
+
+
+	</help>
+</tool>	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyFastqSplitter.pl	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,27 @@
+#!/usr/bin/perl -w
+use strict;
+use Getopt::Long;
+
+my %opt;
+
+
+GetOptions(\%opt, "f=s", "c=s", "o1=s", "o2=s","file_type=s", "version","id=s");
+
+my $cmnd;
+
+if (exists $opt{version}){
+	$cmnd = "python /usr/local/bin/pyFastqSplitter.py --version";
+}
+else{
+	$cmnd = "python /usr/local/bin/pyFastqSplitter.py -f $opt{f} -o $opt{id} --file_type=$opt{file_type}";
+
+	if(defined $opt{c}){
+		$cmnd.= " -c $opt{c}";
+	}
+
+}
+
+system $cmnd;
+system "mv $opt{id}_1.$opt{file_type} $opt{o1}";
+system "mv $opt{id}_2.$opt{file_type} $opt{o2}";
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyFastqSplitter.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,140 @@
+ <tool id ="pyFastqSplitter" name="pyFastqSplitter" force_history_refresh="True">
+        <requirements>
+                <requirement type="package">pyCRAC</requirement>
+        </requirements>
+	<command interpreter="perl"> 
+	pyFastqSplitter.pl
+	-f $f
+	--o1 $out1
+	--id $label.value 
+	--o2 $out2
+	--file_type $ftype.type
+	#if $joinc.ch == "-c":
+        -c $joinc.c
+    #end if#
+	</command>
+	<version_command>/usr/local/bin/pyFastqSplitter.py --version</version_command>
+	<inputs>
+		<conditional name="ftype">
+            <param name="type" type="select" label="File type">
+                <option value="fastq" selected="true">FASTQ</option>
+                <option value="fasta">FASTA</option>
+            </param>
+            <when value="fastq">
+                <param format="fastq" name="f" type="data" label="FastQ File -f" help="FastQ format" />
+            </when>
+            <when value="fasta">
+                <param format="fasta" name="f" type="data" label="FastA File -f" help="FastA format" />
+            </when>
+		</conditional>
+		<conditional name="joinc">
+            <param name="ch" type="select"  label="Insert a character at join">
+                <option value="" selected="true">NO</option>
+                <option value="-c">YES</option>
+            </param>
+            <when value="-c">
+                <param type="text" name="c" label="Split the reads on the -c character" value=":" >
+                    <validator type="empty_field" message="enter a character or turn this option off" />
+                </param>
+            </when>
+            <when value="">
+            </when>
+		</conditional>
+	    <param name="label" type="text" format="txt" size="30" value="pyFastqSplitter" label="Enter output file label -o" />
+	</inputs>
+	<outputs>
+		<data format="input" name="out1" label="${label.value}_1.${ftype.type}"/>
+		<data format="input" name="out2" label="${label.value}_2.${ftype.type}"/>
+		    <change_format>
+                <when input="ftype.type" value="fasta" format="fasta" />
+            </change_format>
+	</outputs>
+	<help>
+
+.. class:: infomark
+
+**pyFastqSplitter**
+
+pyFastqSplitter is part of the pyCRAC_ package. Splits a merged fastq file (pyFastqJoiner output) in to two files.
+
+Example::
+    
+    Here the ":" character was used to separate the two sequences. By using the -c flag you can tell pyFastqSplitter where to split the sequences.
+    This character is ignored by pyFastqDuplicateRemover
+    
+    
+    @FCC102EACXX:3:1101:1343:2181#ATCACGAT/1##CAATAG@FCC102EACXX:3:1101:1343:2181#ATCACGAT/2
+    CAAATTAGAGTGTTCAAAGCAGGCGTATTGCTCGAAT:AGCCTTTAAGTTTCAGCCTTGCGACCATACTCCCCCCAGAACCCAAAGA
+    +
+    `efhYb][bdQQ`eeaeaYbeY^ceU__IXa[^ZYaeYJaSJ`Z`K`YbSb[[daeJRR[YeWd_I^I^ecgc]OV\bdeaegbXb
+    @FCC102EACXX:3:1101:1424:2248#ATCACGAT/1##CCAGGA@FCC102EACXX:3:1101:1424:2248#ATCACGAT/2
+    CTAACCATAAACTATGCCTACTAGGGATCCAGAGGTG:AAGTCCTTTAAGTTACAGCCTTGCGACCATACTACACCCAGAACCCAAA
+    +
+    ^_adddhJbaehbedd`dIb_^cXaRI^BBBBBBBBBYJJ\`JQY\`KJ`gY[[QRYY[[`H[_ceI^e[PYO^IWOHW^eaefhh
+    @FCC102EACXX:3:1101:1623:2036#ATCACGAN/1##CTCAGC@FCC102EACXX:3:1101:1623:2036#ATCACGAN/2
+    CAAAGTTAGGGGATCGAAGATGATCAGATACCGTCGT:GGCCAATCCTTATTGTGTCTGGACCTGGTGAGTTTCCCCGTGTTGAGTC
+    +
+    bghfc^YbgbeadggfdffeaS^ac_X^cegaGZ_efPP\`ccQ`eY[bQQ[d`ghehaghfgdg[`gb^bd[ePbH^c_c\a_eg
+    
+    Result:
+    
+    Forward reaction:
+    
+    @FCC102EACXX:3:1101:1343:2181#ATCACGAT/1##CAATAG
+    CAAATTAGAGTGTTCAAAGCAGGCGTATTGCTCGAAT
+    +
+    `efhYb][bdQQ`eeaeaYbeY^ceU__IXa[^ZYae
+    @FCC102EACXX:3:1101:1424:2248#ATCACGAT/1##CCAGGA
+    CTAACCATAAACTATGCCTACTAGGGATCCAGAGGTG
+    +
+    ^_adddhJbaehbedd`dIb_^cXaRI^BBBBBBBBB
+    @FCC102EACXX:3:1101:1623:2036#ATCACGAN/1##CTCAGC
+    CAAAGTTAGGGGATCGAAGATGATCAGATACCGTCGT
+    +
+    bghfc^YbgbeadggfdffeaS^ac_X^cegaGZ_ef
+    @FCC102EACXX:3:1101:1574:2214#ATCACGAT/1##CGTTTT
+    CTAATGACCCACTCGGCACCTTACGAAATCAAAGTCT
+    +
+    cdfgYY`cefhhZef\eaggXaceeghfQaeghWNW\
+    
+    Reverse reaction:
+    
+    @FCC102EACXX:3:1101:1343:2181#ATCACGAT/2
+    AGCCTTTAAGTTTCAGCCTTGCGACCATACTCCCCCCAGAACCCAAAGA
+    +
+    YJaSJ`Z`K`YbSb[[daeJRR[YeWd_I^I^ecgc]OV\bdeaegbXb
+    @FCC102EACXX:3:1101:1424:2248#ATCACGAT/2
+    AAGTCCTTTAAGTTACAGCCTTGCGACCATACTACACCCAGAACCCAAA
+    +
+    YJJ\`JQY\`KJ`gY[[QRYY[[`H[_ceI^e[PYO^IWOHW^eaefhh
+    @FCC102EACXX:3:1101:1623:2036#ATCACGAN/2
+    GGCCAATCCTTATTGTGTCTGGACCTGGTGAGTTTCCCCGTGTTGAGTC
+    +
+    PP\`ccQ`eY[bQQ[d`ghehaghfgdg[`gb^bd[ePbH^c_c\a_eg
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+Options::
+
+    -f fastq_file, --filename=fastq_file
+                        To provide the names of two raw data files separated
+                        by a single space. Default = standard input
+    --file_type=FASTQ     
+                        Can split joined fasta and fastq files. Fastq is default
+                        If there isn't a specific character splitting the two reads
+                        the tool assumes that the two reads were of equal length
+    -o splitfastq, --outfile=splitfastq
+                        Provide the name of the output files (WITHOUT file
+                        extension). By default the data will be printed to the
+                        standard output
+    -c :, --character=:   
+                        If the joined sequences were separated by a specific
+                        character then the program can divide the sequences by
+                        looking for that character
+
+	</help>
+</tool>	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyGTF2bed.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,107 @@
+<tool id="pyGTF2bed" name="pyGTF2bed">
+	  <description>converter</description>
+	  <requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	  </requirements>
+ 	  <command interpreter="python">/usr/local/bin/pyGTF2bed.py --gtf $input -o $output
+	  #if $addtrack.track == "--track":
+	   --track
+	   --name $addtrack.name
+	   --description $addtrack.description
+	   #if $addtrack.colorscheme.colorsel == "default":
+	    -c $addtrack.colorscheme.color
+	   #else:
+	    -s '$addtrack.colorscheme.plus,$addtrack.colorscheme.minus'
+	   #end if#
+	  #end if#
+ 	  </command>
+	  <version_command>/usr/local/bin/pyGTF2bed.py --version</version_command>
+ 	  <inputs>
+	      <param name="input" type="data" format="gtf" label="GTF file --gtf"/>
+	      <conditional name="addtrack">
+		<param name="track" type="select" label="Add UCSC track line to output --track">
+		  <option value="" selected="true">NO</option>
+		  <option value="--track">YES</option>
+		</param>
+		<when value=""/>
+		<when value="--track">
+		  <param name="name" format="txt" type="text" value="User_supplied_track" size="80" label="Track name -n"/>
+                  <param name="description" format="txt" type="text" value="User_supplied_track" size="80" label="Track description -d"/>
+		  <conditional name="colorscheme">
+		  <param name="colorsel" type="select" label="Colouring scheme">
+		    <option value="default" selected="true">One Colour</option>
+		    <option value="strand">By Strand</option>
+		  </param>
+		  <when value="default">
+		    <param name="color" type="select" label="Choose track colour -c">
+		      <option value="black" selected="true">Black</option>
+		      <option value="red">Red</option>
+                      <option value="blue">Blue</option>
+                      <option value="green">Green</option>
+                      <option value="purple">Purple</option>
+		    </param>            
+		  </when>
+                  <when value="strand">
+                    <param name="plus" type="select" label="Choose forward strand track colour -s">
+                      <option value="black" selected="true">Black</option>
+                      <option value="red">Red</option>
+                      <option value="blue">Blue</option>
+                      <option value="green">Green</option>
+                      <option value="purple">Purple</option>
+                    </param>
+		    <param name="minus" type="select" label="Choose minus strand track colour -s">
+                      <option value="black" selected="true">Black</option>
+                      <option value="red">Red</option>
+                      <option value="blue">Blue</option>
+                      <option value="green">Green</option>
+                      <option value="purple">Purple</option>
+		    </param>
+                  </when>
+		  </conditional>
+		</when>
+	      </conditional>
+	      <param name="label" type="text" format="txt" size="30" value="pyGTF2bed" label="Enter output file label -o" />
+ 	  </inputs>
+ 	  <outputs>
+	    <data name="output" format="bed6" label="${label.value}.bed"/>
+ 	  </outputs>
+	  <help>
+
+.. class:: infomark
+
+**pyGTF2bed**
+
+pyGTF2bed is part of the pyCRAC_ package. Converts GTF files to the bed 6 format. Gene names present in the GTF file will be included in the bed file.
+
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+These options can be used to add and modify a track line for the UCSC genome browser::
+
+    --track             
+                        Use this flag to add a UCSC genome browser track line
+                        to the beginning of your file
+    -n NAME, --name=NAME
+                        For the USCS track line: provide a track name. Default
+                        = 'User_supplied_track'
+    -d DESCRIPTION, --description=DESCRIPTION
+                        For the USCS track line: provide a track description.
+                        Default = 'User_supplied_track'
+    -c COLOR, --color=COLOR
+                        select the track color. Default = black
+    -s STRANDS, --colorstrands=STRANDS
+                        select the colors for each strand. Default =
+                        'red,blue'
+
+File input options::
+
+    --gtf=Yourfavoritegtf.gtf
+                        type the path to the gtf file that you want to
+                        convert. Default is standard input
+ 	  </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyGTF2bedGraph.pl	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,38 @@
+#!/usr/bin/perl -w
+use strict;
+use Getopt::Long;
+
+my %opt;
+
+
+GetOptions(\%opt, "gtf=s","po=s","version","mo=s","count=i","chromfile=s","t=s","iCLIP","track","name=s","description=s","color=s","s=s","id=s");
+
+my $cmnd;
+my $prefix = "gb_$opt{id}";
+$prefix =~ s/\s/_/g;
+
+
+if (exists $opt{version}){
+	$cmnd = "python /usr/local/bin/pyGTF2bedGraph.py --version";
+}
+else{
+	$cmnd = "python /usr/local/bin/pyGTF2bedGraph.py --gtf $opt{gtf} --chromfile $opt{chromfile} -t $opt{t} --count $opt{count} -o $prefix";
+
+	if(exists $opt{iCLIP}){
+
+	    $cmnd .= " --iCLIP";
+	}
+
+	if(exists $opt{track}){
+	    $cmnd .= " --track --name \"$opt{name}\" --description \"$opt{description}\"";
+	    
+	    if(exists $opt{color}){$cmnd .= " --color $opt{color}";}
+            if(exists $opt{s}){$cmnd .= " -s \"$opt{s}\"";}
+	}
+}
+
+system $cmnd;
+
+system "mv $prefix"."_plus_strand.bedgraph $opt{po}";
+system "mv $prefix"."_minus_strand.bedgraph $opt{mo}";
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyGTF2bedGraph.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,162 @@
+<tool id="pyGTF2bedGraph" name="pyGTF2bedGraph">
+	  <description>converter</description>
+	  <requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	  </requirements>
+ 	  <command interpreter="perl">pyGTF2bedGraph.pl --gtf $input --po $po --mo $mo
+	  --chromfile $addchr.chr
+	  -t $type
+	  --count $count
+	  $iclip
+	  #if $addtrack.track == "--track":
+	   --track
+	   --name $addtrack.name
+	   --description $addtrack.description
+	   #if $addtrack.colorscheme.colorsel == "default":
+	    --color $addtrack.colorscheme.color
+	   #else:
+	    -s '$addtrack.colorscheme.plus,$addtrack.colorscheme.minus'
+	   #end if#
+	  #end if#
+	  --id $po.id
+ 	  </command>
+	  <version_command>/usr/local/bin/pyGTF2bedGraph.py --version</version_command>
+ 	  <inputs>
+	      <param name="input" type="data" format="gtf" label="GTF file --gtf"/>
+              <conditional name="addchr">
+		<param name="chrfile" type="select"  label="Choose Chromosome length file from">
+		  <option value="default" selected="true">Defaults</option>
+		  <option value="other">History</option>
+		</param>
+		<when value="default">
+		  <param name="chr" type="select"  label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes. Use pyCrac utility pyCalculateChromosomeLengths to create.">
+		    <options from_data_table="pycrac_chr"/>
+		  </param>
+		</when>
+		<when value="other">
+		  <param format="tabular" name="chr" type="data" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes"/>
+		</when>
+	      </conditional>
+	      <param name="type" type="select"  label="Choose type of data -t">
+                  <option value="reads" selected="true">Reads</option>
+                  <option value="substitutions">Substitutions</option>
+                  <option value="deletions">Deletions</option>
+	      </param>
+	      <param format="integer" name="count" type="integer" label="Count per feature --count " value="1" size="5" help="Takes the numbers in the 'score' column of the GTF file as the total number of reads for each position" >
+		<validator type="in_range" min="1" message="Please enter a value >= 1"/>
+	      </param>
+	      <param name="iclip" type="select" label="iCLIP mode --iCLIP">
+		<option value="" selected="true">OFF</option>
+		<option value="--iCLIP">ON</option>
+	      </param>
+	      <conditional name="addtrack">
+		<param name="track" type="select" label="Add UCSC track line to output">
+		  <option value="" selected="true">NO</option>
+		  <option value="--track">YES</option>
+		</param>
+		<when value=""/>
+		<when value="--track">
+		  <param name="name" format="txt" type="text" value="User_supplied_track" size="80" label="Track name"/>
+                  <param name="description" format="txt" type="text" value="User_supplied_track" size="80" label="Track description"/>
+		  <conditional name="colorscheme">
+		  <param name="colorsel" type="select" label="Colouring scheme">
+		    <option value="default" selected="true">One Colour</option>
+		    <option value="strand">By Strand</option>
+		  </param>
+		  <when value="default">
+		    <param name="color" type="select" label="Choose track colour">
+		      <option value="black" selected="true">Black</option>
+		      <option value="red">Red</option>
+                      <option value="blue">Blue</option>
+                      <option value="green">Green</option>
+                      <option value="purple">Purple</option>
+		    </param>            
+		  </when>
+                  <when value="strand">
+                    <param name="plus" type="select" label="Choose forward strand track colour">
+                      <option value="black" selected="true">Black</option>
+                      <option value="red">Red</option>
+                      <option value="blue">Blue</option>
+                      <option value="green">Green</option>
+                      <option value="purple">Purple</option>
+                    </param>
+		    <param name="minus" type="select" label="Choose minus strand track colour">
+                      <option value="black" selected="true">Black</option>
+                      <option value="red">Red</option>
+                      <option value="blue">Blue</option>
+                      <option value="green">Green</option>
+                      <option value="purple">Purple</option>
+		    </param>
+                  </when>
+		  </conditional>
+		</when>
+	      </conditional>
+	      <param name="label" type="text" format="txt" size="30" value="pyGTF2bedGraph" label="Enter output file label -o" />
+ 	  </inputs>
+ 	  <outputs>
+	    <data name="po" format="bedgraph" label="${label.value}_plus_strand.bg"/>
+            <data name="mo" format="bedgraph" label="${label.value}_minus_strand.bg"/>
+ 	  </outputs>
+	  <help>
+
+.. class:: infomark
+
+**pyGTF2bedGraph**
+
+pyGTF2bedGraph is part of the pyCRAC_ package. Generates bedgraph files for each chromosome. An homage to bedtools genomecoverage. Takes a pyReadCounters GTF file as input file. Can also output bedGraph files for substitutions and deletions.
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+File input options::
+
+    --gtf=readdata.gtf  
+                        type the path to the gtf file data file. Be default it
+                        expects data from the standard input
+    -o converted        
+                        provide a name for an output file. A file extension or
+                        strand information is not necessary.
+    -c yeast.txt, --chromfile=yeast.txt
+                        Location of the chromosome info file. This file should
+                        have two columns: first column is the names of the
+                        chromosomes, second column is length of the
+                        chromosomes. Default is yeast
+    -t TYPE, --type=TYPE
+                        this tool can generate bedGraph files for reads,
+                        substitutions or deletions. Please use
+                        'reads','substitutions' or 'deletions' to indicate the
+                        type of data. Default='reads'
+    --count             
+                        Takes the numbers in the 'score' column of the GTF
+                        file as the total number of reads for each position.
+                        Default is 1 for each interval.
+    --iCLIP             
+                        This turns on the iCLIP mode and the sgr reads or cDNA
+                        files will report cross-linking site frequencies in
+                        iCLIP data
+    -v, --verbose       
+                        to print status messages to a log file
+
+These options can be used to add a track line for the UCSC genome browser::
+
+    --track             
+                        Use this flag to add a UCSC genome browser track line
+                        to the beginning of your file
+    -n NAME, --name=NAME
+                        For the USCS track line: provide a track name. Default
+                        = 'User_supplied_track'
+    -d DESCRIPTION, --description=DESCRIPTION
+                        For the USCS track line: provide a track description.
+                        Default = 'User_supplied_track'
+    --color=COLOR       
+                        select the track color. Default = black
+    -s STRANDS, --colorstrands=STRANDS
+                        select the colors for each strand. Default =
+                        'red,blue'
+
+ 	  </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyGetGTFSources.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,63 @@
+ <tool id ="pyGetGTFSources" name="pyGetGTFSources">
+	<requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	</requirements>
+	<command interpreter="python"> 
+	/usr/local/bin/pyGetGTFSources.py --gtf $addGTF.gtf --count -o $out
+	</command>
+	<version_command>/usr/local/bin/pyGetGTFSources.py --version</version_command>
+	<inputs>
+		<conditional name="addGTF">
+			<param name="gtfFile" type="select"  label="Choose GTF File from">
+				<option value="default" selected="true">Defaults</option>
+				<option value="other">History</option>
+			</param>	
+			<when value="default">
+				<param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+					<options from_data_table="pycrac_gtf"/>
+				</param>
+			</when>
+			<when value="other">
+				<param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+			</when>
+		</conditional>
+		<param name="count" type="select"  label="Count occurences of each annotation --count">
+                                <option value="" selected="true">No</option>
+                                <option value="--count">Yes</option>
+		</param>
+                <param name="label" type="text" format="txt" size="30" value="GTF sources list" label="Enter output file label -o" />
+	</inputs>
+
+	<outputs>
+		<data format="tabular" name="out" label="${label.value}.txt"/>
+	</outputs>
+	<help>
+.. class:: infomark
+
+**pyGetGTFSources**
+
+pyGetGTFSources is part of the pyCRAC_ package. Extracts source names from the second column in a GTF file.
+
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+Options::
+
+  --gtf=Yourfavoritegtf.gtf
+                        type the path to the gtf file that you want to use. By
+                        default it expects data from the standard input
+  -o OUTFILE, --outfile=OUTFILE
+                        type the name and path of the file you want to write
+                        the output to. Default is standard output
+  --count               with this flag you the program will count the
+                        occurence for each source/annotation in the gtf file
+
+
+
+
+	</help>
+</tool>	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyGetGeneNamesFromGTF.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,71 @@
+ <tool id ="pyGetGeneNamesFromGTF" name="pyGetGeneNamesFromGTF">
+	<requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	</requirements>
+	<command interpreter="python"> 
+	/usr/local/bin/pyGetGeneNamesFromGTF.py --gtf $addGTF.gtf --attribute $attribute $count -o $out
+	</command>
+	<version_command>/usr/local/bin/pyGetGeneNamesFromGTF.py --version</version_command>
+	<inputs>
+		<conditional name="addGTF">
+			<param name="gtfFile" type="select"  label="Choose GTF File from">
+				<option value="default" selected="true">Defaults</option>
+				<option value="other">History</option>
+			</param>	
+			<when value="default">
+				<param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+					<options from_data_table="pycrac_gtf"/>
+				</param>
+			</when>
+			<when value="other">
+				<param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+			</when>
+		</conditional>
+		<param name="attribute" type="select"  label="Select the attribute to extract names from --attribute">
+                                <option value="gene_name" selected="true">gene_name</option>
+                                <option value="gene_id">gene_id</option>
+                                <option value="transcript_name">transcript_name</option>
+                                <option value="transcript_id">transcript_id</option>
+		</param>
+                <param name="count" type="select"  label="Count occurences of each annotation --count">
+                                <option value="" selected="true">No</option>
+                                <option value="--count">Yes</option>
+                </param>
+		<param name="label" type="text" format="txt" size="30" value="GTF gene list" label="Enter output file label -o" />
+	</inputs>
+
+	<outputs>
+		<data format="tabular" name="out" label="${label.value}.txt"/>
+	</outputs>
+	<help>
+.. class:: infomark
+
+**pyGetGeneNamesFromGTF**
+
+pyGetGeneNamesFromGTF is part of the pyCRAC_ package. Extracts and counts all gene names from a GTF file.
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+Options::
+
+  --gtf=Yourfavoritegtf.gtf
+                        type the path to the gtf file that you want to use. By
+                        default it expects data from the standard input.
+  -o OUTFILE, --outfile=OUTFILE
+                        type the name and path of the file you want to write
+                        the output to. Default is standard output
+  -a ATTRIBUTE, --attribute=ATTRIBUTE
+                        from which attribute do you want to extract names?
+                        Choices: gene_name, gene_id, transcript_name,
+                        transcript_id
+  --count               
+                        with this flag you the program will count the
+                        occurence for each source/annotation in the gtf file
+
+
+	</help>
+</tool>	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyMotif.pl	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,41 @@
+#!/usr/bin/perl -w
+use strict;
+use Getopt::Long;
+
+my %opt;
+
+
+GetOptions(\%opt, "f=s","version","gtf=s","range=i","overlap=i","--annotation=s", "--tab=s","--k_min=i","--k_max=i","--numberofkmers=i","--count=s","--features=s","--zscores=s","--random=s","options","o=s","id=s");
+
+my $cmnd;
+
+my $prefix = "m_$opt{id}";
+
+
+if (exists $opt{version}){
+	$cmnd = "python /usr/local/bin/pyMotif.py --version";
+}
+else{
+	$cmnd = "python /usr/local/bin/pyMotif.py -f $opt{f} --gtf $opt{gtf} --tab $opt{tab} --annotation $opt{annotation} -o $prefix";
+
+	if(exists $opt{options}){
+
+	    $cmnd .= " --range=$opt{range} --overlap=$opt{overlap} --k_min=$opt{k_min} --k_max=$opt{k_max} --numberofkmers=$opt{numberofkmers}";
+	}
+}
+
+#testing
+#open (COUNT, ">$opt{count}") || die "";
+#print COUNT "$cmnd";
+
+system $cmnd;
+
+
+system "mv $prefix"."_$opt{annotation}_data_k-mers_count.txt $opt{count}";
+system "mv $prefix"."_$opt{annotation}_top_k-mers_in_features.gtf $opt{features}";
+system "mv $prefix"."_$opt{annotation}_k-mer_Z_scores.txt $opt{zscores}";
+system "mv $prefix"."_$opt{annotation}_random_k-mers_count.txt $opt{random}";
+    
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyMotif.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,222 @@
+ <tool id ="pyMotif" name="pyMotif">
+	<requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	</requirements>
+	<command interpreter="perl"> 
+	pyMotif.pl
+	-f $input
+	--gtf=$addGTF.gtf
+
+       	#if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
+	  --annotation $addGTF.annotate.scan.annotation
+	#else:
+	  --annotation $addGTF.annotate.annotation
+	#end if#
+
+	--tab=$addTab.tab
+
+	#if $addOpt.options == "edit":
+	        --options
+	        --k_min $addOpt.kmin
+		--k_max $addOpt.kmax
+		--numberofkmers=$addOpt.numberofkmers
+		--overlap $addOpt.overlap
+		--range $addOpt.range
+	#end if#
+	-o "$input.name"
+	--id $count.id
+	--count $count
+	--random $random
+	--features $features
+	--zscores $zscores
+	</command>
+	<version_command>/usr/local/bin/pyMotif.py --version</version_command>
+	<inputs>
+		<param format="gtf" name="input" type="data" label="Input File --input_file" help="File of type .gtf" />
+                <conditional name="addTab">
+                        <param name="tabFile" type="select"  label="Choose Genomic Reference Sequence from">
+                                <option value="default" selected="true">Defaults</option>
+                                <option value="other">History</option>
+                        </param>
+                        <when value="default">
+                                <param name="tab" type="select"  label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence">
+                                        <options from_data_table="pycrac_tab"/>
+                                </param>
+                        </when>
+                        <when value="other">
+                                <param format="tabular" name="tab" type="data" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence"/>
+                        </when>
+                </conditional>				
+		<conditional name="addGTF">
+			<param name="gtfFile" type="select"  label="Choose GTF File from">
+				<option value="default" selected="true">Defaults</option>
+				<option value="other">History</option>
+			</param>	
+			<when value="default">
+				<param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+					<options from_data_table="pycrac_gtf"/>
+				</param>
+
+				<conditional name="annotate">
+				<param name="annotations" type="select"  label="Select annotation">
+					<option value="all" selected="true">All</option>
+					<option value="manual">Enter in text box</option>
+					<option value="auto">Scan pyGetGTFSources file</option>
+				</param>	
+				<when value="all">
+					<param name="annotation" type="hidden" format="txt" size="10" value="all"/>
+				</when>
+				<when value="manual">
+					<param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
+						<validator type="empty_field" message="Please enter a value"/>			
+					</param>
+				</when>
+				<when value="auto">
+					<param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>	
+						<conditional name="scan">
+						<param name="annotations" type="select"  label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
+							<option value="wait" selected="true">Waiting</option>
+							<option value="scanning">Go</option>
+						</param>	
+						<when value="wait">
+						</when>
+						<when value="scanning">
+						<param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
+							  <options from_dataset="gtf_annotation">
+							    <column name="name" index="0"/>
+							    <column name="value" index="0"/>
+							  </options>
+						</param>      
+						</when>
+						</conditional>
+				</when>
+				</conditional>		
+			</when>
+			<when value="other">
+				<param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+				<conditional name="annotate">
+				<param name="annotations" type="select"  label="Select annotation">
+					<option value="all" selected="true">All</option>
+					<option value="manual">Enter in text box</option>
+					<option value="auto">Scan selected file</option>
+				</param>	
+				<when value="all">
+					<param name="annotation" type="hidden" format="txt" size="10" value="all"/>
+				</when>
+				<when value="manual">
+					<param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
+						<validator type="empty_field" message="Please enter a value"/>			
+					</param>
+				</when>
+				<when value="auto">
+					<param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
+						  <options from_dataset="gtf">
+						    <column name="name" index="1"/>
+						    <column name="value" index="1"/>
+						    <filter type="unique_value" name="unique" column="1"/>
+						  </options>
+					</param>      
+				</when>
+				</conditional>
+			</when>
+		</conditional>
+		<conditional name="addOpt">
+		  <param name="options" type="select"  label="Standard options">
+		    <option value="default" selected="true">Default</option>
+		    <option value="edit">Edit</option>
+		  </param>
+		  <when value="edit">
+		    <param format="integer" name="kmin" type="integer" label="Minimum k-mer Length --k_min " value="4" size="6" help="Set the minimal k-mer length">
+		      <validator type="in_range" min="1" message="Please enter a value >= 1"/>
+		    </param>
+		    <param format="integer" name="kmax" type="integer" label="Maximum k-mer Length --k_min " value="8" size="6" help="Set the minimal k-mer length">
+		      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+		    </param>
+		    <param format="integer" name="numberofkmers" type="integer" label="Maximum number of k-mers in output file --numberofkmers" value="1000" size="6" help="Set the maximum number of k-mers in output">
+		      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+		    </param>
+		    <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
+		      <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
+		    </param>
+		    <param format="integer" name="overlap" type="integer" label="Overlap --overlap" value="1" size="5" help="Sets the number of nucleotides a read has to overlap with a gene before it is considered a hit. ">
+		      <validator type="in_range" min="1" message="Please enter a positive integer"/>
+		    </param>
+		  </when>
+		  <when value="default">
+		  </when>
+		</conditional>
+                <param name="label" type="text" format="txt" size="30" value="pyMotif" label="Enter output file label -o" />
+	</inputs>
+
+	<outputs>
+		<data format="tabular" name="zscores" label="${label.value}_k-mer_Z_scores.txt"/>
+		<data format="tabular" name="count" label="${label.value}_data_k-mers_count.txt"/>
+		<data format="gtf" name="features" label="${label.value}_top_k-mers_in_features.gtf"/>
+                <data format="tabular" name="random" label="${label.value}_random_k-mers_count.txt"/>
+	</outputs>
+	<help>
+
+.. class:: infomark
+
+**pyMotif**
+
+pyMotif is part of the pyCRAC_ package. Looks for enriched sequence motifs in high-throughput sequencing data. Produces a GTF type output file 
+with coordinates and Z-scores for enriched motifs. The GTF file can be visualised in genome browsers.
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+File input options::
+
+    -f intervals.gtf, --input_file=intervals.gtf
+                        Provide the path to an interval gtf file. By default
+                        it expects data from the standard input.
+    -o OUTPUT_FILE, --output_file=OUTPUT_FILE
+                        Use this flag to override the standard file names. Do
+                        NOT add an extension.
+    --gtf=annotation_file.gtf
+                        type the path to the gtf annotation file that you want
+                        to use
+    --tab=tab_file.tab  
+                        type the path to the tab file that contains the
+                        genomic reference sequence
+
+pyMotif specific options::
+
+    --k_min=4           
+                        this option allows you to set the shortest k-mer
+                        length. Default = 4.
+    --k_max=6           
+                        this option allows you to set the longest k-mer
+                        length. Default = 8.
+    -n 100, --numberofkmers=100
+                        choose the maximum number of enriched k-mer sequences
+                        you want to have reported in output files. Default =
+                        1000
+
+pyCRAC common options::
+
+    -a protein_coding, --annotation=protein_coding
+                        select which annotation (i.e. protein_coding, ncRNA,
+                        sRNA, rRNA,snoRNA,snRNA, depending on the source of
+                        your GTF file) you would like to focus your search on.
+                        Default = all annotations
+    -r 100, --range=100
+                        allows you to add regions flanking the genomic
+                        feature. If you set '-r 50' or '--range=50', then the
+                        program will add 50 nucleotides to each feature on
+                        each side regardless of whether the GTF file has genes
+                        with annotated UTRs.
+    --overlap=1         
+                        sets the number of nucleotides a motif has to overlap
+                        with a genomic feature before it is considered a hit.
+                        Default =  1 nucleotide
+
+
+
+
+	</help>
+</tool>	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyPileup.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,384 @@
+<?xml version="1.0" encoding="utf-8"?>
+ <tool id ="pyPileup" name="pyPileup">
+	<requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	</requirements>
+	<command interpreter="python">
+	/usr/local/bin/pyPileup.py
+	-f $ftype.input
+        --file_type $ftype.file_type                                                                                                                       
+        #if $geneOpt.alignGene == "gene":                                                                                                                  
+                -g $geneOpt.genes                                                                                                                          
+        #end if#                                                                                                                                           
+        #if $geneOpt.alignGene == "chr":                                                                                                                  
+                --chr $geneOpt.chr                                                                                                                         
+        #end if#                                                    
+        #if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.disc.discard == "discard":  
+                   --discarded $discarded
+	#end if#                                                                                       
+        --gtf=$addGTF.gtf                                                                                                                                  
+        --tab=$addTab.tab                                                                                                                                  
+        #if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.addAlignOpt.alignoptions == "edit":
+               --align_quality=$ftype.addAlignOpt.align_quality                                                                                            
+               --align_score=$ftype.addAlignOpt.align_score                                                                                                
+               --distance=$ftype.addAlignOpt.d                                                                                                             
+               --length=$ftype.addAlignOpt.length                                                                                                          
+               #if int($ftype.addAlignOpt.max) > 0:
+                    --max=$ftype.addAlignOpt.max                                                                                                           
+               #end if#                                                                                                                                    
+               $ftype.addAlignOpt.unique                                                                                                                   
+               $ftype.addAlignOpt.blocks                                                                                                                   
+               $ftype.addAlignOpt.mutations                                                                                                                
+               #if $ftype.disc.discard == "--discarded":                                                                                            
+                   --discarded $discarded                                                                                                                 
+               #end if#                                                                                                                                    
+        #end if#                                                                                                                                           
+        #if $addOpt.options == "edit":                                                                                                                     
+                --range=$addOpt.range                                                                                                                      
+                --overlap=$addOpt.overlap                                                                                                                  
+                $addOpt.iclip                                                                                                                              
+                $addOpt.ignore                                                                                                                             
+                -s $addOpt.sequence
+                #if int($addOpt.limit) > 0:                                                                                                                
+                        --limit=$addOpt.limit                                                                                                              
+                #end if#                                                                                                                                   
+        #end if#               
+	-o $output	
+	</command>
+	<version_command>/usr/local/bin/pyPileup.py --version</version_command>
+	<inputs>
+
+
+	        <conditional name="geneOpt">
+                        <param name="alignGene" type="select"  label="Do you want to align reads to genes or chromosome co-ordinates?">
+                                <option value="gene" selected="true">Genes</option>
+                                <option value="chr">Chromosome Co-ordinates</option>
+                        </param>
+                        <when value="chr">
+			  <param format="interval" name="chr" type="data" label="Choose a Chromosome Coordinate File" help="Tab delimited text file containing an identifier, chromosome name, start position, end position and strand ('-' or '+')"/>
+                        </when>
+                        <when value="gene">
+			  <param format="txt" name="genes" type="data" label="Choose a Gene List -g" help="Single column gene ID file"/>
+                        </when>
+		</conditional>
+                <conditional name="addGTF">
+                        <param name="gtfFile" type="select"  label="Choose GTF File from">
+                                <option value="default" selected="true">Defaults</option>
+                                <option value="other">History</option>
+                        </param>
+                        <when value="default">
+                                <param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+                                        <options from_data_table="pycrac_gtf"/>
+                                </param>
+                        </when>
+                        <when value="other">
+                                <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+                        </when>
+                </conditional>
+               <conditional name="addTab">
+                        <param name="tabFile" type="select"  label="Choose Genomic Reference Sequence from">
+                                <option value="default" selected="true">Defaults</option>
+                                <option value="other">History</option>
+                        </param>
+                        <when value="default">
+                                <param name="tab" type="select"  label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence">
+                                        <options from_data_table="pycrac_tab"/>
+                                </param>
+                        </when>
+                        <when value="other">
+                                <param format="tabular" name="tab" type="data" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence"/>
+                        </when>
+	       </conditional>
+
+
+		<conditional name="ftype">
+			<param name="file_type" type="select"  label="Input File Type --file_type">
+					<option value="novo" selected="true">Novo</option>
+                                        <option value="sam">Sam/BAM</option>
+					<option value="gtf">GTF</option>
+			</param>
+			<when value="sam">
+			    <param format="sam,bam" name="input" type="data" label="Input File -f" help="Alignment file of type .sam or .bam" />
+			    <conditional name="disc">
+			    <param name="discard" type="select"  label="Print discarded reads to a separate file">
+			      <option value="" selected="true">OFF</option>
+			      <option value="discard">ON</option>
+			    </param>
+			    <when value="discard">
+			    </when>
+			    <when value="">
+			    </when>
+			    </conditional>
+			    <conditional name="addAlignOpt">
+				<param name="alignoptions" type="select"  label="Alignment Options">
+                                    <option value="default" selected="true">Default</option>
+                                    <option value="edit">Edit</option>
+                                </param>
+				<when value="edit">
+                                    <param name="mutations" type="select"  label="Filter reads by mutations --mutations" help="cross-linking sites are often highlighted by deletions and/or substitutions in the reads. You can use this option to filter reads based on whether they have mutations or not.">
+                                      <option value="" selected="true">Off</option>
+                                      <option value="--mutations=delsonly">deletions</option>
+                                      <option value="--mutations=subsonly">substitutions</option>
+                                      <option value="--mutations=TC">T->C mutations</option>
+                                      <option value="--mutations=allmuts">all mutations</option>
+                                      <option value="--mutations=nomuts">no mutations</option>
+                                    </param>
+                                    <param format="integer" name="align_quality" type="integer" label="Align Quality --align_quality " value="0" size="5" >
+                                      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+                                    </param>
+                                    <param format="integer" name="align_score" type="integer" label="Align Score --align_score " value="0" size="5" >
+                                      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+                                    </param>
+                                    <param format="integer" name="max" type="integer" label="Mapped reads to read from input file --max" help="Set to 0 to align all reads." value="0" size="10" >
+                                      <validator type="in_range" min="0" max="100000000" message="Please enter a value between 1 and 100000000 or 0 to align all reads"/>
+                                    </param>
+                                    <param format="integer" name="d" type="integer" label="Distance --distance " value="1000" size="6" help="Set the maximum number of bp allowed between two non-overlapping paired reads">
+                                      <validator type="in_range" min="1" message="Please enter a value >= 0"/>
+                                    </param>
+                                    <param format="integer" name="length" type="integer" label="Set the maximum length of reads --length" value="1000" size="7" help="Set the read length threshold between 15 and 1000">
+                                      <validator type="in_range" min="15" max="1000" message="Please enter a value between 15 and 1000"/>
+                                    </param>
+                                    <param name="unique" type="select"  label="Remove reads with multiple alignment locations --unique">
+                                      <option value="" selected="true">OFF</option>
+                                      <option value="--unique">ON</option>
+                                    </param>
+                                    <param name="blocks" type="select"  label="Only count reads with same start and end coords once --blocks">
+                                      <option value="" selected="true">OFF</option>
+                                      <option value="--blocks">ON</option>
+                                    </param>
+                                </when>
+                                <when value="default">
+                                </when>
+			    </conditional>
+			</when>
+			<when value="novo">
+				<param format="tabular" name="input" type="data" label="Input File -f" help="Alignment file of type .novo" />
+				<conditional name="disc">
+				  <param name="discard" type="select"  label="Print discarded reads to a separate file">
+				    <option value="" selected="true">OFF</option>
+				    <option value="discard">ON</option>
+				  </param>
+				  <when value="discard">
+				  </when>
+				  <when value="">
+				  </when>
+				</conditional>
+				<conditional name="addAlignOpt">
+                                <param name="alignoptions" type="select"  label="Alignment Options">
+                                    <option value="default" selected="true">Default</option>
+                                    <option value="edit">Edit</option>
+                                </param>
+                                <when value="edit">
+                                    <param name="mutations" type="select"  label="Filter reads by mutations --mutations" help="cross-linking sites are often
+ highlighted by deletions and/or substitutions in the reads. You can use this option to filter reads based on whether they have mutations or not.">
+                                      <option value="" selected="true">Off</option>
+                                      <option value="--mutations=delsonly">deletions</option>
+                                      <option value="--mutations=subsonly">substitutions</option>
+                                      <option value="--mutations=TC">T->C mutations</option>
+                                      <option value="--mutations=allmuts">all mutations</option>
+                                      <option value="--mutations=nomuts">no mutations</option>
+                                    </param>
+                                    <param format="integer" name="align_quality" type="integer" label="Align Quality --align_quality " value="0" size="5" >
+                                      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+                                    </param>
+                                    <param format="integer" name="align_score" type="integer" label="Align Score --align_score " value="0" size="5" >
+                                      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+                                    </param>
+                                    <param format="integer" name="max" type="integer" label="Mapped reads to read from input file --max" help="Set to 0 to align all reads." value="0" size="10" >
+                                      <validator type="in_range" min="0" max="100000000" message="Please enter a value between 1 and 100000000 or 0 to align all reads"/>
+                                    </param>
+                                    <param format="integer" name="d" type="integer" label="Distance --distance " value="1000" size="6" help="Set the maximum number of bp allowed between two non-overlapping paired reads">
+                                      <validator type="in_range" min="1" message="Please enter a value >= 0"/>
+                                    </param>
+                                    <param format="integer" name="length" type="integer" label="Set the maximum length of reads --length" value="1000" size="7" help="Set the read length threshold between 15 and 1000">
+                                      <validator type="in_range" min="15" max="1000" message="Please enter a value between 15 and 1000"/>
+                                    </param>
+                                    <param name="unique" type="select"  label="Remove reads with multiple alignment locations --unique">
+                                      <option value="" selected="true">OFF</option>
+                                      <option value="--unique">ON</option>
+                                    </param>
+                                    <param name="blocks" type="select"  label="Only count reads with same start and end coords once --blocks">
+                                      <option value="" selected="true">OFF</option>
+                                      <option value="--blocks">ON</option>
+                                    </param>
+                                </when>
+                                <when value="default">
+                                </when>
+			</conditional>
+			</when>
+			<when value="gtf">
+				<param format="gtf" name="input" type="data" label="Input File -f" help="File of type .gtf" />
+			</when>
+		      </conditional>
+		      
+		      <conditional name="addOpt">
+			<param name="options" type="select"  label="Standard Options">
+			  <option value="default" selected="true">Default</option>
+			  <option value="edit">Edit</option>
+			</param>        
+			<when value="edit">
+			  <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
+			    <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
+			  </param>
+			  <param name="ignore" type="select" label="Ignore strand information? --ignorestrand">
+			    <option value="" selected="true">No</option>
+			    <option value="--ignorestrand">Yes</option>
+			  </param>
+			  <param format="integer" name="overlap" type="integer" label="Overlap --overlap" value="1" size="5" help="Sets the number of nucleotides a read has to overlap with a gene before it is considered a hit. ">
+			    <validator type="in_range" min="1" message="Please enter a positive integer"/>
+			  </param>
+                          <param name="sequence" type="select" label="Align reads to --sequence">
+                            <option value="genomic" selected="true">Genomic Sequence</option>
+                            <option value="coding">Coding Sequence</option>
+                          </param>
+			  <param name="iclip" type="select" label="iCLIP mode --iCLIP">
+                            <option value="" selected="true">OFF</option>
+                            <option value="--iCLIP">ON</option>
+			  </param>
+			  <param format="integer" name="limit" type="integer" label="Limit number of reads to count that map to a particular region --limit" value="0" size="15" help="Set to 0 for unlimited reads" >
+			    <validator type="in_range" min="0" message="Please enter a value greater than 1 or set to 0 for unlimited reads"/>
+			  </param> 
+			</when>
+			<when value="default">
+			</when>
+		      </conditional> 
+                <param name="label" type="text" format="txt" size="30" value="pyPileup" label="Enter output file label -o" />
+	</inputs>
+	<outputs>
+		<data format="tabular" name="output" label="${label.value}.pileup"/>
+                <data format="txt" name="discarded" label="${label.value}_discarded.txt">
+                        <filter>(ftype['file_type'] == "novo" or ftype['file_type'] == "sam") and ftype['disc']['discard'] ==  "discard"</filter>
+                </data> 
+	</outputs>
+	<help>
+
+
+.. class:: infomark
+
+**pyPileup**
+
+pyPileup is part of the pyCRAC_ package. Produces pileups containing the number of hits, substitutions and deletions for each nucleotide covered by 
+reads in specific genes or genomic regions
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+File input options::
+
+    -f FILE, --input_file=FILE
+                        As input files you can use Novoalign native output,
+                        SAM, pyMotif or pyReadCounters GTF files as input
+                        file. By default it expects data from the standard
+                        input. Make sure to specify the file type of the file
+                        you want to have analyzed using the --file_type
+                        option!
+    -o OUTPUT_FILE, --output_file=OUTPUT_FILE
+                        Use this flag to override the standard output file
+                        names. All pileups will be written to one output file.
+    -g FILE, --genes_file=FILE
+                        here you need to type in the name of your gene list
+                        file (1 column) or the hittable file
+    --chr=FILE          
+                        if you simply would like to align reads against a
+                        genomic sequence you should generate a tab delimited
+                        file containing an identifyer, chromosome name, start
+                        position, end position and strand
+    --gtf=annotation_file.gtf
+                        type the path to the gtf annotation file that you want
+                        to use
+    --tab=tab_file.tab  
+                        type the path to the tab file that contains the
+                        genomic reference sequence
+    --file_type=FILE_TYPE
+                        use this option to specify the file type (i.e. 'novo',
+                        'sam', 'gtf'). This will tell the program which
+                        parsers to use for processing the files. Default =
+                        'novo'
+
+pyPileup specific options::
+
+    --limit=500         
+                        with this option you can select how many reads mapped
+                        to a particular gene/ORF/region you want to count.
+                        Default = All
+    --iCLIP             
+                        This turns on the iCLIP mode and the pileups will
+                        report cross-linking site frequencies in iCLIP data in
+                        reference sequences
+
+Common options::
+
+    -v, --verbose       
+                        prints all the status messages to a file rather than
+                        the standard output
+    --ignorestrand      
+                        this flag tells the program to ignore strand
+                        information and all overlapping reads will considered
+                        sense reads. Useful for analysing ChIP or RIP data
+    --zip=FILE          
+                        use this option to compress all the output files in a
+                        single zip file
+    --overlap=1         
+                        sets the number of nucleotides a read has to overlap
+                        with a gene before it is considered a hit. Default =
+                        1 nucleotide
+    -s genomic, --sequence=genomic
+                        with this option you can select whether you want the
+                        reads aligned to the genomic or the coding sequence.
+                        Default = genomic
+    -r 100, --range=100
+                        allows you to set the length of the UTR regions. If
+                        you set '-r 50' or '--range=50', then the program will
+                        set a fixed length (50 bp) regardless of whether the
+                        GTF file has genes with annotated UTRs.
+
+Options for novo, SAM and BAM files::
+
+    --align_quality=100, --mapping_quality=100
+                        with these options you can set the alignment quality
+                        (Novoalign) or mapping quality (SAM) threshold. Reads
+                        with qualities lower than the threshold will be
+                        ignored. Default = 0
+    --align_score=100   
+                        with this option you can set the alignment score
+                        threshold. Reads with alignment scores lower than the
+                        threshold will be ignored. Default = 0
+    -l 100, --length=100
+                        to set read length threshold. Default = 1000
+    -m 100000, --max=100000
+                        maximum number of mapped reads that will be analyzed.
+                        Default = All
+    --unique            
+                        with this option reads with multiple alignment
+                        locations will be removed. Default = Off
+    --blocks            
+                        with this option reads with the same start and end
+                        coordinates on a chromosome will only be counted once.
+                        Default = Off
+    --discarded=FILE    
+                        prints the lines from the alignments file that were
+                        discarded by the parsers. This file contains reads
+                        that were unmapped (NM), of poor quality (i.e. QC) or
+                        paired reads that were mapped to different chromosomal
+                        locations or were too far apart on the same
+                        chromosome. Useful for debugging purposes
+    -d 1000, --distance=1000
+                        this option allows you to set the maximum number of
+                        base-pairs allowed between two non-overlapping paired
+                        reads. Default = 1000
+    --mutations=delsonly
+                        Use this option to only track mutations that are of
+                        interest. For CRAC data this is usually deletions
+                        (--mutations=delsonly). For PAR-CLIP data this is
+                        usually T-C mutations (--mutations=TC). Other options
+                        are: do not report any mutations: --mutations=nomuts.
+                        Only report specific base mutations, for example only
+                        in T's, C's and G's :--mutations=[TCG]. The brackets
+                        are essential. Other nucleotide combinations are also
+                        possible
+
+	</help>
+</tool>	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyReadAligner.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,368 @@
+ <tool id ="pyReadAligner" name="pyReadAligner">
+	<requirements>
+			<requirement type="package">pyCRAC</requirement>
+		</requirements>
+	<command interpreter="python"> 
+	/usr/local/bin/pyReadAligner.py
+	-f $ftype.input
+	--file_type $ftype.file_type
+	#if $geneOpt.alignGene == "gene":
+		-g $geneOpt.genes
+	#end if#
+	#if $geneOpt.alignGene == "chr":
+		--chr $geneOpt.chr
+	#end if#
+		#if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.disc.discard == "discard":	 
+				   --discarded $discarded
+		#end if#	   
+	--gtf=$addGTF.gtf
+	--tab=$addTab.tab
+	#if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.addAlignOpt.alignoptions == "edit":
+		   --align_quality=$ftype.addAlignOpt.align_quality			
+		   --align_score=$ftype.addAlignOpt.align_score							
+		   --distance=$ftype.addAlignOpt.d									
+		   --length=$ftype.addAlignOpt.length	
+		   #if int($ftype.addAlignOpt.max) > 0:									
+		        --max=$ftype.addAlignOpt.max							 
+		   #end if#
+		   $ftype.addAlignOpt.unique										   
+		   $ftype.addAlignOpt.blocks		   
+		   $ftype.addAlignOpt.mutations	 
+	#end if#
+	#if $addOpt.options == "edit":
+		--range=$addOpt.range
+		--overlap=$addOpt.overlap
+		$addOpt.ignore
+		-s $addOpt.sequence
+		#if int($addOpt.limit) > 0:
+			--limit=$addOpt.limit
+		#end if#
+	#end if#
+	-o $output	
+	</command>
+	<version_command>/usr/local/bin/pyReadAligner.py --version</version_command>
+	<inputs>
+
+
+			<conditional name="geneOpt">
+						<param name="alignGene" type="select"  label="Do you want to align reads to genes or chromosome co-ordinates?">
+								<option value="gene" selected="true">Genes</option>
+								<option value="chr">Chromosome Co-ordinates</option>
+						</param>
+						<when value="chr">
+			  <param format="interval" name="chr" type="data" label="Choose a Chromosome Coordinate File" help="Tab delimited text file contai\
+ning an identifier, chromosome name, start position, end position and strand ('-' or '+')"/>
+						</when>
+						<when value="gene">
+			  <param format="txt" name="genes" type="data" label="Choose a Gene List -g" help="Single column gene ID file"/>
+						</when>
+		</conditional>
+				<conditional name="addGTF">
+						<param name="gtfFile" type="select"	 label="Choose GTF File from">
+								<option value="default" selected="true">Defaults</option>
+								<option value="other">History</option>
+						</param>
+						<when value="default">
+								<param name="gtf" type="select"	 label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+										<options from_data_table="pycrac_gtf"/>
+								</param>
+						</when>
+						<when value="other">
+								<param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+						</when>
+				</conditional>
+			   <conditional name="addTab">
+						<param name="tabFile" type="select"	 label="Choose Genomic Reference Sequence from">
+								<option value="default" selected="true">Defaults</option>
+								<option value="other">History</option>
+						</param>
+						<when value="default">
+								<param name="tab" type="select"	 label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence">
+										<options from_data_table="pycrac_tab"/>
+								</param>
+						</when>
+						<when value="other">
+								<param format="tabular" name="tab" type="data" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence"/>
+						</when>
+		   </conditional>
+
+
+		<conditional name="ftype">
+			<param name="file_type" type="select"  label="Input File Type --file_type">
+					<option value="sam">Sam/BAM</option>
+					<option value="novo">Novo</option>
+					<option value="gtf">GTF</option>
+			</param>
+			<when value="sam">
+				<param format="sam,bam" name="input" type="data" label="Input File -f" help="Alignment file of type .sam or .bam"/>
+				<conditional name="disc">
+				  <param name="discard" type="select"  label="Print discarded reads to a separate file">
+				<option value="" selected="true">OFF</option>
+				<option value="discard">ON</option>
+				  </param>
+				  <when value="discard">
+				  </when>
+				  <when value="">
+				  </when>
+				</conditional>
+				<conditional name="addAlignOpt">
+				<param name="alignoptions" type="select"  label="Alignment Options">
+									<option value="default" selected="true">Default</option>
+									<option value="edit">Edit</option>
+								</param>
+								<when value="edit">
+									<param name="mutations" type="select"  label="Filter reads by mutations --mutations" help="cross-linking sites are often highlighted by deletions and/or substitutions in the reads. You can use this option to filter reads based on whether they have mutations or not.">
+									  <option value="" selected="true">Off</option>
+									  <option value="--mutations=delsonly">deletions</option>
+									  <option value="--mutations=subsonly">substitutions</option>
+									  <option value="--mutations=TC">T->C mutations</option>
+									  <option value="--mutations=allmuts">all mutations</option>
+									  <option value="--mutations=nomuts">no mutations</option>
+									</param>
+									<param format="integer" name="align_quality" type="integer" label="Align Quality --align_quality " value="0" size="5" >
+									  <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+									</param>
+									<param format="integer" name="align_score" type="integer" label="Align Score --align_score " value="0" size="5" >
+									  <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+									</param>
+									<param format="integer" name="max" type="integer" label="Mapped reads to read from input file --max" help="Set to 0 to align all reads." value="0" size="10" >
+									  <validator type="in_range" min="0" max="100000000" message="Please enter a value between 1 and 100000000 or 0 to align all reads"/>
+									</param>
+									<param format="integer" name="d" type="integer" label="Distance --distance " value="1000" size="6" help="Set the maximum number of bp allowed between two non-overlapping paired reads">
+									  <validator type="in_range" min="1" message="Please enter a value >= 0"/>
+									</param>
+									<param format="integer" name="length" type="integer" label="Set the maximum length of reads --length" value="1000" size="7" help="Set the read length threshold between 15 and 1000">
+									  <validator type="in_range" min="15" max="1000" message="Please enter a value between 15 and 1000"/>
+									</param>
+									<param name="unique" type="select"	label="Remove reads with multiple alignment locations --unique">
+									  <option value="" selected="true">OFF</option>
+									  <option value="--unique">ON</option>
+									</param>
+									<param name="blocks" type="select"	label="Only count reads with same start and end coords once --blocks">
+									  <option value="" selected="true">OFF</option>
+									  <option value="--blocks">ON</option>
+									</param>
+								</when>
+								<when value="default">
+								</when>
+				</conditional>
+			</when>
+			<when value="novo">
+				<param format="tabular" name="input" type="data" label="Input File -f" help="Alignment file of type .novo" />
+				<conditional name="disc">
+				  <param name="discard" type="select"  label="Print discarded reads to a separate file">
+					<option value="" selected="true">OFF</option>
+					<option value="discard">ON</option>
+				  </param>
+				  <when value="discard">
+				  </when>
+				  <when value="">
+				  </when>
+				</conditional>
+								<conditional name="addAlignOpt">
+								<param name="alignoptions" type="select"  label="Alignment Options">
+									<option value="default" selected="true">Default</option>
+									<option value="edit">Edit</option>
+								</param>
+								<when value="edit">
+									<param name="mutations" type="select"  label="Filter reads by mutations --mutations" help="cross-linking sites are often
+ highlighted by deletions and/or substitutions in the reads. You can use this option to filter reads based on whether they have mutations or not.">
+									  <option value="" selected="true">Off</option>
+									  <option value="--mutations=delsonly">deletions</option>
+									  <option value="--mutations=subsonly">substitutions</option>
+									  <option value="--mutations=TC">T->C mutations</option>
+									  <option value="--mutations=allmuts">all mutations</option>
+									  <option value="--mutations=nomuts">no mutations</option>
+									</param>
+									<param format="integer" name="align_quality" type="integer" label="Align Quality --align_quality " value="0" size="5" >
+									  <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+									</param>
+									<param format="integer" name="align_score" type="integer" label="Align Score --align_score " value="0" size="5" >
+									  <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+									</param>
+									<param format="integer" name="max" type="integer" label="Mapped reads to read from input file --max" help="Set to 0 to align all reads." value="0" size="10" >
+									  <validator type="in_range" min="0" max="100000000" message="Please enter a value between 1 and 100000000 or 0 to align all reads"/>
+									</param>
+									<param format="integer" name="d" type="integer" label="Distance --distance " value="1000" size="6" help="Set the maximum number of bp allowed between two non-overlapping paired reads">
+									  <validator type="in_range" min="1" message="Please enter a value >= 0"/>
+									</param>
+									<param format="integer" name="length" type="integer" label="Set the maximum length of reads --length" value="1000" size="7" help="Set the read length threshold between 15 and 1000">
+									  <validator type="in_range" min="15" max="1000" message="Please enter a value between 15 and 1000"/>
+									</param>
+									<param name="unique" type="select"	label="Remove reads with multiple alignment locations --unique">
+									  <option value="" selected="true">OFF</option>
+									  <option value="--unique">ON</option>
+									</param>
+									<param name="blocks" type="select"	label="Only count reads with same start and end coords once --blocks">
+									  <option value="" selected="true">OFF</option>
+									  <option value="--blocks">ON</option>
+									</param>
+								</when>
+								<when value="default">
+								</when>
+			</conditional>
+			</when>
+			<when value="gtf">
+				<param format="gtf" name="input" type="data" label="Input File -f" help="File of type .gtf" />
+			</when>
+			  </conditional>
+			  
+			  <conditional name="addOpt">
+			<param name="options" type="select"	 label="Standard Options">
+			  <option value="default" selected="true">Default</option>
+			  <option value="edit">Edit</option>
+			</param>		
+			<when value="edit">
+			  <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
+				<validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
+			  </param>
+			  <param name="ignore" type="select" label="Ignore strand information? --ignorestrand">
+				<option value="" selected="true">No</option>
+				<option value="--ignorestrand">Yes</option>
+			  </param>
+			  <param format="integer" name="overlap" type="integer" label="Overlap --overlap" value="1" size="5" help="Sets the number of nucleotides a read has to overlap with a gene before it is considered a hit. ">
+				<validator type="in_range" min="1" message="Please enter a positive integer"/>
+			  </param>
+						  <param name="sequence" type="select" label="Align reads to --sequence">
+							<option value="genomic" selected="true">Genomic Sequence</option>
+							<option value="coding">Coding Sequence</option>
+						  </param>
+			  <param format="integer" name="limit" type="integer" label="Limit number of reads to count that map to a particular region --limit" value="0" size="15" help="Set to 0 for unlimited reads" >
+				<validator type="in_range" min="0" message="Please enter a value greater than 1 or set to 0 for unlimited reads"/>
+			  </param> 
+			</when>
+			<when value="default">
+			</when>
+			  </conditional>
+				<param name="label" type="text" format="txt" size="30" value="pyReadAligner" label="Enter output file label -o" />
+	</inputs>
+	<outputs>
+		<data format="fasta" name="output" label="${label.value}.aligned.fasta"/>
+				<data format="txt" name="discarded" label="${label.value}_discarded.txt">
+						<filter>(ftype['file_type'] == "novo" or ftype['file_type'] == "sam") and ftype['disc']['discard'] ==  "discard"</filter>
+				</data> 
+	</outputs>
+	<help>
+
+
+.. class:: infomark
+
+**pyReadAligner**
+
+pyReadAligner is part of the pyCRAC_ package. Generates multiple sequence alignments for reads mapped to individual genes or genomic regions. 
+Produces a fasta output file.
+
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+		
+------
+
+**Parameter list**
+
+File input options::
+
+	-f FILE, --input_file=FILE
+						As input files you can use Novoalign native output or
+						SAM files as input file. By default it expects data
+						from the standard input. Make sure to specify the file
+						type of the file you want to have analyzed using the
+						--file_type option!
+	-o OUTPUT_FILE, --output_file=OUTPUT_FILE
+						Use this flag to override the standard output file
+						names. All alignments will be written to one output
+						file.
+	-g FILE, --genes_file=FILE
+						here you need to type in the name of your gene list
+						file (1 column) or the hittable file
+	--chr=FILE			
+                                                if you simply would like to align reads against a
+						genomic sequence you should generate a tab delimited
+						file containing an identifyer, chromosome name, start
+						position, end position and strand
+	--gtf=annotation_file.gtf
+						type the path to the gtf annotation file that you want
+						to use
+	--tab=tab_file.tab	
+                                                type the path to the tab file that contains the
+						genomic reference sequence
+	--file_type=FILE_TYPE
+						use this option to specify the file type (i.e. 'novo',
+						'sam', 'gtf'). This will tell the program which
+						parsers to use for processing the files. Default =
+						'novo'
+
+pyReadAligner specific options::
+
+	--limit=500			
+                                                with this option you can select how many reads mapped
+						to a particular gene/ORF/region you want to count.
+						Default = All
+
+Common options::
+
+	--ignorestrand		
+                                                this flag tells the program to ignore strand
+						information and all overlapping reads will considered
+						sense reads. Useful for analysing ChIP or RIP data
+	--overlap=1			
+                                                sets the number of nucleotides a read has to overlap
+						with a gene before it is considered a hit. Default =
+						1 nucleotide
+	-s genomic, --sequence=genomic
+						with this option you can select whether you want the
+						reads aligned to the genomic or the coding sequence.
+						Default = genomic
+	-r 100, --range=100
+						allows you to set the length of the UTR regions. If
+						you set '-r 50' or '--range=50', then the program will
+						set a fixed length (50 bp) regardless of whether the
+						GTF file has genes with annotated UTRs.
+
+Options for novo, SAM and BAM files::
+
+	--align_quality=100, --mapping_quality=100
+						with these options you can set the alignment quality
+						(Novoalign) or mapping quality (SAM) threshold. Reads
+						with qualities lower than the threshold will be
+						ignored. Default = 0
+	--align_score=100	
+                                                with this option you can set the alignment score
+						threshold. Reads with alignment scores lower than the
+						threshold will be ignored. Default = 0
+	-l 100, --length=100
+						to set read length threshold. Default = 1000
+	-m 100000, --max=100000
+						maximum number of mapped reads that will be analyzed.
+						Default = All
+	--unique			      
+                                                with this option reads with multiple alignment
+						locations will be removed. Default = Off
+	--blocks			
+                                                with this option reads with the same start and end
+						coordinates on a chromosome will only be counted once.
+						Default = Off
+	--discarded=FILE	                
+                                                prints the lines from the alignments file that were
+						discarded by the parsers. This file contains reads
+						that were unmapped (NM), of poor quality (i.e. QC) or
+						paired reads that were mapped to different chromosomal
+						locations or were too far apart on the same
+						chromosome. Useful for debugging purposes
+	-d 1000, --distance=1000
+						this option allows you to set the maximum number of
+						base-pairs allowed between two non-overlapping paired
+						reads. Default = 1000
+	--mutations=delsonly
+						Use this option to only track mutations that are of
+						interest. For CRAC data this is usually deletions
+						(--mutations=delsonly). For PAR-CLIP data this is
+						usually T-C mutations (--mutations=TC). Other options
+						are: do not report any mutations: --mutations=nomuts.
+						Only report specific base mutations, for example only
+						in T's, C's and G's :--mutations=[TCG]. The brackets
+						are essential. Other nucleotide combinations are also
+						possible
+
+
+	</help>
+</tool> 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyReadCounters.pl	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,60 @@
+#!/usr/bin/perl -w
+use strict;
+use Getopt::Long;
+
+my %opt;
+
+
+GetOptions(\%opt, "f=s","file_type=s","version","gtf=s","align_quality=i","align_score=i","range=i","length=i","max=i","distance=i","ignorestrand","overlap=i","unique","blocks","mutations=s","countoutput=s","stats=s","hittable=s","intronUTRoverlap=s","discarded=s","options","alignOpt","id=s");
+
+my $cmnd;
+my $prefix = "rc_$opt{id}";
+
+
+if (exists $opt{version}){
+	$cmnd = "python /usr/local/bin/pyReadCounters.py --version";
+}
+else{
+	$cmnd = "python /usr/local/bin/pyReadCounters.py -f $opt{f} --file_type $opt{file_type} --gtf $opt{gtf} -o $prefix";
+
+	if(exists $opt{options}){
+
+	    $cmnd .= " --range=$opt{range} --overlap=$opt{overlap}";
+
+	    if(exists $opt{ignorestrand}){ $cmnd .= " --ignorestrand";}
+	}
+
+	if(exists $opt{alignOpt}){
+	    $cmnd .= " --align_quality=$opt{align_quality} --align_score=$opt{align_score} --length=$opt{length} --distance=$opt{distance}";
+            if(exists $opt{max}){$cmnd .= " --max=$opt{max}";}
+	    if(exists $opt{unique}){$cmnd .= " --unique";}
+            if(exists $opt{blocks}){$cmnd .= " --blocks";}
+            if(exists $opt{mutations}){$cmnd .= " --mutations=$opt{mutations}";}
+            if(exists $opt{discarded}){$cmnd .= " --discarded=$opt{discarded}";}
+
+	}
+}
+
+
+system $cmnd;
+print STDOUT $cmnd;
+
+
+if(exists $opt{blocks}){
+    system "mv $prefix"."_hittable_cDNAs.txt $opt{hittable}";
+    system "mv $prefix"."_file_statistics_cDNAs.txt $opt{stats}";
+    system "mv $prefix"."_intron_and_UTR_overlap_cDNAs.gtf $opt{intronUTRoverlap}";
+    
+    if($opt{file_type} ne "gtf"){
+	system "mv $prefix"."_count_output_cDNAs.gtf $opt{countoutput}";
+    }
+}
+else{
+    system "mv $prefix"."_hittable_reads.txt $opt{hittable}";
+    system "mv $prefix"."_file_statistics_reads.txt $opt{stats}";
+    system "mv $prefix"."_intron_and_UTR_overlap_reads.gtf $opt{intronUTRoverlap}";
+
+    if($opt{file_type} ne "gtf"){
+        system "mv $prefix"."_count_output_reads.gtf $opt{countoutput}";
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyReadCounters.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,359 @@
+<tool id ="pyReadCounters" name="pyReadCounters" force_history_refresh="True">
+	<requirements>
+			<requirement type="package">pyCRAC</requirement>
+		</requirements>
+	<command interpreter="perl"> 
+	pyReadCounters.pl
+	-f $ftype.input
+	--file_type $ftype.file_type
+	--gtf $addGTF.gtf
+	#if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.disc.discard == "discard":
+		--discarded $discarded 
+	#end if#
+	#if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.addAlignOpt.alignoptions == "edit":
+		--alignOpt
+		--align_quality $ftype.addAlignOpt.align_quality
+		--align_score $ftype.addAlignOpt.align_score   
+		#if int($ftype.addAlignOpt.max) > 0:												   
+			--max $ftype.addAlignOpt.max							  
+		#end if#	 
+		--distance $ftype.addAlignOpt.d
+		--length $ftype.addAlignOpt.length
+		$ftype.addAlignOpt.unique	
+		$ftype.addAlignOpt.blocks
+		$ftype.addAlignOpt.mutations
+	#end if#
+	#if $addOpt.options == "edit":
+		--options
+		--range $addOpt.range
+		$addOpt.ignore
+		--overlap $addOpt.overlap
+	#end if#
+
+	--stats $stats
+	--hittable $hittable
+	--intronUTRoverlap $intronUTRoverlap
+
+	#if $ftype.file_type == "novo" or $ftype.file_type == "sam":
+		--countoutput $countoutput
+	#end if#
+
+	--id $stats.id
+	</command>
+	<version_command>/usr/local/bin/pyReadCounters.py --version</version_command>
+	<inputs>
+        <conditional name="addGTF">
+            <param name="gtfFile" type="select"	 label="Choose GTF File from">
+                <option value="default" selected="true">Defaults</option>
+                <option value="other">History</option>
+            </param>
+            <when value="default">
+                <param name="gtf" type="select"	 label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+                    <options from_data_table="pycrac_gtf"/>
+                </param>
+            </when>
+            <when value="other">
+                <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+            </when>
+        </conditional>
+		<conditional name="ftype">
+			<param name="file_type" type="select"  label="Input File Type --file_type" help="Use .novo or .sam input files">
+				<option value="novo" selected="true">Novo</option>
+				<option value="sam">Sam/Bam</option>
+				<option value="gtf">GTF</option>
+			</param>
+			<when value="novo">
+				<param format="tabular" name="input" type="data" label="Input File --input_file" help="Alignment file of type .novo" />
+				<conditional name="disc">
+				  <param name="discard" type="select"  label="Print discarded reads to a separate file">
+					<option value="" selected="true">OFF</option>
+					<option value="discard">ON</option>
+				  </param>
+				  <when value="discard">
+				  </when>
+				  <when value="">
+				  </when>
+				</conditional>
+		    <conditional name="addAlignOpt">
+                    <param name="alignoptions" type="select"  label="Alignment Options">
+                        <option value="default" selected="true">Default</option>
+                        <option value="edit">Edit</option>
+                    </param>
+                    <when value="edit">
+                        <param name="mutations" type="select" label="Option for selecting type of mutations to report --mutations" help="cross-linking sites are often highlighted by deletions and/or substitutions in the reads. You can use this option to select specific mutations that you want to have reported in the GTF output file.">
+                            <option value="" selected="true">Off</option>
+                            <option value="--mutations delsonly">deletions</option>
+                            <option value="--mutations subsonly">substitutions</option>
+                            <option value="--mutations TC">T->C substitutions</option>
+                            <option value="--mutations nomuts">no mutations</option>
+                        </param>
+                        <param format="integer" name="align_quality" type="integer" label="Align Quality --align_quality " value="0" size="5" >
+                            <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+                        </param>
+                        <param format="integer" name="align_score" type="integer" label="Align Score --align_score " value="0" size="5" >
+                            <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+                        </param>
+                        <param format="integer" name="max" type="integer" label="Mapped reads to read from input file --max" help="Set to 0 to align all reads." value="0" size="10" >
+                            <validator type="in_range" min="0" max="100000000" message="Please enter a value between 1 and 100000000 or 0 to align all reads"/>
+                        </param>
+                        <param format="integer" name="d" type="integer" label="Distance --distance " value="1000" size="6" help="Set the maximum number of bp allowed between two non-overlapping paired reads">
+                            <validator type="in_range" min="1" message="Please enter a value >= 0"/>
+                        </param>
+                        <param format="integer" name="length" type="integer" label="Set the maximum length of reads --length" value="1000" size="7" help="Set the read length threshold between 15 and 1000">
+                            <validator type="in_range" min="15" max="1000" message="Please enter a value between 15 and 1000"/>
+                        </param>
+                        <param name="unique" type="select"	label="Remove reads with multiple alignment locations --unique">
+                            <option value="" selected="true">OFF</option>
+                            <option value="--unique">ON</option>
+                        </param>
+                        <param name="blocks" type="select"	label="Only count reads with same start and end coords once --blocks">
+                            <option value="" selected="true">OFF</option>
+                            <option value="--blocks">ON</option>
+                        </param>
+                    </when>
+                    <when value="default">
+                    </when>
+				</conditional>
+			</when>
+			<when value="sam">
+				<param format="sam,bam" name="input" type="data" label="Input File --input_file" help="Alignment file of type .sam or .bam" />
+				<conditional name="disc">
+				  <param name="discard" type="select"  label="Print discarded reads to a separate file">
+					<option value="" selected="true">OFF</option>
+					<option value="discard">ON</option>
+				  </param>
+				  <when value="discard">
+				  </when>
+				  <when value="">
+				  </when>
+				</conditional>
+				<conditional name="addAlignOpt">
+					<param name="alignoptions" type="select" label="Alignment Options">
+						<option value="default" selected="true">Default</option>
+						<option value="edit">Edit</option>
+					</param>
+					<when value="edit">
+						<param name="mutations" type="select" label="Option for selecting type of mutations to report --mutations" help="cross-linking sites are often highlighted by deletions and/or substitutions in the reads. You can use this option to select specific mutations that you want to have reported in the GTF output file.">
+							<option value="" selected="true">Off</option>
+							<option value="--mutations delsonly">deletions</option>
+							<option value="--mutations subsonly">substitutions</option>
+							<option value="--mutations TC">T->C mutations</option>
+							<option value="--mutations nomuts">no mutations</option>
+						</param>
+						<param format="integer" name="align_quality" type="integer" label="Align Quality --align_quality " value="0" size="5" >
+							<validator type="in_range" min="0" message="Please enter a value >= 0"/>
+						</param>
+						<param format="integer" name="align_score" type="integer" label="Align Score --align_score " value="0" size="5" >
+							<validator type="in_range" min="0" message="Please enter a value >= 0"/>
+						</param>
+						<param format="integer" name="max" type="integer" label="Mapped reads to read from input file --max" help="Set to 0 to align all reads." value="0" size="10" >
+							<validator type="in_range" min="0" max="100000000" message="Please enter a value between 1 and 100000000 or 0 to align all reads"/>
+						</param>
+						<param format="integer" name="d" type="integer" label="Distance --distance " value="1000" size="6" help="Set the maximum number of bp allowed between two non-overlapping paired reads">
+							<validator type="in_range" min="1" message="Please enter a value >= 0"/>
+						</param>
+						<param format="integer" name="length" type="integer" label="Set the maximum length of reads --length" value="1000" size="7" help="Set the read length threshold between 15 and 1000">
+							<validator type="in_range" min="15" max="1000" message="Please enter a value between 15 and 1000"/>
+						</param>
+						<param name="unique" type="select"	label="Remove reads with multiple alignment locations --unique">
+						  <option value="" selected="true">OFF</option>
+						  <option value="--unique">ON</option>
+						</param>
+						<param name="blocks" type="select"	label="Only count reads with same start and end coords once --blocks">
+						  <option value="" selected="true">OFF</option>
+						  <option value="--blocks">ON</option>
+						</param>
+					</when>
+					<when value="default">
+					</when>
+				</conditional>
+			</when>
+			<when value="gtf">
+				<param format="gtf" name="input" type="data" label="Input File --input_file" help="File of type .gtf" />
+			</when>
+		</conditional>
+		<conditional name="addOpt">
+		<param name="options" type="select"	 label="Standard Options">
+			<option value="default" selected="true">Default</option>
+			<option value="edit">Edit</option>
+		</param>	
+		<when value="edit">
+			<param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
+				<validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
+			</param>
+			<param name="ignore" type="select" label="Ignore strand information? --ignorestrand">
+				<option value="" selected="true">No</option>
+				<option value="--ignorestrand">Yes</option>
+			</param>
+			<param format="integer" name="overlap" type="integer" label="Overlap --overlap" value="1" size="5" help="Sets the number of nucleotides a read has to overlap with a gene before it is considered a hit. ">
+				<validator type="in_range" min="1" message="Please enter a positive integer"/>
+			</param>
+		</when>
+		<when value="default">
+		</when>
+		</conditional>	
+			<param name="label" type="text" format="txt" size="30" value="pyReadCounters" label="Enter output file label -o" />
+	</inputs>
+	<outputs>
+		<data format="tabular" name="stats" label="${label.value}_file_statistics.txt"/>							 
+		<data format="tabular" name="hittable" label="${label.value}_hittable.txt"/>
+		<data format="gtf" name="intronUTRoverlap" label="${label.value}_intron_and_UTR_overlap.txt"/>
+		<data format="gtf" name="countoutput" label="${label.value}_count_output.gtf">
+			<filter>ftype['file_type'] == "novo" or ftype['file_type'] == "sam"</filter>
+		</data>
+		<data format="txt" name="discarded" label="${label.value}_discarded.txt">
+			<filter>(ftype['file_type'] == "novo" or ftype['file_type'] == "sam") and ftype['disc']['discard'] ==  "discard"</filter>
+		</data> 
+	</outputs>
+	<help>
+
+.. class:: infomark
+
+**pyReadCounters**
+
+pyReadCounters is part of the pyCRAC_ package. Produces a gene hittable file, two GTF output files showing to which genomic features the reads overlap.
+Finally the tool produces a read statistics file that provides information about the complexity of your dataset.
+
+**Output file examples**
+
+A hittable file::
+
+    # generated by pyReadCounters version 1.1.0, Mon Apr 16 20:34:22 2012
+    # /usr/local/bin/pyReadCounters.py -f RNAseq_data.novo -c 1 --unique
+    # total number of reads 12534556
+    # total number of paired reads  10947376
+    # total number of single reads  483095
+    # total number of mapped reads: 11430471
+    # total number of overlapping genomic features  7019550
+    #       sense   5960669
+    #       anti-sense      1058881
+    # feature       sense_overlap anti-sense_overlap  number of reads
+    
+    ## protein_coding       3190701
+    YEF3        49930       3629        24221
+    PMA1        32621       2650        21776
+    COX1        24559       1037        15174
+    TFP1        21539       1689        13506
+    HSC82       21177       1458        12729
+    ADH1        20245       1467        11351
+    AI5_ALPHA   20022       918         13101
+    AI4         19390       886         12638
+    AI3         17823       798         11473
+    AI2         17590       790         11297
+    RPL10       16822       1113        8797
+    ENO2        16336       1125        8913
+    TEF1        15578       1333        5450
+
+An example of a GTF 'count_output' file::
+
+    ##gff-version 2
+    # generated by Counters version 1.2.0, Tue Jan  8 22:47:29 2013
+    # pyReadCounters.py -f PAR_CLIP_unique.novo --mutations=TC -v
+    # total number of reads:	2455251
+    # total number of paired reads:	0
+    # total number of single reads:	2455251
+    # total number of mapped reads:	2455251
+    # total number of overlapping genomic features:	5153943
+    #	sense:	2640600
+    #	anti-sense:	2513343
+    chrXIV	reads	exon	661572	661605	2	+	.   gene_id "INT_0_6716,YNR016C"; gene_name "INT_0_6716,ACC1"; # 661596S;
+    chrXIV	reads	exon	661720	661738	1	+	.   gene_id "INT_0_6716,YNR016C"; gene_name "INT_0_6716,ACC1"; # 661726S;
+    chrXIV	reads	exon	661839	661878	4	+	.   gene_id "INT_0_6716,YNR016C"; gene_name "INT_0_6716,ACC1"; # 661875S;
+    
+This output file also reports whether a read contains a mutation. 
+    
+For example::
+    
+    # 661596S
+    
+Indicates that the read had a nucleotide substitution ("S") at genomic coordinate 661596. The chromosome name can be found in the first column. 
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+		
+------
+
+**Parameter list**
+
+File input options::
+
+	-f FILE, --input_file=FILE
+						provide the path to your novo, SAM/BAM or gtf data
+						file. Default is standard input. Make sure to specify
+						the file type of the file you want to have analyzed
+						using the --file_type option!
+	-o OUTPUT_FILE, --output_file=OUTPUT_FILE
+						Use this flag to override the standard file names. Do
+						NOT add an extension.
+	--file_type=FILE_TYPE
+						use this option to specify the file type (i.e.
+						'novo','sam' or 'gtf'). This will tell the program
+						which parsers to use for processing the files. Default
+						= 'novo'
+	--gtf=annotation_file.gtf
+						type the path to the gtf annotation file that you want
+						to use
+
+Common pyCRAC options::
+
+		--ignorestrand						
+											To ignore strand information and all reads overlapping
+						with genomic features will be considered sense reads.
+						Useful for analysing ChIP or RIP data
+	--overlap=1					
+												sets the number of nucleotides a read has to overlap
+						with a gene before it is considered a hit. Default =
+						1 nucleotide
+	-r 100, --range=100
+						allows you to add regions flanking the genomic
+						feature. If you set '-r 50' or '--range=50', then the
+						program will add 50 nucleotides to each feature on
+						each side regardless of whether the GTF file has genes
+						with annotated UTRs
+
+Options for SAM/BAM and Novo files::
+
+	--mutations=delsonly
+						Use this option to only track mutations that are of
+						interest. For CRAC data this is usually deletions
+						(--mutations=delsonly). For PAR-CLIP data this is
+						usually T-C mutations (--mutations=TC). Other options
+						are\: do not report any mutations: --mutations=nomuts.
+						Only report specific base mutations, for example only
+						in T's, C's and G's :--mutations=[TCG]. The brackets
+						are essential. Other nucleotide combinations are also
+						possible
+	--align_quality=100, --mapping_quality=100
+						with these options you can set the alignment quality
+						(Novoalign) or mapping quality (SAM) threshold. Reads
+						with qualities lower than the threshold will be
+						ignored. Default = 0
+	--align_score=100					
+												with this option you can set the alignment score
+						threshold. Reads with alignment scores lower than the
+						threshold will be ignored. Default = 0
+	--unique							
+												with this option reads with multiple alignment
+						locations will be removed. Default = Off
+	--blocks					
+												with this option reads with the same start and end
+						coordinates on a chromosome will be counted as one
+						cDNA. Default = Off
+	-m 100000, --max=100000
+						maximum number of mapped reads that will be analyzed.
+						Default = All
+	-d 1000, --distance=1000
+						this option allows you to set the maximum number of
+						base-pairs allowed between two non-overlapping paired
+						reads. Default = 1000
+	--discarded=FILE					
+												prints the lines from the alignments file that were
+						discarded by the parsers. This file contains reads
+						that were unmapped (NM), of poor quality (i.e. QC) or
+						paired reads that were mapped to different chromosomal
+						locations or were too far apart on the same
+						chromosome. Useful for debugging purposes
+	-l 100, --length=1000					
+												to set read length threshold. Default = 1000
+
+	</help>
+</tool> 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pySelectMotifsFromGTF.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,75 @@
+ <tool id ="pySelectMotifsFromGTF" name="pySelectMotifsFromGTF">
+	<requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	</requirements>
+	<command interpreter="python"> 
+	/usr/local/bin/pySelectMotifsFromGTF.py
+	--gtf $input
+	-m $motif
+	-o $out
+	-l $length
+	-z $zscore	
+	</command>
+	<version_command>/usr/local/bin/pySelectMotifsFromGTF.py --version</version_command>
+	<inputs>
+		<param format="gtf" name="input" type="data" label="Input File -f" help="pyMotif gtf output files" />
+		<param format="txt" name="motif" type="text" size="200" value="KBCTTG" label="motif string" help="Enter motif (all uppercase) you want to extract from the pyMotif gtf output file">
+			<validator type="empty_field" />
+		</param>
+		<param format="integer" type="integer" value="6" size="5" name="length" label="Length --length" help="Set a Kmer Length. Note that the length has to be at least as long as your k-mer sequence, otherwise the program will not run correctly" />
+		<param format="float" type="float" value="0" size="5" name="zscore" label="Z Score --Z_score" help="Set a minimum Kmer Z_score" />
+		<param name="label" type="text" format="txt" size="30" value="pySelectMotifsFromGTF" label="Enter output file label -o" />
+	</inputs>
+
+	<outputs>
+		<data format="gtf" name="out" label="${label.value}_${motif.value}.gtf"/>
+	</outputs>
+	<help>
+
+
+.. class:: infomark
+
+**pySelectMotifsFromGTF**
+
+pySelectMotifsFromGTF is part of the pyCRAC_ package. Extracts your favourite k-mer sequence from pyMotif GTF output files.
+Note that you can include degenerate nucleotides in your motif string::
+
+    N = A, G, C or T
+    R = A or G = puRine
+    Y = C or T = pYrimidine
+    M = A or C = aroMatic
+    S = G or C
+    W = A or T
+    K = G or T = Keto
+    V = A, C or G = Not T (letter after)
+    D = A, G or T = Not C
+    H = A, C or T = Not G
+    B = C, G or T = Not A
+
+So if you enter KBCTTG as search string and length=6, then the program will extract a large number of six-mers from your data.
+If you set length = 8, it will look for this pattern in a stretch of 8 nucleotides.
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+Options::
+
+    --gtf=Yourfavoritegtf.gtf
+                        type the path to the gtf file that you want to use. By
+                        default it expects data from the standard input
+    -o FILE, --output=FILE
+                        Optional.Specify the name of the output file. Default
+                        is standard output. Make sure it has the .gtf
+                        extension!
+    -m KBCTTG, --motif=KBCTTG
+                        Specify the motif you want extract from the GTF file.
+    -z 15.0, --Z_score=15.0
+                        Set a minimum k-mer Z-score. Default=0
+    -l 4, --length=4      
+                        Set a k-mer length. Default is no length filtering
+
+	</help>
+</tool>	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pycrac.chr.loc.sample	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,1 @@
+s.cerevisiae EF2	/usr/local/pyCRAC/db/Saccharomyces_cerevisiae.EF2.59.1.0_chr_lengths.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pycrac.fasta.loc.sample	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,1 @@
+s.cerevisiae EF2	/usr/local/pyCRAC/db/Saccharomyces_cerevisiae.EF2.59.1.0.fa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pycrac.gtf.loc.sample	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,1 @@
+s.cerevisiae EF2	/usr/local/pyCRAC/db/Saccharomyces_cerevisiae.EF2.59.1.2.gtf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pycrac.tab.loc.sample	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,1 @@
+s.cerevisiae EF2	/usr/local/pyCRAC/db/Saccharomyces_cerevisiae.EF2.59.1.0.fa.tab
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/tool_data_table_conf.xml.sample	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,23 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of default fasta files for pycrac -->
+    <table name="pycrac_fasta">
+        <columns>name, value</columns>
+        <file path="tool-data/pyCRAC/pycrac.fasta.loc" />
+    </table>
+    <!-- Locations of default gtf files for pycrac -->
+    <table name="pycrac_gtf">
+        <columns>name, value</columns>
+        <file path="tool-data/pyCRAC/pycrac.gtf.loc" />
+    </table>
+    <!-- Locations of default tab files for pycrac -->
+    <table name="pycrac_tab">
+        <columns>name, value</columns>
+        <file path="tool-data/pyCRAC/pycrac.tab.loc" />
+    </table>
+    <!-- Locations of default chrom length files for pycrac -->
+    <table name="pycrac_chr">
+        <columns>name, value</columns>
+        <file path="tool-data/pyCRAC/pycrac.chr.loc" />
+    </table>
+</tables>