changeset 0:5b9519f4b165

Upload first commmit
author nikhil-joshi
date Mon, 19 Sep 2011 19:04:41 -0400
parents
children 798fe7ba8b5e
files mirdeep2/bowtie_build.xml mirdeep2/bowtie_build_wrapper.pl mirdeep2/mapper.xml mirdeep2/mapper_wrapper.pl mirdeep2/mirdeep2.xml mirdeep2/mirdeep2_wrapper.pl mirdeep2/target_spy.xml mirdeep2/target_spy_wrapper.pl mirdeep2/target_spy_wrapper.sh mirdeep2/ucsc_browser_species.txt
diffstat 10 files changed, 399 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirdeep2/bowtie_build.xml	Mon Sep 19 19:04:41 2011 -0400
@@ -0,0 +1,22 @@
+<tool id="bowtiebuild" name="Bowtie Indexing">
+	<description>Build index for Bowtie alignment</description>
+
+	<requirements>
+                <requirement type="perl-module">bowtie_build_wrapper.pl</requirement>
+        </requirements>
+
+        <command interpreter="perl">
+		./bowtie_build_wrapper.pl $fastafile "$output.files_path" $output "$fastafile.name"
+	</command>
+
+	<inputs>
+		<param format="fasta" name="fastafile" type="data" optional="false" label="Fasta reference file" help="Reference file in fasta format"/>
+	</inputs>
+
+	<outputs>
+		<data format="bowtie_html_index" name="output"/>
+	</outputs>
+
+	<help>
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirdeep2/bowtie_build_wrapper.pl	Mon Sep 19 19:04:41 2011 -0400
@@ -0,0 +1,31 @@
+#!/usr/bin/perl
+
+use File::Copy qw/ copy /;
+
+$fastafile = $ARGV[0];
+$file_path = $ARGV[1];
+$output_file = $ARGV[2];
+$fastafile_text = $ARGV[3];
+
+$output_basename = `basename $output_file`;
+chomp $output_basename;
+$filepath_basename = `basename $file_path`;
+chomp $filepath_basename;
+
+$output_dir = $output_file;
+$output_dir =~ s/$output_basename/$filepath_basename/;
+
+system ("mkdir -p $file_path");
+copy ($fastafile, $output_file);
+copy ($output_file, $file_path);
+
+system ("bowtie-build $fastafile $file_path/$output_basename");
+
+open (OUTPUT,">$output_file");
+print OUTPUT "<h1>Bowtie index on $fastafile_text</h1>\n";
+$dirout = `ls $file_path`;
+
+foreach $file (split (/\n/, $dirout)) {
+	print OUTPUT "<a href='$file'>$file</a><br/>\n";
+}
+close (OUTPUT);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirdeep2/mapper.xml	Mon Sep 19 19:04:41 2011 -0400
@@ -0,0 +1,80 @@
+<tool id="mapper" name="Mapping to reference">
+        <description>Mapping using a bowtie index</description>
+
+        <requirements>
+                <requirement type="perl-module">mapper_wrapper.pl</requirement>
+        </requirements>
+
+        <command interpreter="perl">
+                ./mapper_wrapper.pl $bowtie_index_name $output_arf $bowtie_index_name.extra_files_path $reads 
+
+		#if $reads.extension.startswith("fastq"):
+		-e
+		#end if
+
+		#if $reads.extension.startswith("fasta"):
+		-c
+		#end if
+
+		#if str($convert_rna) == "convert_rna_true":
+		-i
+		#end if
+
+		#if str($remove_non_atcgun) == "remove_non_atcgun_true":
+		-j
+		#end if
+
+		#if str($adapter_seq) != "":
+		-k $adapter_seq
+		#end if
+
+		#if str($discard_short_reads) != "":
+		-l $discard_short_reads
+		#end if
+
+		#if str($collapse_reads) == "Collapsed Reads Fasta":
+		-m
+		#end if
+
+		#if str($map_mismatch) == "map_mismatch_false":
+		-q
+		#end if
+
+		#if str($map_threshold) != "":
+		-r $map_threshold
+		#end if
+
+		-h -s $output_fasta -n
+        </command>
+
+        <inputs>
+                <param format="fastq, fasta" name="reads" type="data" optional="false" label="Reads" help="Reads in fastq or fasta format"/>
+                <param format="bowtie_html_index" name="bowtie_index_name" type="data" optional="false" label="Bowtie indexed reference" help="Select the bowtie-build run, NOT the fasta reference file you indexed."/>
+
+		<param name="convert_rna" type="boolean" truevalue="convert_rna_true" falsevalue="convert_rna_false" checked="false" label="Convert RNA to DNA alphabet (to map against genome)"/>
+
+		<param name="remove_non_atcgun" type="boolean" truevalue="remove_non_atcgun_true" falsevalue="remove_non_atcgun_false" checked="false" label="Remove reads with non-standard nucleotides" help="Remove all entries that have a sequence that contains letters other than a,c,g,t,u,n,A,C,G,T,U,N"/>
+
+		<param name="adapter_seq" value="" type="text" optional="true" label="Clip 3' Adapter Sequence (optional)" help="Adapter Sequence can only contain a,c,g,t,u,n,A,C,G,T,U,N">
+			<validator type="regex" message="Adapter can ONLY contain a,c,g,t,u,n,A,C,G,T,U,N">^[ACGTUacgtu]+$</validator>
+		</param>
+
+		<param name="discard_short_reads" value="17" type="integer" optional="false" label="Discard reads shorter than this length (0 for keeping all reads)" help="Note that miRDeep2 requires no reads under 17 in length">
+			<validator type="in_range" min="0" message="Minimum value is 0"/>
+		</param>
+
+		<param name="collapse_reads" type="boolean" truevalue="Collapsed Reads Fasta" falsevalue="Fasta" checked="true" label="Collapse identical reads into one read with count information in sequence header (default)"/>
+
+		<param name="map_mismatch" type="boolean" truevalue="map_mismatch_true" falsevalue="map_mismatch_false" checked="false" label="Map with one mismatch in the seed (mapping takes longer)"/>
+
+		<param name="map_threshold" value="5" type="integer" optional="false" label="A read is allowed to map up to this number of positions in the genome">
+			<validator type="in_range" min="1" message="Minimum value is 1"/>
+		</param>
+        </inputs>
+
+        <outputs>
+		<data format="fasta" name="output_fasta" label="$collapse_reads of ${tool.name} on ${on_string}"/>
+                <data format="arf" name="output_arf" label="Mapping output of ${tool.name} on ${on_string} in ARF format"/>
+        </outputs>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirdeep2/mapper_wrapper.pl	Mon Sep 19 19:04:41 2011 -0400
@@ -0,0 +1,13 @@
+#!/usr/bin/perl
+
+$bowtie_index_name = $ARGV[0];
+$output_arf = $ARGV[1];
+$bowtie_files_path = $ARGV[2];
+$options = join (" ", @ARGV[3..$#ARGV]);
+
+$basename = `basename $bowtie_index_name`;
+chomp $basename;
+
+system ("mapper.pl $options -p $bowtie_files_path/$basename -t $output_arf 2> /dev/null");
+
+if (-s $output_arf == 0) {die "No reads aligned to the reference.";}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirdeep2/mirdeep2.xml	Mon Sep 19 19:04:41 2011 -0400
@@ -0,0 +1,87 @@
+<tool id="mirdeep2" name="miRDeep2">
+	<description>Find miRNA candidates using output from mapping step</description>
+
+	<requirements>
+                <requirement type="perl-module">mirdeep2_wrapper.pl</requirement>
+        </requirements>
+
+	<command interpreter="perl">
+		./mirdeep2_wrapper.pl $collapsed_reads $bowtie_index $bowtie_index.extra_files_path $arf_file $mirna_ref $mirna_other $precursors "$html_output.files_path" "$collapsed_reads.name" $csv_output $html_output $survey_output $mrd_output
+
+		#if str($min_stack_height) != '':
+		-a $min_stack_height
+		#end if
+
+		#if str($max_num_precursors) != '':
+		-g $max_num_precursors
+		#end if
+
+		#if str($min_score_cutoff) != '':
+		-b $min_score_cutoff
+		#end if
+
+		#if str($disable_randfold) == 'disable_randfold_true':
+		-c
+		#end if
+
+		#if str($ucsc_genome) != 'None':
+		-t $ucsc_genome
+		#end if
+
+		#if str($star_seq) != 'None':
+		-s $star_seq
+		#end if
+	</command>
+
+	<inputs>
+		<param format="fasta" type="data" name="collapsed_reads" optional="false" label="Collapsed reads fasta file" help="Collapsed read fasta file outputted from mapping step"/>
+
+		<param format="bowtie_html_index" type="data" name="bowtie_index" optional="false" label="Bowtie indexed reference" help="Select the bowtie-build run, NOT the fasta reference file you indexed"/>
+
+		<param format="arf" type="data" name="arf_file" optional="false" label="ARF file from mapping step"/>
+
+		<param format="fasta" type="data" name="mirna_ref" optional="true" label="Known miRBase miRNA sequences in fasta format (optional)" help="If used, these should be the known mature sequences for the species being analyzed"/>
+
+		<param format="fasta" type="data" name="mirna_other" optional="true" label="Known related miRBase miRNA sequences in fasta format (optional)" help="If used, these should be pooled known mature sequences for 1-5 species closely related to the species being analyzed"/>
+
+		<param format="fasta" type="data" name="precursors" optional="true" label="Known miRBase miRNA precursors in fasta format (optional)" help="If used, these should be the known precursor sequences for the species being analyzed"/>
+
+		<param format="fasta" type="data" name="star_seq" optional="true" label="Known miRBase miRNA star sequences in fasta format (optional)" help="If used, these should be the known star sequences for the species being analyzed"/>
+
+		<param name="min_stack_height" value="" type="integer" optional="true" label="Minimum read stack height that triggers analysis (optional)" help="Using this option disables automatic estimation of the optimal value and all detected precursors are analyzed"/>
+
+		<param name="max_num_precursors" value="50000" type="integer" label="Maximum number of precursors to analyze when automatic excision gearing is used (default 50000)" help="If set to -1 all precursors will be analyzed"/>
+
+		<param name="min_score_cutoff" value="0" type="integer" label="Minimum score cut-off for predicted novel miRNAs to be displayed in the overview table" help="This score cut-off is by default 0"/>
+
+		<param name="disable_randfold" type="boolean" truevalue="disable_randfold_true" falsevalue="disable_randfold_false" checked="false" label="Disable randfold analysis" help="Runs faster but with less accurate results"/>
+
+		<param name="ucsc_genome" type="select" optional="true" label="Species being analyzed - this is used to link to the appropriate UCSC browser entry (optional)">
+			<options from_file="ucsc_browser_species.txt">
+				<column name="value" index="0"/>
+			</options>
+		</param>
+	</inputs>
+
+	<outputs>
+                <data format="csv" name="csv_output" label="miRNA CSV output of ${tool.name} on ${on_string}"/>
+                <data format="html" name="html_output" label="miRNA HTML output of ${tool.name} on ${on_string}"/>
+		<data format="csv" name="survey_output" label="miRNA prediction accuracy spreadsheet of ${tool.name} on ${on_string}"/>
+		<data format="txt" name="mrd_output" label="miRNA hairpins of ${tool.name} on ${on_string}"/>
+	</outputs>
+
+	<help>
+miRDeep2 is a software package for identification of novel and known miRNAs in deep sequencing data. Furthermore, it can be used for miRNA expression profiling across samples.
+
+The output files produced are:
+
+result.html: a html table giving an overview of novel and known miRNAs detected in the data. The table is hyperlinked to pdfs showing the signature and structure of each hairpin.
+
+result.csv: spread-sheet format of results.html
+
+survey.csv: spread-sheet of prediction accuracy for all score-cutoffs between -10 and 10.
+
+output.mrd: text output of the reported hairpins.
+
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirdeep2/mirdeep2_wrapper.pl	Mon Sep 19 19:04:41 2011 -0400
@@ -0,0 +1,69 @@
+#!/usr/bin/perl
+
+use File::Temp qw/ tempfile tempdir /;
+use File::Copy qw/ copy /;
+use Cwd;
+
+$collapsed_reads = $ARGV[0];
+$bowtie_index_name = $ARGV[1];
+$bowtie_files_path = $ARGV[2];
+$arf_file = $ARGV[3];
+$mirna_ref = $ARGV[4];
+$mirna_other = $ARGV[5];
+$precursors = $ARGV[6];
+$file_path = $ARGV[7];
+$input_name = $ARGV[8];
+
+$csv_output = $ARGV[9];
+$html_output = $ARGV[10];
+$survey_output = $ARGV[11];
+$mrd_output = $ARGV[12];
+
+# the rest are options
+$options = join (" ", @ARGV[13..$#ARGV]);
+
+#point to correct bowtie index path
+$basename = `basename $bowtie_index_name`;
+chomp $basename;
+
+# create the path used to house the pdfs
+chomp $file_path;
+system ("mkdir -p $file_path");
+
+# do all the dirty work in a temp directory
+$cwd = cwd();
+$tempdir = tempdir(CLEANUP => 1);
+chdir ($tempdir);
+
+if ($mirna_ref eq "None") {$mirna_ref="none";}
+if ($mirna_other eq "None") {$mirna_other="none";}
+if ($precursors eq "None") {$precursors="none";}
+
+$ret_mirdeep2 = `miRDeep2.pl $collapsed_reads $bowtie_files_path/$basename $arf_file $mirna_ref $mirna_other $precursors -v $options 2> /dev/null`;
+system ("cp $tempdir/pdfs*/*.pdf $file_path");
+
+$csv_file = `ls $tempdir/result*.csv`;
+chomp $csv_file;
+if ($csv_file eq "") {die "Error: Cannot find csv result file in dir $tempdir\n";}
+
+$html_file = `ls $tempdir/result*.html`;
+chomp $html_file;
+if ($html_file eq "") {die "Error: Cannot find html result file in dir $tempdir\n";}
+
+$survey_file = `ls $tempdir/mirdeep_runs/run*/survey.csv`;
+chomp $survey_file;
+if ($survey_file eq "") {die "Error: Cannot find survey result file in dir $tempdir\n";}
+
+$mrd_file = `ls $tempdir/mirdeep_runs/run*/output.mrd`;
+chomp $mrd_file;
+if ($mrd_file eq "") {die "Error: Cannot find hairpin result file in dir $tempdir\n";}
+
+# replacing mirdeep created links with relative links in galaxy
+system ("sed -r -i 's/file:\\/\\/.+\\/(.+\\.pdf)/\\1/g' $html_file");
+
+copy ($csv_file, $csv_output);
+copy ($html_file, $html_output);
+copy ($survey_file, $survey_output);
+copy ($mrd_file, $mrd_output);
+
+chdir($cwd);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirdeep2/target_spy.xml	Mon Sep 19 19:04:41 2011 -0400
@@ -0,0 +1,23 @@
+<tool id="target_spy" name="Target Spy">
+	<description>Find targets for miRNAs</description>
+
+	<requirements>
+                <requirement type="perl-module">target_spy_wrapper.pl</requirement>
+        </requirements>
+
+        <command interpreter="perl">
+		./target_spy_wrapper.pl $mirna_file $trans_file $targets
+	</command>
+
+	<inputs>
+		<param format="fasta" name="mirna_file" type="data" optional="false" label="miRNA file in Fasta format" />
+		<param format="fasta" name="trans_file" type="data" optional="false" label="3' UTR transcript file in Fasta format" />
+	</inputs>
+
+	<outputs>
+		<data format="txt" name="targets"/>
+	</outputs>
+
+	<help>
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirdeep2/target_spy_wrapper.pl	Mon Sep 19 19:04:41 2011 -0400
@@ -0,0 +1,22 @@
+#!/usr/bin/perl
+
+use File::Temp qw/ tempfile tempdir /;
+use File::Copy qw/ copy /;
+use Cwd;
+
+$mirna_file = $ARGV[0];
+$trans_file = $ARGV[1];
+$targets = $ARGV[2];
+
+# do all the dirty work in a temp directory
+$cwd = cwd();
+$tempdir = tempdir(CLEANUP => 1);
+
+copy ("/opt/Bio/galaxy-dist/tools/mirdeep2/target_spy_wrapper.sh", $tempdir);
+chdir ($tempdir);
+
+$ret_ts = `sh target_spy_wrapper.sh $mirna_file $trans_file`;
+if ($ret_ts ne "") {die "TargetSpy error"}
+copy ("targets", $targets);
+
+chdir($cwd);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirdeep2/target_spy_wrapper.sh	Mon Sep 19 19:04:41 2011 -0400
@@ -0,0 +1,4 @@
+export PATH=/share/apps/ViennaRNA-1.6.1/bin:/share/apps/TargetSpy/bin:$PATH
+
+TargetSpy -microRNAs $1 -transcripts $2 -result targets
+gunzip targets.gz
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirdeep2/ucsc_browser_species.txt	Mon Sep 19 19:04:41 2011 -0400
@@ -0,0 +1,48 @@
+None
+A.gambiae
+A.mellifera
+Cat
+C.brenneri
+C.briggsae
+C.elegans
+Chicken
+Chimp
+C.intestinalis
+C.japonica
+Cow
+C.remanei
+D.ananassae
+D.erecta
+D.grimshawi
+D.melanogaster
+D.mojavensis
+Dog
+D.persimilis
+D.pseudoobscura
+D.sechellia
+D.simulans
+D.virilis
+D.yakuba
+Fugu
+GuineaPig
+Horse
+Human
+Lamprey
+Lancelet
+Lizard
+Marmoset
+Medaka
+Mouse
+Opossum
+Orangutan
+Platypus
+P.pacificus
+Rat
+Rhesus
+S.cerevisiae
+S.purpuratus
+Stickleback
+Tetraodon
+X.tropicalis
+Zebrafinch
+Zebrafish