Mercurial > repos > nikhil-joshi > mirdeep2_and_targetspy
changeset 0:5b9519f4b165
Upload first commmit
author | nikhil-joshi |
---|---|
date | Mon, 19 Sep 2011 19:04:41 -0400 |
parents | |
children | 798fe7ba8b5e |
files | mirdeep2/bowtie_build.xml mirdeep2/bowtie_build_wrapper.pl mirdeep2/mapper.xml mirdeep2/mapper_wrapper.pl mirdeep2/mirdeep2.xml mirdeep2/mirdeep2_wrapper.pl mirdeep2/target_spy.xml mirdeep2/target_spy_wrapper.pl mirdeep2/target_spy_wrapper.sh mirdeep2/ucsc_browser_species.txt |
diffstat | 10 files changed, 399 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2/bowtie_build.xml Mon Sep 19 19:04:41 2011 -0400 @@ -0,0 +1,22 @@ +<tool id="bowtiebuild" name="Bowtie Indexing"> + <description>Build index for Bowtie alignment</description> + + <requirements> + <requirement type="perl-module">bowtie_build_wrapper.pl</requirement> + </requirements> + + <command interpreter="perl"> + ./bowtie_build_wrapper.pl $fastafile "$output.files_path" $output "$fastafile.name" + </command> + + <inputs> + <param format="fasta" name="fastafile" type="data" optional="false" label="Fasta reference file" help="Reference file in fasta format"/> + </inputs> + + <outputs> + <data format="bowtie_html_index" name="output"/> + </outputs> + + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2/bowtie_build_wrapper.pl Mon Sep 19 19:04:41 2011 -0400 @@ -0,0 +1,31 @@ +#!/usr/bin/perl + +use File::Copy qw/ copy /; + +$fastafile = $ARGV[0]; +$file_path = $ARGV[1]; +$output_file = $ARGV[2]; +$fastafile_text = $ARGV[3]; + +$output_basename = `basename $output_file`; +chomp $output_basename; +$filepath_basename = `basename $file_path`; +chomp $filepath_basename; + +$output_dir = $output_file; +$output_dir =~ s/$output_basename/$filepath_basename/; + +system ("mkdir -p $file_path"); +copy ($fastafile, $output_file); +copy ($output_file, $file_path); + +system ("bowtie-build $fastafile $file_path/$output_basename"); + +open (OUTPUT,">$output_file"); +print OUTPUT "<h1>Bowtie index on $fastafile_text</h1>\n"; +$dirout = `ls $file_path`; + +foreach $file (split (/\n/, $dirout)) { + print OUTPUT "<a href='$file'>$file</a><br/>\n"; +} +close (OUTPUT);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2/mapper.xml Mon Sep 19 19:04:41 2011 -0400 @@ -0,0 +1,80 @@ +<tool id="mapper" name="Mapping to reference"> + <description>Mapping using a bowtie index</description> + + <requirements> + <requirement type="perl-module">mapper_wrapper.pl</requirement> + </requirements> + + <command interpreter="perl"> + ./mapper_wrapper.pl $bowtie_index_name $output_arf $bowtie_index_name.extra_files_path $reads + + #if $reads.extension.startswith("fastq"): + -e + #end if + + #if $reads.extension.startswith("fasta"): + -c + #end if + + #if str($convert_rna) == "convert_rna_true": + -i + #end if + + #if str($remove_non_atcgun) == "remove_non_atcgun_true": + -j + #end if + + #if str($adapter_seq) != "": + -k $adapter_seq + #end if + + #if str($discard_short_reads) != "": + -l $discard_short_reads + #end if + + #if str($collapse_reads) == "Collapsed Reads Fasta": + -m + #end if + + #if str($map_mismatch) == "map_mismatch_false": + -q + #end if + + #if str($map_threshold) != "": + -r $map_threshold + #end if + + -h -s $output_fasta -n + </command> + + <inputs> + <param format="fastq, fasta" name="reads" type="data" optional="false" label="Reads" help="Reads in fastq or fasta format"/> + <param format="bowtie_html_index" name="bowtie_index_name" type="data" optional="false" label="Bowtie indexed reference" help="Select the bowtie-build run, NOT the fasta reference file you indexed."/> + + <param name="convert_rna" type="boolean" truevalue="convert_rna_true" falsevalue="convert_rna_false" checked="false" label="Convert RNA to DNA alphabet (to map against genome)"/> + + <param name="remove_non_atcgun" type="boolean" truevalue="remove_non_atcgun_true" falsevalue="remove_non_atcgun_false" checked="false" label="Remove reads with non-standard nucleotides" help="Remove all entries that have a sequence that contains letters other than a,c,g,t,u,n,A,C,G,T,U,N"/> + + <param name="adapter_seq" value="" type="text" optional="true" label="Clip 3' Adapter Sequence (optional)" help="Adapter Sequence can only contain a,c,g,t,u,n,A,C,G,T,U,N"> + <validator type="regex" message="Adapter can ONLY contain a,c,g,t,u,n,A,C,G,T,U,N">^[ACGTUacgtu]+$</validator> + </param> + + <param name="discard_short_reads" value="17" type="integer" optional="false" label="Discard reads shorter than this length (0 for keeping all reads)" help="Note that miRDeep2 requires no reads under 17 in length"> + <validator type="in_range" min="0" message="Minimum value is 0"/> + </param> + + <param name="collapse_reads" type="boolean" truevalue="Collapsed Reads Fasta" falsevalue="Fasta" checked="true" label="Collapse identical reads into one read with count information in sequence header (default)"/> + + <param name="map_mismatch" type="boolean" truevalue="map_mismatch_true" falsevalue="map_mismatch_false" checked="false" label="Map with one mismatch in the seed (mapping takes longer)"/> + + <param name="map_threshold" value="5" type="integer" optional="false" label="A read is allowed to map up to this number of positions in the genome"> + <validator type="in_range" min="1" message="Minimum value is 1"/> + </param> + </inputs> + + <outputs> + <data format="fasta" name="output_fasta" label="$collapse_reads of ${tool.name} on ${on_string}"/> + <data format="arf" name="output_arf" label="Mapping output of ${tool.name} on ${on_string} in ARF format"/> + </outputs> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2/mapper_wrapper.pl Mon Sep 19 19:04:41 2011 -0400 @@ -0,0 +1,13 @@ +#!/usr/bin/perl + +$bowtie_index_name = $ARGV[0]; +$output_arf = $ARGV[1]; +$bowtie_files_path = $ARGV[2]; +$options = join (" ", @ARGV[3..$#ARGV]); + +$basename = `basename $bowtie_index_name`; +chomp $basename; + +system ("mapper.pl $options -p $bowtie_files_path/$basename -t $output_arf 2> /dev/null"); + +if (-s $output_arf == 0) {die "No reads aligned to the reference.";}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2/mirdeep2.xml Mon Sep 19 19:04:41 2011 -0400 @@ -0,0 +1,87 @@ +<tool id="mirdeep2" name="miRDeep2"> + <description>Find miRNA candidates using output from mapping step</description> + + <requirements> + <requirement type="perl-module">mirdeep2_wrapper.pl</requirement> + </requirements> + + <command interpreter="perl"> + ./mirdeep2_wrapper.pl $collapsed_reads $bowtie_index $bowtie_index.extra_files_path $arf_file $mirna_ref $mirna_other $precursors "$html_output.files_path" "$collapsed_reads.name" $csv_output $html_output $survey_output $mrd_output + + #if str($min_stack_height) != '': + -a $min_stack_height + #end if + + #if str($max_num_precursors) != '': + -g $max_num_precursors + #end if + + #if str($min_score_cutoff) != '': + -b $min_score_cutoff + #end if + + #if str($disable_randfold) == 'disable_randfold_true': + -c + #end if + + #if str($ucsc_genome) != 'None': + -t $ucsc_genome + #end if + + #if str($star_seq) != 'None': + -s $star_seq + #end if + </command> + + <inputs> + <param format="fasta" type="data" name="collapsed_reads" optional="false" label="Collapsed reads fasta file" help="Collapsed read fasta file outputted from mapping step"/> + + <param format="bowtie_html_index" type="data" name="bowtie_index" optional="false" label="Bowtie indexed reference" help="Select the bowtie-build run, NOT the fasta reference file you indexed"/> + + <param format="arf" type="data" name="arf_file" optional="false" label="ARF file from mapping step"/> + + <param format="fasta" type="data" name="mirna_ref" optional="true" label="Known miRBase miRNA sequences in fasta format (optional)" help="If used, these should be the known mature sequences for the species being analyzed"/> + + <param format="fasta" type="data" name="mirna_other" optional="true" label="Known related miRBase miRNA sequences in fasta format (optional)" help="If used, these should be pooled known mature sequences for 1-5 species closely related to the species being analyzed"/> + + <param format="fasta" type="data" name="precursors" optional="true" label="Known miRBase miRNA precursors in fasta format (optional)" help="If used, these should be the known precursor sequences for the species being analyzed"/> + + <param format="fasta" type="data" name="star_seq" optional="true" label="Known miRBase miRNA star sequences in fasta format (optional)" help="If used, these should be the known star sequences for the species being analyzed"/> + + <param name="min_stack_height" value="" type="integer" optional="true" label="Minimum read stack height that triggers analysis (optional)" help="Using this option disables automatic estimation of the optimal value and all detected precursors are analyzed"/> + + <param name="max_num_precursors" value="50000" type="integer" label="Maximum number of precursors to analyze when automatic excision gearing is used (default 50000)" help="If set to -1 all precursors will be analyzed"/> + + <param name="min_score_cutoff" value="0" type="integer" label="Minimum score cut-off for predicted novel miRNAs to be displayed in the overview table" help="This score cut-off is by default 0"/> + + <param name="disable_randfold" type="boolean" truevalue="disable_randfold_true" falsevalue="disable_randfold_false" checked="false" label="Disable randfold analysis" help="Runs faster but with less accurate results"/> + + <param name="ucsc_genome" type="select" optional="true" label="Species being analyzed - this is used to link to the appropriate UCSC browser entry (optional)"> + <options from_file="ucsc_browser_species.txt"> + <column name="value" index="0"/> + </options> + </param> + </inputs> + + <outputs> + <data format="csv" name="csv_output" label="miRNA CSV output of ${tool.name} on ${on_string}"/> + <data format="html" name="html_output" label="miRNA HTML output of ${tool.name} on ${on_string}"/> + <data format="csv" name="survey_output" label="miRNA prediction accuracy spreadsheet of ${tool.name} on ${on_string}"/> + <data format="txt" name="mrd_output" label="miRNA hairpins of ${tool.name} on ${on_string}"/> + </outputs> + + <help> +miRDeep2 is a software package for identification of novel and known miRNAs in deep sequencing data. Furthermore, it can be used for miRNA expression profiling across samples. + +The output files produced are: + +result.html: a html table giving an overview of novel and known miRNAs detected in the data. The table is hyperlinked to pdfs showing the signature and structure of each hairpin. + +result.csv: spread-sheet format of results.html + +survey.csv: spread-sheet of prediction accuracy for all score-cutoffs between -10 and 10. + +output.mrd: text output of the reported hairpins. + +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2/mirdeep2_wrapper.pl Mon Sep 19 19:04:41 2011 -0400 @@ -0,0 +1,69 @@ +#!/usr/bin/perl + +use File::Temp qw/ tempfile tempdir /; +use File::Copy qw/ copy /; +use Cwd; + +$collapsed_reads = $ARGV[0]; +$bowtie_index_name = $ARGV[1]; +$bowtie_files_path = $ARGV[2]; +$arf_file = $ARGV[3]; +$mirna_ref = $ARGV[4]; +$mirna_other = $ARGV[5]; +$precursors = $ARGV[6]; +$file_path = $ARGV[7]; +$input_name = $ARGV[8]; + +$csv_output = $ARGV[9]; +$html_output = $ARGV[10]; +$survey_output = $ARGV[11]; +$mrd_output = $ARGV[12]; + +# the rest are options +$options = join (" ", @ARGV[13..$#ARGV]); + +#point to correct bowtie index path +$basename = `basename $bowtie_index_name`; +chomp $basename; + +# create the path used to house the pdfs +chomp $file_path; +system ("mkdir -p $file_path"); + +# do all the dirty work in a temp directory +$cwd = cwd(); +$tempdir = tempdir(CLEANUP => 1); +chdir ($tempdir); + +if ($mirna_ref eq "None") {$mirna_ref="none";} +if ($mirna_other eq "None") {$mirna_other="none";} +if ($precursors eq "None") {$precursors="none";} + +$ret_mirdeep2 = `miRDeep2.pl $collapsed_reads $bowtie_files_path/$basename $arf_file $mirna_ref $mirna_other $precursors -v $options 2> /dev/null`; +system ("cp $tempdir/pdfs*/*.pdf $file_path"); + +$csv_file = `ls $tempdir/result*.csv`; +chomp $csv_file; +if ($csv_file eq "") {die "Error: Cannot find csv result file in dir $tempdir\n";} + +$html_file = `ls $tempdir/result*.html`; +chomp $html_file; +if ($html_file eq "") {die "Error: Cannot find html result file in dir $tempdir\n";} + +$survey_file = `ls $tempdir/mirdeep_runs/run*/survey.csv`; +chomp $survey_file; +if ($survey_file eq "") {die "Error: Cannot find survey result file in dir $tempdir\n";} + +$mrd_file = `ls $tempdir/mirdeep_runs/run*/output.mrd`; +chomp $mrd_file; +if ($mrd_file eq "") {die "Error: Cannot find hairpin result file in dir $tempdir\n";} + +# replacing mirdeep created links with relative links in galaxy +system ("sed -r -i 's/file:\\/\\/.+\\/(.+\\.pdf)/\\1/g' $html_file"); + +copy ($csv_file, $csv_output); +copy ($html_file, $html_output); +copy ($survey_file, $survey_output); +copy ($mrd_file, $mrd_output); + +chdir($cwd);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2/target_spy.xml Mon Sep 19 19:04:41 2011 -0400 @@ -0,0 +1,23 @@ +<tool id="target_spy" name="Target Spy"> + <description>Find targets for miRNAs</description> + + <requirements> + <requirement type="perl-module">target_spy_wrapper.pl</requirement> + </requirements> + + <command interpreter="perl"> + ./target_spy_wrapper.pl $mirna_file $trans_file $targets + </command> + + <inputs> + <param format="fasta" name="mirna_file" type="data" optional="false" label="miRNA file in Fasta format" /> + <param format="fasta" name="trans_file" type="data" optional="false" label="3' UTR transcript file in Fasta format" /> + </inputs> + + <outputs> + <data format="txt" name="targets"/> + </outputs> + + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2/target_spy_wrapper.pl Mon Sep 19 19:04:41 2011 -0400 @@ -0,0 +1,22 @@ +#!/usr/bin/perl + +use File::Temp qw/ tempfile tempdir /; +use File::Copy qw/ copy /; +use Cwd; + +$mirna_file = $ARGV[0]; +$trans_file = $ARGV[1]; +$targets = $ARGV[2]; + +# do all the dirty work in a temp directory +$cwd = cwd(); +$tempdir = tempdir(CLEANUP => 1); + +copy ("/opt/Bio/galaxy-dist/tools/mirdeep2/target_spy_wrapper.sh", $tempdir); +chdir ($tempdir); + +$ret_ts = `sh target_spy_wrapper.sh $mirna_file $trans_file`; +if ($ret_ts ne "") {die "TargetSpy error"} +copy ("targets", $targets); + +chdir($cwd);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2/target_spy_wrapper.sh Mon Sep 19 19:04:41 2011 -0400 @@ -0,0 +1,4 @@ +export PATH=/share/apps/ViennaRNA-1.6.1/bin:/share/apps/TargetSpy/bin:$PATH + +TargetSpy -microRNAs $1 -transcripts $2 -result targets +gunzip targets.gz
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2/ucsc_browser_species.txt Mon Sep 19 19:04:41 2011 -0400 @@ -0,0 +1,48 @@ +None +A.gambiae +A.mellifera +Cat +C.brenneri +C.briggsae +C.elegans +Chicken +Chimp +C.intestinalis +C.japonica +Cow +C.remanei +D.ananassae +D.erecta +D.grimshawi +D.melanogaster +D.mojavensis +Dog +D.persimilis +D.pseudoobscura +D.sechellia +D.simulans +D.virilis +D.yakuba +Fugu +GuineaPig +Horse +Human +Lamprey +Lancelet +Lizard +Marmoset +Medaka +Mouse +Opossum +Orangutan +Platypus +P.pacificus +Rat +Rhesus +S.cerevisiae +S.purpuratus +Stickleback +Tetraodon +X.tropicalis +Zebrafinch +Zebrafish