# HG changeset patch # User Jim Johnson # Date 1310916611 18000 # Node ID e5c3175506b79b0a682cf718d441c07fd73a9945 Initial tool configs for qiime, most need work. diff -r 000000000000 -r e5c3175506b7 README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,129 @@ +This was a first attempt at providing galaxy tool_wrappers for the Qiime metagenomics package: +You must first istall Qiime: http://qiime.sourceforge.net/install/install.html + + + +Initial tool wrappers were generated by a script searching the qiime scripts (version 1.2.1) for usage info, +and then were hand edited afterwards. + +NOTE: A few of the tool configs worked on the galaxy-central code in April 2011. +I haven't taken time to check them with more recent galaxy releases. + + +I executed the qiime scripts via qiime_wrapper.py +This was to accommmodate moving multiple outputs to history items: http://wiki.g2.bx.psu.edu/Admin/Tools/Multiple%20Output%20Files + + +The datatypes file: metagenomics.py has Mothur datatypes with a start at qiime types added at the end. + + + + +The most common used qiime scripts are: +- check_id_map.py +- split_libraries.py +- pick_otus_through_otu_table.py +- beta_diversity_through_3d_plots.py +- alpha_rarefaction.py +- jackknifed_beta_diversity.py +- filter_by_metadata.py +- filter_otu_table.py +- merge_otu_tables.py +- merge_mapping_files.py + + +Tool_config development status: +The tool configs with a * indicate that the tool at least displayed in galaxy at least once upon time. +( Since these were intially auto generated, some may not make sense in a galaxy framework. ) + + add_taxa.xml + adjust_seq_orientation.xml +* align_seqs.xml +* alpha_diversity.xml metrics - select input/output repeat conditional tree +* alpha_rarefaction.xml +* assign_taxonomy.xmlA assignment_method-select +* beta_diversity.xml +* beta_diversity_through_3d_plots.xml html-plots + beta_significance.xml + blast_wrapper.xml +* check_id_map.xml + collate_alpha.xml +* compare_3d_plots.xml + consensus_tree.xml + convert_otu_table_to_unifrac_sample_mapping.xml + convert_unifrac_sample_mapping_to_otu_table.xml +* denoise.xml +* dissimilarity_mtx_stats.xml + exclude_seqs_by_blast.xml + extract_seqs_by_sample_id.xml +* filter_alignment.xml + filter_by_metadata.xml + filter_fasta.xml + filter_otu_table.xml +* filter_otus_by_sample.xml + fix_arb_fasta.xml + identify_chimeric_seqs.xml +* jackknifed_beta_diversity.xml +* make_2d_plots.xml +* make_3d_plots.xml + make_bootstrapped_tree.xml + make_distance_histograms.xml + make_fastq.xml + make_library_id_lists.xml +* make_otu_heatmap_html.xml +* make_otu_network.xml + make_otu_table.xml + make_per_library_sff.xml + make_phylogeny.xml + make_pie_charts.xml + make_prefs_file.xml + make_qiime_py_file.xml +* make_qiime_rst_file.xml +* make_rarefaction_plots.xml +* make_sra_submission.xml +* merge_denoiser_output.xml + merge_mapping_files.xml + merge_otu_maps.xml + merge_otu_tables.xml + multiple_rarefactions.xml + multiple_rarefactions_even_depth.xml + otu_category_significance.xml +* parallel_align_seqs_pynast.xml + parallel_alpha_diversity.xml +* parallel_assign_taxonomy_blast.xml +* parallel_assign_taxonomy_rdp.xml + parallel_beta_diversity.xml +* parallel_blast.xml + parallel_identify_chimeric_seqs.xml + parallel_multiple_rarefactions.xml +* parallel_pick_otus_blast.xml +* parallel_pick_otus_uclust_ref.xml + per_library_stats.xml +* pick_otus.xml +* pick_otus_through_otu_table.xml + pick_rep_set.xml +* plot_rank_abundance_graph.xml + poller.xml + poller_example.xml + pool_by_metadata.xml + principal_coordinates.xml + print_qiime_config.xml +* process_sff.xml +* process_sra_submission.xml +* quality_scores_plot.xml + shared_phylotypes.xml + single_rarefaction.xml + sort_denoiser_output.xml +* split_libraries.xml +* split_libraries_illumina.xml + sra_spreadsheet_to_map_files.xml + start_parallel_jobs.xml + summarize_otu_by_cat.xml + summarize_taxa.xml +* supervised_learning.xml +* transform_coordinate_matrices.xml +* tree_compare.xml + trflp_file_to_otu_table.xml + trim_sff_primers.xml +* truncate_fasta_qual_files.xml + upgma_cluster.xml diff -r 000000000000 -r e5c3175506b7 add_taxa.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/add_taxa.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,31 @@ + + Add taxa to OTU table + + add_taxa.py + + + qiime_wrapper.py + add_taxa.py + --otu_file=$otu_file + --taxonomy_file=$taxonomy_file + --output_file=$output_file + --id_map_file=$id_map_file + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 adjust_seq_orientation.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/adjust_seq_orientation.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,29 @@ + + Get the reverse complement of all sequences + + adjust_seq_orientation.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + adjust_seq_orientation.py + --input_fasta_fp=$input_fasta_fp + --output_fp=$output_fp + $retain_seq_id + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 align_seqs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/align_seqs.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,127 @@ + + Align sequences using a variety of alignment methods + + align_seqs.py + + + qiime_wrapper.py + --galaxy_outputdir='$log.extra_files_path' + --galaxy_datasets='^\S+_aligned\.\S+$:'$aligned_fasta,'^\S+_log\.txt$:'$log,'^\S+_failures\.fasta$:'$failures + align_seqs.py + --input_fasta_fp=$input_fasta_fp + --alignment_method=$align.alignment_method + #if $align.alignment_method == 'pynast': + --template_fp=$align.alignment.template_fp + --pairwise_alignment_method=$align.pairwise_alignment_method + --min_length=$align.min_length + --min_percent_id=$align.min_percent_id + --blast_db=$align.blast_db + #elif $align.alignment_method == 'infernal': + --template_fp=$align.alignment.template_fp + #elif $align.alignment_method == 'clustalw': + #echo '' + #elif $align.alignment_method == 'muscle': + #echo '' + #elif $align.alignment_method == 'mafft': + #echo '' + #end if + + --output_dir='$log.extra_files_path' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + align['alignment_method'] == 'pynast' + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 alpha_diversity.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alpha_diversity.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,61 @@ + + Calculate alpha diversity on each sample in an otu table, using a variety of alpha diversity metrics + + alpha_diversity.py + + + qiime_wrapper.py + alpha_diversity.py + --input_path=$input_path + --output_path=$output_path + --metrics=$metrics + #if $tree_path.__str__ != "None" and len($tree_path.__str__) > 0: + --tree_path=$tree_path + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 alpha_rarefaction.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alpha_rarefaction.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,108 @@ + + A workflow script for performing alpha rarefaction + + alpha_rarefaction.py + + + qiime_wrapper.py + --galaxy_summary_html='$output_html' + --galaxy_outputdir='$output_html.extra_files_path' + --galaxy_summary_template='$output_template' + ## --galaxy_datasets='^rarefaction_plots.html$:'$output_html + alpha_rarefaction.py + --otu_table_fp=$otu_table_fp + --mapping_fp=$mapping_fp + --output_dir=$output_html.extra_files_path + #if $parameter.source == 'hist': + --parameter_fp=$parameter_fp + #else: + --parameter_fp=$parameter_generated + #end if + --num_steps=$num_steps + $force + $print_only + $parallel + #if $tree_fp != None and $tree_fp.__str__ != 'None': + --tree_fp=$tree_fp + #end if + + + + + + + + + + + + + + + + + + + + + + + + +alpha_diversity:metrics chao1,observed_species,PD_whole_tree +multiple_rarefactions_even_depth:num-reps 20 +parallel:jobs_to_start 2 +parallel:retain_temp_files False +parallel:seconds_to_sleep 60 +collate_alpha:example_path +make_rarefaction_plots:imagetype png +make_rarefaction_plots:resolution 75 +make_rarefaction_plots:background_color white +make_rarefaction_plots:prefs_path + + + + +rarefaction_plots.html + + +]]> + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 assign_taxonomy.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/assign_taxonomy.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,93 @@ + + Assign taxonomy to each sequence + + assign_taxonomy.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + assign_taxonomy.py + --input_fasta_fp=$input_fasta_fp + --assignment_method=$assign.assignment_method + #if $assign.assignment_method = 'rdp' + #if $assign.reference_seqs_fp != None + --reference_seqs_fp=$assign.reference_seqs_fp + #end if + #if $assign.training_data_properties_fp != None + --training_data_properties_fp=$assign.training_data_properties_fp + #end if + #if $assign.id_to_taxonomy_fp != None + --id_to_taxonomy_fp=$assign.id_to_taxonomy_fp + #end if + --confidence=$confidence + #elif $assign.assignment_method = 'blast' + #if $assign.blast_ref.source = 'blast_db' + --blast_db=$assign.blast_ref.blast_db + #elif $assign.blast_ref.source = 'reference_seqs' + -- + #end if + --e_value=$assign.e_value + --id_to_taxonomy_fp=$assign.id_to_taxonomy_fp + #end if + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 beta_diversity.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/beta_diversity.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,255 @@ + + Calculate beta diversity (pairwise sample dissimilarity) on one or many otu tables + + beta_diversity.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + #set datasets = [] + #if $binary_chisq.__str__ != "None": + #set datasets = $datasets + ["'binary_chisq_.*$:'" + $binary_chisq.__str__] + #end if + #if $binary_chord.__str__ != "None": + #set datasets = $datasets + ["'binary_chord_.*$:'" + $binary_chord.__str__] + #end if + #if $binary_euclidean.__str__ != "None": + #set datasets = $datasets + ["'binary_euclidean_.*$:'" + $binary_euclidean.__str__] + #end if + #if $binary_hamming.__str__ != "None": + #set datasets = $datasets + ["'binary_hamming_.*$:'" + $binary_hamming.__str__] + #end if + #if $binary_jaccard.__str__ != "None": + #set datasets = $datasets + ["'binary_jaccard_.*$:'" + $binary_jaccard.__str__] + #end if + #if $binary_lennon.__str__ != "None": + #set datasets = $datasets + ["'binary_lennon_.*$:'" + $binary_lennon.__str__] + #end if + #if $binary_ochiai.__str__ != "None": + #set datasets = $datasets + ["'binary_ochiai_.*$:'" + $binary_ochiai.__str__] + #end if + #if $binary_pearson.__str__ != "None": + #set datasets = $datasets + ["'binary_pearson_.*$:'" + $binary_pearson.__str__] + #end if + #if $binary_sorensen_dice.__str__ != "None": + #set datasets = $datasets + ["'binary_sorensen_dice_.*$:'" + $binary_sorensen_dice.__str__] + #end if + #if $bray_curtis.__str__ != "None": + #set datasets = $datasets + ["'bray_curtis_.*$:'" + $bray_curtis.__str__] + #end if + #if $canberra.__str__ != "None": + #set datasets = $datasets + ["'canberra_.*$:'" + $canberra.__str__] + #end if + #if $chisq.__str__ != "None": + #set datasets = $datasets + ["'chisq_.*$:'" + $chisq.__str__] + #end if + #if $chord.__str__ != "None": + #set datasets = $datasets + ["'chord_.*$:'" + $chord.__str__] + #end if + #if $euclidean.__str__ != "None": + #set datasets = $datasets + ["'euclidean_.*$:'" + $euclidean.__str__] + #end if + #if $gower.__str__ != "None": + #set datasets = $datasets + ["'gower_.*$:'" + $gower.__str__] + #end if + #if $hellinger.__str__ != "None": + #set datasets = $datasets + ["'hellinger_.*$:'" + $hellinger.__str__] + #end if + #if $kulczynski.__str__ != "None": + #set datasets = $datasets + ["'kulczynski_.*$:'" + $kulczynski.__str__] + #end if + #if $manhattan.__str__ != "None": + #set datasets = $datasets + ["'manhattan_.*$:'" + $manhattan.__str__] + #end if + #if $morisita_horn.__str__ != "None": + #set datasets = $datasets + ["'morisita_horn_.*$:'" + $morisita_horn.__str__] + #end if + #if $pearson.__str__ != "None": + #set datasets = $datasets + ["'pearson_.*$:'" + $pearson.__str__] + #end if + #if $soergel.__str__ != "None": + #set datasets = $datasets + ["'soergel_.*$:'" + $soergel.__str__] + #end if + #if $spearman_approx.__str__ != "None": + #set datasets = $datasets + ["'spearman_approx_.*$:'" + $spearman_approx.__str__] + #end if + #if $specprof.__str__ != "None": + #set datasets = $datasets + ["'specprof_.*$:'" + $specprof.__str__] + #end if + #if $unifrac.__str__ != "None": + #set datasets = $datasets + ["'unifrac_.*$:'" + $unifrac.__str__] + #end if + #if $unifrac_g.__str__ != "None": + #set datasets = $datasets + ["'unifrac_g_.*$:'" + $unifrac_g.__str__] + #end if + #if $unifrac_g_full_tree.__str__ != "None": + #set datasets = $datasets + ["'unifrac_g_full_tree_.*$:'" + $unifrac_g_full_tree.__str__] + #end if + #if $unweighted_unifrac.__str__ != "None": + #set datasets = $datasets + ["'unweighted_unifrac_.*$:'" + $unweighted_unifrac.__str__] + #end if + #if $unweighted_unifrac_full_tree.__str__ != "None": + #set datasets = $datasets + ["'unweighted_unifrac_full_tree_.*$:'" + $unweighted_unifrac_full_tree.__str__] + #end if + #if $weighted_normalized_unifrac.__str__ != "None": + #set datasets = $datasets + ["'weighted_normalized_unifrac_.*$:'" + $weighted_normalized_unifrac.__str__] + #end if + #if $weighted_unifrac.__str__ != "None": + #set datasets = $datasets + ["'weighted_unifrac_.*$:'" + $weighted_unifrac.__str__] + #end if + --galaxy_datasets=#echo ','.join($datasets) + beta_diversity.py + --input_path=$input_path + #if $rows.__str__ != '': + --rows=$rows + #end if + --output_dir=$__new_file_path__ + --metrics=$metrics + #if $tree_path.__str__ != "None" and len($tree_path.__str__) > 0: + --tree_path=$tree_path + #end if + $full_tree + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 'binary_chisq' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_chord' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_euclidean' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_hamming' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_jaccard' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_lennon' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_ochiai' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_pearson' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'binary_sorensen_dice' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'bray_curtis' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'canberra' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'chisq' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'chord' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'euclidean' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'gower' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'hellinger' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'kulczynski' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'manhattan' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'morisita_horn' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'pearson' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'soergel' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'spearman_approx' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'specprof' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'unifrac' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'unifrac_g' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'unifrac_g_full_tree' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'unweighted_unifrac' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'unweighted_unifrac_full_tree' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'weighted_normalized_unifrac' in (metrics if isinstance(metrics,list) else [metrics]) + + + 'weighted_unifrac' in (metrics if isinstance(metrics,list) else [metrics]) + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 beta_diversity_through_3d_plots.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/beta_diversity_through_3d_plots.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,131 @@ + + A workflow script for computing beta diversity distance matrices and the corresponding 3D plots + + beta_diversity_through_3d_plots.py + + + qiime_wrapper.py + --galaxy_outputdir='$log.extra_files_path' + --galaxy_datasets='^log_\S+\.txt$:'$log,'^prefs\.txt$:'$prefs + beta_diversity_through_3d_plots.py + --otu_table_fp=$otu_table_fp + --mapping_fp=$mapping_fp + --output_dir='$log.extra_files_path' + --force + #if $parameter.source == 'hist': + --parameter_fp=$parameter_fp + #else: + --parameter_fp=$parameter_generated + #end if + --tree_fp=$tree_fp + #if int($seqs_per_sample) > 0: + --seqs_per_sample=$seqs_per_sample + #end if + $parallel + + + + + + + + + + + + + + + + + + + + + + + +beta_diversity:metrics +alpha_diversity:metrics chao1,observed_species,PD_whole_tree +multiple_rarefactions_even_depth:num-reps 20 +parallel:jobs_to_start 2 +parallel:retain_temp_files False +parallel:seconds_to_sleep 60 +collate_alpha:example_path +make_rarefaction_plots:imagetype png +make_rarefaction_plots:resolution 75 +make_rarefaction_plots:background_color white +make_rarefaction_plots:prefs_path + + + + + +rarefaction_plots.html + + +]]> + + + + + + + + {weighted_unifrac}_{dataset_4477.dat} + + + + + + + + +$ beta_diversity_through_3d_plots.py -i wf_da/uclust_picked_otus/rep_set/rdp_assigned_taxonomy/otu_table/seqs_otu_table.txt -m Fasting_Map.txt -o wf_bdiv_even146_test/ -p custom_parameters.tt -t wf_da/uclust_picked_otus/rep_set/pynast_aligned_seqs/fasttree_phylogeny/seqs_rep_set.tre -e 146 --print_only + +single_rarefaction.py -i wf_da/uclust_picked_otus/rep_set/rdp_assigned_taxonomy/otu_table/seqs_otu_table.txt -o wf_bdiv_even146_test//seqs_otu_table_even146.txt -d 146 +make_prefs_file.py -m Fasting_Map.txt -o wf_bdiv_even146_test//prefs.txt --monte_carlo_dists 10 --background_color black --mapping_headers_to_use Treatment,DOB +beta_diversity.py -i wf_bdiv_even146_test//seqs_otu_table_even146.txt -o wf_bdiv_even146_test/ --metrics weighted_unifrac -t wf_da/uclust_picked_otus/rep_set/pynast_aligned_seqs/fasttree_phylogeny/seqs_rep_set.tre +principal_coordinates.py -i wf_bdiv_even146_test//weighted_unifrac_seqs_otu_table_even146.txt -o wf_bdiv_even146_test//weighted_unifrac_pc.txt +make_3d_plots.py -p wf_bdiv_even146_test//prefs.txt -i wf_bdiv_even146_test//weighted_unifrac_pc.txt -o wf_bdiv_even146_test//weighted_unifrac_3d_continuous/ -m Fasting_Map.txt --ellipsoid_smoothness 1 +make_3d_plots.py -b "SampleID,BarcodeSequence,LinkerPrimerSequence,Treatment,DOB,Description" -i wf_bdiv_even146_test//weighted_unifrac_pc.txt -o wf_bdiv_even146_test//weighted_unifrac_3d_discrete/ -m Fasting_Map.txt --ellipsoid_smoothness 1 +beta_diversity.py -i wf_bdiv_even146_test//seqs_otu_table_even146.txt -o wf_bdiv_even146_test/ --metrics unweighted_unifrac -t wf_da/uclust_picked_otus/rep_set/pynast_aligned_seqs/fasttree_phylogeny/seqs_rep_set.tre +principal_coordinates.py -i wf_bdiv_even146_test//unweighted_unifrac_seqs_otu_table_even146.txt -o wf_bdiv_even146_test//unweighted_unifrac_pc.txt +make_3d_plots.py -p wf_bdiv_even146_test//prefs.txt -i wf_bdiv_even146_test//unweighted_unifrac_pc.txt -o wf_bdiv_even146_test//unweighted_unifrac_3d_continuous/ -m Fasting_Map.txt --ellipsoid_smoothness 1 +make_3d_plots.py -b "SampleID,BarcodeSequence,LinkerPrimerSequence,Treatment,DOB,Description" -i wf_bdiv_even146_test//unweighted_unifrac_pc.txt -o wf_bdiv_even146_test//unweighted_unifrac_3d_discrete/ -m Fasting_Map.txt --ellipsoid_smoothness 1 + + + + + diff -r 000000000000 -r e5c3175506b7 beta_significance.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/beta_significance.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,35 @@ + + This script runs any of a set of common tests to determine if a sample is statistically significantly different from another sample + + beta_significance.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + beta_significance.py + --input_path=$input_path + --output_path=$output_path + --significance_test=$significance_test + --tree_path=$tree_path + --num_iters=$num_iters + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 blast_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blast_wrapper.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,31 @@ + + Blast Interface + + blast_wrapper.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + blast_wrapper.py + --input_fasta_fp=$input_fasta_fp + --refseqs_fp=$refseqs_fp + --num_seqs_per_blast_run=$num_seqs_per_blast_run + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 categorized_dist_scatterplot.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/categorized_dist_scatterplot.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,41 @@ + + makes a figure representing average distances between samples, broken down by categories. I call it a 'categorized distance scatterplot' + + categorized_dist_scatterplot.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + categorized_dist_scatterplot.py + --map=$map + --distance_matrix=$distance_matrix + --primary_state=$primary_state + --axis_category=$axis_category + --output_path=$output_path + --colorby=$colorby + --secondary_state=$secondary_state + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 check_id_map.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/check_id_map.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,43 @@ + + Checks user's metadata mapping file for required data, valid format + + check_id_map.py + + + qiime_wrapper.py + --galaxy_outputdir='$log.extra_files_path' + --galaxy_datasets='^\S+_corrected\.txt$:'$corrected_mapping,'^\S+\.log:'$log + check_id_map.py + --map=$map + --output_dir='$log.extra_files_path' + --char_replace=$char_replace + $not_barcoded + $variable_len_barcodes + $disable_primer_check + $verbose + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 collate_alpha.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/collate_alpha.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,29 @@ + + Collate alpha diversity results + + collate_alpha.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + collate_alpha.py + --input_path=$input_path + --output_path=$output_path + --example_path=$example_path + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 compare_3d_plots.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/compare_3d_plots.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,47 @@ + + Plot several PCoA files on the same 3D plot + + compare_3d_plots.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + compare_3d_plots.py + --coord_fnames=$coord_fnames + --map_fname=$map_fname + --colorby=$colorby + --custom_axes=$custom_axes + --prefs_path=$prefs_path + --background_color=$background_color + --edges_file=$edges_file + $serial + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 consensus_tree.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/consensus_tree.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,29 @@ + + This script outputs a majority consensus tree given a collection of input trees. + + consensus_tree.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + consensus_tree.py + --input_dir=$input_dir + --output_fname=$output_fname + $strict + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 convert_otu_table_to_unifrac_sample_mapping.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/convert_otu_table_to_unifrac_sample_mapping.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,26 @@ + + Convert a QIIME OTU table to a UniFrac sample mapping file + + convert_otu_table_to_unifrac_sample_mapping.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + convert_otu_table_to_unifrac_sample_mapping.py + --otu_table_fp=$otu_table_fp + --output_fp=$output_fp + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 convert_unifrac_sample_mapping_to_otu_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/convert_unifrac_sample_mapping_to_otu_table.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,26 @@ + + Convert a UniFrac sample mapping file to an OTU table + + convert_unifrac_sample_mapping_to_otu_table.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + convert_unifrac_sample_mapping_to_otu_table.py + --sample_mapping_fp=$sample_mapping_fp + --output_fp=$output_fp + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 denoise.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/denoise.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,53 @@ + + Denoise a flowgram file + + denoise.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + denoise.py + --input_file=$input_file + --fasta_file=$fasta_file + --output_dir=$__new_file_path__ + --method=$method + $keep_intermediates + --cut-off=$cut_off + --precision=$precision + --num_cpus=$num_cpus + $force_overwrite + --map_fname=$map_fname + --primer=$primer + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 dissimilarity_mtx_stats.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dissimilarity_mtx_stats.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,26 @@ + + Calculate mean, median and standard deviation from a set of distance matrices + + dissimilarity_mtx_stats.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + dissimilarity_mtx_stats.py + --input_dir=$input_dir + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 exclude_seqs_by_blast.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exclude_seqs_by_blast.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,53 @@ + + Exclude contaminated sequences using BLAST + + exclude_seqs_by_blast.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + exclude_seqs_by_blast.py + --querydb=$querydb + --subjectdb=$subjectdb + --outputfilename=$outputfilename + --e_value=$e_value + --percent_aligned=$percent_aligned + $no_clean + --blastmatroot=$blastmatroot + --working_dir=$working_dir + --max_hits=$max_hits + --word_size=$word_size + $no_format_db + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 extract_seqs_by_sample_id.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_seqs_by_sample_id.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,32 @@ + + Extract sequences based on the SampleID + + extract_seqs_by_sample_id.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + extract_seqs_by_sample_id.py + --input_fasta_fp=$input_fasta_fp + --sample_ids=$sample_ids + --output_fasta_fp=$output_fasta_fp + $negate + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 filter_alignment.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_alignment.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,43 @@ + + Filter sequence alignment by removing highly variable regions + + filter_alignment.py + + + qiime_wrapper.py + ## --galaxy_tmpdir='$__new_file_path__' + --galaxy_outputdir='$pfiltered_fasta.extra_files_path' + --galaxy_datasets='^\S+_pfiltered\.\S+$:'$pfiltered_fasta + filter_alignment.py + --input_fasta_file=$input_fasta_file + --output_dir='$pfiltered_fasta.extra_files_path' + --lane_mask_fp=$lane_mask_fp + $suppress_lane_mask_filter + --allowed_gap_frac=$allowed_gap_frac + $remove_outliers + --threshold=$threshold + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 filter_by_metadata.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_by_metadata.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,37 @@ + + Filter OTU table by removal of specified metadata + + filter_by_metadata.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + filter_by_metadata.py + --otu_table_fp=$otu_table_fp + --map=$map + --states=$states + --otu_outfile=$otu_outfile + --map_outfile=$map_outfile + --num_seqs_per_otu=$num_seqs_per_otu + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 filter_fasta.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_fasta.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,41 @@ + + This script can be applied to remove sequences from a fasta file based on input criteria. + + filter_fasta.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + filter_fasta.py + --input_fasta_fp=$input_fasta_fp + --output_fasta_fp=$output_fasta_fp + --otu_map=$otu_map + --seq_id_fp=$seq_id_fp + --subject_fasta_fp=$subject_fasta_fp + --seq_id_prefix=$seq_id_prefix + $negate + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 filter_otu_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_otu_table.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,43 @@ + + Filters OTU table by minimum OTU count and number of samples or by taxonomy + + filter_otu_table.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + filter_otu_table.py + --otu_table_fp=$otu_table_fp + --min_count=$min_count + --min_samples=$min_samples + --include_taxonomy=$include_taxonomy + --exclude_taxonomy=$exclude_taxonomy + --dir_path=$dir_path + --seqs_per_sample=$seqs_per_sample + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 filter_otus_by_sample.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_otus_by_sample.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,32 @@ + + Filter OTU mapping file and sequences by SampleIDs + + filter_otus_by_sample.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + filter_otus_by_sample.py + --otu_map_fp=$otu_map_fp + --input_fasta_fp=$input_fasta_fp + --samples_to_extract=$samples_to_extract + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 fix_arb_fasta.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fix_arb_fasta.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,25 @@ + + Reformat ARB FASTA files + + fix_arb_fasta.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + fix_arb_fasta.py + --input_fasta_fp=$input_fasta_fp + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 identify_chimeric_seqs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/identify_chimeric_seqs.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,68 @@ + + Identify chimeric sequences in input FASTA file + + identify_chimeric_seqs.py + + + qiime_wrapper.py + identify_chimeric_seqs.py + --input_fasta_fp=$input_fasta_fp + #if $pick.chimera_detection_method == 'ChimeraSlayer': + --chimera_detection_method=$pick.chimera_detection_method + --aligned_reference_seqs_fp=$pick.aligned_reference_seqs_fp + --min_div_ratio=$pick.min_div_ratio + $pick.keep_intermediates + #elif $pick.chimera_detection_method == 'blast_fragments': + --chimera_detection_method=$pick.chimera_detection_method + --id_to_taxonomy_fp=$pick.id_to_taxonomy_fp + --reference_seqs_fp=$pick.reference_seqs_fp + --blast_db=$pick.blast_db + --num_fragments=$pick.num_fragments + --taxonomy_depth=$pick.taxonomy_depth + --max_e_value=$pick.max_e_value + #end if + --output_fp=$output_fp + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 jackknifed_beta_diversity.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jackknifed_beta_diversity.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,101 @@ + + A workflow script for performing jackknifed UPGMA clustering and build jackknifed 2d and 3D PCoA plots. + + jackknifed_beta_diversity.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + jackknifed_beta_diversity.py + --otu_table_fp=$otu_table_fp + --output_dir=$__new_file_path__ + $force + #if $parameter.source == 'hist': + --parameter_fp=$parameter_fp + #else: + --parameter_fp=$parameter_generated + #end if + --seqs_per_sample=$seqs_per_sample + --mapping_fp=$mapping_fp + --tree_fp=$tree_fp + --master_tree=$master_tree + $print_only + $parallel + + + + + + + + + + + + + + + + + + + + + + + + + + + + +alpha_diversity:metrics chao1,observed_species,PD_whole_tree +multiple_rarefactions_even_depth:num-reps 20 +parallel:jobs_to_start 2 +parallel:retain_temp_files False +parallel:seconds_to_sleep 60 +collate_alpha:example_path +make_rarefaction_plots:imagetype png +make_rarefaction_plots:resolution 75 +make_rarefaction_plots:background_color white +make_rarefaction_plots:prefs_path + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 lib/galaxy/datatypes/metagenomics.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/metagenomics.py Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,1121 @@ +""" +metagenomics datatypes +James E Johnson - University of Minnesota +for Mothur +""" + +import data +import logging, os, sys, time, tempfile, shutil, string, glob, re +import galaxy.model +from galaxy.datatypes import metadata +from galaxy.datatypes import tabular +from galaxy.datatypes import sequence +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes.tabular import Tabular +from galaxy.datatypes.sequence import Fasta +from galaxy import util +from galaxy.datatypes.images import Html +from sniff import * + +log = logging.getLogger(__name__) + + +## Mothur Classes + +class Otu( Tabular ): + file_ext = 'otu' + + def sniff( self, filename ): + """ + Determines whether the file is a otu (operational taxonomic unit) format + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) < 2: + return False + try: + check = int(linePieces[1]) + if check + 2 != len(linePieces): + return False + except ValueError: + return False + count += 1 + if count == 5: + return True + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class OtuList( Otu ): + file_ext = 'list' + +class Sabund( Otu ): + file_ext = 'sabund' + + def sniff( self, filename ): + """ + Determines whether the file is a otu (operational taxonomic unit) format + labelcount[value(1..n)] + + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) < 2: + return False + try: + check = int(linePieces[1]) + if check + 2 != len(linePieces): + return False + for i in range( 2, len(linePieces)): + ival = int(linePieces[i]) + except ValueError: + return False + count += 1 + if count >= 5: + return True + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class Rabund( Sabund ): + file_ext = 'rabund' + +class GroupAbund( Otu ): + file_ext = 'grpabund' + def init_meta( self, dataset, copy_from=None ): + Otu.init_meta( self, dataset, copy_from=copy_from ) + def set_meta( self, dataset, overwrite = True, skip=1, max_data_lines = 100000, **kwd ): + # See if file starts with header line + if dataset.has_data(): + try: + fh = open( dataset.file_name ) + line = fh.readline() + line = line.strip() + linePieces = line.split('\t') + if linePieces[0] == 'label' and linePieces[1] == 'Group': + skip=1 + else: + skip=0 + finally: + fh.close() + Otu.set_meta( self, dataset, overwrite, skip, max_data_lines, **kwd) + def sniff( self, filename, vals_are_int=False): + """ + Determines whether the file is a otu (operational taxonomic unit) Shared format + labelgroupcount[value(1..n)] + The first line is column headings as of Mothur v 1.20 + """ + log.info( "sniff GroupAbund vals_are_int %s" % vals_are_int) + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) < 3: + return False + if count > 0 or linePieces[0] != 'label': + try: + check = int(linePieces[2]) + if check + 3 != len(linePieces): + return False + for i in range( 3, len(linePieces)): + if vals_are_int: + ival = int(linePieces[i]) + else: + fval = float(linePieces[i]) + except ValueError: + return False + count += 1 + if count >= 5: + return True + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class SharedRabund( GroupAbund ): + file_ext = 'shared' + + + def sniff( self, filename ): + """ + Determines whether the file is a otu (operational taxonomic unit) Shared format + labelgroupcount[value(1..n)] + The first line is column headings as of Mothur v 1.20 + """ + # return GroupAbund.sniff(self,filename,True) + isme = GroupAbund.sniff(self,filename,True) + log.info( "is SharedRabund %s" % isme) + return isme + + +class RelAbund( GroupAbund ): + file_ext = 'relabund' + + def sniff( self, filename ): + """ + Determines whether the file is a otu (operational taxonomic unit) Relative Abundance format + labelgroupcount[value(1..n)] + The first line is column headings as of Mothur v 1.20 + """ + # return GroupAbund.sniff(self,filename,False) + isme = GroupAbund.sniff(self,filename,False) + log.info( "is RelAbund %s" % isme) + return isme + +class SecondaryStructureMap(Tabular): + file_ext = 'map' + def __init__(self, **kwd): + """Initialize secondary structure map datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['Map'] + + def sniff( self, filename ): + """ + Determines whether the file is a secondary structure map format + A single column with an integer value which indicates the row that this row maps to. + check you make sure is structMap[10] = 380 then structMap[380] = 10. + """ + try: + fh = open( filename ) + line_num = 0 + rowidxmap = {} + while True: + line = fh.readline() + line_num += 1 + line = line.strip() + if not line: + break #EOF + if line: + try: + pointer = int(line) + if pointer > 0: + if pointer > line_num: + rowidxmap[line_num] = pointer + elif pointer < line_num & rowidxmap[pointer] != line_num: + return False + except ValueError: + return False + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class SequenceAlignment( Fasta ): + file_ext = 'align' + def __init__(self, **kwd): + Fasta.__init__( self, **kwd ) + """Initialize AlignCheck datatype""" + + def sniff( self, filename ): + """ + Determines whether the file is in Mothur align fasta format + Each sequence line must be the same length + """ + + try: + fh = open( filename ) + len = -1 + while True: + line = fh.readline() + if not line: + break #EOF + line = line.strip() + if line: #first non-empty line + if line.startswith( '>' ): + #The next line.strip() must not be '', nor startwith '>' + line = fh.readline().strip() + if line == '' or line.startswith( '>' ): + break + if len < 0: + len = len(line) + elif len != len(line): + return False + else: + break #we found a non-empty line, but its not a fasta header + if len > 0: + return True + except: + pass + finally: + fh.close() + return False + +class AlignCheck( Tabular ): + file_ext = 'align.check' + def __init__(self, **kwd): + """Initialize AlignCheck datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] + self.column_types = ['str','int','int','int','int','int','int','int'] + self.comment_lines = 1 + + def set_meta( self, dataset, overwrite = True, **kwd ): + # Tabular.set_meta( self, dataset, overwrite = overwrite, first_line_is_header = True, skip = 1 ) + data_lines = 0 + if dataset.has_data(): + dataset_fh = open( dataset.file_name ) + while True: + line = dataset_fh.readline() + if not line: break + data_lines += 1 + dataset_fh.close() + dataset.metadata.comment_lines = 1 + dataset.metadata.data_lines = data_lines - 1 if data_lines > 0 else 0 + dataset.metadata.column_names = self.column_names + dataset.metadata.column_types = self.column_types + +class AlignReport(Tabular): + """ +QueryName QueryLength TemplateName TemplateLength SearchMethod SearchScore AlignmentMethod QueryStart QueryEnd TemplateStart TemplateEnd PairwiseAlignmentLength GapsInQuery GapsInTemplate LongestInsert SimBtwnQuery&Template +AY457915 501 82283 1525 kmer 89.07 needleman 5 501 1 499 499 2 0 0 97.6 + """ + file_ext = 'align.report' + def __init__(self, **kwd): + """Initialize AlignCheck datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['QueryName','QueryLength','TemplateName','TemplateLength','SearchMethod','SearchScore', + 'AlignmentMethod','QueryStart','QueryEnd','TemplateStart','TemplateEnd', + 'PairwiseAlignmentLength','GapsInQuery','GapsInTemplate','LongestInsert','SimBtwnQuery&Template' + ] + +class BellerophonChimera( Tabular ): + file_ext = 'bellerophon.chimera' + def __init__(self, **kwd): + """Initialize AlignCheck datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['Name','Score','Left','Right'] + +class SecondaryStructureMatch(Tabular): + """ + name pound dash plus equal loop tilde total + 9_1_12 42 68 8 28 275 420 872 + 9_1_14 36 68 6 26 266 422 851 + 9_1_15 44 68 8 28 276 418 873 + 9_1_16 34 72 6 30 267 430 860 + 9_1_18 46 80 2 36 261 + """ + def __init__(self, **kwd): + """Initialize SecondaryStructureMatch datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] + +class DistanceMatrix(data.Text): + file_ext = 'dist' + """Add metadata elements""" + MetadataElement( name="sequence_count", default=0, desc="Number of sequences", readonly=False, optional=True, no_value=0 ) + + +class LowerTriangleDistanceMatrix(DistanceMatrix): + file_ext = 'lower.dist' + def __init__(self, **kwd): + """Initialize secondary structure map datatype""" + DistanceMatrix.__init__( self, **kwd ) + + def sniff( self, filename ): + """ + Determines whether the file is a lower-triangle distance matrix (phylip) format + The first line has the number of sequences in the matrix. + The remaining lines have the sequence name followed by a list of distances from all preceeding sequences + 5 + U68589 + U68590 0.3371 + U68591 0.3609 0.3782 + U68592 0.4155 0.3197 0.4148 + U68593 0.2872 0.1690 0.3361 0.2842 + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) != 3: + return False + try: + check = float(linePieces[2]) + except ValueError: + return False + count += 1 + if count == 5: + return True + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class SquareDistanceMatrix(DistanceMatrix,Tabular): + file_ext = 'square.dist' + sequence_count = -1 + + def __init__(self, **kwd): + """Initialize secondary structure map datatype""" + Tabular.__init__( self, **kwd ) + def init_meta( self, dataset, copy_from=None ): + data.Text.init_meta( self, dataset, copy_from=copy_from ) + def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): + dataset.metadata.sequences = 0 + + def sniff( self, filename ): + """ + Determines whether the file is a square distance matrix (Column-formatted distance matrix) format + The first line has the number of sequences in the matrix. + The following lines have the sequence name in the first column plus a column for the distance to each sequence + in the row order in which they appear in the matrix. + 3 + U68589 0.0000 0.3371 0.3610 + U68590 0.3371 0.0000 0.3783 + U68590 0.3371 0.0000 0.3783 + """ + try: + fh = open( filename ) + count = 0 + line = fh.readline() + line = line.strip() + sequence_count = int(line) + col_cnt = seq_cnt + 1 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) != col_cnt : + return False + try: + for i in range(1, col_cnt): + check = float(linePieces[i]) + except ValueError: + return False + count += 1 + if count == 5: + return True + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class PairwiseDistanceMatrix(DistanceMatrix,Tabular): + file_ext = 'pair.dist' + def __init__(self, **kwd): + """Initialize secondary structure map datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['Sequence','Sequence','Distance'] + self.column_types = ['str','str','float'] + self.comment_lines = 1 + + def sniff( self, filename ): + """ + Determines whether the file is a pairwise distance matrix (Column-formatted distance matrix) format + The first and second columns have the sequence names and the third column is the distance between those sequences. + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) != 3: + return False + try: + check = float(linePieces[2]) + except ValueError: + return False + count += 1 + if count == 5: + return True + fh.close() + if count < 5 and count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class AlignCheck(Tabular): + file_ext = 'align.check' + def __init__(self, **kwd): + """Initialize secondary structure map datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] + self.columns = 8 + +class Names(Tabular): + file_ext = 'names' + def __init__(self, **kwd): + """Name file shows the relationship between a representative sequence(col 1) and the sequences(comma-separated) it represents(col 2)""" + Tabular.__init__( self, **kwd ) + self.column_names = ['name','representatives'] + self.columns = 2 + +class Summary(Tabular): + file_ext = 'summary' + def __init__(self, **kwd): + """summarizes the quality of sequences in an unaligned or aligned fasta-formatted sequence file""" + Tabular.__init__( self, **kwd ) + self.column_names = ['seqname','start','end','nbases','ambigs','polymer'] + self.columns = 6 + +class Group(Tabular): + file_ext = 'groups' + def __init__(self, **kwd): + """Name file shows the relationship between a representative sequence(col 1) and the sequences it represents(col 2)""" + Tabular.__init__( self, **kwd ) + self.column_names = ['name','group'] + self.columns = 2 + +class Design(Tabular): + file_ext = 'design' + def __init__(self, **kwd): + """Name file shows the relationship between a group(col 1) and a grouping (col 2), providing a way to merge groups.""" + Tabular.__init__( self, **kwd ) + self.column_names = ['group','grouping'] + self.columns = 2 + +class AccNos(Tabular): + file_ext = 'accnos' + def __init__(self, **kwd): + """A list of names""" + Tabular.__init__( self, **kwd ) + self.column_names = ['name'] + self.columns = 1 + +class Oligos( data.Text ): + file_ext = 'oligos' + + def sniff( self, filename ): + """ + Determines whether the file is a otu (operational taxonomic unit) format + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + else: + if line[0] != '#': + linePieces = line.split('\t') + if len(linePieces) == 2 and re.match('forward|reverse',linePieces[0]): + count += 1 + continue + elif len(linePieces) == 3 and re.match('barcode',linePieces[0]): + count += 1 + continue + else: + return False + if count > 20: + return True + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class Frequency(Tabular): + file_ext = 'freq' + def __init__(self, **kwd): + """A list of names""" + Tabular.__init__( self, **kwd ) + self.column_names = ['position','frequency'] + self.column_types = ['int','float'] + + def sniff( self, filename ): + """ + Determines whether the file is a frequency tabular format for chimera analysis + #1.14.0 + 0 0.000 + 1 0.000 + ... + 155 0.975 + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + else: + if line[0] != '#': + try: + linePieces = line.split('\t') + i = int(linePieces[0]) + f = float(linePieces[1]) + count += 1 + continue + except: + return False + if count > 20: + return True + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class Quantile(Tabular): + file_ext = 'quan' + MetadataElement( name="filtered", default=False, no_value=False, optional=True , desc="Quantiles calculated using a mask", readonly=True) + MetadataElement( name="masked", default=False, no_value=False, optional=True , desc="Quantiles calculated using a frequency filter", readonly=True) + def __init__(self, **kwd): + """Quantiles for chimera analysis""" + Tabular.__init__( self, **kwd ) + self.column_names = ['num','ten','twentyfive','fifty','seventyfive','ninetyfive','ninetynine'] + self.column_types = ['int','float','float','float','float','float','float'] + def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): + log.info( "Mothur Quantile set_meta %s" % kwd) + def sniff( self, filename ): + """ + Determines whether the file is a quantiles tabular format for chimera analysis + 1 0 0 0 0 0 0 + 2 0.309198 0.309198 0.37161 0.37161 0.37161 0.37161 + 3 0.510982 0.563213 0.693529 0.858939 1.07442 1.20608 + ... + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + else: + if line[0] != '#': + try: + linePieces = line.split('\t') + i = int(linePieces[0]) + f = float(linePieces[1]) + f = float(linePieces[2]) + f = float(linePieces[3]) + f = float(linePieces[4]) + f = float(linePieces[5]) + f = float(linePieces[6]) + count += 1 + continue + except: + return False + if count > 10: + return True + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class FilteredQuantile(Quantile): + file_ext = 'filtered.quan' + def __init__(self, **kwd): + """Quantiles for chimera analysis""" + Quantile.__init__( self, **kwd ) + self.filtered = True + +class MaskedQuantile(Quantile): + file_ext = 'masked.quan' + def __init__(self, **kwd): + """Quantiles for chimera analysis""" + Quantile.__init__( self, **kwd ) + self.masked = True + self.filtered = False + +class FilteredMaskedQuantile(Quantile): + file_ext = 'filtered.masked.quan' + def __init__(self, **kwd): + """Quantiles for chimera analysis""" + Quantile.__init__( self, **kwd ) + self.masked = True + self.filtered = True + +class LaneMask(data.Text): + file_ext = 'filter' + + def sniff( self, filename ): + """ + Determines whether the file is a lane mask filter: 1 line consisting of zeros and ones. + """ + try: + fh = open( filename ) + while True: + buff = fh.read(1000) + if not buff: + break #EOF + else: + if not re.match('^[01]+$',line): + return False + return True + except: + pass + finally: + close(fh) + return False + +class SequenceTaxonomy(Tabular): + file_ext = 'seq.taxonomy' + """ + A table with 2 columns: + - SequenceName + - Taxonomy (semicolon-separated taxonomy in descending order) + Example: + X56533.1 Eukaryota;Alveolata;Ciliophora;Intramacronucleata;Oligohymenophorea;Hymenostomatida;Tetrahymenina;Glaucomidae;Glaucoma; + X97975.1 Eukaryota;Parabasalidea;Trichomonada;Trichomonadida;unclassified_Trichomonadida; + AF052717.1 Eukaryota;Parabasalidea; + """ + def __init__(self, **kwd): + Tabular.__init__( self, **kwd ) + self.column_names = ['name','taxonomy'] + + def sniff( self, filename ): + """ + Determines whether the file is a SequenceTaxonomy + """ + try: + pat = '^([^ \t\n\r\f\v;]+([(]\d+[)])?[;])+$' + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + if not line: + break #EOF + line = line.strip() + if line: + fields = line.split('\t') + if len(fields) != 2: + return False + if not re.match(pat,fields[1]): + return False + count += 1 + if count > 10: + break + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class RDPSequenceTaxonomy(SequenceTaxonomy): + file_ext = 'rdp.taxonomy' + """ + A table with 2 columns: + - SequenceName + - Taxonomy (semicolon-separated taxonomy in descending order, RDP requires exactly 6 levels deep) + Example: + AB001518.1 Bacteria;Bacteroidetes;Sphingobacteria;Sphingobacteriales;unclassified_Sphingobacteriales; + AB001724.1 Bacteria;Cyanobacteria;Cyanobacteria;Family_II;GpIIa; + AB001774.1 Bacteria;Chlamydiae;Chlamydiae;Chlamydiales;Chlamydiaceae;Chlamydophila; + """ + def sniff( self, filename ): + """ + Determines whether the file is a SequenceTaxonomy + """ + try: + pat = '^([^ \t\n\r\f\v;]+([(]\d+[)])?[;]){6}$' + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + if not line: + break #EOF + line = line.strip() + if line: + fields = line.split('\t') + if len(fields) != 2: + return False + if not re.match(pat,fields[1]): + return False + count += 1 + if count > 10: + break + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class ConsensusTaxonomy(Tabular): + file_ext = 'cons.taxonomy' + def __init__(self, **kwd): + """A list of names""" + Tabular.__init__( self, **kwd ) + self.column_names = ['OTU','count','taxonomy'] + +class TaxonomySummary(Tabular): + file_ext = 'tax.summary' + def __init__(self, **kwd): + """A Summary of taxon classification""" + Tabular.__init__( self, **kwd ) + self.column_names = ['taxlevel','rankID','taxon','daughterlevels','total'] + +class Phylip(data.Text): + file_ext = 'phy' + + def sniff( self, filename ): + """ + Determines whether the file is in Phylip format (Interleaved or Sequential) + The first line of the input file contains the number of species and the + number of characters, in free format, separated by blanks (not by + commas). The information for each species follows, starting with a + ten-character species name (which can include punctuation marks and blanks), + and continuing with the characters for that species. + http://evolution.genetics.washington.edu/phylip/doc/main.html#inputfiles + Interleaved Example: + 6 39 + Archaeopt CGATGCTTAC CGCCGATGCT + HesperorniCGTTACTCGT TGTCGTTACT + BaluchitheTAATGTTAAT TGTTAATGTT + B. virginiTAATGTTCGT TGTTAATGTT + BrontosaurCAAAACCCAT CATCAAAACC + B.subtilisGGCAGCCAAT CACGGCAGCC + + TACCGCCGAT GCTTACCGC + CGTTGTCGTT ACTCGTTGT + AATTGTTAAT GTTAATTGT + CGTTGTTAAT GTTCGTTGT + CATCATCAAA ACCCATCAT + AATCACGGCA GCCAATCAC + """ + try: + fh = open( filename ) + # counts line + line = fh.readline().strip() + linePieces = line.split() + count = int(linePieces[0]) + seq_len = int(linePieces[1]) + # data lines + """ + TODO check data lines + while True: + line = fh.readline() + # name is the first 10 characters + name = line[0:10] + seq = line[10:].strip() + # nucleic base or amino acid 1-char designators (spaces allowed) + bases = ''.join(seq.split()) + # float per base (each separated by space) + """ + return True + except: + pass + finally: + close(fh) + return False + + +class Axes(Tabular): + file_ext = 'axes' + + def __init__(self, **kwd): + """Initialize axes datatype""" + Tabular.__init__( self, **kwd ) + def sniff( self, filename ): + """ + Determines whether the file is an axes format + The first line may have column headings. + The following lines have the name in the first column plus float columns for each axis. + ==> 98_sq_phylip_amazon.fn.unique.pca.axes <== + group axis1 axis2 + forest 0.000000 0.145743 + pasture 0.145743 0.000000 + + ==> 98_sq_phylip_amazon.nmds.axes <== + axis1 axis2 + U68589 0.262608 -0.077498 + U68590 0.027118 0.195197 + U68591 0.329854 0.014395 + """ + try: + fh = open( filename ) + count = 0 + line = fh.readline() + line = line.strip() + col_cnt = None + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + fields = line.split('\t') + if col_cnt == None: # ignore values in first line as they may be column headings + col_cnt = len(fields) + else: + if len(fields) != col_cnt : + return False + try: + for i in range(1, col_cnt): + check = float(fields[i]) + except ValueError: + return False + count += 1 + if count > 10: + return True + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +## Qiime Classes + +class QiimeMetadataMapping(Tabular): + MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) + file_ext = 'qiimemapping' + + def __init__(self, **kwd): + """ + http://qiime.sourceforge.net/documentation/file_formats.html#mapping-file-overview + Information about the samples necessary to perform the data analysis. + # self.column_names = ['#SampleID','BarcodeSequence','LinkerPrimerSequence','Description'] + """ + Tabular.__init__( self, **kwd ) + + def sniff( self, filename ): + """ + Determines whether the file is a qiime mapping file + Just checking for an appropriate header line for now, could be improved + """ + try: + pat = '#SampleID(\t[a-zA-Z][a-zA-Z0-9_]*)*\tDescription' + fh = open( filename ) + while True: + line = dataset_fh.readline() + if re.match(pat,line): + return True + except: + pass + finally: + close(fh) + return False + + def set_column_names(self, dataset): + if dataset.has_data(): + dataset_fh = open( dataset.file_name ) + line = dataset_fh.readline() + if line.startswith('#SampleID'): + dataset.metadata.column_names = line.strip().split('\t'); + dataset_fh.close() + + def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ): + Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines) + self.set_column_names(dataset) + +class QiimeOTU(Tabular): + """ + Associates OTUs with sequence IDs + Example: + 0 FLP3FBN01C2MYD FLP3FBN01B2ALM + 1 FLP3FBN01DF6NE FLP3FBN01CKW1J FLP3FBN01CHVM4 + 2 FLP3FBN01AXQ2Z + """ + file_ext = 'qiimeotu' + +class QiimeOTUTable(Tabular): + """ + #Full OTU Counts + #OTU ID PC.354 PC.355 PC.356 Consensus Lineage + 0 0 1 0 Root;Bacteria;Firmicutes;"Clostridia";Clostridiales + 1 1 3 1 Root;Bacteria + 2 0 2 2 Root;Bacteria;Bacteroidetes + """ + MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) + file_ext = 'qiimeotutable' + def init_meta( self, dataset, copy_from=None ): + tabular.Tabular.init_meta( self, dataset, copy_from=copy_from ) + def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): + self.set_column_names(dataset) + def set_column_names(self, dataset): + if dataset.has_data(): + dataset_fh = open( dataset.file_name ) + line = dataset_fh.readline() + line = dataset_fh.readline() + if line.startswith('#OTU ID'): + dataset.metadata.column_names = line.strip().split('\t'); + dataset_fh.close() + dataset.metadata.comment_lines = 2 + +class QiimeDistanceMatrix(Tabular): + """ + PC.354 PC.355 PC.356 + PC.354 0.0 3.177 1.955 + PC.355 3.177 0.0 3.444 + PC.356 1.955 3.444 0.0 + """ + file_ext = 'qiimedistmat' + def init_meta( self, dataset, copy_from=None ): + tabular.Tabular.init_meta( self, dataset, copy_from=copy_from ) + def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): + self.set_column_names(dataset) + def set_column_names(self, dataset): + if dataset.has_data(): + dataset_fh = open( dataset.file_name ) + line = dataset_fh.readline() + # first line contains the names + dataset.metadata.column_names = line.strip().split('\t'); + dataset_fh.close() + dataset.metadata.comment_lines = 1 + +class QiimePCA(Tabular): + """ + Principal Coordinate Analysis Data + The principal coordinate (PC) axes (columns) for each sample (rows). + Pairs of PCs can then be graphed to view the relationships between samples. + The bottom of the output file contains the eigenvalues and % variation explained for each PC. + Example: + pc vector number 1 2 3 + PC.354 -0.309063936588 0.0398252112257 0.0744672231759 + PC.355 -0.106593922619 0.141125998277 0.0780204374172 + PC.356 -0.219869362955 0.00917241121781 0.0357281314115 + + + eigvals 0.480220500471 0.163567082874 0.125594470811 + % variation explained 51.6955484555 17.6079322939 + """ + file_ext = 'qiimepca' + +class QiimeParams(Tabular): + """ +###pick_otus_through_otu_table.py parameters### + +# OTU picker parameters +pick_otus:otu_picking_method uclust +pick_otus:clustering_algorithm furthest + +# Representative set picker parameters +pick_rep_set:rep_set_picking_method first +pick_rep_set:sort_by otu + """ + file_ext = 'qiimeparams' + +class QiimePrefs(data.Text): + """ + A text file, containing coloring preferences to be used by make_distance_histograms.py, make_2d_plots.py and make_3d_plots.py. + Example: +{ +'background_color':'black', + +'sample_coloring': + { + 'Treatment': + { + 'column':'Treatment', + 'colors':(('red',(0,100,100)),('blue',(240,100,100))) + }, + 'DOB': + { + 'column':'DOB', + 'colors':(('red',(0,100,100)),('blue',(240,100,100))) + } + }, +'MONTE_CARLO_GROUP_DISTANCES': + { + 'Treatment': 10, + 'DOB': 10 + } +} + """ + file_ext = 'qiimeprefs' + +class QiimeTaxaSummary(Tabular): + """ + Taxon PC.354 PC.355 PC.356 + Root;Bacteria;Actinobacteria 0.0 0.177 0.955 + Root;Bacteria;Firmicutes 0.177 0.0 0.444 + Root;Bacteria;Proteobacteria 0.955 0.444 0.0 + """ + MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) + file_ext = 'qiimetaxsummary' + + def set_column_names(self, dataset): + if dataset.has_data(): + dataset_fh = open( dataset.file_name ) + line = dataset_fh.readline() + if line.startswith('Taxon'): + dataset.metadata.column_names = line.strip().split('\t'); + dataset_fh.close() + + def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ): + Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines) + self.set_column_names(dataset) + +if __name__ == '__main__': + import doctest, sys + doctest.testmod(sys.modules[__name__]) + diff -r 000000000000 -r e5c3175506b7 make_2d_plots.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_2d_plots.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,47 @@ + + Make 2D PCoA Plots + + make_2d_plots.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_2d_plots.py + --coord_fname=$coord_fname + --map_fname=$map_fname + --colorby=$colorby + --prefs_path=$prefs_path + --background_color=$background_color + --ellipsoid_opacity=$ellipsoid_opacity + --ellipsoid_method=$ellipsoid_method + --master_pcoa=$master_pcoa + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 make_3d_plots.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_3d_plots.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,89 @@ + + Make 3D PCoA plots + + make_3d_plots.py + + + qiime_wrapper.py + --galaxy_summary_html='$output_html' + --galaxy_outputdir='$output_html.extra_files_path' + --galaxy_summary_template='$output_template' + make_3d_plots.py + --coord_fname=$coord_fname + --map_fname=$map_fname + --colorby=$colorby + --custom_axes=$custom_axes + --prefs_path=$prefs_path + --background_color=$background_color + --output_dir=$__new_file_path__ + --ellipsoid_smoothness=$ellipsoid_smoothness + --ellipsoid_opacity=$ellipsoid_opacity + --ellipsoid_method=$ellipsoid_method + --taxa_fname=$taxa_fname + --n_taxa_keep=$n_taxa_keep + --biplot_output_file=$biplot_output_file + --master_pcoa=$master_pcoa + --output_format=$output_format + --interpolation_points=$interpolation_points + --polyhedron_points=$polyhedron_points + --polyhedron_offset=$polyhedron_offset + + + + + + + + + + + + + + + + + + + + + + + + + + + +weighted_unifrac_pc.txt_3D.html + + +]]> + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 make_bootstrapped_tree.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_bootstrapped_tree.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,29 @@ + + Make bootstrapped tree + + make_bootstrapped_tree.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_bootstrapped_tree.py + --master_tree=$master_tree + --support=$support + --output_file=$output_file + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 make_distance_histograms.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_distance_histograms.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,49 @@ + + Make distance histograms + + make_distance_histograms.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_distance_histograms.py + --distance_matrix_file=$distance_matrix_file + --map_fname=$map_fname + --prefs_path=$prefs_path + --dir_path=$dir_path + --background_color=$background_color + $monte_carlo + $suppress_html_output + --fields=$fields + --monte_carlo_iters=$monte_carlo_iters + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 make_fastq.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_fastq.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,34 @@ + + Make fastq file for ERA submission from paired fasta and qual files + + make_fastq.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_fastq.py + --input_fasta_fp=$input_fasta_fp + --qual=$qual + --result_fp=$result_fp + $split + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 make_library_id_lists.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_library_id_lists.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,40 @@ + + Make library id lists + + make_library_id_lists.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_library_id_lists.py + --input_fasta=$input_fasta + --screened_rep_seqs=$screened_rep_seqs + --otus=$otus + --outdir=$outdir + --field=$field + $debug + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 make_otu_heatmap_html.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_otu_heatmap_html.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,57 @@ + + Make heatmap of OTU table + + make_otu_heatmap_html.py + + + qiime_wrapper.py + --galaxy_outputdir='$otu_heatmp.extra_files_path' + --galaxy_datasets='^\S+\.html$:'$otu_heatmp + make_otu_heatmap_html.py + --otu_table_fp=$otu_table_fp + --output_dir='$otu_heatmp.extra_files_path' + --num_otu_hits=$num_otu_hits + #if $tree != None and $tree.__str__ != 'None': + --tree=$tree + #end if + #if $map_fname != None and $map_fname.__str__ != 'None' > 0: + --map_fname=$map_fname + #end if + #if $sample_tree != None and $sample_tree.__str__ != 'None': + --sample_tree=$sample_tree + #end if + $log_transform + --log_eps=$log_eps + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 make_otu_network.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_otu_network.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,38 @@ + + Make an OTU network and calculate statistics + + make_otu_network.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_otu_network.py + --mapping_file=$mapping_file + --input_file=$input_file + --colorby=$colorby + --prefs_path=$prefs_path + --background_color=$background_color + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 make_otu_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_otu_table.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,31 @@ + + Make OTU table + + make_otu_table.py + + + qiime_wrapper.py + make_otu_table.py + --otu_map_fp=$otu_map_fp + --taxonomy=$taxonomy + --exclude_otus_fp=$exclude_otus_fp + --output_fp=$output_fp + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 make_per_library_sff.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_per_library_sff.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,37 @@ + + Make per-library sff files from id lists + + make_per_library_sff.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_per_library_sff.py + --input_sff=$input_sff + --libdir=$libdir + --sfffile_path=$sfffile_path + $use_sfftools + $debug + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 make_phylogeny.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_phylogeny.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,43 @@ + + Make Phylogeny + + make_phylogeny.py + + + qiime_wrapper.py + make_phylogeny.py + --input_fp=$input_fp + --tree_method=$tree_method + --root_method=$root_method + --result_fp=$result_fp + --log_fp=$log_fp + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 make_pie_charts.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_pie_charts.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,46 @@ + + Make pie charts based on taxonomy assignment + + make_pie_charts.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_pie_charts.py + --input_files=$input_files + --labels=$labels + $sample_flag + --num=$num + --dir-prefix=$dir_prefix + --colorby=$colorby + --prefs_path=$prefs_path + --background_color=$background_color + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 make_prefs_file.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_prefs_file.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,47 @@ + + Generate preferences file + + make_prefs_file.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_prefs_file.py + --map_fname=$map_fname + --output_fp=$output_fp + --mapping_headers_to_use=$mapping_headers_to_use + --background_color=$background_color + --monte_carlo_dists=$monte_carlo_dists + --input_taxa_file=$input_taxa_file + --ball_scale=$ball_scale + --arrow_line_color=$arrow_line_color + --arrow_head_color=$arrow_head_color + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 make_qiime_py_file.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_qiime_py_file.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,38 @@ + + Create python file + + make_qiime_py_file.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_qiime_py_file.py + --output_fp=$output_fp + $script + $test + --author_name=$author_name + --author_email=$author_email + --copyright=$copyright + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 make_qiime_rst_file.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_qiime_rst_file.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,26 @@ + + Make Sphinx RST file + + make_qiime_rst_file.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_qiime_rst_file.py + --input_script=$input_script + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 make_rarefaction_plots.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_rarefaction_plots.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,50 @@ + + Generate Rarefaction Plots + + make_rarefaction_plots.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + make_rarefaction_plots.py + --input_dir=$input_dir + --map_fname=$map_fname + --colorby=$colorby + --prefs_path=$prefs_path + --background_color=$background_color + --imagetype=$imagetype + --resolution=$resolution + --ymax=$ymax + $webpage + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 merge_denoiser_output.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_denoiser_output.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,35 @@ + + Merge the output of denoising step back into QIIME + + merge_denoiser_output.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + merge_denoiser_output.py + --map_file=$map_file + --otu_picker_map_file=$otu_picker_map_file + --fasta_fp=$fasta_fp + --denoised_fasta_fp=$denoised_fasta_fp + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 merge_mapping_files.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_mapping_files.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,29 @@ + + Merge mapping files + + merge_mapping_files.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + merge_mapping_files.py + --mapping_fps=$mapping_fps + --output_fp=$output_fp + --no_data_value=$no_data_value + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 merge_otu_maps.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_otu_maps.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,29 @@ + + Merge OTU mapping files + + merge_otu_maps.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + merge_otu_maps.py + --otu_map_fps=$otu_map_fps + --output_fp=$output_fp + --failures_fp=$failures_fp + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 merge_otu_tables.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_otu_tables.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,26 @@ + + + + merge_otu_tables.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + merge_otu_tables.py + --input_fps=$input_fps + --output_fp=$output_fp + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 multiple_rarefactions.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/multiple_rarefactions.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,44 @@ + + Perform multiple subsamplings/rarefactions on an otu table + + multiple_rarefactions.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + multiple_rarefactions.py + --input_path=$input_path + --output_path=$output_path + --min=$min + --max=$max + --step=$step + --num-reps=$num_reps + $lineages_included + $keep_empty_otus + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 multiple_rarefactions_even_depth.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/multiple_rarefactions_even_depth.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,38 @@ + + Perform multiple rarefactions on a single otu table, at one depth of sequences/sample + + multiple_rarefactions_even_depth.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + multiple_rarefactions_even_depth.py + --input_path=$input_path + --output_path=$output_path + --depth=$depth + --num-reps=$num_reps + $lineages_included + $keep_empty_otus + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 otu_category_significance.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/otu_category_significance.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,44 @@ + + OTU significance and co-occurence analysis + + otu_category_significance.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + otu_category_significance.py + --otu_table_fp=$otu_table_fp + --category_mapping_fp=$category_mapping_fp + --category=$category + --test=$test + --output_fp=$output_fp + --filter=$filter + --threshold=$threshold + --otu_include_fp=$otu_include_fp + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 parallel_align_seqs_pynast.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_align_seqs_pynast.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,81 @@ + + Parallel sequence alignment using PyNAST + + parallel_align_seqs_pynast.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_align_seqs_pynast.py + --input_fasta_fp=$input_fasta_fp + --output_dir=$__new_file_path__ + --template_fp=$template_fp + --pairwise_alignment_method=$pairwise_alignment_method + --blast_db=$blast_db + --min_length=$min_length + --min_percent_id=$min_percent_id + --align_seqs_fp=$align_seqs_fp + --jobs_to_start=$jobs_to_start + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 parallel_alpha_diversity.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_alpha_diversity.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,65 @@ + + Parallel alpha diversity + + parallel_alpha_diversity.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_alpha_diversity.py + --input_path=$input_path + --output_path=$output_path + --metrics=$metrics + --tree_path=$tree_path + --alpha_diversity_fp=$alpha_diversity_fp + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + --jobs_to_start=$jobs_to_start + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 parallel_assign_taxonomy_blast.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_assign_taxonomy_blast.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,74 @@ + + Parallel taxonomy assignment using BLAST + + parallel_assign_taxonomy_blast.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_assign_taxonomy_blast.py + --input_fasta_fp=$input_fasta_fp + --id_to_taxonomy_fp=$id_to_taxonomy_fp + --output_dir=$__new_file_path__ + --reference_seqs_fp=$reference_seqs_fp + --blast_db=$blast_db + --e_value=$e_value + --blastmat_dir=$blastmat_dir + --assign_taxonomy_fp=$assign_taxonomy_fp + --jobs_to_start=$jobs_to_start + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 parallel_assign_taxonomy_rdp.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_assign_taxonomy_rdp.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,65 @@ + + Parallel taxonomy assignment using RDP + + parallel_assign_taxonomy_rdp.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_assign_taxonomy_rdp.py + --input_fasta_fp=$input_fasta_fp + --output_dir=$__new_file_path__ + --rdp_classifier_fp=$rdp_classifier_fp + --confidence=$confidence + --assign_taxonomy_fp=$assign_taxonomy_fp + --jobs_to_start=$jobs_to_start + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 parallel_beta_diversity.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_beta_diversity.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,68 @@ + + Parallel beta diversity + + parallel_beta_diversity.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_beta_diversity.py + --input_path=$input_path + --output_path=$output_path + --metrics=$metrics + --tree_path=$tree_path + --beta_diversity_fp=$beta_diversity_fp + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + --jobs_to_start=$jobs_to_start + $full_tree + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 parallel_blast.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_blast.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,80 @@ + + Parallel BLAST + + parallel_blast.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_blast.py + --infile_path=$infile_path + --refseqs_path=$refseqs_path + --output_dir=$__new_file_path__ + $disable_low_complexity_filter + --e_value=$e_value + --num_hits=$num_hits + --word_size=$word_size + $suppress_format_blastdb + --blastmat_dir=$blastmat_dir + --blastall_fp=$blastall_fp + --jobs_to_start=$jobs_to_start + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 parallel_identify_chimeric_seqs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_identify_chimeric_seqs.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,89 @@ + + Parallel chimera detection + + parallel_identify_chimeric_seqs.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_identify_chimeric_seqs.py + --input_fasta_fp=$input_fasta_fp + --aligned_reference_seqs_fp=$aligned_reference_seqs_fp + --id_to_taxonomy_fp=$id_to_taxonomy_fp + --reference_seqs_fp=$reference_seqs_fp + --blast_db=$blast_db + --chimera_detection_method=$chimera_detection_method + --num_fragments=$num_fragments + --taxonomy_depth=$taxonomy_depth + --max_e_value=$max_e_value + --min_div_ratio=$min_div_ratio + --output_fp=$output_fp + --identify_chimeric_seqs_fp=$identify_chimeric_seqs_fp + --jobs_to_start=$jobs_to_start + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 parallel_multiple_rarefactions.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_multiple_rarefactions.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,74 @@ + + Parallel multiple file rarefaction + + parallel_multiple_rarefactions.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_multiple_rarefactions.py + --input_path=$input_path + --output_path=$output_path + --min=$min + --max=$max + --step=$step + --num-reps=$num_reps + $lineages_included + --single_rarefaction_fp=$single_rarefaction_fp + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + --jobs_to_start=$jobs_to_start + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 parallel_pick_otus_blast.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_pick_otus_blast.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,74 @@ + + Parallel pick otus using BLAST + + parallel_pick_otus_blast.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_pick_otus_blast.py + --input_fasta_fp=$input_fasta_fp + --output_dir=$__new_file_path__ + --max_e_value=$max_e_value + --similarity=$similarity + --refseqs_fp=$refseqs_fp + --blast_db=$blast_db + --min_aligned_percent=$min_aligned_percent + --pick_otus_fp=$pick_otus_fp + --jobs_to_start=$jobs_to_start + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 parallel_pick_otus_uclust_ref.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parallel_pick_otus_uclust_ref.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,95 @@ + + Parallel pick otus using uclust_ref + + parallel_pick_otus_uclust_ref.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + parallel_pick_otus_uclust_ref.py + --input_fasta_fp=$input_fasta_fp + --output_dir=$__new_file_path__ + --refseqs_fp=$refseqs_fp + --similarity=$similarity + $enable_rev_strand_match + $optimal_uclust + $exact_uclust + --max_accepts=$max_accepts + --max_rejects=$max_rejects + --stepwords=$stepwords + --word_length=$word_length + $uclust_stable_sort + $suppress_uclust_stable_sort + $save_uc_files + --pick_otus_fp=$pick_otus_fp + --jobs_to_start=$jobs_to_start + --poller_fp=$poller_fp + $retain_temp_files + $suppress_submit_jobs + $poll_directly + --cluster_jobs_fp=$cluster_jobs_fp + $suppress_polling + --job_prefix=$job_prefix + --python_exe_fp=$python_exe_fp + --seconds_to_sleep=$seconds_to_sleep + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 per_library_stats.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/per_library_stats.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,25 @@ + + Calculate per library statistics + + per_library_stats.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + per_library_stats.py + --otu_table_fp=$otu_table_fp + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 pick_otus.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pick_otus.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,169 @@ + + OTU picking + + pick_otus.py + + + qiime_wrapper.py + --galaxy_outputdir='$log.extra_files_path' + #if $pick.otu_picking_method == 'uclust' and $pick.refseqs_fp.__str__ != 'None': + --galaxy_datasets='^\S+_otus\.txt$:'$otus,'^\S+_otus\.log$:'$log,'^\S+_failures\.txt$:'$failures + #else: + --galaxy_datasets='^\S+_otus\.txt$:'$otus,'^\S+_otus\.log$:'$log + #end if + pick_otus.py + --input_seqs_filepath=$input_seqs_filepath + #if $pick.otu_picking_method == 'uclust': + #if $pick.refseqs_fp.__str__ != 'None': + --refseqs_fp=$pick.refseqs_fp + --otu_picking_method='uclust_ref' + $pick.suppress_new_clusters + #else: + --otu_picking_method=$pick.otu_picking_method + #end if + --similarity=$pick.similarity + $pick.enable_rev_strand_match + $pick.optimal_uclust + $pick.exact_uclust + $pick.user_sort + $pick.suppress_presort_by_abundance_uclust + --max_accepts=$pick.max_accepts + --max_rejects=$pick.max_rejects + --stepwords=$pick.stepwords + --word_length=$pick.word_length + --uclust_otu_id_prefix=$pick.uclust_otu_id_prefix + $pick.uclust_stable_sort + $pick.save_uc_files + #elif $pick.otu_picking_method == 'mothur': + --otu_picking_method=$pick.otu_picking_method + --clustering_algorithm=$pick.clustering_algorithm + --similarity=$pick.similarity + #elif $pick.otu_picking_method == 'trie': + --otu_picking_method=$pick.otu_picking_method + $pick.trie_reverse_seqs + #elif $pick.otu_picking_method == 'prefix_suffix': + --otu_picking_method=$pick.otu_picking_method + --prefix_length=$pick.prefix_length + --suffix_length=$pick.suffix_length + #elif pick.otu_picking_method == 'blast': + --otu_picking_method=$pick.otu_picking_method + #if $refseqs_fp.__str__ != 'None': + --refseqs_fp=$pick.refseqs_fp + #end if + --blast_db=$pick.blast_db + --similarity=$pick.similarity + --max_e_value=$pick.max_e_value + --min_aligned_percent=$pick.min_aligned_percent + #elif $pick.otu_picking_method == 'cdhit': + --otu_picking_method=$pick.otu_picking_method + --similarity=$pick.similarity + --max_cdhit_memory=$pick.max_cdhit_memory + --prefix_prefilter_length=$pick.prefix_prefilter_length + $pick.trie_prefilter + #end if + --output_dir='$log.extra_files_path' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (pick['otu_picking_method'] == 'uclust' and pick['refseqs_fp']) + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 pick_otus_through_otu_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pick_otus_through_otu_table.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,203 @@ + + A workflow script for picking OTUs through building OTU tables + + pick_otus_through_otu_table.py + + + qiime_wrapper.py + --galaxy_outputdir='$log.extra_files_path' + --galaxy_datasets='^log_\S+\.txt$:'$log,'^\S+_otus.txt$:'$seqs_otus,'^\S+_rep_set.fasta$:'$seqs_rep_set,'^\S+_rep_set_tax_assignments.txt$:'$seqs_rep_set_tax_assignments,'^\S+_otu_table.txt$:'$seqs_otu_table,'^\S+_rep_set_aligned.fasta$:'$seqs_rep_set_aligned,'^\S+_rep_set_aligned_pfiltered.fasta$:'$seqs_rep_set_aligned_pfiltered,'^\S+_rep_set.tre$:'$seqs_rep_set_tre + pick_otus_through_otu_table.py + --input_fp=$input_fp + #if $parameter.source == 'hist': + --parameter_fp=$parameter.parameter_fp + #else: + --parameter_fp=$parameter_generated + #end if + #if $denoise.choice == 'yes': + --sff_fp=$denoise.sff_fp + --mapping_fp=$denoise.mapping_fp + #end if + --output_dir='$log.extra_files_path' + --force + $parallel + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +##OTU picker parameters +pick_otus:otu_picking_method uclust +pick_otus:clustering_algorithm furthest +pick_otus:max_cdhit_memory 400 +pick_otus:refseqs_fp +pick_otus:blast_db +pick_otus:similarity 0.97 +pick_otus:max_e_value 1e-10 +pick_otus:prefix_prefilter_length +pick_otus:trie_prefilter +pick_otus:prefix_length +pick_otus:suffix_length +pick_otus:optimal_uclust +pick_otus:exact_uclust +pick_otus:user_sort +pick_otus:suppress_presort_by_abundance_uclust +pick_otus:suppress_new_clusters +pick_otus:suppress_uclust_stable_sort +pick_otus:max_accepts +pick_otus:max_rejects +pick_otus:word_length +pick_otus:stepwords +##Representative set picker parameters +pick_rep_set:rep_set_picking_method first +pick_rep_set:sort_by otu +##Multiple sequence alignment parameters +align_seqs:template_fp +align_seqs:alignment_method pynast +align_seqs:pairwise_alignment_method uclust +align_seqs:blast_db +align_seqs:min_length 150 +align_seqs:min_percent_id 75.0 +##Alignment filtering (prior to tree-building) parameters +filter_alignment:lane_mask_fp +filter_alignment:allowed_gap_frac 0.999999 +filter_alignment:remove_outliers False +filter_alignment:threshold 3.0 +##Taxonomy assignment parameters +assign_taxonomy:id_to_taxonomy_fp +assign_taxonomy:reference_seqs_fp +assign_taxonomy:assignment_method rdp +assign_taxonomy:blast_db +assign_taxonomy:confidence 0.8 +#assign_taxonomy:e_value 0.001 +##Phylogenetic tree building parameters +make_phylogeny:tree_method fasttree +make_phylogeny:root_method tree_method_default +##align_seqs:template_fp +##filter_alignment:lane_mask_fp + + + + + + + + + + + + + + + + + + + + The steps performed by this function are: + 0) Optionally denoise the sequences (if sff_input_fp=True); + 1) Pick OTUs; + 2) Pick a representative set; + 3) Align the representative set; + 4) Assign taxonomy; + 5) Filter the alignment prior to tree building - remove positions + which are all gaps, and specified as 0 in the lanemask + 6) Build a phylogenetic tree; + 7) Build an OTU table. + + +pick_otus_through_otu_table.py -i split_library_output/seqs.fna -p custom_parameters.txt -o wf_da --print_only + +python /usr/local/bin/pick_otus.py -i split_library_output/seqs.fna -o wf_da/uclust_picked_otus --max_e_value 1e-10 --clustering_algorithm furthest --similarity 0.97 --otu_picking_method uclust --max_cdhit_memory 400 + +python /usr/local/bin/pick_rep_set.py -i wf_da/uclust_picked_otus/seqs_otus.txt -f split_library_output/seqs.fna -l wf_da/uclust_picked_otus/rep_set//seqs_rep_set.log -o wf_da/uclust_picked_otus/rep_set//seqs_rep_set.fasta --rep_set_picking_method first --sort_by otu + +python /usr/local/bin/assign_taxonomy.py -o wf_da/uclust_picked_otus/rep_set//rdp_assigned_taxonomy -i wf_da/uclust_picked_otus/rep_set//seqs_rep_set.fasta --confidence 0.8 --assignment_method rdp + +python /usr/local/bin/make_otu_table.py -i wf_da/uclust_picked_otus/seqs_otus.txt -t wf_da/uclust_picked_otus/rep_set//rdp_assigned_taxonomy/seqs_rep_set_tax_assignments.txt -o wf_da/uclust_picked_otus/rep_set//rdp_assigned_taxonomy/otu_table//seqs_otu_table.txt + +python /usr/local/bin/align_seqs.py -i wf_da/uclust_picked_otus/rep_set//seqs_rep_set.fasta -o wf_da/uclust_picked_otus/rep_set//pynast_aligned_seqs --alignment_method pynast --pairwise_alignment_method uclust --min_percent_id 75.0 --min_length 150 + +python /usr/local/bin/filter_alignment.py -o wf_da/uclust_picked_otus/rep_set//pynast_aligned_seqs -i wf_da/uclust_picked_otus/rep_set//pynast_aligned_seqs/seqs_rep_set_aligned.fasta --allowed_gap_frac 0.999999 --threshold 3.0 + +python /usr/local/bin/make_phylogeny.py -i wf_da/uclust_picked_otus/rep_set//pynast_aligned_seqs/seqs_rep_set_aligned_pfiltered.fasta -o wf_da/uclust_picked_otus/rep_set//pynast_aligned_seqs/fasttree_phylogeny/seqs_rep_set.tre -l wf_da/uclust_picked_otus/rep_set//pynast_aligned_seqs/fasttree_phylogeny/seqs_rep_set_phylogeny.log --root_method tree_method_default --tree_method fasttree + + + + + diff -r 000000000000 -r e5c3175506b7 pick_reference_otus_through_otu_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pick_reference_otus_through_otu_table.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,94 @@ + + Reference OTU picking/Shotgun UniFrac workflow. + + pick_reference_otus_through_otu_table.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + pick_reference_otus_through_otu_table.py + --input_fp=$input_fp + --reference_fp=$reference_fp + --output_dir=$__new_file_path__ + #if $parameter.source == 'hist': + --parameter_fp=$parameter_fp + #else: + --parameter_fp=$parameter_generated + #end if + --taxonomy_fp=$taxonomy_fp + $force + $print_only + $parallel + + + + + + + + + + + + + + + + + + + + + + + + + +alpha_diversity:metrics chao1,observed_species,PD_whole_tree +multiple_rarefactions_even_depth:num-reps 20 +parallel:jobs_to_start 2 +parallel:retain_temp_files False +parallel:seconds_to_sleep 60 +collate_alpha:example_path +make_rarefaction_plots:imagetype png +make_rarefaction_plots:resolution 75 +make_rarefaction_plots:background_color white +make_rarefaction_plots:prefs_path + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 pick_rep_set.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pick_rep_set.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,47 @@ + + Pick representative set of sequences + + pick_rep_set.py + + + qiime_wrapper.py + pick_rep_set.py + --input_file=$input_file + --fasta_file=$fasta_file + --rep_set_picking_method=$rep_set_picking_method + --sort_by=$sort_by + --reference_seqs_fp=$reference_seqs_fp + --log_fp=$log_fp + --result_fp=$result_fp + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 plot_rank_abundance_graph.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plot_rank_abundance_graph.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,44 @@ + + plot rank-abundance curve + + plot_rank_abundance_graph.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + plot_rank_abundance_graph.py + --otu_table_fp=$otu_table_fp + --sample_name=$sample_name + --output_dir=$__new_file_path__ + $absolute_counts + $no_legend + $x_linear_scale + $y_linear_scale + --file_type=$file_type + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 plot_taxa_summary.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plot_taxa_summary.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,69 @@ + + Make taxaonomy summary charts based on taxonomy assignment + + plot_taxa_summary.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + plot_taxa_summary.py + --input_files=$input_files + --labels=$labels + --num=$num + --dir-prefix=$dir_prefix + --colorby=$colorby + --prefs_path=$prefs_path + --background_color=$background_color + --dpi=$dpi + --x_width=$x_width + --y_height=$y_height + --bar_width=$bar_width + --type_of_file=$type_of_file + --chart_type=$chart_type + --resize_nth_label=$resize_nth_label + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 poller.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/poller.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,43 @@ + + Poller for parallel QIIME scripts. + + poller.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + poller.py + --check_run_complete_file=$check_run_complete_file + --check_run_complete_f=$check_run_complete_f + --process_run_results_f=$process_run_results_f + --process_run_results_file=$process_run_results_file + --clean_up_f=$clean_up_f + --clean_up_file=$clean_up_file + --time_to_sleep=$time_to_sleep + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 poller_example.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/poller_example.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,34 @@ + + Create python file + + poller_example.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + poller_example.py + --polled_dir=$polled_dir + --poller_fp=$poller_fp + --python_exe_fp=$python_exe_fp + $suppress_custom_functions + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 pool_by_metadata.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pool_by_metadata.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,37 @@ + + pool samples in OTU table and mapping file based on sample metadata from mapping file + + pool_by_metadata.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + pool_by_metadata.py + --otu_table_fp=$otu_table_fp + --map=$map + --states=$states + --otu_outfile=$otu_outfile + --map_outfile=$map_outfile + --pooled_sample_name=$pooled_sample_name + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 principal_coordinates.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/principal_coordinates.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,25 @@ + + Principal Coordinates Analysis (PCoA) + + principal_coordinates.py + + + qiime_wrapper.py + principal_coordinates.py + --input_path=$input_path + --output_path=$output_path + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 print_qiime_config.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/print_qiime_config.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,23 @@ + + Print out the qiime config settings. + + print_qiime_config.py + + + print_qiime_config.py + $test + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 process_sff.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/process_sff.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,35 @@ + + Convert sff to FASTA and QUAL files + + process_sff.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + process_sff.py + --input_dir=$input_dir + $make_flowgram + $convert_to_FLX + $use_sfftools + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 qiime_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime_wrapper.py Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,210 @@ +#!/usr/bin/env python +import logging, os, string, sys, tempfile, glob, shutil, types, urllib, optparse, re +import shlex, subprocess + +""" +sys.argv +this --galaxy_datasets= --quime_script + +alpha_rarefaction + output html + wf_arare/alpha_rarefaction_plots/rarefaction_plots.html + wf_arare/alpha_rarefaction_plots/html_plots/ + wf_arare/alpha_div + wf_arare/alpha_div/alpha_rarefaction_101_0.txt + + --galaxy_summary_html=$output_html + --galaxy_summary_template=$output_template + --galaxy_summary_links='label:link,label:link' + --galaxy_outputdir=$output_html.extra_files_path + + +""" + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def __main__(): + debug = False + tmp_dir = None + inputdir = None + outputdir = None + dataset_patterns = None + datasetid = None + new_dataset_patterns = None + new_files_path = None + summary_html=None + summary_template=None + summary_links=None + ## check if there are files to generate + cmd_args = [] + for arg in sys.argv[1:]: + if arg.startswith('--galaxy_'): + (opt,val) = arg.split('=') if arg.find('=') > 0 else (arg,None) + if opt == '--galaxy_tmpdir': + try: + if not os.path.exists(val): + os.makedirs(val) + tmp_dir = val + except Exception, ex: + stop_err(ex) + if opt == '--galaxy_outputdir': + try: + if not os.path.exists(val): + os.makedirs(val) + outputdir = val + except Exception, ex: + stop_err(ex) + if opt == '--galaxy_datasets': + dataset_patterns = val.split(',') + if opt == '--galaxy_datasetid': + datasetid = val + if opt == '--galaxy_new_datasets': + new_dataset_patterns = val.split(',') + if opt == '--galaxy_new_files_path': + new_dataset_patterns = val + if opt == '--galaxy_summary_html': + summary_html=val + if opt == '--galaxy_summary_template': + summary_template=val + if opt == '--galaxy_summary_links': + summary_links=val + if opt == '--galaxy_debug': + debug = True + else: + cmd_args.append(arg) + if debug: print >> sys.stdout, '\n : '.join(cmd_args) + try: + cmdline = ' '.join(cmd_args) + if debug: print >> sys.stdout, cmdline + if tmp_dir == None or not os.path.isdir(tmp_dir): + tmp_dir = tempfile.mkdtemp() + if outputdir == None or not os.path.isdir(outputdir): + outputdir = tmp_dir + tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name + tmp_stderr = open( tmp_stderr_name, 'wb' ) + tmp_stdout_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.out' ).name + tmp_stdout = open( tmp_stdout_name, 'wb' ) + proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp_stderr_name, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + if debug: print >> sys.stderr, stderr + except OverflowError: + pass + tmp_stderr.close() + if returncode != 0: + if debug: print >> sys.stderr, "returncode = %d" % returncode + raise Exception, stderr + # collect results + if dataset_patterns != None: + for root, dirs, files in os.walk(outputdir): + for fname in files: + fpath = os.path.join(root,fname) + if dataset_patterns != None: + for output in dataset_patterns: + (pattern,path) = output.split(':') + if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) + if path == None or path == 'None': + continue + if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) + if re.match(pattern,fname): + found = True + # flist.remove(fname) + try: + shutil.copy2(fpath, path) + except Exception, ex: + stop_err('%s' % ex) + # move result to outdir + # Need to flatten the dir hierachy in order for galaxy to serve the href links + if summary_html != None: + """ + for root, dirs, files in os.walk(outputdir): + if root != outputdir: + for fname in files: + fpath = os.path.join(root,fname) + """ + ## move everything up one level + dlist = os.listdir(outputdir) + for dname in dlist: + dpath = os.path.join(outputdir,dname) + if os.path.isdir(dpath): + flist = os.listdir(dpath) + for fname in flist: + fpath = os.path.join(dpath,fname) + shutil.move(fpath,outputdir) + if summary_template != None: + shutil.copy(summary_template,summary_html) + """ + flist = os.listdir(outputdir) + if debug: print >> sys.stdout, 'outputdir: %s' % outputdir + if debug: print >> sys.stdout, 'files: %s' % ','.join(flist) + if dataset_patterns != None: + for output in dataset_patterns: + (pattern,path) = output.split(':') + if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) + if path == None or path == 'None': + continue + for fname in flist: + if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) + if re.match(pattern,fname): + found = True + flist.remove(fname) + fpath = os.path.join(outputdir,fname) + try: + shutil.copy2(fpath, path) + except Exception, ex: + stop_err('%s' % ex) + """ + # Handle the dynamically generated galaxy datasets + # http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput + # --new_datasets= specifies files to copy to the new_file_path + # The list items are separated by commas + # Each item conatins: a regex pattern for matching filenames and a galaxy datatype (separated by :) + # The regex match.groups()[0] is used as the id name of the dataset, and must result in unique name for each output + if new_dataset_patterns != None and new_files_path != None and datasetid != None: + for output in new_dataset_patterns(','): + (pattern,ext) = output.split(':'); + for fname in flist: + m = re.match(pattern,fname) + if m: + fpath = os.path.join(outputdir,fname) + if len(m.groups()) > 0: + root = m.groups()[0] + else: + # remove the ext from the name if it exists, galaxy will add back later + # remove underscores since galaxy uses that as a field separator for dynamic datasets + root = re.sub('\.?'+ext+'$','',fname).replace('_','').replace('.','') + # filename pattern required by galaxy + fn = "%s_%s_%s_%s_%s" % ( 'primary', datasetid, root, 'visible', ext ) + if debug: print >> sys.stdout, '> %s' % fpath + if debug: print >> sys.stdout, '< %s' % os.path.join(new_files_path,fn) + try: + os.link(fpath, os.path.join(new_files_path,fn)) + except: + shutil.copy2(fpath, os.path.join(new_files_path,fn)) + + except Exception, e: + msg = str(e) + stderr + stop_err( 'Error running ' + msg) + finally: + # Only remove temporary directories + # Enclose in try block, so we don't report error on stale nfs handles + + try: + if inputdir != None and os.path.exists(inputdir): + shutil.rmtree(inputdir) + except: + pass + +if __name__ == "__main__": __main__() + diff -r 000000000000 -r e5c3175506b7 quality_scores_plot.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/quality_scores_plot.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,30 @@ + + Generates histograms of sequence quality scores and number of nucleotides recorded at a particular index + + quality_scores_plot.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + quality_scores_plot.py + --qual_fp=$qual_fp + --output_dir=$__new_file_path__ + --score_min=$score_min + --verbose + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 shared_phylotypes.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shared_phylotypes.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,32 @@ + + Compute shared OTUs between all pairs of samples + + shared_phylotypes.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + shared_phylotypes.py + --otu_table_fp=$otu_table_fp + --output_fp=$output_fp + --reference_sample=$reference_sample + $force_overwrite + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 single_rarefaction.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/single_rarefaction.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,34 @@ + + Perform rarefaction on an otu table + + single_rarefaction.py + + + qiime_wrapper.py + single_rarefaction.py + --input_path=$input_path + --output_path=$output_path + --depth=$depth + $lineages_included + $keep_empty_otus + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 sort_denoiser_output.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sort_denoiser_output.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,26 @@ + + Sort denoiser output by cluster size. + + sort_denoiser_output.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + sort_denoiser_output.py + --input_fasta_fp=$input_fasta_fp + --output_file=$output_file + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 sort_otu_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sort_otu_table.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,35 @@ + + Script for sorting the sample IDs in an OTU table based on a specified value in a mapping file. + + sort_otu_table.py + + + qiime_wrapper.py + sort_otu_table.py + --input_otu_table=$input_otu_table + --mapping_fp=$mapping_fp + --sort_field=$sort_field + --output_fp=$output_fp + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 split_libraries.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/split_libraries.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,132 @@ + + Split libraries according to barcodes specified in mapping file + + split_libraries.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + --galaxy_datasets='^seqs.fna$:'$sequences,'histograms.txt:'$histograms,'split_library_log.txt:'$log + split_libraries.py + --map=$map + #set fnas = [] + #for i in $inputs: + #set fnas = $fnas + [$i.fasta.__str__] + #end for + --fasta=#echo ','.join($fnas) + #set quals = [] + #for i in $inputs: + #if $i.qual != None and $i.qual.__str__ != 'None': + #set quals = $quals + [$i.qual.__str__] + #end if + #end for + #if len($quals) > 0: + --qual=#echo ','.join($quals) + #end if + #if len($min_seq_length.__str__) > 0 and $min_seq_length > 0: + --min-seq-length=$min_seq_length + #end if + #if len($max_seq_length.__str__) > 0: + --max-seq-length=$max_seq_length + #end if + $trim_seq_length + #if len($min_qual_score.__str__) > 0: + --min-qual-score=$min_qual_score + #end if + $keep_primer + $keep_barcode + #if len($max_ambig.__str__) > 0: + --max-ambig=$max_ambig + #end if + #if len($max_homopolymer.__str__) > 0: + --max-homopolymer=$max_homopolymer + #end if + #if len($max_primer_mismatch.__str__) > 0: + --max-primer-mismatch=$max_primer_mismatch + #end if + --barcode-type=$barcode_type + ## --dir-prefix=$dir_prefix + #if $max_barcode_errors >= 0.: + --max-barcode-errors=$max_barcode_errors + #end if + #if len($start_numbering_at.__str__) > 0: + --start-numbering-at=$start_numbering_at + #end if + $remove_unassigned + $disable_bc_correction + #if len($qual_score_window.__str__) > 0: + --qual_score_window=$qual_score_window + #end if + $disable_primers + --reverse_primers=$reverse_primers + $record_qual_scores + $discard_bad_windows + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 split_libraries_illumina.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/split_libraries_illumina.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,56 @@ + + Script for processing raw Illumina Genome Analyzer II data. + + split_libraries_illumina.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + split_libraries_illumina.py + --mapping_fp=$mapping_fp + --five_prime_read_fp=$five_prime_read_fp + --three_prime_read_fp=$three_prime_read_fp + --output_dir=$__new_file_path__ + $store_unassigned + --quality_threshold=$quality_threshold + --max_bad_run_length=$max_bad_run_length + --min_per_read_length=$min_per_read_length + --sequence_max_n=$sequence_max_n + --start_seq_id=$start_seq_id + $rev_comp_barcode + $barcode_in_header + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 start_parallel_jobs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/start_parallel_jobs.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,28 @@ + + Starts multiple jobs in parallel on multicore or multiprocessor systems. + + start_parallel_jobs.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + start_parallel_jobs.py + $make_jobs + $submit_jobs + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 submit_to_mgrast.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submit_to_mgrast.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,32 @@ + + This script submits a FASTA file to MG-RAST + + submit_to_mgrast.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + submit_to_mgrast.py + --input_fasta_fp=$input_fasta_fp + --web_key_auth=$web_key_auth + --project_id=$project_id + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 summarize_otu_by_cat.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/summarize_otu_by_cat.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,37 @@ + + Create a summarized OTU table for a specific metadata category + + summarize_otu_by_cat.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + summarize_otu_by_cat.py + --input_map=$input_map + --otu_file=$otu_file + --meta_category=$meta_category + --dir-prefix=$dir_prefix + $normalize_flag + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 summarize_taxa.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/summarize_taxa.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,42 @@ + + Summarize Taxa + + summarize_taxa.py + + + qiime_wrapper.py + summarize_taxa.py + --otu_table_fp=$otu_table_fp + --output_fp=$output_fp + --level=$level + #if $mapping != None and $mapping.__str__ != 'None': + --mapping=$mapping + #end if + #if $delimiter != ';': + --delimiter=$delimiter + #end if + ## --relative_abundance=$relative_abundance + $absolute_abundance + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 supervised_learning.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/supervised_learning.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,62 @@ + + Run supervised classification using OTUs as predictors and a mapping file category as class labels. + + supervised_learning.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + supervised_learning.py + --input_data=$input_data + --mapping_file=$mapping_file + --category=$category + --output_dir=$__new_file_path__ + --method=$method + $force + --param_file=$param_file + $show_params + --filter_type=$filter_type + --filter_min=$filter_min + --filter_max=$filter_max + --filter_step=$filter_step + --filter_reps=$filter_reps + $keepfiles + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 transform_coordinate_matrices.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/transform_coordinate_matrices.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,38 @@ + + Transform 2 coordinate matrices + + transform_coordinate_matrices.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + transform_coordinate_matrices.py + --input_fps=$input_fps + --output_dir=$__new_file_path__ + --random_trials=$random_trials + --num_dimensions=$num_dimensions + --sample_id_map_fp=$sample_id_map_fp + $store_trial_details + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 tree_compare.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tree_compare.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,29 @@ + + Compare jackknifed/bootstrapped trees + + tree_compare.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + tree_compare.py + --master_tree=$master_tree + --support_dir=$support_dir + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 trflp_file_to_otu_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trflp_file_to_otu_table.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,26 @@ + + Convert TRFLP text file to an OTU table + + trflp_file_to_otu_table.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + trflp_file_to_otu_table.py + --input_path=$input_path + --output_path=$output_path + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 trim_sff_primers.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trim_sff_primers.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,40 @@ + + Trim sff primers + + trim_sff_primers.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + trim_sff_primers.py + --libdir=$libdir + --input_map=$input_map + --sfffile_path=$sfffile_path + --sffinfo_path=$sffinfo_path + $use_sfftools + $debug + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 truncate_fasta_qual_files.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/truncate_fasta_qual_files.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,32 @@ + + Generates filtered fasta and quality score files by truncating at the specified base position. + + truncate_fasta_qual_files.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + truncate_fasta_qual_files.py + --fasta_fp=$fasta_fp + --qual_fp=$qual_fp + --base_pos=$base_pos + --output_dir=$__new_file_path__ + + + + + + + + + + + + + + + + diff -r 000000000000 -r e5c3175506b7 upgma_cluster.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/upgma_cluster.xml Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,26 @@ + + Build a UPGMA tree comparing samples + + upgma_cluster.py + + + qiime_wrapper.py + --galaxy_tmpdir='$__new_file_path__' + upgma_cluster.py + --input_path=$input_path + --output_path=$output_path + + + + + + + + + + + + + +