comparison generate_test_data.sh.orig @ 1:33a751525a8f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime/ commit a9d1e0debcd357d8080a1c6c5f1d206dd45a7a4d
author iuc
date Fri, 19 May 2017 03:53:11 -0400
parents
children
comparison
equal deleted inserted replaced
0:0a7b6f4036dd 1:33a751525a8f
1 #!/usr/bin/env bash
2
3 # validate_mapping_file
4 validate_mapping_file.py \
5 -m 'test-data/validate_mapping_file/map.tsv' \
6 -o validate_mapping_file_output \
7 -c '_'
8 cp validate_mapping_file_output/*.html 'test-data/validate_mapping_file/map.tsv.html'
9 cp validate_mapping_file_output/*.log 'test-data/validate_mapping_file/map.tsv.log'
10 cp validate_mapping_file_output/*corrected.txt 'test-data/validate_mapping_file/map.tsv_corrected.txt'
11 rm -rf validate_mapping_file_output
12
13 # split_libraries_fastq
14 split_libraries_fastq.py \
15 --sequence_read_fps 'test-data/split_libraries_fastq/forward_reads.fastq' \
16 -o split_libraries \
17 --mapping_fps 'test-data/map.tsv' \
18 --barcode_read_fps 'test-data/split_libraries_fastq/barcodes.fastq' \
19 --store_qual_scores \
20 --store_demultiplexed_fastq \
21 --max_bad_run_length 3 \
22 --min_per_read_length_fraction 0.75 \
23 --sequence_max_n 0 \
24 --start_seq_id 0 \
25 --barcode_type 'golay_12' \
26 --max_barcode_errors 1.5
27 cp split_libraries/histograms.txt 'test-data/split_libraries_fastq/histograms.tabular'
28 cp split_libraries/seqs.fna 'test-data/split_libraries_fastq/sequences.fasta'
29 cp split_libraries/seqs.qual 'test-data/split_libraries_fastq/sequence_qualities.qual'
30 cp split_libraries/seqs.fastq 'test-data/split_libraries_fastq/demultiplexed_sequences.fastq'
31 rm -rf split_libraries
32
33 # split_libraries
34 split_libraries.py \
35 --map 'test-data/split_libraries/mapping_file.txt' \
36 -o split_libraries \
37 --fasta 'test-data/split_libraries/reads_1.fna,test-data/split_libraries/reads_2.fna' \
38 --qual 'test-data/split_libraries/reads_1.qual,test-data/split_libraries/reads_2.qual' \
39 --min_qual_score 25 \
40 --qual_score_window 0 \
41 --record_qual_scores \
42 --min_seq_length 200 \
43 --max_seq_length 1000 \
44 --max_ambig 6 \
45 --max_homopolymer 6 \
46 --max_primer_mismatch 0 \
47 --barcode_type 'golay_12' \
48 --max_barcode_errors 1.5 \
49 --start_numbering_at 1
50 cp split_libraries/seqs.fna 'test-data/split_libraries/seqs.fna'
51 cp split_libraries/split_library_log.txt 'test-data/split_libraries/split_library_log'
52 cp split_libraries/histograms.txt 'test-data/split_libraries/histograms.txt'
53 cp split_libraries/seqs_filtered.qual 'test-data/split_libraries/seqs_filtered.qual'
54 rm -rf split_libraries
55
56 # pick_open_reference_otus
57 pick_open_reference_otus.py \
58 --input_fps 'test-data/pick_open_reference_otus/sequences.fasta' \
59 -o pick_open_reference_otus_1 \
60 --reference_fp 'test-data/gg_13_8_79_otus.fasta' \
61 --otu_picking_method 'uclust' \
62 --new_ref_set_id 'New' \
63 --parallel \
64 --percent_subsample '0.001' \
65 --prefilter_percent_id '0.0' \
66 --minimum_failure_threshold '100000' \
67 --min_otu_size '2'
68 cp pick_open_reference_otus_1/final_otu_map.txt 'test-data/pick_open_reference_otus/1_final_otu_map.txt'
69 cp pick_open_reference_otus_1/final_otu_map_mc*.txt 'test-data/pick_open_reference_otus/1_final_otu_map_mc.txt'
70 cp pick_open_reference_otus_1/rep_set.tre 'test-data/pick_open_reference_otus/1_rep_set_tree.tre'
71 rm -rf pick_open_reference_otus_1
72
73 pick_open_reference_otus.py \
74 --input_fps 'test-data/pick_open_reference_otus/sequences.fasta' \
75 -o pick_open_reference_otus_2 \
76 --reference_fp 'test-data/gg_13_8_79_otus.fasta' \
77 --otu_picking_method 'uclust' \
78 --new_ref_set_id 'New' \
79 --parallel \
80 --percent_subsample '0.001' \
81 --prefilter_percent_id '0.0' \
82 --minimum_failure_threshold '100000' \
83 --min_otu_size '3' \
84 --suppress_taxonomy_assignment \
85 --suppress_align_and_tree
86 cp pick_open_reference_otus_2/final_otu_map.txt 'test-data/pick_open_reference_otus/2_final_otu_map.txt'
87 cp pick_open_reference_otus_2/final_otu_map_mc*.txt 'test-data/pick_open_reference_otus/2_final_otu_map_mc.txt'
88 rm -rf pick_open_reference_otus_2
89
90 pick_open_reference_otus.py \
91 --input_fps 'test-data/pick_open_reference_otus/sequences.fasta' \
92 -o pick_open_reference_otus_3 \
93 --reference_fp 'test-data/gg_13_8_79_otus.fasta' \
94 --otu_picking_method 'uclust' \
95 --new_ref_set_id 'New' \
96 --parallel \
97 --percent_subsample '0.001' \
98 --prefilter_percent_id '0.0' \
99 --minimum_failure_threshold '100000' \
100 --min_otu_size '10' \
101 --suppress_taxonomy_assignment
102 cp pick_open_reference_otus_3/final_otu_map.txt 'test-data/pick_open_reference_otus/3_final_otu_map.txt'
103 cp pick_open_reference_otus_3/final_otu_map_mc*.txt 'test-data/pick_open_reference_otus/3_final_otu_map_mc.txt'
104 cp pick_open_reference_otus_3/rep_set.tre 'test-data/pick_open_reference_otus/3_rep_set_tree.tre'
105 rm -rf pick_open_reference_otus_3
106
107 # core_diversity_analyses
108 # Data are from test data in https://github.com/biocore/qiime
109 core_diversity_analyses.py \
110 --input_biom_fp 'test-data/core_diversity_analyses/otu_table.biom' \
111 -o core_diversity_analyses_1 \
112 --mapping_fp 'test-data/core_diversity_analyses/map.txt' \
113 --sampling_depth 22 \
114 --tree_fp 'test-data/core_diversity_analyses/rep_set.tre'
115 cp core_diversity_analyses_1/bdiv_even22/unweighted_unifrac_pc.txt 'test-data/core_diversity_analyses/unweighted_unifrac_pc.txt'
116 rm -rf core_diversity_analyses_1
117
118 core_diversity_analyses.py \
119 --input_biom_fp 'test-data/core_diversity_analyses/otu_table.biom' \
120 -o core_diversity_analyses_2 \
121 --mapping_fp 'test-data/core_diversity_analyses/map.txt' \
122 --sampling_depth 22 \
123 --nonphylogenetic_diversity \
124 --suppress_taxa_summary \
125 --suppress_beta_diversity \
126 --suppress_alpha_diversity \
127 --suppress_group_significance
128 rm -rf core_diversity_analyses_2
129
130 # summarize_taxa
131 cp 'test-data/core_diversity_analyses/otu_table.biom' 'test-data/summarize_taxa/otu_table.biom'
132 cp 'test-data/core_diversity_analyses/map.txt' 'test-data/summarize_taxa/map.txt'
133
134 summarize_taxa.py \
135 -i 'test-data/summarize_taxa/otu_table.biom' \
136 -o summarize_taxa_1 \
137 -L '2,3,4,5,6' \
138 -m 'test-data/summarize_taxa/map.txt' \
139 --md_identifier "taxonomy" \
140 --delimiter ";"
141 cp summarize_taxa_1/*_L2.txt "test-data/summarize_taxa/1_L2.txt"
142 cp summarize_taxa_1/*_L3.txt "test-data/summarize_taxa/1_L3.txt"
143 cp summarize_taxa_1/*_L4.txt "test-data/summarize_taxa/1_L4.txt"
144 cp summarize_taxa_1/*_L5.txt "test-data/summarize_taxa/1_L5.txt"
145 cp summarize_taxa_1/*_L6.txt "test-data/summarize_taxa/1_L6.txt"
146 rm -rf summarize_taxa_1
147
148 summarize_taxa.py \
149 -i 'test-data/summarize_taxa/otu_table.biom' \
150 -o summarize_taxa_2 \
151 -L '3,6' \
152 --md_identifier "taxonomy" \
153 --delimiter ";"
154 cp summarize_taxa_2/*_L3.txt "test-data/summarize_taxa/2_L3.txt"
155 cp summarize_taxa_2/*_L6.txt "test-data/summarize_taxa/2_L6.txt"
156 rm -rf summarize_taxa_2
157
158 # make_emperor
159 cp 'test-data/core_diversity_analyses/unweighted_unifrac_pc.txt' 'test-data/make_emperor/unweighted_unifrac_pc.txt'
160 cp 'test-data/core_diversity_analyses/map.txt' 'test-data/make_emperor/map.txt'
161 cp 'test-data/summarize_taxa/2_L3.txt' 'test-data/make_emperor/2_L3.txt'
162
163 make_emperor.py \
164 --input_coords 'test-data/make_emperor/unweighted_unifrac_pc.txt' \
165 -o make_emperor_1 \
166 --map_fp 'test-data/make_emperor/map.txt' \
167 --number_of_axes '10' \
168 --add_unique_columns \
169 --number_of_segments 8
170 rm -rf make_emperor_1
171
172 make_emperor.py \
173 --input_coords 'test-data/make_emperor/unweighted_unifrac_pc.txt' \
174 -o make_emperor_2 \
175 --map_fp 'test-data/make_emperor/map.txt' \
176 --number_of_axes '10' \
177 --add_unique_columns \
178 --number_of_segments 8 \
179 --taxa_fp 'test-data/make_emperor/2_L3.txt' \
180 --n_taxa_to_keep 10
181 rm -rf make_emperor_2
182
183 #alpha_rarefaction
184 alpha_rarefaction.py \
185 --otu_table_fp "test-data/alpha_rarefaction/otu_table.biom" \
186 --mapping_fp "test-data/alpha_rarefaction/mapping_file.txt" \
187 -o alpha_rarefaction \
188 --num_steps '2' \
189 --tree_fp "test-data/alpha_rarefaction/rep_set.tre" \
190 --min_rare_depth '10' \
191 --max_rare_depth '50' \
192 --retain_intermediate_files
193 rm -rf alpha_rarefaction
194
195 ##beta_diversity
196 beta_diversity.py \
197 --input_path 'test-data/beta_diversity/otu_table.biom' \
198 -o beta_diversity_1 \
199 --metrics 'unweighted_unifrac,weighted_unifrac' \
200 --tree_path 'test-data/beta_diversity/rep_set.tre'
201 md5 'beta_diversity_1/unweighted_unifrac_otu_table.txt'
202 md5 'beta_diversity_1/weighted_unifrac_otu_table.txt'
203 rm -rf beta_diversity_1
204
205 beta_diversity.py \
206 --input_path 'test-data/beta_diversity/otu_table.biom' \
207 -o beta_diversity_2 \
208 --metrics 'abund_jaccard,binary_chisq,binary_chord,binary_euclidean,binary_hamming,binary_jaccard,binary_lennon,binary_ochiai,binary_pearson,binary_sorensen_dice,bray_curtis,canberra,chisq,chord,euclidean,gower,hellinger,kulczynski,manhattan,morisita_horn,pearson,soergel,spearman_approx,specprof,unifrac_g,unifrac_g_full_tree,unweighted_unifrac,unweighted_unifrac_full_tree,weighted_normalized_unifrac,weighted_unifrac' \
209 --tree_path 'test-data/beta_diversity/rep_set.tre'
210 md5 'beta_diversity_2/canberra_otu_table.txt'
211 md5 'beta_diversity_2/pearson_otu_table.txt'
212 rm -rf beta_diversity_2
213
214 # jackknifed_beta_diversity
215 jackknifed_beta_diversity.py \
216 --otu_table_fp 'test-data/jackknifed_beta_diversity/otu_table.biom' \
217 --mapping_fp 'test-data/jackknifed_beta_diversity/map.txt' \
218 -o jackknifed_beta_diversity \
219 --seqs_per_sample '10' \
220 --tree_fp 'test-data/jackknifed_beta_diversity/rep_set.tre' \
221 --master_tree 'consensus' \
222 --parallel
223 rm -rf jackknifed_beta_diversity
224
225 #beta_diversity_through_plots
226 beta_diversity_through_plots.py \
227 --otu_table_fp 'test-data/beta_diversity_through_plots/otu_table.biom' \
228 --mapping_fp 'test-data/beta_diversity_through_plots/map.txt' \
229 --output_dir beta_diversity_through_plots \
230 --tree_fp 'test-data/beta_diversity_through_plots/rep_set.tre' \
231 --parallel
232 cp beta_diversity_through_plots/unweighted_unifrac_dm.txt 'test-data/beta_diversity_through_plots/'
233 cp beta_diversity_through_plots/unweighted_unifrac_pc.txt 'test-data/beta_diversity_through_plots/'
234 cp beta_diversity_through_plots/weighted_unifrac_dm.txt 'test-data/beta_diversity_through_plots/'
235 cp beta_diversity_through_plots/weighted_unifrac_pc.txt 'test-data/beta_diversity_through_plots/'
236 rm -rf beta_diversity_through_plots
237
238 # assign_taxonomy
239 assign_taxonomy.py \
240 --input_fasta_fp 'test-data/assign_taxonomy/uclust_input_seqs.fasta' \
241 --assignment_method 'uclust' \
242 --min_consensus_fraction '0.51' \
243 --similarity '0.9' \
244 --uclust_max_accepts '3' \
245 -o assign_taxonomy_uclust
246 cp assign_taxonomy_uclust/uclust_input_seqs_tax_assignments.txt 'test-data/assign_taxonomy/uclust_taxonomic_assignation.txt'
247 rm -rf assign_taxonomy_uclust
248
249 #assign_taxonomy.py \
250 # --input_fasta_fp 'test-data/assign_taxonomy/rdp_input_seqs.fasta' \
251 # --id_to_taxonomy_fp 'test-data/assign_taxonomy/rdp_id_to_taxonomy.txt' \
252 # --assignment_method 'rdp' \
253 # --confidence '3' \
254 # -o assign_taxonomy_rdp
255
256 #assign_taxonomy.py \
257 # --input_fasta_fp 'test-data/assign_taxonomy/rtax_ref_seq_set.fna' \
258 # --id_to_taxonomy_fp 'test-data/assign_taxonomy/rtax_id_to_taxonomy.txt' \
259 # --assignment_method 'rtax' \
260 # --read_1_seqs_fp 'test-data/assign_taxonomy/read_1.seqs.fna' \
261 # --read_2_seqs_fp 'test-data/assign_taxonomy/read_2.seqs.fna' \
262 # --single_ok \
263 # --no_single_ok_generic \
264 # --read_id_regex "\S+\s+(\S+)" \
265 # --amplicon_id_regex "(\S+)\s+(\S+?)\/" \
266 # --header_id_regex "\S+\s+(\S+?)\/" \
267 # -o assign_taxonomy_rtax
268 #ls assign_taxonomy_rtax
269
270 #assign_taxonomy.py \
271 # --input_fasta_fp 'test-data/assign_taxonomy/mothur_ref_seq_set.fna' \
272 # --id_to_taxonomy_fp 'test-data/assign_taxonomy/mothur_id_to_taxonomy.txt' \
273 # --assignment_method 'mothur' \
274 # --confidence 0.5 \
275 # -o assign_taxonomy_mothur
276 #ls assign_taxonomy_mothur
277
278 assign_taxonomy.py \
279 --input_fasta_fp 'test-data/assign_taxonomy/mothur_ref_seq_set.fna' \
280 --assignment_method 'sortmerna' \
281 --min_consensus_fraction "0.51" \
282 --similarity "0.9" \
283 --sortmerna_e_value "1.0" \
284 --sortmerna_coverage "0.9" \
285 --sortmerna_best_N_alignments "5" \
286 -o assign_taxonomy_sortmerna
287 cp assign_taxonomy_sortmerna/sortmerna_map.blast 'test-data/assign_taxonomy/sortmerna_map.blast'
288 cp assign_taxonomy_sortmerna/mothur_ref_seq_set_tax_assignments.txt 'test-data/assign_taxonomy/sortmerna_taxonomic_assignation.txt'
289 rm -rf assign_taxonomy_sortmerna
290 <<<<<<< HEAD
291 =======
292
293 # filter_otus_from_otu_table
294 filter_otus_from_otu_table.py \
295 --input_fp 'test-data/filter_otus_from_otu_table/otu_table.biom' \
296 --min_count '2' \
297 --max_count '1000' \
298 --min_samples '5' \
299 --max_samples '350' \
300 --output_fp 'test-data/filter_otus_from_otu_table/filtered_otu_table.biom'
301
302 filter_otus_from_otu_table.py \
303 --input_fp 'test-data/filter_otus_from_otu_table/otu_table.biom' \
304 --otu_ids_to_exclude_fp 'test-data/filter_otus_from_otu_table/chimeric_otus.txt' \
305 --output_fp 'test-data/filter_otus_from_otu_table/chimera_filtered_otu_table.biom'
306
307 filter_otus_from_otu_table.py \
308 --input_fp 'test-data/filter_otus_from_otu_table/otu_table.biom' \
309 --otu_ids_to_exclude_fp 'test-data/filter_otus_from_otu_table/chimeric_otus.txt' \
310 --negate_ids_to_exclude \
311 --output_fp 'test-data/filter_otus_from_otu_table/chimera_picked_otu_table.biom'
312 >>>>>>> Update of the Qiime_core generate_test_data script