Mercurial > repos > brenninc > directory_reader_limited_by_data_table
comparison data_reader.xml @ 0:b737d0ed42be draft default tip
Uploaded
| author | brenninc |
|---|---|
| date | Tue, 21 Jun 2016 03:38:52 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:b737d0ed42be |
|---|---|
| 1 <tool id="directory_table_reader" name="Directory Data Reader" version="0.2"> | |
| 2 <description>Reads data from preconfigured directories table.</description> | |
| 3 <command interpreter="python"> | |
| 4 <![CDATA[ | |
| 5 directory_copier.py | |
| 6 --ending .${directory.fields.original_extension} | |
| 7 --new_ending .${directory.fields.galaxy_extension} | |
| 8 #if $results.required=="data" | |
| 9 --new_ending .${directory.fields.galaxy_extension} | |
| 10 --decompress ${directory.fields.decompress} | |
| 11 #if $results.start | |
| 12 --start "$results.start" | |
| 13 #end if | |
| 14 #if $results.last | |
| 15 --last "$results.last" | |
| 16 #end if | |
| 17 #if $results.regex | |
| 18 --regex "$results.regex" | |
| 19 #end if | |
| 20 #end if | |
| 21 --path ${directory.fields.path} | |
| 22 --list ${listing} | |
| 23 ]]> | |
| 24 </command> | |
| 25 <inputs> | |
| 26 <param name="directory" type="select" label="Directory to import data from"> | |
| 27 <options from_data_table="directory_data"/> | |
| 28 <validator type="no_options" message="No Data Directory Setup"/> | |
| 29 </param> | |
| 30 <param name="list_name" type="text" size="25" label="output name" value="input data"/> | |
| 31 <conditional name="results"> | |
| 32 <param name="required" type="select" label="Download data or just directory listing" help="Select type of action required."> | |
| 33 <option value="data" selected="true">Data and listing of selected type</option> | |
| 34 <option value="listing">Get listing of selected file types </option> | |
| 35 </param> | |
| 36 <when value="data"> | |
| 37 <param name="start" type="text" value="" label="String which must be at the start of each file name" /> | |
| 38 <param name="last" type="text" value="" label="String which must be at the end of the file name (excluding the file type)" /> | |
| 39 <param name="regex" type="text" value="" label="Regex pattern which must somewhere in the file name (excluding the file type)" > | |
| 40 <sanitizer> | |
| 41 <valid initial="string.printable"/> | |
| 42 </sanitizer> | |
| 43 </param> | |
| 44 </when> | |
| 45 <when value="listing"/> | |
| 46 </conditional> | |
| 47 </inputs> | |
| 48 <outputs> | |
| 49 <data format="txt" name="listing" label="List of files in $list_name"> | |
| 50 </data> | |
| 51 <!-- Ideally galaxy can get the type based on the file extensions. If so just add the type here --> | |
| 52 <collection type="list" label="$list_name" name="data_collection"> | |
| 53 <filter>(results['required'] == 'data')</filter> | |
| 54 <discover_datasets pattern="__designation_and_ext__" directory="output" visible="true" /> | |
| 55 </collection> | |
| 56 </outputs> | |
| 57 <tests> | |
| 58 <test> | |
| 59 <param name="directory" value="fastq.gz_files_id" /> | |
| 60 <param name="list_name" value="test_files" /> | |
| 61 <param name="results|required" value="listing"/> | |
| 62 <output name="listing"> | |
| 63 <assert_contents> | |
| 64 <has_line line="sample1.fastqsanger" /> | |
| 65 <has_line line="other.fastqsanger" /> | |
| 66 </assert_contents> | |
| 67 </output> | |
| 68 </test> | |
| 69 <test> | |
| 70 <param name="directory" value="fastq.gz_files_id" /> | |
| 71 <output name="listing_fastq_gz"> | |
| 72 <assert_contents> | |
| 73 <has_line line="sample1.fastqsanger" /> | |
| 74 </assert_contents> | |
| 75 </output> | |
| 76 <output_collection name="data_collection" type="list"> | |
| 77 <element name="sample1" ftype="fastqsanger" file="sample1.fastq" /> | |
| 78 <element name="other" ftype="fastqsanger" file="other.fastq" /> | |
| 79 </output_collection> | |
| 80 </test> | |
| 81 <test> | |
| 82 <param name="directory" value="fastq_files_id" /> | |
| 83 <param name="results|start" value="sam" /> | |
| 84 <output name="listing_fastq"> | |
| 85 <assert_contents> | |
| 86 <has_line line="sample1.fastq" /> | |
| 87 <not_has_text text="other.fastq" /> | |
| 88 </assert_contents> | |
| 89 </output> | |
| 90 <output_collection name="data_collection" type="list"> | |
| 91 <element name="sample1" ftype="fastq" file="sample1.fastq" /> | |
| 92 </output_collection> | |
| 93 </test> | |
| 94 <test> | |
| 95 <param name="directory" value="fastq_files_id" /> | |
| 96 <param name="results|regex" value="le.?" /> | |
| 97 <output name="listing_fastq"> | |
| 98 <assert_contents> | |
| 99 <has_line line="sample1.fastq" /> | |
| 100 <not_has_text text="other.fastq" /> | |
| 101 </assert_contents> | |
| 102 </output> | |
| 103 <output_collection name="data_collection" type="list"> | |
| 104 <element name="sample1" ftype="fastq" file="sample1.fastq" /> | |
| 105 </output_collection> | |
| 106 </test> | |
| 107 </tests> | |
| 108 | |
| 109 <help> | |
| 110 <![CDATA[ | |
| 111 This tool will lookup files on the Galaxy server machine, including mounted directories. | |
| 112 | |
| 113 Only directories and ending combinations set up by a Galaxy admin can be listed or loaded in this way. | |
| 114 These endings are case senitive. | |
| 115 | |
| 116 ==== | |
| 117 | |
| 118 The data options will look for all files that have a particular ending in the selected directory. | |
| 119 | |
| 120 The tool will return two things. | |
| 121 | |
| 122 1. A Dataset collection of all the detected files. (If data requested) | |
| 123 | |
| 124 2. A file with the names of all the detected files. These will be sorted in the same order as galaxy builds the dataset collection. | |
| 125 | |
| 126 The files can be filtered by setting a specific start string for the file name. | |
| 127 Only files that start with this string (case senstive) will be included. | |
| 128 | |
| 129 Files can also be filter for the last part before the file extsentions. | |
| 130 | |
| 131 Files can also be filtered by a regex pattern. | |
| 132 Only files that contain the regex string will be included. | |
| 133 This uses the python search funtion so as long as the Regex pattern is found somewhere in file name (excluding extension). | |
| 134 | |
| 135 The three filter start, last and regex if supplied work indepently, so only files that pass all supplied test will be included. | |
| 136 | |
| 137 Note: Before applying the last and regex test the ending (includig the . just before the ending are removed) | |
| 138 | |
| 139 Assuming the directory has: | |
| 140 C01_R1_001.fasta C01_R2_001.fatsa C02_R1_001.fasta C02_R2_002.fatsa | |
| 141 | |
| 142 Setting start C01 will return just the C01 files: C01_R1_001.fasta C01_R2_002.fatsa | |
| 143 | |
| 144 Setting last R1_001 will return the read1 files: C01_R1_001.fasta C02_R1_001.fasta | |
| 145 | |
| 146 Setting regex R2_00.$ will return the R2 files: C01_R2_001.fatsa C02_R2_002.fatsa | |
| 147 | |
| 148 As Galaxy detects the file type based on the extension this tool will change the exstension as setup by the admin. | |
| 149 | |
| 150 This tool will unzip gz files if requested to by the admin, | |
| 151 | |
| 152 ]]> | |
| 153 </help> | |
| 154 <citations> | |
| 155 </citations> | |
| 156 | |
| 157 </tool> |
