Mercurial > repos > iuc > ncbi_eutils_efetch
comparison __efetch_build_options.py @ 0:71bcf87a7031 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_entrez_eutils commit 15bcc5104c577b4b9c761f2854fc686c07ffa9db
| author | iuc |
|---|---|
| date | Thu, 07 Jul 2016 02:39:36 -0400 |
| parents | |
| children | 0fc65a60436f |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:71bcf87a7031 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # Daniel Blankenberg | |
| 3 # Creates the options for tool interface | |
| 4 import re | |
| 5 | |
| 6 # http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi | |
| 7 db_list = ''' | |
| 8 <DbName>annotinfo</DbName> | |
| 9 <DbName>assembly</DbName> | |
| 10 <DbName>bioproject</DbName> | |
| 11 <DbName>biosample</DbName> | |
| 12 <DbName>biosystems</DbName> | |
| 13 <DbName>blastdbinfo</DbName> | |
| 14 <DbName>books</DbName> | |
| 15 <DbName>cdd</DbName> | |
| 16 <DbName>clinvar</DbName> | |
| 17 <DbName>clone</DbName> | |
| 18 <DbName>dbvar</DbName> | |
| 19 <DbName>gap</DbName> | |
| 20 <DbName>gapplus</DbName> | |
| 21 <DbName>gds</DbName> | |
| 22 <DbName>gencoll</DbName> | |
| 23 <DbName>gene</DbName> | |
| 24 <DbName>genome</DbName> | |
| 25 <DbName>geoprofiles</DbName> | |
| 26 <DbName>grasp</DbName> | |
| 27 <DbName>gtr</DbName> | |
| 28 <DbName>homologene</DbName> | |
| 29 <DbName>medgen</DbName> | |
| 30 <DbName>mesh</DbName> | |
| 31 <DbName>ncbisearch</DbName> | |
| 32 <DbName>nlmcatalog</DbName> | |
| 33 <DbName>nuccore</DbName> | |
| 34 <DbName>nucest</DbName> | |
| 35 <DbName>nucgss</DbName> | |
| 36 <DbName>nucleotide</DbName> | |
| 37 <DbName>omim</DbName> | |
| 38 <DbName>orgtrack</DbName> | |
| 39 <DbName>pcassay</DbName> | |
| 40 <DbName>pccompound</DbName> | |
| 41 <DbName>pcsubstance</DbName> | |
| 42 <DbName>pmc</DbName> | |
| 43 <DbName>popset</DbName> | |
| 44 <DbName>probe</DbName> | |
| 45 <DbName>protein</DbName> | |
| 46 <DbName>proteinclusters</DbName> | |
| 47 <DbName>pubmed</DbName> | |
| 48 <DbName>pubmedhealth</DbName> | |
| 49 <DbName>seqannot</DbName> | |
| 50 <DbName>snp</DbName> | |
| 51 <DbName>sra</DbName> | |
| 52 <DbName>structure</DbName> | |
| 53 <DbName>taxonomy</DbName> | |
| 54 <DbName>unigene</DbName>'''.replace( "<DbName>", "").replace( "</DbName>", "").split("\n") | |
| 55 | |
| 56 | |
| 57 help = ''' (all) | |
| 58 docsum xml Document Summary | |
| 59 docsum json Document Summary | |
| 60 full text Full Document | |
| 61 uilist xml Unique Identifier List | |
| 62 uilist text Unique Identifier List | |
| 63 full xml Full Document | |
| 64 | |
| 65 bioproject | |
| 66 native BioProject Report | |
| 67 native xml RecordSet | |
| 68 | |
| 69 biosample | |
| 70 native BioSample Report | |
| 71 native xml BioSampleSet | |
| 72 | |
| 73 biosystems | |
| 74 native xml Sys-set | |
| 75 | |
| 76 gds | |
| 77 native xml RecordSet | |
| 78 summary text Summary | |
| 79 | |
| 80 gene | |
| 81 gene_table xml Gene Table | |
| 82 native text Gene Report | |
| 83 native asn.1 Entrezgene | |
| 84 native xml Entrezgene-Set | |
| 85 tabular tabular Tabular Report | |
| 86 | |
| 87 homologene | |
| 88 alignmentscores text Alignment Scores | |
| 89 fasta fasta FASTA | |
| 90 homologene text Homologene Report | |
| 91 native text Homologene List | |
| 92 native asn.1 HG-Entry | |
| 93 native xml Entrez-Homologene-Set | |
| 94 | |
| 95 mesh | |
| 96 full text Full Record | |
| 97 native text MeSH Report | |
| 98 native xml RecordSet | |
| 99 | |
| 100 nlmcatalog | |
| 101 native text Full Record | |
| 102 native xml NLMCatalogRecordSet | |
| 103 | |
| 104 pmc | |
| 105 medline text MEDLINE | |
| 106 native xml pmc-articleset | |
| 107 | |
| 108 pubmed | |
| 109 abstract xml Abstract | |
| 110 medline text MEDLINE | |
| 111 native asn.1 Pubmed-entry | |
| 112 native xml PubmedArticleSet | |
| 113 | |
| 114 (sequences) | |
| 115 acc text Accession Number | |
| 116 est xml EST Report | |
| 117 fasta fasta FASTA | |
| 118 fasta xml TinySeq | |
| 119 fasta_cds_aa fasta CDS Products | |
| 120 fasta_cds_na fasta Coding Regions | |
| 121 ft text Feature Table | |
| 122 gb text GenBank Flatfile | |
| 123 gb xml GBSet | |
| 124 gbc xml INSDSet | |
| 125 gbwithparts text GenBank with Contig Sequences | |
| 126 gene_fasta fasta FASTA of Gene | |
| 127 gp text GenPept Flatfile | |
| 128 gp xml GBSet | |
| 129 gpc xml INSDSet | |
| 130 gss text GSS Report | |
| 131 ipg text Identical Protein Report | |
| 132 ipg xml IPGReportSet | |
| 133 native text Seq-entry | |
| 134 native xml Bioseq-set | |
| 135 seqid asn.1 Seq-id | |
| 136 | |
| 137 snp | |
| 138 chr text Chromosome Report | |
| 139 docset text Summary | |
| 140 fasta fasta FASTA | |
| 141 flt text Flat File | |
| 142 native asn.1 Rs | |
| 143 native xml ExchangeSet | |
| 144 rsr tabular RS Cluster Report | |
| 145 ssexemplar text SS Exemplar List | |
| 146 | |
| 147 sra | |
| 148 native xml EXPERIMENT_PACKAGE_SET | |
| 149 runinfo xml SraRunInfo | |
| 150 | |
| 151 structure | |
| 152 mmdb asn.1 Ncbi-mime-asn1 strucseq | |
| 153 native text MMDB Report | |
| 154 native xml RecordSet | |
| 155 | |
| 156 taxonomy | |
| 157 native text Taxonomy List | |
| 158 native xml TaxaSet'''.split("\n") | |
| 159 | |
| 160 | |
| 161 db = {} | |
| 162 for db_name in db_list: | |
| 163 db[db_name] = [] | |
| 164 | |
| 165 section = None | |
| 166 for line in help: | |
| 167 line = re.split('\s{2,}', line.strip()) | |
| 168 # Ignore empties | |
| 169 if len(line) == 0: | |
| 170 continue | |
| 171 # Section headers have one item | |
| 172 elif len(line) == 1: | |
| 173 section = line[0] | |
| 174 db[section] = [] | |
| 175 # Format lines have 2+ | |
| 176 elif len(line) == 2: | |
| 177 parent_format = line[0] | |
| 178 description = line[1] | |
| 179 | |
| 180 if parent_format not in db[section]: | |
| 181 db[section].append((parent_format, None, description)) | |
| 182 elif len(line) == 3: | |
| 183 parent_format = line[0] | |
| 184 format_modifier = line[1] | |
| 185 description = line[2] | |
| 186 | |
| 187 if parent_format not in db[section]: | |
| 188 db[section].append((parent_format, format_modifier, description)) | |
| 189 | |
| 190 | |
| 191 all_formats = db['(all)'] | |
| 192 del db['(all)'] | |
| 193 sequences_formats = db['(sequences)'] | |
| 194 del db['(sequences)'] | |
| 195 del db[''] | |
| 196 | |
| 197 for key in db: | |
| 198 db[key] += all_formats | |
| 199 | |
| 200 for key in ('nuccore', 'nucest', 'nucgss', 'nucleotide'): | |
| 201 db[key] += sequences_formats | |
| 202 | |
| 203 MACRO_TPL = ''' | |
| 204 | |
| 205 ''' | |
| 206 | |
| 207 WHEN_TPL = ''' <when value="{format}"> | |
| 208 <param name="output_format" type="select" label="Output Format"> | |
| 209 {format_options} | |
| 210 </param> | |
| 211 </when>''' | |
| 212 | |
| 213 FORMAT_OPTION_TPL = '''<option value="{name_type}">{name_type_human}</option>''' | |
| 214 | |
| 215 format_names = {} | |
| 216 | |
| 217 print ''' <xml name="db"> | |
| 218 <conditional name="db"> | |
| 219 <expand macro="dbselect" />''' | |
| 220 for key in sorted(db): | |
| 221 format_options = [] | |
| 222 | |
| 223 for (parent_format, format_modifier, description) in sorted(db[key]): | |
| 224 name_human = description | |
| 225 if format_modifier: | |
| 226 name_human += ' (%s)' % format_modifier | |
| 227 format_string = '%s-%s' % (parent_format, format_modifier) | |
| 228 | |
| 229 format_options.append(FORMAT_OPTION_TPL.format( | |
| 230 name_type=format_string, | |
| 231 name_type_human=name_human, | |
| 232 )) | |
| 233 | |
| 234 format_names[format_string] = format_modifier | |
| 235 | |
| 236 print WHEN_TPL.format( | |
| 237 format=key, | |
| 238 format_options='\n '.join(format_options) | |
| 239 ) | |
| 240 | |
| 241 print ''' </conditional> | |
| 242 </xml>''' | |
| 243 | |
| 244 CHANGE_FORMAT_TPL = ''' | |
| 245 <xml name="efetch_formats"> | |
| 246 <change_format> | |
| 247 {formats} | |
| 248 </change_format> | |
| 249 </xml> | |
| 250 ''' | |
| 251 | |
| 252 CHANGE_FORMAT_WHEN_TPL = '''<when input="output_format" value="{key}" format="{value}"/>''' | |
| 253 # Format options | |
| 254 | |
| 255 | |
| 256 whens = [] | |
| 257 for (k, v) in format_names.items(): | |
| 258 if v is None: | |
| 259 v = 'text' | |
| 260 elif v == 'asn.1': | |
| 261 v = 'asn1' | |
| 262 | |
| 263 whens.append(CHANGE_FORMAT_WHEN_TPL.format( | |
| 264 key=k, value=v | |
| 265 )) | |
| 266 | |
| 267 print CHANGE_FORMAT_TPL.format(formats='\n '.join(whens)) |
