Mercurial > repos > peterjc > blastxml_to_top_descr
changeset 13:8dc4ba7eba5d draft default tip
v0.1.2 with Python 3.9 declaration
| author | peterjc | 
|---|---|
| date | Sun, 17 Sep 2023 13:01:56 +0000 | 
| parents | fe1ed74793c9 | 
| children | |
| files | test-data/blastp_four_human_vs_rhodopsin.xml test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular test-data/blastp_four_human_vs_rhodopsin_top3.tabular test-data/blastp_four_human_vs_rhodopsin_top3_positive.tabular tools/blastxml_to_top_descr/README.rst tools/blastxml_to_top_descr/blastxml_to_top_descr.py tools/blastxml_to_top_descr/blastxml_to_top_descr.xml tools/blastxml_to_top_descr/repository_dependencies.xml | 
| diffstat | 8 files changed, 235 insertions(+), 575 deletions(-) [+] | 
line wrap: on
 line diff
--- a/test-data/blastp_four_human_vs_rhodopsin.xml Wed Jul 30 05:36:52 2014 -0400 +++ b/test-data/blastp_four_human_vs_rhodopsin.xml Sun Sep 17 13:01:56 2023 +0000 @@ -2,10 +2,10 @@ <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd"> <BlastOutput> <BlastOutput_program>blastp</BlastOutput_program> - <BlastOutput_version>BLASTP 2.2.29+</BlastOutput_version> + <BlastOutput_version>BLASTP 2.10.1+</BlastOutput_version> <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference> <BlastOutput_db></BlastOutput_db> - <BlastOutput_query-ID>sp|Q9BS26|ERP44_HUMAN</BlastOutput_query-ID> + <BlastOutput_query-ID>Q9BS26</BlastOutput_query-ID> <BlastOutput_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</BlastOutput_query-def> <BlastOutput_query-len>406</BlastOutput_query-len> <BlastOutput_param> @@ -20,7 +20,7 @@ <BlastOutput_iterations> <Iteration> <Iteration_iter-num>1</Iteration_iter-num> - <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID> + <Iteration_query-ID>Q9BS26</Iteration_query-ID> <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def> <Iteration_query-len>406</Iteration_query-len> <Iteration_hits> @@ -29,8 +29,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>30</Statistics_hsp-len> - <Statistics_eff-space>119568</Statistics_eff-space> + <Statistics_hsp-len>42</Statistics_hsp-len> + <Statistics_eff-space>657748</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -40,17 +40,17 @@ </Iteration> <Iteration> <Iteration_iter-num>2</Iteration_iter-num> - <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID> - <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def> - <Iteration_query-len>406</Iteration_query-len> + <Iteration_query-ID>Q9NSY1</Iteration_query-ID> + <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def> + <Iteration_query-len>1161</Iteration_query-len> <Iteration_hits> </Iteration_hits> <Iteration_stat> <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>30</Statistics_hsp-len> - <Statistics_eff-space>119568</Statistics_eff-space> + <Statistics_hsp-len>50</Statistics_hsp-len> + <Statistics_eff-space>1954249</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -60,17 +60,17 @@ </Iteration> <Iteration> <Iteration_iter-num>3</Iteration_iter-num> - <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID> - <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def> - <Iteration_query-len>406</Iteration_query-len> + <Iteration_query-ID>P06213</Iteration_query-ID> + <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def> + <Iteration_query-len>1382</Iteration_query-len> <Iteration_hits> </Iteration_hits> <Iteration_stat> <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>30</Statistics_hsp-len> - <Statistics_eff-space>119568</Statistics_eff-space> + <Statistics_hsp-len>51</Statistics_hsp-len> + <Statistics_eff-space>2333243</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -80,307 +80,7 @@ </Iteration> <Iteration> <Iteration_iter-num>4</Iteration_iter-num> - <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID> - <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def> - <Iteration_query-len>406</Iteration_query-len> -<Iteration_hits> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>30</Statistics_hsp-len> - <Statistics_eff-space>119568</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> - <Iteration_message>No hits found</Iteration_message> -</Iteration> -<Iteration> - <Iteration_iter-num>5</Iteration_iter-num> - <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID> - <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def> - <Iteration_query-len>406</Iteration_query-len> -<Iteration_hits> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>30</Statistics_hsp-len> - <Statistics_eff-space>119568</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> - <Iteration_message>No hits found</Iteration_message> -</Iteration> -<Iteration> - <Iteration_iter-num>6</Iteration_iter-num> - <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID> - <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def> - <Iteration_query-len>406</Iteration_query-len> -<Iteration_hits> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>30</Statistics_hsp-len> - <Statistics_eff-space>119568</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> - <Iteration_message>No hits found</Iteration_message> -</Iteration> -<Iteration> - <Iteration_iter-num>7</Iteration_iter-num> - <Iteration_query-ID>sp|Q9NSY1|BMP2K_HUMAN</Iteration_query-ID> - <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def> - <Iteration_query-len>1161</Iteration_query-len> -<Iteration_hits> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>38</Statistics_hsp-len> - <Statistics_eff-space>348130</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> - <Iteration_message>No hits found</Iteration_message> -</Iteration> -<Iteration> - <Iteration_iter-num>8</Iteration_iter-num> - <Iteration_query-ID>sp|Q9NSY1|BMP2K_HUMAN</Iteration_query-ID> - <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def> - <Iteration_query-len>1161</Iteration_query-len> -<Iteration_hits> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>38</Statistics_hsp-len> - <Statistics_eff-space>348130</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> - <Iteration_message>No hits found</Iteration_message> -</Iteration> -<Iteration> - <Iteration_iter-num>9</Iteration_iter-num> - <Iteration_query-ID>sp|Q9NSY1|BMP2K_HUMAN</Iteration_query-ID> - <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def> - <Iteration_query-len>1161</Iteration_query-len> -<Iteration_hits> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>38</Statistics_hsp-len> - <Statistics_eff-space>348130</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> - <Iteration_message>No hits found</Iteration_message> -</Iteration> -<Iteration> - <Iteration_iter-num>10</Iteration_iter-num> - <Iteration_query-ID>sp|Q9NSY1|BMP2K_HUMAN</Iteration_query-ID> - <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def> - <Iteration_query-len>1161</Iteration_query-len> -<Iteration_hits> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>38</Statistics_hsp-len> - <Statistics_eff-space>348130</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> - <Iteration_message>No hits found</Iteration_message> -</Iteration> -<Iteration> - <Iteration_iter-num>11</Iteration_iter-num> - <Iteration_query-ID>sp|Q9NSY1|BMP2K_HUMAN</Iteration_query-ID> - <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def> - <Iteration_query-len>1161</Iteration_query-len> -<Iteration_hits> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>38</Statistics_hsp-len> - <Statistics_eff-space>348130</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> - <Iteration_message>No hits found</Iteration_message> -</Iteration> -<Iteration> - <Iteration_iter-num>12</Iteration_iter-num> - <Iteration_query-ID>sp|Q9NSY1|BMP2K_HUMAN</Iteration_query-ID> - <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def> - <Iteration_query-len>1161</Iteration_query-len> -<Iteration_hits> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>38</Statistics_hsp-len> - <Statistics_eff-space>348130</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> - <Iteration_message>No hits found</Iteration_message> -</Iteration> -<Iteration> - <Iteration_iter-num>13</Iteration_iter-num> - <Iteration_query-ID>sp|P06213|INSR_HUMAN</Iteration_query-ID> - <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def> - <Iteration_query-len>1382</Iteration_query-len> -<Iteration_hits> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>39</Statistics_hsp-len> - <Statistics_eff-space>414987</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> - <Iteration_message>No hits found</Iteration_message> -</Iteration> -<Iteration> - <Iteration_iter-num>14</Iteration_iter-num> - <Iteration_query-ID>sp|P06213|INSR_HUMAN</Iteration_query-ID> - <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def> - <Iteration_query-len>1382</Iteration_query-len> -<Iteration_hits> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>39</Statistics_hsp-len> - <Statistics_eff-space>414987</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> - <Iteration_message>No hits found</Iteration_message> -</Iteration> -<Iteration> - <Iteration_iter-num>15</Iteration_iter-num> - <Iteration_query-ID>sp|P06213|INSR_HUMAN</Iteration_query-ID> - <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def> - <Iteration_query-len>1382</Iteration_query-len> -<Iteration_hits> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>39</Statistics_hsp-len> - <Statistics_eff-space>414987</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> - <Iteration_message>No hits found</Iteration_message> -</Iteration> -<Iteration> - <Iteration_iter-num>16</Iteration_iter-num> - <Iteration_query-ID>sp|P06213|INSR_HUMAN</Iteration_query-ID> - <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def> - <Iteration_query-len>1382</Iteration_query-len> -<Iteration_hits> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>39</Statistics_hsp-len> - <Statistics_eff-space>414987</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> - <Iteration_message>No hits found</Iteration_message> -</Iteration> -<Iteration> - <Iteration_iter-num>17</Iteration_iter-num> - <Iteration_query-ID>sp|P06213|INSR_HUMAN</Iteration_query-ID> - <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def> - <Iteration_query-len>1382</Iteration_query-len> -<Iteration_hits> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>39</Statistics_hsp-len> - <Statistics_eff-space>414987</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> - <Iteration_message>No hits found</Iteration_message> -</Iteration> -<Iteration> - <Iteration_iter-num>18</Iteration_iter-num> - <Iteration_query-ID>sp|P06213|INSR_HUMAN</Iteration_query-ID> - <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def> - <Iteration_query-len>1382</Iteration_query-len> -<Iteration_hits> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>39</Statistics_hsp-len> - <Statistics_eff-space>414987</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> - <Iteration_message>No hits found</Iteration_message> -</Iteration> -<Iteration> - <Iteration_iter-num>19</Iteration_iter-num> - <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID> + <Iteration_query-ID>P08100</Iteration_query-ID> <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def> <Iteration_query-len>348</Iteration_query-len> <Iteration_hits> @@ -412,74 +112,36 @@ </Hsp> </Hit_hsps> </Hit> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>29</Statistics_hsp-len> - <Statistics_eff-space>101761</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> -</Iteration> -<Iteration> - <Iteration_iter-num>20</Iteration_iter-num> - <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID> - <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def> - <Iteration_query-len>348</Iteration_query-len> -<Iteration_hits> <Hit> - <Hit_num>1</Hit_num> - <Hit_id>gi|3024260|sp|P56514.1|OPSD_BUFBU</Hit_id> - <Hit_def>RecName: Full=Rhodopsin</Hit_def> - <Hit_accession>P56514</Hit_accession> - <Hit_len>354</Hit_len> + <Hit_num>2</Hit_num> + <Hit_id>gi|223523|prf||0811197A</Hit_id> + <Hit_def>rhodopsin [Bos taurus]</Hit_def> + <Hit_accession>0811197A</Hit_accession> + <Hit_len>347</Hit_len> <Hit_hsps> <Hsp> <Hsp_num>1</Hsp_num> - <Hsp_bit-score>619.002</Hsp_bit-score> - <Hsp_score>1595</Hsp_score> + <Hsp_bit-score>673.315</Hsp_bit-score> + <Hsp_score>1736</Hsp_score> <Hsp_evalue>0</Hsp_evalue> <Hsp_query-from>1</Hsp_query-from> - <Hsp_query-to>341</Hsp_query-to> + <Hsp_query-to>348</Hsp_query-to> <Hsp_hit-from>1</Hsp_hit-from> - <Hsp_hit-to>342</Hsp_hit-to> + <Hsp_hit-to>347</Hsp_hit-to> <Hsp_query-frame>0</Hsp_query-frame> <Hsp_hit-frame>0</Hsp_hit-frame> - <Hsp_identity>290</Hsp_identity> - <Hsp_positive>322</Hsp_positive> + <Hsp_identity>324</Hsp_identity> + <Hsp_positive>336</Hsp_positive> <Hsp_gaps>1</Hsp_gaps> - <Hsp_align-len>342</Hsp_align-len> - <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE</Hsp_qseq> - <Hsp_hseq>MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE</Hsp_hseq> - <Hsp_midline>MNGTEGPNFY+P SN TGVVRSPFEYPQYYLAEPWQ+S+L AYMFLLI+LGFPINF+TLYVT+QHKKLRTPLNYILLNLA A+ FMVL GFT T+Y+S++GYF+ G TGC +EGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRF ENHA+MGVAFTW+MAL+CA PPL GWSRYIPEG+QCSCG+DYYTLKPEVNNESFVIYMFVVHFTIP+IIIFFCYG+LV TVKEAAAQQQESATTQKAEKEVTRMVIIMV+ FLICWVPYASVAF+IF++QGS FGPIFMT+PAFFAKS++IYNPVIYIM+NKQFRNCM+TT+CCGKNP G+D+A SA SKTE</Hsp_midline> + <Hsp_align-len>348</Hsp_align-len> + <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA</Hsp_qseq> + <Hsp_hseq>MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA</Hsp_hseq> + <Hsp_midline>MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGID YT E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA</Hsp_midline> </Hsp> </Hit_hsps> </Hit> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>29</Statistics_hsp-len> - <Statistics_eff-space>101761</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> -</Iteration> -<Iteration> - <Iteration_iter-num>21</Iteration_iter-num> - <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID> - <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def> - <Iteration_query-len>348</Iteration_query-len> -<Iteration_hits> <Hit> - <Hit_num>1</Hit_num> + <Hit_num>3</Hit_num> <Hit_id>gi|283855846|gb|ADB45242.1|</Hit_id> <Hit_def>rhodopsin [Cynopterus brachyotis]</Hit_def> <Hit_accession>ADB45242</Hit_accession> @@ -506,27 +168,8 @@ </Hsp> </Hit_hsps> </Hit> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>29</Statistics_hsp-len> - <Statistics_eff-space>101761</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> -</Iteration> -<Iteration> - <Iteration_iter-num>22</Iteration_iter-num> - <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID> - <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def> - <Iteration_query-len>348</Iteration_query-len> -<Iteration_hits> <Hit> - <Hit_num>1</Hit_num> + <Hit_num>4</Hit_num> <Hit_id>gi|283855823|gb|ADB45229.1|</Hit_id> <Hit_def>rhodopsin [Myotis pilosus]</Hit_def> <Hit_accession>ADB45229</Hit_accession> @@ -553,74 +196,36 @@ </Hsp> </Hit_hsps> </Hit> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>29</Statistics_hsp-len> - <Statistics_eff-space>101761</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> -</Iteration> -<Iteration> - <Iteration_iter-num>23</Iteration_iter-num> - <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID> - <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def> - <Iteration_query-len>348</Iteration_query-len> -<Iteration_hits> <Hit> - <Hit_num>1</Hit_num> - <Hit_id>gi|223523|prf||0811197A</Hit_id> - <Hit_def>rhodopsin [Bos taurus]</Hit_def> - <Hit_accession>0811197A</Hit_accession> - <Hit_len>347</Hit_len> + <Hit_num>5</Hit_num> + <Hit_id>gi|3024260|sp|P56514.1|OPSD_BUFBU</Hit_id> + <Hit_def>RecName: Full=Rhodopsin</Hit_def> + <Hit_accession>P56514</Hit_accession> + <Hit_len>354</Hit_len> <Hit_hsps> <Hsp> <Hsp_num>1</Hsp_num> - <Hsp_bit-score>673.315</Hsp_bit-score> - <Hsp_score>1736</Hsp_score> + <Hsp_bit-score>619.002</Hsp_bit-score> + <Hsp_score>1595</Hsp_score> <Hsp_evalue>0</Hsp_evalue> <Hsp_query-from>1</Hsp_query-from> - <Hsp_query-to>348</Hsp_query-to> + <Hsp_query-to>341</Hsp_query-to> <Hsp_hit-from>1</Hsp_hit-from> - <Hsp_hit-to>347</Hsp_hit-to> + <Hsp_hit-to>342</Hsp_hit-to> <Hsp_query-frame>0</Hsp_query-frame> <Hsp_hit-frame>0</Hsp_hit-frame> - <Hsp_identity>324</Hsp_identity> - <Hsp_positive>336</Hsp_positive> + <Hsp_identity>290</Hsp_identity> + <Hsp_positive>322</Hsp_positive> <Hsp_gaps>1</Hsp_gaps> - <Hsp_align-len>348</Hsp_align-len> - <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA</Hsp_qseq> - <Hsp_hseq>MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA</Hsp_hseq> - <Hsp_midline>MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGID YT E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA</Hsp_midline> + <Hsp_align-len>342</Hsp_align-len> + <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE</Hsp_qseq> + <Hsp_hseq>MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE</Hsp_hseq> + <Hsp_midline>MNGTEGPNFY+P SN TGVVRSPFEYPQYYLAEPWQ+S+L AYMFLLI+LGFPINF+TLYVT+QHKKLRTPLNYILLNLA A+ FMVL GFT T+Y+S++GYF+ G TGC +EGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRF ENHA+MGVAFTW+MAL+CA PPL GWSRYIPEG+QCSCG+DYYTLKPEVNNESFVIYMFVVHFTIP+IIIFFCYG+LV TVKEAAAQQQESATTQKAEKEVTRMVIIMV+ FLICWVPYASVAF+IF++QGS FGPIFMT+PAFFAKS++IYNPVIYIM+NKQFRNCM+TT+CCGKNP G+D+A SA SKTE</Hsp_midline> </Hsp> </Hit_hsps> </Hit> -</Iteration_hits> - <Iteration_stat> - <Statistics> - <Statistics_db-num>0</Statistics_db-num> - <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>29</Statistics_hsp-len> - <Statistics_eff-space>101761</Statistics_eff-space> - <Statistics_kappa>0.041</Statistics_kappa> - <Statistics_lambda>0.267</Statistics_lambda> - <Statistics_entropy>0.14</Statistics_entropy> - </Statistics> - </Iteration_stat> -</Iteration> -<Iteration> - <Iteration_iter-num>24</Iteration_iter-num> - <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID> - <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def> - <Iteration_query-len>348</Iteration_query-len> -<Iteration_hits> <Hit> - <Hit_num>1</Hit_num> + <Hit_num>6</Hit_num> <Hit_id>gi|12583665|dbj|BAB21486.1|</Hit_id> <Hit_def>fresh water form rod opsin [Conger myriaster]</Hit_def> <Hit_accession>BAB21486</Hit_accession> @@ -652,8 +257,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>29</Statistics_hsp-len> - <Statistics_eff-space>101761</Statistics_eff-space> + <Statistics_hsp-len>41</Statistics_hsp-len> + <Statistics_eff-space>556591</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy>
--- a/test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular Wed Jul 30 05:36:52 2014 -0400 +++ b/test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular Sun Sep 17 13:01:56 2023 +0000 @@ -1,6 +1,6 @@ -sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0 701 gi|57163783|ref|NP_001009242.1| 1808 336 343 0 98.56 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 rhodopsin [Felis catus] -sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0 619 gi|3024260|sp|P56514.1|OPSD_BUFBU 1595 290 322 1 94.15 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 354 RecName: Full=Rhodopsin -sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653 gi|283855846|gb|ADB45242.1| 1684 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 rhodopsin [Cynopterus brachyotis] -sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631 gi|283855823|gb|ADB45229.1| 1627 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 rhodopsin [Myotis pilosus] -sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673 gi|223523|prf||0811197A 1736 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347 rhodopsin [Bos taurus] -sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599 gi|12583665|dbj|BAB21486.1| 1544 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354 fresh water form rod opsin [Conger myriaster] +P08100 gi|57163783|ref|NP_001009242.1| 96.552 348 12 0 1 348 1 348 0.0 701 gi|57163783|ref|NP_001009242.1| 1808 336 343 0 98.56 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 rhodopsin [Felis catus] +P08100 gi|223523|prf||0811197A 93.103 348 23 1 1 348 1 347 0.0 673 gi|223523|prf||0811197A 1736 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347 rhodopsin [Bos taurus] +P08100 gi|283855846|gb|ADB45242.1| 94.817 328 17 0 11 338 1 328 0.0 653 gi|283855846|gb|ADB45242.1| 1684 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 rhodopsin [Cynopterus brachyotis] +P08100 gi|283855823|gb|ADB45229.1| 94.817 328 17 0 11 338 1 328 0.0 631 gi|283855823|gb|ADB45229.1| 1627 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 rhodopsin [Myotis pilosus] +P08100 gi|3024260|sp|P56514.1|OPSD_BUFBU 84.795 342 51 1 1 341 1 342 0.0 619 gi|3024260|sp|P56514.1|OPSD_BUFBU 1595 290 322 1 94.15 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 354 RecName: Full=Rhodopsin +P08100 gi|12583665|dbj|BAB21486.1| 82.164 342 60 1 1 341 1 342 0.0 599 gi|12583665|dbj|BAB21486.1| 1544 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354 fresh water form rod opsin [Conger myriaster]
--- a/test-data/blastp_four_human_vs_rhodopsin_top3.tabular Wed Jul 30 05:36:52 2014 -0400 +++ b/test-data/blastp_four_human_vs_rhodopsin_top3.tabular Sun Sep 17 13:01:56 2023 +0000 @@ -1,5 +1,5 @@ #Query BLAST hit 1 BLAST hit 2 BLAST hit 3 -sp|Q9BS26|ERP44_HUMAN -sp|Q9NSY1|BMP2K_HUMAN -sp|P06213|INSR_HUMAN -sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] gi|3024260|sp|P56514.1|OPSD_BUFBU RecName: Full=Rhodopsin gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis] +Q9BS26 +Q9NSY1 +P06213 +P08100 gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] gi|223523|prf||0811197A rhodopsin [Bos taurus] gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis]
--- a/test-data/blastp_four_human_vs_rhodopsin_top3_positive.tabular Wed Jul 30 05:36:52 2014 -0400 +++ b/test-data/blastp_four_human_vs_rhodopsin_top3_positive.tabular Sun Sep 17 13:01:56 2023 +0000 @@ -1,2 +1,2 @@ #Query BLAST hit 1 BLAST hit 2 BLAST hit 3 -sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] gi|3024260|sp|P56514.1|OPSD_BUFBU RecName: Full=Rhodopsin gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis] +P08100 gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] gi|223523|prf||0811197A rhodopsin [Bos taurus] gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis]
--- a/tools/blastxml_to_top_descr/README.rst Wed Jul 30 05:36:52 2014 -0400 +++ b/tools/blastxml_to_top_descr/README.rst Sun Sep 17 13:01:56 2023 +0000 @@ -1,7 +1,7 @@ Galaxy tool to extract top BLAST hit descriptions from BLAST XML ================================================================ -This tool is copyright 2012-2013 by Peter Cock, The James Hutton Institute +This tool is copyright 2012-2015 by Peter Cock, The James Hutton Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. See the licence text below. @@ -12,38 +12,39 @@ It is available from the Galaxy Tool Shed at: http://toolshed.g2.bx.psu.edu/view/peterjc/blastxml_to_top_descr -This requires the 'blast_datatypes' repository from the Galaxy Tool Shed -to provide the 'blastxml' file format definition. +This requires the ``blast_datatypes`` repository from the Galaxy Tool Shed +to provide the ``blastxml`` file format definition. Automated Installation ====================== This should be straightforward, Galaxy should automatically install the -'blast_datatypes' dependency. +``blast_datatypes`` dependency. Manual Installation =================== -If you haven't done so before, first install the 'blast_datatypes' repository. +If you haven't done so before, first install the ``blast_datatypes`` repository. There are just two files to install (if doing this manually): -* blastxml_to_top_descr.py (the Python script) -* blastxml_to_top_descr.xml (the Galaxy tool definition) +- ``blastxml_to_top_descr.py`` (the Python script) +- ``blastxml_to_top_descr.xml`` (the Galaxy tool definition) -The suggested location is in the Galaxy folder tools/ncbi_blast_plus next to -the NCBI BLAST+ tool wrappers. +The suggested location is in the Galaxy folder ``tools/ncbi_blast_plus/`` +next to the NCBI BLAST+ tool wrappers. -You will also need to modify the tools_conf.xml file to tell Galaxy to offer +You will also need to modify the ``tools_conf.xml`` file to tell Galaxy to offer the tool. e.g. next to the NCBI BLAST+ tools. Simply add the line:: <tool file="ncbi_blast_plus/blastxml_to_top_descr.xml" /> -To run the tool's tests, also add this line to tools_conf.xml.sample then:: +If you wish to run the unit tests, alsomove/copy the ``test-data/`` files +under Galaxy's ``test-data/`` folder. Then:: - $ sh run_functional_tests.sh -id blastxml_to_top_descr + $ sh run_tests.sh -id blastxml_to_top_descr History @@ -54,13 +55,13 @@ ------- ---------------------------------------------------------------------- v0.0.1 - Initial version. v0.0.2 - Since BLAST+ was moved out of the Galaxy core, now have a dependency - on the 'blast_datatypes' repository in the Tool Shed. + on the ``blast_datatypes`` repository in the Tool Shed. v0.0.3 - Include the test files required to run the unit tests v0.0.4 - Quote filenames in case they contain spaces (internal change) v0.0.5 - Include number of queries with BLAST matches in stdout (peek text) v0.0.6 - Check for errors via the script's return code (internal change) v0.0.7 - Link to Tool Shed added to help text and this documentation. - - Tweak dependency on blast_datatypes to also work on Test Tool Shed + - Tweak dependency on ``blast_datatypes`` to also work on Test Tool Shed - Adopt standard MIT License. v0.0.8 - Development moved to GitHub, https://github.com/peterjc/galaxy_blast v0.0.9 - Updated citation information (Cock et al. 2013). @@ -70,6 +71,11 @@ - Support BLAST XML with multiple ``<Iteration>`` blocks per query. - Support the default 25 column extended tabular BLAST output. v0.1.1 - Embed citation information in the tool XML (new Galaxy feature). +v0.1.2 - Reorder XML elements (internal change only). + - Planemo for Tool Shed upload (``.shed.yml``, internal change only). + - Use ``<command detect_errors="aggressive">`` (internal change only). + - Single quote command line arguments (internal change only). + - Python 3 compatible syntax. ======= ====================================================================== @@ -89,22 +95,31 @@ As of July 2013, development is continuing on a dedicated GitHub repository: https://github.com/peterjc/galaxy_blast -For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use -the following command from the GitHub repository root folder:: +For pushing a release to the test or main "Galaxy Tool Shed", use the following +Planemo commands (which requires you have set your Tool Shed access details in +``~/.planemo.yml`` and that you have access rights on the Tool Shed):: + + $ planemo shed_update -t testtoolshed --check_diff ~/repositories/galaxy_blast/tools/blastxml_to_top_descr/ + ... + +or:: - $ tar -czf blastxml_to_top_descr.tar.gz tools/blastxml_to_top_descr/README.rst tools/blastxml_to_top_descr/blastxml_to_top_descr.* tools/blastxml_to_top_descr/repository_dependencies.xml test-data/blastp_four_human_vs_rhodopsin.xml test-data/blastp_four_human_vs_rhodopsin_top3.tabular test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular test-data/blastp_four_human_vs_rhodopsin_top3_positive.tabular + $ planemo shed_update -t toolshed --check_diff ~/repositories/galaxy_blast/tools/blastxml_to_top_descr/ + ... + +To just build and check the tar ball, use:: -Check this worked:: - - $ tar -tzf blastxml_to_top_descr.tar.gz + $ planemo shed_upload --tar_only ~/repositories/galaxy_blast/tools/blastxml_to_top_descr/ + ... + $ tar -tzf shed_upload.tar.gz + test-data/blastp_four_human_vs_rhodopsin.xml + test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular + test-data/blastp_four_human_vs_rhodopsin_top3.tabular + test-data/blastp_four_human_vs_rhodopsin_top3_positive.tabular tools/blastxml_to_top_descr/README.rst tools/blastxml_to_top_descr/blastxml_to_top_descr.py tools/blastxml_to_top_descr/blastxml_to_top_descr.xml tools/blastxml_to_top_descr/repository_dependencies.xml - test-data/blastp_four_human_vs_rhodopsin.xml - test-data/blastp_four_human_vs_rhodopsin_top3.tabular - test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular - test-data/blastp_four_human_vs_rhodopsin_top3_positive.tabular Licence (MIT)
--- a/tools/blastxml_to_top_descr/blastxml_to_top_descr.py Wed Jul 30 05:36:52 2014 -0400 +++ b/tools/blastxml_to_top_descr/blastxml_to_top_descr.py Sun Sep 17 13:01:56 2023 +0000 @@ -6,25 +6,26 @@ Assumes the hits are pre-sorted, so "best" 3 hits gives first 3 hits. """ +from __future__ import print_function + import os +import re import sys -import re from optparse import OptionParser if "-v" in sys.argv or "--version" in sys.argv: - print "v0.1.0" + print("v0.1.2") sys.exit(0) -if sys.version_info[:2] >= ( 2, 5 ): +if sys.version_info[:2] >= (2, 5): import xml.etree.cElementTree as ElementTree else: - from galaxy import eggs - import pkg_resources; pkg_resources.require( "elementtree" ) + from galaxy import eggs # noqa - ignore flake8 F401 + import pkg_resources + + pkg_resources.require("elementtree") from elementtree import ElementTree -def stop_err( msg ): - sys.stderr.write("%s\n" % msg) - sys.exit(1) usage = """Use as follows: @@ -39,23 +40,54 @@ """ parser = OptionParser(usage=usage) -parser.add_option("-t", "--topN", dest="topN", default=3, - help="Number of descriptions to collect (in order from file)") -parser.add_option("-o", "--output", dest="out_file", default=None, - help="Output filename for tabular file", - metavar="FILE") -parser.add_option("-f", "--format", dest="format", default="blastxml", - help="Input format (blastxml or tabular)") -parser.add_option("-q", "--qseqid", dest="qseqid", default="1", - help="Column for query 'qseqid' (for tabular input; default 1)") -parser.add_option("-s", "--sseqid", dest="sseqid", default="2", - help="Column for subject 'sseqid' (for tabular input; default 2)") -parser.add_option("-d", "--salltitles", dest="salltitles", default="25", - help="Column for descriptions 'salltitles' (for tabular input; default 25)") +parser.add_option( + "-t", + "--topN", + dest="topN", + default=3, + help="Number of descriptions to collect (in order from file)", +) +parser.add_option( + "-o", + "--output", + dest="out_file", + default=None, + help="Output filename for tabular file", + metavar="FILE", +) +parser.add_option( + "-f", + "--format", + dest="format", + default="blastxml", + help="Input format (blastxml or tabular)", +) +parser.add_option( + "-q", + "--qseqid", + dest="qseqid", + default="1", + help="Column for query 'qseqid' (for tabular input; default 1)", +) +parser.add_option( + "-s", + "--sseqid", + dest="sseqid", + default="2", + help="Column for subject 'sseqid' (for tabular input; default 2)", +) +parser.add_option( + "-d", + "--salltitles", + dest="salltitles", + default="25", + help="Column for descriptions 'salltitles' (for tabular input; default 25)", +) (options, args) = parser.parse_args() if len(sys.argv) == 4 and len(args) == 3 and not options.out_file: - stop_err("""The API has changed, replace this: + sys.exit( + """The API has changed, replace this: $ python blastxml_to_top_descr.py input.xml output.tab 3 @@ -64,12 +96,13 @@ $ python blastxml_to_top_descr.py -o output.tab -t 3 input.xml Sorry. -""") +""" + ) if not args: - stop_err("Input filename missing, try -h") + sys.exit("Input filename missing, try -h") if len(args) > 1: - stop_err("Expects a single argument, one input filename") + sys.exit("Expects a single argument, one input filename") in_file = args[0] out_file = options.out_file topN = options.topN @@ -77,12 +110,12 @@ try: topN = int(topN) except ValueError: - stop_err("Number of hits argument should be an integer (at least 1)") + sys.exit("Number of hits argument should be an integer (at least 1)") if topN < 1: - stop_err("Number of hits argument should be an integer (at least 1)") + sys.exit("Number of hits argument should be an integer (at least 1)") if not os.path.isfile(in_file): - stop_err("Missing input file: %r" % in_file) + sys.exit("Missing input file: %r" % in_file) def get_column(value): @@ -92,11 +125,12 @@ value = value[1:] try: col = int(value) - except: - stop_err("Expected an integer column number, not %r" % value) + except ValueError: + sys.exit("Expected an integer column number, not %r" % value) if col < 1: - stop_err("Expect column numbers to be at least one, not %r" % value) - return col - 1 # Python counting! + sys.exit("Expect column numbers to be at least one, not %r" % value) + return col - 1 # Python counting! + def tabular_hits(in_file, qseqid, sseqid, salltitles): """Parse key data from tabular BLAST output. @@ -105,8 +139,8 @@ """ current_query = None current_hits = [] - with open(in_file) as input: - for line in input: + with open(in_file) as input_handle: + for line in input_handle: parts = line.rstrip("\n").split("\t") query = parts[qseqid] descr = "%s %s" % (parts[sseqid], parts[salltitles]) @@ -126,6 +160,7 @@ # Final query yield current_query, current_hits + def blastxml_hits(in_file): """Parse key data from BLAST XML output. @@ -133,32 +168,35 @@ """ try: context = ElementTree.iterparse(in_file, events=("start", "end")) - except: + except Exception: with open(in_file) as handle: header = handle.read(100) - stop_err("Invalid data format in XML file %r which starts: %r" % (in_file, header)) + sys.exit( + "Invalid data format in XML file %r which starts: %r" % (in_file, header) + ) # turn it into an iterator context = iter(context) # get the root element try: - event, root = context.next() - except: + event, root = next(context) + except Exception: with open(in_file) as handle: header = handle.read(100) - stop_err("Unable to get root element from XML file %r which starts: %r" % (in_file, header)) + sys.exit( + "Unable to get root element from XML file %r which starts: %r" + % (in_file, header) + ) - re_default_query_id = re.compile("^Query_\d+$") - assert re_default_query_id.match("Query_101") - assert not re_default_query_id.match("Query_101a") - assert not re_default_query_id.match("MyQuery_101") - re_default_subject_id = re.compile("^Subject_\d+$") - assert re_default_subject_id.match("Subject_1") - assert not re_default_subject_id.match("Subject_") - assert not re_default_subject_id.match("Subject_12a") - assert not re_default_subject_id.match("TheSubject_1") + re_default_query_id = re.compile(r"^Query_\d+$") + assert re_default_query_id.match(r"Query_101") + assert not re_default_query_id.match(r"Query_101a") + assert not re_default_query_id.match(r"MyQuery_101") + re_default_subject_id = re.compile(r"^Subject_\d+$") + assert re_default_subject_id.match(r"Subject_1") + assert not re_default_subject_id.match(r"Subject_") + assert not re_default_subject_id.match(r"Subject_12a") + assert not re_default_subject_id.match(r"TheSubject_1") - count = 0 - pos_count = 0 current_query = None hit_descrs = [] for event, elem in context: @@ -166,7 +204,8 @@ if event == "end" and elem.tag == "Iteration": # Expecting either this, from BLAST 2.2.25+ using FASTA vs FASTA # <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID> - # <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def> + # <Iteration_query-def>Endoplasmic reticulum resident protein 44 + # OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def> # <Iteration_query-len>406</Iteration_query-len> # <Iteration_hits></Iteration_hits> # @@ -177,10 +216,12 @@ # <Iteration_hits>... qseqid = elem.findtext("Iteration_query-ID") if qseqid is None: - stop_err("Missing <Iteration_query-ID> (could be really old BLAST XML data?)") + sys.exit( + "Missing <Iteration_query-ID> (could be really old BLAST XML data?)" + ) if re_default_query_id.match(qseqid): - #Place holder ID, take the first word of the query definition - qseqid = elem.findtext("Iteration_query-def").split(None,1)[0] + # Place holder ID, take the first word of the query definition + qseqid = elem.findtext("Iteration_query-def").split(None, 1)[0] if current_query is None: # First hit current_query = qseqid @@ -203,17 +244,19 @@ # <Hit_accession>P56514</Hit_accession> # or, # <Hit_id>Subject_1</Hit_id> - # <Hit_def>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]</Hit_def> + # <Hit_def>gi|57163783|ref|NP_001009242.1| + # rhodopsin [Felis catus]</Hit_def> # <Hit_accession>Subject_1</Hit_accession> # - #apparently depending on the parse_deflines switch - sseqid = hit.findtext("Hit_id").split(None,1)[0] + # apparently depending on the parse_deflines switch + sseqid = hit.findtext("Hit_id").split(None, 1)[0] hit_def = sseqid + " " + hit.findtext("Hit_def") - if re_default_subject_id.match(sseqid) \ - and sseqid == hit.findtext("Hit_accession"): - #Place holder ID, take the first word of the subject definition + if re_default_subject_id.match(sseqid) and sseqid == hit.findtext( + "Hit_accession" + ): + # Place holder ID, take the first word of the subject definition hit_def = hit.findtext("Hit_def") - sseqid = hit_def.split(None,1)[0] + sseqid = hit_def.split(None, 1)[0] assert hit_def not in hit_descrs hit_descrs.append(hit_def) # prevents ElementTree from growing large datastructure @@ -223,6 +266,7 @@ # Final query yield current_query, hit_descrs + if options.format == "blastxml": hits = blastxml_hits(in_file) elif options.format == "tabular": @@ -231,21 +275,23 @@ salltitles = get_column(options.salltitles) hits = tabular_hits(in_file, qseqid, sseqid, salltitles) else: - stop_err("Unsupported format: %r" % options.format) + sys.exit("Unsupported format: %r" % options.format) def best_hits(descriptions, topN): + """Truncate given descriptions list to at most N entries.""" if len(descriptions) < topN: - return descriptions + [""] * (topN - len(descriptions)) + return descriptions + [""] * (topN - len(descriptions)) else: return descriptions[:topN] + count = 0 if out_file is None: outfile = sys.stdout else: - outfile = open(out_file, 'w') -outfile.write("#Query\t%s\n" % "\t".join("BLAST hit %i" % (i+1) for i in range(topN))) + outfile = open(out_file, "w") +outfile.write("#Query\t%s\n" % "\t".join("BLAST hit %i" % (i + 1) for i in range(topN))) for query, descrs in hits: count += 1 outfile.write("%s\t%s\n" % (query, "\t".join(best_hits(descrs, topN))))
--- a/tools/blastxml_to_top_descr/blastxml_to_top_descr.xml Wed Jul 30 05:36:52 2014 -0400 +++ b/tools/blastxml_to_top_descr/blastxml_to_top_descr.xml Sun Sep 17 13:01:56 2023 +0000 @@ -1,27 +1,27 @@ -<tool id="blastxml_to_top_descr" name="BLAST top hit descriptions" version="0.1.1"> +<tool id="blastxml_to_top_descr" name="BLAST top hit descriptions" version="0.1.2" profile="16.10"> <description>Make a table from BLAST output</description> - <version_command interpreter="python">blastxml_to_top_descr.py --version</version_command> - <command interpreter="python"> -blastxml_to_top_descr.py --f "$input.in_format" + <requirements> + <requirement type="package" version="3.9">python</requirement> + </requirements> + <version_command> +python $__tool_directory__/blastxml_to_top_descr.py --version + </version_command> + <command detect_errors="aggressive"> +python $__tool_directory__/blastxml_to_top_descr.py +-f '$input.in_format' #if $input.in_format == "tabular": --qseqid $input.qseqid --sseqid $input.sseqid --salltitles $input.salltitles #end if --o "${tabular_file}" +-o '${tabular_file}' -t ${topN} -"${in_file}" +'${in_file}' </command> - <stdio> - <!-- Assume anything other than zero is an error --> - <exit_code range="1:" /> - <exit_code range=":-1" /> - </stdio> <inputs> <conditional name="input"> <param name="in_format" type="select" label="Input format"> - <option value="blastxml" select="True">BLAST XML</option> + <option value="blastxml" selected="true">BLAST XML</option> <option value="tabular">Tabular</option> </param> <when value="blastxml"> @@ -30,17 +30,17 @@ <when value="tabular"> <param name="in_file" type="data" format="tabular" label="BLAST results as tabular"/> <param name="qseqid" type="data_column" data_ref="in_file" - multiple="False" numerical="False" default_value="1" value="1" - label="Column containing query ID (qseqid)" - help="This is column 1 in standard BLAST tabular output" /> - <param name="sseqid" type="data_column" data_ref="in_file" - multiple="False" numerical="False" default_value="2" value="2" - label="Column containing match ID (sseqid)" - help="This is column 2 in standard BLAST tabular output"/> + multiple="false" numerical="false" default_value="1" value="1" + label="Column containing query ID (qseqid)" + help="This is column 1 in standard BLAST tabular output" /> + <param name="sseqid" type="data_column" data_ref="in_file" + multiple="false" numerical="false" default_value="2" value="2" + label="Column containing match ID (sseqid)" + help="This is column 2 in standard BLAST tabular output"/> <param name="salltitles" type="data_column" data_ref="in_file" - multiple="False" numerical="False" default_value="25" value="25" + multiple="false" numerical="false" default_value="25" value="25" label="Column containing containing descriptions (salltitles)" - help="This is column 25 in the default extended BLAST tabular output"/> + help="This is column 25 in the default extended BLAST tabular output"/> </when> </conditional> <param name="topN" type="integer" min="1" max="100" optional="false" label="Number of descriptions" value="3"/> @@ -48,8 +48,6 @@ <outputs> <data name="tabular_file" format="tabular" label="Top $topN descriptions from $input.in_file.name" /> </outputs> - <requirements> - </requirements> <tests> <test> <param name="in_format" value="blastxml" /> @@ -102,7 +100,7 @@ Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). Galaxy tools and workflows for sequence analysis with applications in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 +https://doi.org/10.7717/peerj.167 This wrapper is available to install into other Galaxy Instances via the Galaxy Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/blastxml_to_top_descr
--- a/tools/blastxml_to_top_descr/repository_dependencies.xml Wed Jul 30 05:36:52 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -<?xml version="1.0"?> -<repositories description="Requires BLAST XML and database datatype definitions."> -<repository changeset_revision="de11e1a921c4" name="blast_datatypes" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> -</repositories>
