# HG changeset patch
# User peterjc
# Date 1694955716 0
# Node ID 8dc4ba7eba5da852729ec1a500d4556cc3d4a436
# Parent fe1ed74793c98527c68e3fec41429d88aedb32d6
v0.1.2 with Python 3.9 declaration
diff -r fe1ed74793c9 -r 8dc4ba7eba5d test-data/blastp_four_human_vs_rhodopsin.xml
--- a/test-data/blastp_four_human_vs_rhodopsin.xml Wed Jul 30 05:36:52 2014 -0400
+++ b/test-data/blastp_four_human_vs_rhodopsin.xml Sun Sep 17 13:01:56 2023 +0000
@@ -2,10 +2,10 @@
<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
<BlastOutput>
<BlastOutput_program>blastp</BlastOutput_program>
- <BlastOutput_version>BLASTP 2.2.29+</BlastOutput_version>
+ <BlastOutput_version>BLASTP 2.10.1+</BlastOutput_version>
<BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
<BlastOutput_db></BlastOutput_db>
- <BlastOutput_query-ID>sp|Q9BS26|ERP44_HUMAN</BlastOutput_query-ID>
+ <BlastOutput_query-ID>Q9BS26</BlastOutput_query-ID>
<BlastOutput_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</BlastOutput_query-def>
<BlastOutput_query-len>406</BlastOutput_query-len>
<BlastOutput_param>
@@ -20,7 +20,7 @@
<BlastOutput_iterations>
<Iteration>
<Iteration_iter-num>1</Iteration_iter-num>
- <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>
+ <Iteration_query-ID>Q9BS26</Iteration_query-ID>
<Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>
<Iteration_query-len>406</Iteration_query-len>
<Iteration_hits>
@@ -29,8 +29,8 @@
<Statistics>
<Statistics_db-num>0</Statistics_db-num>
<Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>30</Statistics_hsp-len>
- <Statistics_eff-space>119568</Statistics_eff-space>
+ <Statistics_hsp-len>42</Statistics_hsp-len>
+ <Statistics_eff-space>657748</Statistics_eff-space>
<Statistics_kappa>0.041</Statistics_kappa>
<Statistics_lambda>0.267</Statistics_lambda>
<Statistics_entropy>0.14</Statistics_entropy>
@@ -40,17 +40,17 @@
</Iteration>
<Iteration>
<Iteration_iter-num>2</Iteration_iter-num>
- <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>
- <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>
- <Iteration_query-len>406</Iteration_query-len>
+ <Iteration_query-ID>Q9NSY1</Iteration_query-ID>
+ <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def>
+ <Iteration_query-len>1161</Iteration_query-len>
<Iteration_hits>
</Iteration_hits>
<Iteration_stat>
<Statistics>
<Statistics_db-num>0</Statistics_db-num>
<Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>30</Statistics_hsp-len>
- <Statistics_eff-space>119568</Statistics_eff-space>
+ <Statistics_hsp-len>50</Statistics_hsp-len>
+ <Statistics_eff-space>1954249</Statistics_eff-space>
<Statistics_kappa>0.041</Statistics_kappa>
<Statistics_lambda>0.267</Statistics_lambda>
<Statistics_entropy>0.14</Statistics_entropy>
@@ -60,17 +60,17 @@
</Iteration>
<Iteration>
<Iteration_iter-num>3</Iteration_iter-num>
- <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>
- <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>
- <Iteration_query-len>406</Iteration_query-len>
+ <Iteration_query-ID>P06213</Iteration_query-ID>
+ <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def>
+ <Iteration_query-len>1382</Iteration_query-len>
<Iteration_hits>
</Iteration_hits>
<Iteration_stat>
<Statistics>
<Statistics_db-num>0</Statistics_db-num>
<Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>30</Statistics_hsp-len>
- <Statistics_eff-space>119568</Statistics_eff-space>
+ <Statistics_hsp-len>51</Statistics_hsp-len>
+ <Statistics_eff-space>2333243</Statistics_eff-space>
<Statistics_kappa>0.041</Statistics_kappa>
<Statistics_lambda>0.267</Statistics_lambda>
<Statistics_entropy>0.14</Statistics_entropy>
@@ -80,307 +80,7 @@
</Iteration>
<Iteration>
<Iteration_iter-num>4</Iteration_iter-num>
- <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>
- <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>
- <Iteration_query-len>406</Iteration_query-len>
-<Iteration_hits>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>30</Statistics_hsp-len>
- <Statistics_eff-space>119568</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- <Iteration_message>No hits found</Iteration_message>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>5</Iteration_iter-num>
- <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>
- <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>
- <Iteration_query-len>406</Iteration_query-len>
-<Iteration_hits>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>30</Statistics_hsp-len>
- <Statistics_eff-space>119568</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- <Iteration_message>No hits found</Iteration_message>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>6</Iteration_iter-num>
- <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>
- <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>
- <Iteration_query-len>406</Iteration_query-len>
-<Iteration_hits>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>30</Statistics_hsp-len>
- <Statistics_eff-space>119568</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- <Iteration_message>No hits found</Iteration_message>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>7</Iteration_iter-num>
- <Iteration_query-ID>sp|Q9NSY1|BMP2K_HUMAN</Iteration_query-ID>
- <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def>
- <Iteration_query-len>1161</Iteration_query-len>
-<Iteration_hits>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>38</Statistics_hsp-len>
- <Statistics_eff-space>348130</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- <Iteration_message>No hits found</Iteration_message>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>8</Iteration_iter-num>
- <Iteration_query-ID>sp|Q9NSY1|BMP2K_HUMAN</Iteration_query-ID>
- <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def>
- <Iteration_query-len>1161</Iteration_query-len>
-<Iteration_hits>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>38</Statistics_hsp-len>
- <Statistics_eff-space>348130</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- <Iteration_message>No hits found</Iteration_message>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>9</Iteration_iter-num>
- <Iteration_query-ID>sp|Q9NSY1|BMP2K_HUMAN</Iteration_query-ID>
- <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def>
- <Iteration_query-len>1161</Iteration_query-len>
-<Iteration_hits>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>38</Statistics_hsp-len>
- <Statistics_eff-space>348130</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- <Iteration_message>No hits found</Iteration_message>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>10</Iteration_iter-num>
- <Iteration_query-ID>sp|Q9NSY1|BMP2K_HUMAN</Iteration_query-ID>
- <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def>
- <Iteration_query-len>1161</Iteration_query-len>
-<Iteration_hits>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>38</Statistics_hsp-len>
- <Statistics_eff-space>348130</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- <Iteration_message>No hits found</Iteration_message>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>11</Iteration_iter-num>
- <Iteration_query-ID>sp|Q9NSY1|BMP2K_HUMAN</Iteration_query-ID>
- <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def>
- <Iteration_query-len>1161</Iteration_query-len>
-<Iteration_hits>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>38</Statistics_hsp-len>
- <Statistics_eff-space>348130</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- <Iteration_message>No hits found</Iteration_message>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>12</Iteration_iter-num>
- <Iteration_query-ID>sp|Q9NSY1|BMP2K_HUMAN</Iteration_query-ID>
- <Iteration_query-def>BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2</Iteration_query-def>
- <Iteration_query-len>1161</Iteration_query-len>
-<Iteration_hits>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>38</Statistics_hsp-len>
- <Statistics_eff-space>348130</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- <Iteration_message>No hits found</Iteration_message>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>13</Iteration_iter-num>
- <Iteration_query-ID>sp|P06213|INSR_HUMAN</Iteration_query-ID>
- <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def>
- <Iteration_query-len>1382</Iteration_query-len>
-<Iteration_hits>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>39</Statistics_hsp-len>
- <Statistics_eff-space>414987</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- <Iteration_message>No hits found</Iteration_message>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>14</Iteration_iter-num>
- <Iteration_query-ID>sp|P06213|INSR_HUMAN</Iteration_query-ID>
- <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def>
- <Iteration_query-len>1382</Iteration_query-len>
-<Iteration_hits>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>39</Statistics_hsp-len>
- <Statistics_eff-space>414987</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- <Iteration_message>No hits found</Iteration_message>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>15</Iteration_iter-num>
- <Iteration_query-ID>sp|P06213|INSR_HUMAN</Iteration_query-ID>
- <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def>
- <Iteration_query-len>1382</Iteration_query-len>
-<Iteration_hits>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>39</Statistics_hsp-len>
- <Statistics_eff-space>414987</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- <Iteration_message>No hits found</Iteration_message>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>16</Iteration_iter-num>
- <Iteration_query-ID>sp|P06213|INSR_HUMAN</Iteration_query-ID>
- <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def>
- <Iteration_query-len>1382</Iteration_query-len>
-<Iteration_hits>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>39</Statistics_hsp-len>
- <Statistics_eff-space>414987</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- <Iteration_message>No hits found</Iteration_message>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>17</Iteration_iter-num>
- <Iteration_query-ID>sp|P06213|INSR_HUMAN</Iteration_query-ID>
- <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def>
- <Iteration_query-len>1382</Iteration_query-len>
-<Iteration_hits>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>39</Statistics_hsp-len>
- <Statistics_eff-space>414987</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- <Iteration_message>No hits found</Iteration_message>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>18</Iteration_iter-num>
- <Iteration_query-ID>sp|P06213|INSR_HUMAN</Iteration_query-ID>
- <Iteration_query-def>Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4</Iteration_query-def>
- <Iteration_query-len>1382</Iteration_query-len>
-<Iteration_hits>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>39</Statistics_hsp-len>
- <Statistics_eff-space>414987</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
- <Iteration_message>No hits found</Iteration_message>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>19</Iteration_iter-num>
- <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID>
+ <Iteration_query-ID>P08100</Iteration_query-ID>
<Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def>
<Iteration_query-len>348</Iteration_query-len>
<Iteration_hits>
@@ -412,74 +112,36 @@
</Hsp>
</Hit_hsps>
</Hit>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>29</Statistics_hsp-len>
- <Statistics_eff-space>101761</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>20</Iteration_iter-num>
- <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID>
- <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def>
- <Iteration_query-len>348</Iteration_query-len>
-<Iteration_hits>
<Hit>
- <Hit_num>1</Hit_num>
- <Hit_id>gi|3024260|sp|P56514.1|OPSD_BUFBU</Hit_id>
- <Hit_def>RecName: Full=Rhodopsin</Hit_def>
- <Hit_accession>P56514</Hit_accession>
- <Hit_len>354</Hit_len>
+ <Hit_num>2</Hit_num>
+ <Hit_id>gi|223523|prf||0811197A</Hit_id>
+ <Hit_def>rhodopsin [Bos taurus]</Hit_def>
+ <Hit_accession>0811197A</Hit_accession>
+ <Hit_len>347</Hit_len>
<Hit_hsps>
<Hsp>
<Hsp_num>1</Hsp_num>
- <Hsp_bit-score>619.002</Hsp_bit-score>
- <Hsp_score>1595</Hsp_score>
+ <Hsp_bit-score>673.315</Hsp_bit-score>
+ <Hsp_score>1736</Hsp_score>
<Hsp_evalue>0</Hsp_evalue>
<Hsp_query-from>1</Hsp_query-from>
- <Hsp_query-to>341</Hsp_query-to>
+ <Hsp_query-to>348</Hsp_query-to>
<Hsp_hit-from>1</Hsp_hit-from>
- <Hsp_hit-to>342</Hsp_hit-to>
+ <Hsp_hit-to>347</Hsp_hit-to>
<Hsp_query-frame>0</Hsp_query-frame>
<Hsp_hit-frame>0</Hsp_hit-frame>
- <Hsp_identity>290</Hsp_identity>
- <Hsp_positive>322</Hsp_positive>
+ <Hsp_identity>324</Hsp_identity>
+ <Hsp_positive>336</Hsp_positive>
<Hsp_gaps>1</Hsp_gaps>
- <Hsp_align-len>342</Hsp_align-len>
- <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE</Hsp_qseq>
- <Hsp_hseq>MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE</Hsp_hseq>
- <Hsp_midline>MNGTEGPNFY+P SN TGVVRSPFEYPQYYLAEPWQ+S+L AYMFLLI+LGFPINF+TLYVT+QHKKLRTPLNYILLNLA A+ FMVL GFT T+Y+S++GYF+ G TGC +EGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRF ENHA+MGVAFTW+MAL+CA PPL GWSRYIPEG+QCSCG+DYYTLKPEVNNESFVIYMFVVHFTIP+IIIFFCYG+LV TVKEAAAQQQESATTQKAEKEVTRMVIIMV+ FLICWVPYASVAF+IF++QGS FGPIFMT+PAFFAKS++IYNPVIYIM+NKQFRNCM+TT+CCGKNP G+D+A SA SKTE</Hsp_midline>
+ <Hsp_align-len>348</Hsp_align-len>
+ <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA</Hsp_qseq>
+ <Hsp_hseq>MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA</Hsp_hseq>
+ <Hsp_midline>MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGID YT E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA</Hsp_midline>
</Hsp>
</Hit_hsps>
</Hit>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>29</Statistics_hsp-len>
- <Statistics_eff-space>101761</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>21</Iteration_iter-num>
- <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID>
- <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def>
- <Iteration_query-len>348</Iteration_query-len>
-<Iteration_hits>
<Hit>
- <Hit_num>1</Hit_num>
+ <Hit_num>3</Hit_num>
<Hit_id>gi|283855846|gb|ADB45242.1|</Hit_id>
<Hit_def>rhodopsin [Cynopterus brachyotis]</Hit_def>
<Hit_accession>ADB45242</Hit_accession>
@@ -506,27 +168,8 @@
</Hsp>
</Hit_hsps>
</Hit>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>29</Statistics_hsp-len>
- <Statistics_eff-space>101761</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>22</Iteration_iter-num>
- <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID>
- <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def>
- <Iteration_query-len>348</Iteration_query-len>
-<Iteration_hits>
<Hit>
- <Hit_num>1</Hit_num>
+ <Hit_num>4</Hit_num>
<Hit_id>gi|283855823|gb|ADB45229.1|</Hit_id>
<Hit_def>rhodopsin [Myotis pilosus]</Hit_def>
<Hit_accession>ADB45229</Hit_accession>
@@ -553,74 +196,36 @@
</Hsp>
</Hit_hsps>
</Hit>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>29</Statistics_hsp-len>
- <Statistics_eff-space>101761</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>23</Iteration_iter-num>
- <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID>
- <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def>
- <Iteration_query-len>348</Iteration_query-len>
-<Iteration_hits>
<Hit>
- <Hit_num>1</Hit_num>
- <Hit_id>gi|223523|prf||0811197A</Hit_id>
- <Hit_def>rhodopsin [Bos taurus]</Hit_def>
- <Hit_accession>0811197A</Hit_accession>
- <Hit_len>347</Hit_len>
+ <Hit_num>5</Hit_num>
+ <Hit_id>gi|3024260|sp|P56514.1|OPSD_BUFBU</Hit_id>
+ <Hit_def>RecName: Full=Rhodopsin</Hit_def>
+ <Hit_accession>P56514</Hit_accession>
+ <Hit_len>354</Hit_len>
<Hit_hsps>
<Hsp>
<Hsp_num>1</Hsp_num>
- <Hsp_bit-score>673.315</Hsp_bit-score>
- <Hsp_score>1736</Hsp_score>
+ <Hsp_bit-score>619.002</Hsp_bit-score>
+ <Hsp_score>1595</Hsp_score>
<Hsp_evalue>0</Hsp_evalue>
<Hsp_query-from>1</Hsp_query-from>
- <Hsp_query-to>348</Hsp_query-to>
+ <Hsp_query-to>341</Hsp_query-to>
<Hsp_hit-from>1</Hsp_hit-from>
- <Hsp_hit-to>347</Hsp_hit-to>
+ <Hsp_hit-to>342</Hsp_hit-to>
<Hsp_query-frame>0</Hsp_query-frame>
<Hsp_hit-frame>0</Hsp_hit-frame>
- <Hsp_identity>324</Hsp_identity>
- <Hsp_positive>336</Hsp_positive>
+ <Hsp_identity>290</Hsp_identity>
+ <Hsp_positive>322</Hsp_positive>
<Hsp_gaps>1</Hsp_gaps>
- <Hsp_align-len>348</Hsp_align-len>
- <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA</Hsp_qseq>
- <Hsp_hseq>MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA</Hsp_hseq>
- <Hsp_midline>MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGID YT E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA</Hsp_midline>
+ <Hsp_align-len>342</Hsp_align-len>
+ <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE</Hsp_qseq>
+ <Hsp_hseq>MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE</Hsp_hseq>
+ <Hsp_midline>MNGTEGPNFY+P SN TGVVRSPFEYPQYYLAEPWQ+S+L AYMFLLI+LGFPINF+TLYVT+QHKKLRTPLNYILLNLA A+ FMVL GFT T+Y+S++GYF+ G TGC +EGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRF ENHA+MGVAFTW+MAL+CA PPL GWSRYIPEG+QCSCG+DYYTLKPEVNNESFVIYMFVVHFTIP+IIIFFCYG+LV TVKEAAAQQQESATTQKAEKEVTRMVIIMV+ FLICWVPYASVAF+IF++QGS FGPIFMT+PAFFAKS++IYNPVIYIM+NKQFRNCM+TT+CCGKNP G+D+A SA SKTE</Hsp_midline>
</Hsp>
</Hit_hsps>
</Hit>
-</Iteration_hits>
- <Iteration_stat>
- <Statistics>
- <Statistics_db-num>0</Statistics_db-num>
- <Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>29</Statistics_hsp-len>
- <Statistics_eff-space>101761</Statistics_eff-space>
- <Statistics_kappa>0.041</Statistics_kappa>
- <Statistics_lambda>0.267</Statistics_lambda>
- <Statistics_entropy>0.14</Statistics_entropy>
- </Statistics>
- </Iteration_stat>
-</Iteration>
-<Iteration>
- <Iteration_iter-num>24</Iteration_iter-num>
- <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID>
- <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def>
- <Iteration_query-len>348</Iteration_query-len>
-<Iteration_hits>
<Hit>
- <Hit_num>1</Hit_num>
+ <Hit_num>6</Hit_num>
<Hit_id>gi|12583665|dbj|BAB21486.1|</Hit_id>
<Hit_def>fresh water form rod opsin [Conger myriaster]</Hit_def>
<Hit_accession>BAB21486</Hit_accession>
@@ -652,8 +257,8 @@
<Statistics>
<Statistics_db-num>0</Statistics_db-num>
<Statistics_db-len>0</Statistics_db-len>
- <Statistics_hsp-len>29</Statistics_hsp-len>
- <Statistics_eff-space>101761</Statistics_eff-space>
+ <Statistics_hsp-len>41</Statistics_hsp-len>
+ <Statistics_eff-space>556591</Statistics_eff-space>
<Statistics_kappa>0.041</Statistics_kappa>
<Statistics_lambda>0.267</Statistics_lambda>
<Statistics_entropy>0.14</Statistics_entropy>
diff -r fe1ed74793c9 -r 8dc4ba7eba5d test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular
--- a/test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular Wed Jul 30 05:36:52 2014 -0400
+++ b/test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular Sun Sep 17 13:01:56 2023 +0000
@@ -1,6 +1,6 @@
-sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0 701 gi|57163783|ref|NP_001009242.1| 1808 336 343 0 98.56 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 rhodopsin [Felis catus]
-sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0 619 gi|3024260|sp|P56514.1|OPSD_BUFBU 1595 290 322 1 94.15 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 354 RecName: Full=Rhodopsin
-sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653 gi|283855846|gb|ADB45242.1| 1684 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 rhodopsin [Cynopterus brachyotis]
-sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631 gi|283855823|gb|ADB45229.1| 1627 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 rhodopsin [Myotis pilosus]
-sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673 gi|223523|prf||0811197A 1736 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347 rhodopsin [Bos taurus]
-sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599 gi|12583665|dbj|BAB21486.1| 1544 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354 fresh water form rod opsin [Conger myriaster]
+P08100 gi|57163783|ref|NP_001009242.1| 96.552 348 12 0 1 348 1 348 0.0 701 gi|57163783|ref|NP_001009242.1| 1808 336 343 0 98.56 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 rhodopsin [Felis catus]
+P08100 gi|223523|prf||0811197A 93.103 348 23 1 1 348 1 347 0.0 673 gi|223523|prf||0811197A 1736 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347 rhodopsin [Bos taurus]
+P08100 gi|283855846|gb|ADB45242.1| 94.817 328 17 0 11 338 1 328 0.0 653 gi|283855846|gb|ADB45242.1| 1684 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 rhodopsin [Cynopterus brachyotis]
+P08100 gi|283855823|gb|ADB45229.1| 94.817 328 17 0 11 338 1 328 0.0 631 gi|283855823|gb|ADB45229.1| 1627 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 rhodopsin [Myotis pilosus]
+P08100 gi|3024260|sp|P56514.1|OPSD_BUFBU 84.795 342 51 1 1 341 1 342 0.0 619 gi|3024260|sp|P56514.1|OPSD_BUFBU 1595 290 322 1 94.15 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 354 RecName: Full=Rhodopsin
+P08100 gi|12583665|dbj|BAB21486.1| 82.164 342 60 1 1 341 1 342 0.0 599 gi|12583665|dbj|BAB21486.1| 1544 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354 fresh water form rod opsin [Conger myriaster]
diff -r fe1ed74793c9 -r 8dc4ba7eba5d test-data/blastp_four_human_vs_rhodopsin_top3.tabular
--- a/test-data/blastp_four_human_vs_rhodopsin_top3.tabular Wed Jul 30 05:36:52 2014 -0400
+++ b/test-data/blastp_four_human_vs_rhodopsin_top3.tabular Sun Sep 17 13:01:56 2023 +0000
@@ -1,5 +1,5 @@
#Query BLAST hit 1 BLAST hit 2 BLAST hit 3
-sp|Q9BS26|ERP44_HUMAN
-sp|Q9NSY1|BMP2K_HUMAN
-sp|P06213|INSR_HUMAN
-sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] gi|3024260|sp|P56514.1|OPSD_BUFBU RecName: Full=Rhodopsin gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis]
+Q9BS26
+Q9NSY1
+P06213
+P08100 gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] gi|223523|prf||0811197A rhodopsin [Bos taurus] gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis]
diff -r fe1ed74793c9 -r 8dc4ba7eba5d test-data/blastp_four_human_vs_rhodopsin_top3_positive.tabular
--- a/test-data/blastp_four_human_vs_rhodopsin_top3_positive.tabular Wed Jul 30 05:36:52 2014 -0400
+++ b/test-data/blastp_four_human_vs_rhodopsin_top3_positive.tabular Sun Sep 17 13:01:56 2023 +0000
@@ -1,2 +1,2 @@
#Query BLAST hit 1 BLAST hit 2 BLAST hit 3
-sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] gi|3024260|sp|P56514.1|OPSD_BUFBU RecName: Full=Rhodopsin gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis]
+P08100 gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] gi|223523|prf||0811197A rhodopsin [Bos taurus] gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis]
diff -r fe1ed74793c9 -r 8dc4ba7eba5d tools/blastxml_to_top_descr/README.rst
--- a/tools/blastxml_to_top_descr/README.rst Wed Jul 30 05:36:52 2014 -0400
+++ b/tools/blastxml_to_top_descr/README.rst Sun Sep 17 13:01:56 2023 +0000
@@ -1,7 +1,7 @@
Galaxy tool to extract top BLAST hit descriptions from BLAST XML
================================================================
-This tool is copyright 2012-2013 by Peter Cock, The James Hutton Institute
+This tool is copyright 2012-2015 by Peter Cock, The James Hutton Institute
(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
See the licence text below.
@@ -12,38 +12,39 @@
It is available from the Galaxy Tool Shed at:
http://toolshed.g2.bx.psu.edu/view/peterjc/blastxml_to_top_descr
-This requires the 'blast_datatypes' repository from the Galaxy Tool Shed
-to provide the 'blastxml' file format definition.
+This requires the ``blast_datatypes`` repository from the Galaxy Tool Shed
+to provide the ``blastxml`` file format definition.
Automated Installation
======================
This should be straightforward, Galaxy should automatically install the
-'blast_datatypes' dependency.
+``blast_datatypes`` dependency.
Manual Installation
===================
-If you haven't done so before, first install the 'blast_datatypes' repository.
+If you haven't done so before, first install the ``blast_datatypes`` repository.
There are just two files to install (if doing this manually):
-* blastxml_to_top_descr.py (the Python script)
-* blastxml_to_top_descr.xml (the Galaxy tool definition)
+- ``blastxml_to_top_descr.py`` (the Python script)
+- ``blastxml_to_top_descr.xml`` (the Galaxy tool definition)
-The suggested location is in the Galaxy folder tools/ncbi_blast_plus next to
-the NCBI BLAST+ tool wrappers.
+The suggested location is in the Galaxy folder ``tools/ncbi_blast_plus/``
+next to the NCBI BLAST+ tool wrappers.
-You will also need to modify the tools_conf.xml file to tell Galaxy to offer
+You will also need to modify the ``tools_conf.xml`` file to tell Galaxy to offer
the tool. e.g. next to the NCBI BLAST+ tools. Simply add the line::
<tool file="ncbi_blast_plus/blastxml_to_top_descr.xml" />
-To run the tool's tests, also add this line to tools_conf.xml.sample then::
+If you wish to run the unit tests, alsomove/copy the ``test-data/`` files
+under Galaxy's ``test-data/`` folder. Then::
- $ sh run_functional_tests.sh -id blastxml_to_top_descr
+ $ sh run_tests.sh -id blastxml_to_top_descr
History
@@ -54,13 +55,13 @@
------- ----------------------------------------------------------------------
v0.0.1 - Initial version.
v0.0.2 - Since BLAST+ was moved out of the Galaxy core, now have a dependency
- on the 'blast_datatypes' repository in the Tool Shed.
+ on the ``blast_datatypes`` repository in the Tool Shed.
v0.0.3 - Include the test files required to run the unit tests
v0.0.4 - Quote filenames in case they contain spaces (internal change)
v0.0.5 - Include number of queries with BLAST matches in stdout (peek text)
v0.0.6 - Check for errors via the script's return code (internal change)
v0.0.7 - Link to Tool Shed added to help text and this documentation.
- - Tweak dependency on blast_datatypes to also work on Test Tool Shed
+ - Tweak dependency on ``blast_datatypes`` to also work on Test Tool Shed
- Adopt standard MIT License.
v0.0.8 - Development moved to GitHub, https://github.com/peterjc/galaxy_blast
v0.0.9 - Updated citation information (Cock et al. 2013).
@@ -70,6 +71,11 @@
- Support BLAST XML with multiple ``<Iteration>`` blocks per query.
- Support the default 25 column extended tabular BLAST output.
v0.1.1 - Embed citation information in the tool XML (new Galaxy feature).
+v0.1.2 - Reorder XML elements (internal change only).
+ - Planemo for Tool Shed upload (``.shed.yml``, internal change only).
+ - Use ``<command detect_errors="aggressive">`` (internal change only).
+ - Single quote command line arguments (internal change only).
+ - Python 3 compatible syntax.
======= ======================================================================
@@ -89,22 +95,31 @@
As of July 2013, development is continuing on a dedicated GitHub repository:
https://github.com/peterjc/galaxy_blast
-For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use
-the following command from the GitHub repository root folder::
+For pushing a release to the test or main "Galaxy Tool Shed", use the following
+Planemo commands (which requires you have set your Tool Shed access details in
+``~/.planemo.yml`` and that you have access rights on the Tool Shed)::
+
+ $ planemo shed_update -t testtoolshed --check_diff ~/repositories/galaxy_blast/tools/blastxml_to_top_descr/
+ ...
+
+or::
- $ tar -czf blastxml_to_top_descr.tar.gz tools/blastxml_to_top_descr/README.rst tools/blastxml_to_top_descr/blastxml_to_top_descr.* tools/blastxml_to_top_descr/repository_dependencies.xml test-data/blastp_four_human_vs_rhodopsin.xml test-data/blastp_four_human_vs_rhodopsin_top3.tabular test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular test-data/blastp_four_human_vs_rhodopsin_top3_positive.tabular
+ $ planemo shed_update -t toolshed --check_diff ~/repositories/galaxy_blast/tools/blastxml_to_top_descr/
+ ...
+
+To just build and check the tar ball, use::
-Check this worked::
-
- $ tar -tzf blastxml_to_top_descr.tar.gz
+ $ planemo shed_upload --tar_only ~/repositories/galaxy_blast/tools/blastxml_to_top_descr/
+ ...
+ $ tar -tzf shed_upload.tar.gz
+ test-data/blastp_four_human_vs_rhodopsin.xml
+ test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular
+ test-data/blastp_four_human_vs_rhodopsin_top3.tabular
+ test-data/blastp_four_human_vs_rhodopsin_top3_positive.tabular
tools/blastxml_to_top_descr/README.rst
tools/blastxml_to_top_descr/blastxml_to_top_descr.py
tools/blastxml_to_top_descr/blastxml_to_top_descr.xml
tools/blastxml_to_top_descr/repository_dependencies.xml
- test-data/blastp_four_human_vs_rhodopsin.xml
- test-data/blastp_four_human_vs_rhodopsin_top3.tabular
- test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular
- test-data/blastp_four_human_vs_rhodopsin_top3_positive.tabular
Licence (MIT)
diff -r fe1ed74793c9 -r 8dc4ba7eba5d tools/blastxml_to_top_descr/blastxml_to_top_descr.py
--- a/tools/blastxml_to_top_descr/blastxml_to_top_descr.py Wed Jul 30 05:36:52 2014 -0400
+++ b/tools/blastxml_to_top_descr/blastxml_to_top_descr.py Sun Sep 17 13:01:56 2023 +0000
@@ -6,25 +6,26 @@
Assumes the hits are pre-sorted, so "best" 3 hits gives first 3 hits.
"""
+from __future__ import print_function
+
import os
+import re
import sys
-import re
from optparse import OptionParser
if "-v" in sys.argv or "--version" in sys.argv:
- print "v0.1.0"
+ print("v0.1.2")
sys.exit(0)
-if sys.version_info[:2] >= ( 2, 5 ):
+if sys.version_info[:2] >= (2, 5):
import xml.etree.cElementTree as ElementTree
else:
- from galaxy import eggs
- import pkg_resources; pkg_resources.require( "elementtree" )
+ from galaxy import eggs # noqa - ignore flake8 F401
+ import pkg_resources
+
+ pkg_resources.require("elementtree")
from elementtree import ElementTree
-def stop_err( msg ):
- sys.stderr.write("%s\n" % msg)
- sys.exit(1)
usage = """Use as follows:
@@ -39,23 +40,54 @@
"""
parser = OptionParser(usage=usage)
-parser.add_option("-t", "--topN", dest="topN", default=3,
- help="Number of descriptions to collect (in order from file)")
-parser.add_option("-o", "--output", dest="out_file", default=None,
- help="Output filename for tabular file",
- metavar="FILE")
-parser.add_option("-f", "--format", dest="format", default="blastxml",
- help="Input format (blastxml or tabular)")
-parser.add_option("-q", "--qseqid", dest="qseqid", default="1",
- help="Column for query 'qseqid' (for tabular input; default 1)")
-parser.add_option("-s", "--sseqid", dest="sseqid", default="2",
- help="Column for subject 'sseqid' (for tabular input; default 2)")
-parser.add_option("-d", "--salltitles", dest="salltitles", default="25",
- help="Column for descriptions 'salltitles' (for tabular input; default 25)")
+parser.add_option(
+ "-t",
+ "--topN",
+ dest="topN",
+ default=3,
+ help="Number of descriptions to collect (in order from file)",
+)
+parser.add_option(
+ "-o",
+ "--output",
+ dest="out_file",
+ default=None,
+ help="Output filename for tabular file",
+ metavar="FILE",
+)
+parser.add_option(
+ "-f",
+ "--format",
+ dest="format",
+ default="blastxml",
+ help="Input format (blastxml or tabular)",
+)
+parser.add_option(
+ "-q",
+ "--qseqid",
+ dest="qseqid",
+ default="1",
+ help="Column for query 'qseqid' (for tabular input; default 1)",
+)
+parser.add_option(
+ "-s",
+ "--sseqid",
+ dest="sseqid",
+ default="2",
+ help="Column for subject 'sseqid' (for tabular input; default 2)",
+)
+parser.add_option(
+ "-d",
+ "--salltitles",
+ dest="salltitles",
+ default="25",
+ help="Column for descriptions 'salltitles' (for tabular input; default 25)",
+)
(options, args) = parser.parse_args()
if len(sys.argv) == 4 and len(args) == 3 and not options.out_file:
- stop_err("""The API has changed, replace this:
+ sys.exit(
+ """The API has changed, replace this:
$ python blastxml_to_top_descr.py input.xml output.tab 3
@@ -64,12 +96,13 @@
$ python blastxml_to_top_descr.py -o output.tab -t 3 input.xml
Sorry.
-""")
+"""
+ )
if not args:
- stop_err("Input filename missing, try -h")
+ sys.exit("Input filename missing, try -h")
if len(args) > 1:
- stop_err("Expects a single argument, one input filename")
+ sys.exit("Expects a single argument, one input filename")
in_file = args[0]
out_file = options.out_file
topN = options.topN
@@ -77,12 +110,12 @@
try:
topN = int(topN)
except ValueError:
- stop_err("Number of hits argument should be an integer (at least 1)")
+ sys.exit("Number of hits argument should be an integer (at least 1)")
if topN < 1:
- stop_err("Number of hits argument should be an integer (at least 1)")
+ sys.exit("Number of hits argument should be an integer (at least 1)")
if not os.path.isfile(in_file):
- stop_err("Missing input file: %r" % in_file)
+ sys.exit("Missing input file: %r" % in_file)
def get_column(value):
@@ -92,11 +125,12 @@
value = value[1:]
try:
col = int(value)
- except:
- stop_err("Expected an integer column number, not %r" % value)
+ except ValueError:
+ sys.exit("Expected an integer column number, not %r" % value)
if col < 1:
- stop_err("Expect column numbers to be at least one, not %r" % value)
- return col - 1 # Python counting!
+ sys.exit("Expect column numbers to be at least one, not %r" % value)
+ return col - 1 # Python counting!
+
def tabular_hits(in_file, qseqid, sseqid, salltitles):
"""Parse key data from tabular BLAST output.
@@ -105,8 +139,8 @@
"""
current_query = None
current_hits = []
- with open(in_file) as input:
- for line in input:
+ with open(in_file) as input_handle:
+ for line in input_handle:
parts = line.rstrip("\n").split("\t")
query = parts[qseqid]
descr = "%s %s" % (parts[sseqid], parts[salltitles])
@@ -126,6 +160,7 @@
# Final query
yield current_query, current_hits
+
def blastxml_hits(in_file):
"""Parse key data from BLAST XML output.
@@ -133,32 +168,35 @@
"""
try:
context = ElementTree.iterparse(in_file, events=("start", "end"))
- except:
+ except Exception:
with open(in_file) as handle:
header = handle.read(100)
- stop_err("Invalid data format in XML file %r which starts: %r" % (in_file, header))
+ sys.exit(
+ "Invalid data format in XML file %r which starts: %r" % (in_file, header)
+ )
# turn it into an iterator
context = iter(context)
# get the root element
try:
- event, root = context.next()
- except:
+ event, root = next(context)
+ except Exception:
with open(in_file) as handle:
header = handle.read(100)
- stop_err("Unable to get root element from XML file %r which starts: %r" % (in_file, header))
+ sys.exit(
+ "Unable to get root element from XML file %r which starts: %r"
+ % (in_file, header)
+ )
- re_default_query_id = re.compile("^Query_\d+$")
- assert re_default_query_id.match("Query_101")
- assert not re_default_query_id.match("Query_101a")
- assert not re_default_query_id.match("MyQuery_101")
- re_default_subject_id = re.compile("^Subject_\d+$")
- assert re_default_subject_id.match("Subject_1")
- assert not re_default_subject_id.match("Subject_")
- assert not re_default_subject_id.match("Subject_12a")
- assert not re_default_subject_id.match("TheSubject_1")
+ re_default_query_id = re.compile(r"^Query_\d+$")
+ assert re_default_query_id.match(r"Query_101")
+ assert not re_default_query_id.match(r"Query_101a")
+ assert not re_default_query_id.match(r"MyQuery_101")
+ re_default_subject_id = re.compile(r"^Subject_\d+$")
+ assert re_default_subject_id.match(r"Subject_1")
+ assert not re_default_subject_id.match(r"Subject_")
+ assert not re_default_subject_id.match(r"Subject_12a")
+ assert not re_default_subject_id.match(r"TheSubject_1")
- count = 0
- pos_count = 0
current_query = None
hit_descrs = []
for event, elem in context:
@@ -166,7 +204,8 @@
if event == "end" and elem.tag == "Iteration":
# Expecting either this, from BLAST 2.2.25+ using FASTA vs FASTA
# <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>
- # <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>
+ # <Iteration_query-def>Endoplasmic reticulum resident protein 44
+ # OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>
# <Iteration_query-len>406</Iteration_query-len>
# <Iteration_hits></Iteration_hits>
#
@@ -177,10 +216,12 @@
# <Iteration_hits>...
qseqid = elem.findtext("Iteration_query-ID")
if qseqid is None:
- stop_err("Missing <Iteration_query-ID> (could be really old BLAST XML data?)")
+ sys.exit(
+ "Missing <Iteration_query-ID> (could be really old BLAST XML data?)"
+ )
if re_default_query_id.match(qseqid):
- #Place holder ID, take the first word of the query definition
- qseqid = elem.findtext("Iteration_query-def").split(None,1)[0]
+ # Place holder ID, take the first word of the query definition
+ qseqid = elem.findtext("Iteration_query-def").split(None, 1)[0]
if current_query is None:
# First hit
current_query = qseqid
@@ -203,17 +244,19 @@
# <Hit_accession>P56514</Hit_accession>
# or,
# <Hit_id>Subject_1</Hit_id>
- # <Hit_def>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]</Hit_def>
+ # <Hit_def>gi|57163783|ref|NP_001009242.1|
+ # rhodopsin [Felis catus]</Hit_def>
# <Hit_accession>Subject_1</Hit_accession>
#
- #apparently depending on the parse_deflines switch
- sseqid = hit.findtext("Hit_id").split(None,1)[0]
+ # apparently depending on the parse_deflines switch
+ sseqid = hit.findtext("Hit_id").split(None, 1)[0]
hit_def = sseqid + " " + hit.findtext("Hit_def")
- if re_default_subject_id.match(sseqid) \
- and sseqid == hit.findtext("Hit_accession"):
- #Place holder ID, take the first word of the subject definition
+ if re_default_subject_id.match(sseqid) and sseqid == hit.findtext(
+ "Hit_accession"
+ ):
+ # Place holder ID, take the first word of the subject definition
hit_def = hit.findtext("Hit_def")
- sseqid = hit_def.split(None,1)[0]
+ sseqid = hit_def.split(None, 1)[0]
assert hit_def not in hit_descrs
hit_descrs.append(hit_def)
# prevents ElementTree from growing large datastructure
@@ -223,6 +266,7 @@
# Final query
yield current_query, hit_descrs
+
if options.format == "blastxml":
hits = blastxml_hits(in_file)
elif options.format == "tabular":
@@ -231,21 +275,23 @@
salltitles = get_column(options.salltitles)
hits = tabular_hits(in_file, qseqid, sseqid, salltitles)
else:
- stop_err("Unsupported format: %r" % options.format)
+ sys.exit("Unsupported format: %r" % options.format)
def best_hits(descriptions, topN):
+ """Truncate given descriptions list to at most N entries."""
if len(descriptions) < topN:
- return descriptions + [""] * (topN - len(descriptions))
+ return descriptions + [""] * (topN - len(descriptions))
else:
return descriptions[:topN]
+
count = 0
if out_file is None:
outfile = sys.stdout
else:
- outfile = open(out_file, 'w')
-outfile.write("#Query\t%s\n" % "\t".join("BLAST hit %i" % (i+1) for i in range(topN)))
+ outfile = open(out_file, "w")
+outfile.write("#Query\t%s\n" % "\t".join("BLAST hit %i" % (i + 1) for i in range(topN)))
for query, descrs in hits:
count += 1
outfile.write("%s\t%s\n" % (query, "\t".join(best_hits(descrs, topN))))
diff -r fe1ed74793c9 -r 8dc4ba7eba5d tools/blastxml_to_top_descr/blastxml_to_top_descr.xml
--- a/tools/blastxml_to_top_descr/blastxml_to_top_descr.xml Wed Jul 30 05:36:52 2014 -0400
+++ b/tools/blastxml_to_top_descr/blastxml_to_top_descr.xml Sun Sep 17 13:01:56 2023 +0000
@@ -1,27 +1,27 @@
-<tool id="blastxml_to_top_descr" name="BLAST top hit descriptions" version="0.1.1">
+<tool id="blastxml_to_top_descr" name="BLAST top hit descriptions" version="0.1.2" profile="16.10">
<description>Make a table from BLAST output</description>
- <version_command interpreter="python">blastxml_to_top_descr.py --version</version_command>
- <command interpreter="python">
-blastxml_to_top_descr.py
--f "$input.in_format"
+ <requirements>
+ <requirement type="package" version="3.9">python</requirement>
+ </requirements>
+ <version_command>
+python $__tool_directory__/blastxml_to_top_descr.py --version
+ </version_command>
+ <command detect_errors="aggressive">
+python $__tool_directory__/blastxml_to_top_descr.py
+-f '$input.in_format'
#if $input.in_format == "tabular":
--qseqid $input.qseqid
--sseqid $input.sseqid
--salltitles $input.salltitles
#end if
--o "${tabular_file}"
+-o '${tabular_file}'
-t ${topN}
-"${in_file}"
+'${in_file}'
</command>
- <stdio>
- <!-- Assume anything other than zero is an error -->
- <exit_code range="1:" />
- <exit_code range=":-1" />
- </stdio>
<inputs>
<conditional name="input">
<param name="in_format" type="select" label="Input format">
- <option value="blastxml" select="True">BLAST XML</option>
+ <option value="blastxml" selected="true">BLAST XML</option>
<option value="tabular">Tabular</option>
</param>
<when value="blastxml">
@@ -30,17 +30,17 @@
<when value="tabular">
<param name="in_file" type="data" format="tabular" label="BLAST results as tabular"/>
<param name="qseqid" type="data_column" data_ref="in_file"
- multiple="False" numerical="False" default_value="1" value="1"
- label="Column containing query ID (qseqid)"
- help="This is column 1 in standard BLAST tabular output" />
- <param name="sseqid" type="data_column" data_ref="in_file"
- multiple="False" numerical="False" default_value="2" value="2"
- label="Column containing match ID (sseqid)"
- help="This is column 2 in standard BLAST tabular output"/>
+ multiple="false" numerical="false" default_value="1" value="1"
+ label="Column containing query ID (qseqid)"
+ help="This is column 1 in standard BLAST tabular output" />
+ <param name="sseqid" type="data_column" data_ref="in_file"
+ multiple="false" numerical="false" default_value="2" value="2"
+ label="Column containing match ID (sseqid)"
+ help="This is column 2 in standard BLAST tabular output"/>
<param name="salltitles" type="data_column" data_ref="in_file"
- multiple="False" numerical="False" default_value="25" value="25"
+ multiple="false" numerical="false" default_value="25" value="25"
label="Column containing containing descriptions (salltitles)"
- help="This is column 25 in the default extended BLAST tabular output"/>
+ help="This is column 25 in the default extended BLAST tabular output"/>
</when>
</conditional>
<param name="topN" type="integer" min="1" max="100" optional="false" label="Number of descriptions" value="3"/>
@@ -48,8 +48,6 @@
<outputs>
<data name="tabular_file" format="tabular" label="Top $topN descriptions from $input.in_file.name" />
</outputs>
- <requirements>
- </requirements>
<tests>
<test>
<param name="in_format" value="blastxml" />
@@ -102,7 +100,7 @@
Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
Galaxy tools and workflows for sequence analysis with applications
in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
+https://doi.org/10.7717/peerj.167
This wrapper is available to install into other Galaxy Instances via the Galaxy
Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/blastxml_to_top_descr
diff -r fe1ed74793c9 -r 8dc4ba7eba5d tools/blastxml_to_top_descr/repository_dependencies.xml
--- a/tools/blastxml_to_top_descr/repository_dependencies.xml Wed Jul 30 05:36:52 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-<?xml version="1.0"?>
-<repositories description="Requires BLAST XML and database datatype definitions.">
-<repository changeset_revision="de11e1a921c4" name="blast_datatypes" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-</repositories>