# HG changeset patch
# User iuc
# Date 1750756617 0
# Node ID 49dbf2f5b0e2a4747dffce4b33f30a4b1efaa10b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/tesseract/ commit f1ec0d8eddacc5d5e0ca3bc112005f4f2d2597f6
diff -r 000000000000 -r 49dbf2f5b0e2 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Jun 24 09:16:57 2025 +0000
@@ -0,0 +1,23 @@
+
+ 5.0.0
+ 0
+
+
+ tesseract
+
+
+
+
+
+
+
+
+
+ 10.1145/1815330.1815339
+ 10.1145/1577802.1577804
+ 10.1145/1577802.1577809
+ 10.1109/ICDAR.2009.257
+ 10.1109/ICDAR.2007.4376991
+
+
+
diff -r 000000000000 -r 49dbf2f5b0e2 tesseract.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tesseract.xml Tue Jun 24 09:16:57 2025 +0000
@@ -0,0 +1,247 @@
+
+ Optical Character Recognition
+
+ macros.xml
+
+
+
+ img_paths &&
+ tesseract img_paths output
+ --tessdata-dir '${tessdata.fields.path}'
+ #if $language:
+ -l ${ str($language).replace(",","+") }
+ #end if
+ --psm $psm
+ #if $dpi:
+ --dpi $dpi
+ #end if
+ #if $user_words
+ --user-words '$user_words'
+ #end if
+ #if $user_patterns
+ --user-patterns '$user_patterns'
+ #end if
+ #for $format in $output_formats
+ -c $format=1
+ #end for
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 'tessedit_create_txt' in output_formats
+
+
+ 'tessedit_create_pdf' in output_formats
+
+
+ 'tessedit_create_hocr' in output_formats
+
+
+ 'tessedit_create_tsv' in output_formats
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ `_
+
+* `API example for user patterns `_
+
+**License**
+
+* `Apache-2.0 `_
+ ]]>
+
+
diff -r 000000000000 -r 49dbf2f5b0e2 test-data/eng.user-patterns
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/eng.user-patterns Tue Jun 24 09:16:57 2025 +0000
@@ -0,0 +1,2 @@
+1-\d\d\d-GOOG-411
+www.\n\\\*.com
diff -r 000000000000 -r 49dbf2f5b0e2 test-data/eng.user-words
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/eng.user-words Tue Jun 24 09:16:57 2025 +0000
@@ -0,0 +1,5 @@
+the
+quick
+brown
+fox
+jumped
diff -r 000000000000 -r 49dbf2f5b0e2 test-data/eurotext.png
Binary file test-data/eurotext.png has changed
diff -r 000000000000 -r 49dbf2f5b0e2 test-data/output.pdf
Binary file test-data/output.pdf has changed
diff -r 000000000000 -r 49dbf2f5b0e2 test-data/output.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.tsv Tue Jun 24 09:16:57 2025 +0000
@@ -0,0 +1,12 @@
+level page_num block_num par_num line_num word_num left top width height conf text
+1 1 0 0 0 0 0 0 1024 1024 -1
+2 1 1 0 0 0 92 296 875 130 -1
+3 1 1 1 0 0 92 296 875 130 -1
+4 1 1 1 1 0 92 296 875 130 -1
+5 1 1 1 1 1 92 297 174 129 37.403927 Ꮳ
+5 1 1 1 1 2 286 296 681 130 16.962364 ᏌᎠᏯᏙᏣᎠ
+2 1 2 0 0 0 164 572 694 133 -1
+3 1 2 1 0 0 164 572 694 133 -1
+4 1 2 1 1 0 164 572 694 133 -1
+5 1 2 1 1 1 164 568 364 184 53.049427 ᎪᎶᎩ)
+5 1 2 1 1 2 650 572 200 132 35.113586 Ꮾ(Ꭰ
diff -r 000000000000 -r 49dbf2f5b0e2 test-data/output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.txt Tue Jun 24 09:16:57 2025 +0000
@@ -0,0 +1,14 @@
+ᎢᏂᏮ (ᏄᏌᎥᏟᏦ) (ᏏᎵᏣᎳᏲ). (Ꮅ9Ꮶ). ᎫᏢᏚᎥ
+ᎤᏙᏮᎢ (ᏂᏮ ᏕᏎ3Ꮷ,Ꮞ5Ꮪ6.ᏤᎦ “ᎥᏧᏃᎩ- Ꮱ90 ᏧᎾᏴ
+(Ꮭ ᏧᏄᏟᏦᎥᏙᏴᏣᎾᏚᏮ, 3Ꮪ 12. 506 ᎾᎴ Ꭼ--Ꮑ311
+ᎥᎢᏣᏁᎸ ᏄᏚᏢᎸ ᎠᏁ 1161 (ᏇᎳᏮᏌᏚᎥ (6. ᏟᏣᎥᎸ ᎥᏚ ᏚᏢᎸᏒ.
+ᎠᎴᎢ ..ᏚᏟᏂᏒᏮᎥ16” ᏏᎠᎱᎸᏄᎠᏮ ᎬᎿᏟᏂᏚ ᏚᏢᎱᎥᎸᏰᎥ
+(ᏏᏮᎢ ᏧᏮᎯᏒ (Ꭿ016(Ꮢ ᎻᏄᏌᏲᏁᏧ. ᏞᏮ ᎢᏣᏁᎸᎢᏧ ᏏᎢᏛᏌᏲ
+ᏄᎢᏄᏢᎠᎥᏧᏮᏅ ᏚᎸᏌᎥᏮ ᎠᎧᏄᎱ-ᏄᎶᏮᏚᏚᏌᏚ 16 ᏟᏂᎥᏮᏁ
+ᎠᏰᎸᎱᏮᏚᏚᏮᏔᏦ. ᏞᏘ ᏙᎾᎤᎥᏢᏮ (131ᎢᏣᎠ6 ᎱᎸᎠᎥᏧᎸ
+ᏚᎸ1(3 ᏚᏅᏣᏢᎢᎸ ᎥᎥ ᏟᏱᏒᏮ ᎠᎥᏪᎱᎾ. ᎬᎥ ᏃᎾᎱᎱᎾ
+ᎢᏁᎸᎢᎱᏫᏒ ᎱᏰᎠᎥᏧᎾ ᏚᏰᎥᎥᎸ ᏚᎾᏏᎱᏮ ᏮᎥ ᎠᎾᎥᎱᎾ
+ᎠᏮᎱᏮᏃᏣᏚᎾ. Ꭺ ᎢᎸᎠᏣᏚᏘ (ᏁᎪ1ᎢᏣᎥᏁ ᎢᎦᏁᎠᎥᏧᎸᎸ
+ᏚᎸ1(3 ᏚᎾᏣᎠᎢᏮ Ꮕ ᏟᎸᏣ ᎠᎱᎾᎬᏪᏌᎥᏣᎾᏚᎾ.
+Ꮳ ᏌᎠᏯᏙᏣᎠ
+ᎪᎶᎩ)1 Ꮾ6Ꭰ
diff -r 000000000000 -r 49dbf2f5b0e2 test-data/tessdata.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tessdata.loc Tue Jun 24 09:16:57 2025 +0000
@@ -0,0 +1,1 @@
+test_tessdata Default 4.1.0 4.1.0 ${__HERE__}/tessdata
\ No newline at end of file
diff -r 000000000000 -r 49dbf2f5b0e2 test-data/tessdata/chr.traineddata
Binary file test-data/tessdata/chr.traineddata has changed
diff -r 000000000000 -r 49dbf2f5b0e2 test-data/test_image_cherokee.png
Binary file test-data/test_image_cherokee.png has changed
diff -r 000000000000 -r 49dbf2f5b0e2 tool-data/tessdata.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/tessdata.loc.sample Tue Jun 24 09:16:57 2025 +0000
@@ -0,0 +1,9 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a tessdata model folder. The tessdata.loc
+#file needs this format (longer white space is the TAB character):
+
+#
+
+# for example:
+
+# tessdata name 0.1 0.1 /data/tessdata
\ No newline at end of file
diff -r 000000000000 -r 49dbf2f5b0e2 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Tue Jun 24 09:16:57 2025 +0000
@@ -0,0 +1,7 @@
+
+
+
+ value, name, version, path
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 49dbf2f5b0e2 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Tue Jun 24 09:16:57 2025 +0000
@@ -0,0 +1,7 @@
+
+
+
+ value, name, version, path
+
+
+
\ No newline at end of file