changeset 2:56de4ac77c41 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/tesseract commit a5652d3d3d6a07d96d3898659f0254d8265b8215
author iuc
date Wed, 29 Oct 2025 17:38:19 +0000
parents d39f16771b62
children
files macros.xml tesseract.xml test-data/image_output.pdf test-data/image_output.tsv test-data/image_output.txt test-data/output.pdf test-data/output.tsv test-data/output.txt test-data/pdf_output.pdf test-data/pdf_output.tsv test-data/pdf_output.txt test-data/test_input.pdf
diffstat 12 files changed, 214 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Wed Jul 09 10:36:11 2025 +0000
+++ b/macros.xml	Wed Oct 29 17:38:19 2025 +0000
@@ -1,9 +1,11 @@
 <macros>
     <token name="@TOOL_VERSION@">5.5.1</token>
-    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@VERSION_SUFFIX@">1</token>
+    <token name="@TOOL_VERSION_PDFIMAGES@">25.07.0</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">tesseract</requirement>
+            <requirement type="package" version="@TOOL_VERSION_PDFIMAGES@">poppler</requirement>
         </requirements>
     </xml>
     <xml name="creators">
@@ -18,6 +20,13 @@
             <citation type="doi">10.1145/1577802.1577809</citation>
             <citation type="doi">10.1109/ICDAR.2009.257</citation>
             <citation type="doi">10.1109/ICDAR.2007.4376991</citation>
+            <citation type="bibtex">
+                @misc{poppler,
+                  title        = {Poppler PDF Rendering Library},
+                  howpublished = {\url{https://gitlab.freedesktop.org/poppler/poppler}},
+                  note         = {Accessed: 2025-07-31}
+                }
+            </citation>
         </citations>
     </xml>
 </macros>
--- a/tesseract.xml	Wed Jul 09 10:36:11 2025 +0000
+++ b/tesseract.xml	Wed Oct 29 17:38:19 2025 +0000
@@ -6,7 +6,13 @@
     <expand macro="requirements" />
     <expand macro="creators" />
     <command detect_errors="exit_code"><![CDATA[
-        echo '$input_file' | tr ',' '\n' > img_paths &&
+        #if str($input_file.ext) == "pdf"
+            mkdir extracted_images &&
+            pdfimages -png '$input_file' extracted_images/page &&
+            ls extracted_images/page*.png > img_paths &&
+        #else
+            echo '$input_file' > img_paths &&
+        #end if
         tesseract img_paths output
         --tessdata-dir '${tessdata.fields.path}'
         #if $language:
@@ -27,7 +33,7 @@
         #end for
     ]]></command>
     <inputs>
-        <param name="input_file" type="data" format="jpg,png,tif,tiff,bmp" label="Image file(s)" multiple="true"/>
+        <param name="input_file" type="data" format="jpg,png,tif,tiff,bmp,pdf" label="Image file(s)" multiple="false"/>
         <param name="tessdata" type="select" label="Tessdata" help="Language data models">
             <options from_data_table="tessdata">
                 <column name="value" index="0"/>
@@ -40,7 +46,7 @@
         </param>
         <param name="user_words" type="data" format="txt" label="User words file" optional="true" help="The user words file allows you to specify a list of words that Tesseract should treat as known words. One word per line"/>
         <param name="user_patterns" type="data" format="txt" label="User patterns file" optional="true" help="One pattern per line in UTF-8 format. For more information please visit the tesseract docs about patterns linked in the help section"/>
-        <param name="language" type="select" label="OCR Language(s)" multiple="true" help="In the case of a multilingual image(s), more the one language can be selected">
+        <param name="language" type="select" label="OCR Language(s)" multiple="true" help="In the case of a multilingual image(s), more the one language can be selected" optional="false">
             <option value="afr">Afrikaans</option>
             <option value="amh">Amharic</option>
             <option value="ara">Arabic</option>
@@ -204,15 +210,15 @@
     </outputs>
     <tests>
         <test expect_num_outputs="2">
-            <param name="input_file" value="eurotext.png,test_image_cherokee.png"/>
+            <param name="input_file" value="eurotext.png"/>
             <param name="tessdata" value="test_tessdata"/>
             <param name="user_words" value="eng.user-words"/>
             <param name="user_patterns" value="eng.user-patterns"/>
             <param name="language" value="chr"/>
             <param name="output_formats" value="tessedit_create_txt,tessedit_create_pdf"/>
             <param name="psm" value="3"/>
-            <output name="output_text" file="output.txt"/>
-            <output name="output_pdf" file="output.pdf"/>
+            <output name="output_text" file="image_output.txt"/>
+            <output name="output_pdf" file="image_output.pdf"/>
         </test>
         <test expect_num_outputs="2">
             <param name="input_file" value="test_image_cherokee.png"/>
@@ -227,7 +233,24 @@
                     <has_size value="1805" delta="10"/>
                 </assert_contents>
             </output>
-            <output name="output_tsv" file="output.tsv"/>
+            <output name="output_tsv" file="image_output.tsv"/>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="input_file" value="test_input.pdf"/>
+            <param name="tessdata" value="test_tessdata"/>
+            <param name="language" value="chr"/>
+            <param name="output_formats" value="tessedit_create_txt,tessedit_create_pdf,tessedit_create_hocr,tessedit_create_tsv"/>
+            <param name="psm" value="11"/>
+            <output name="output_hocr">
+                <assert_contents>
+                    <has_text text="Ꮳ"/>
+                    <has_text text="ᏌᎠᏯᏙᏣᎠ"/>
+                    <has_size value="13185" delta="10"/>
+                </assert_contents>
+            </output>
+            <output name="output_tsv" file="pdf_output.tsv"/>
+            <output name="output_text" file="pdf_output.txt"/>
+            <output name="output_pdf" file="pdf_output.pdf"/>
         </test>
     </tests>
     <help><![CDATA[
@@ -239,9 +262,13 @@
 
 * `API example for user patterns <https://tesseract-ocr.github.io/tessdoc/APIExample-user_patterns.html>`_
 
-**License**
+**Tesseract license**
 
 * `Apache-2.0 <https://raw.githubusercontent.com/tesseract-ocr/tesseract/refs/heads/main/LICENSE>`_
+
+**Poppler license**
+
+* `GPL-2.0-only <https://gitlab.freedesktop.org/poppler/poppler/-/raw/master/COPYING>`_
     ]]></help>
     <expand macro="citations" />
 </tool>
Binary file test-data/image_output.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/image_output.tsv	Wed Oct 29 17:38:19 2025 +0000
@@ -0,0 +1,12 @@
+level	page_num	block_num	par_num	line_num	word_num	left	top	width	height	conf	text
+1	1	0	0	0	0	0	0	1024	1024	-1	
+2	1	1	0	0	0	92	296	875	130	-1	
+3	1	1	1	0	0	92	296	875	130	-1	
+4	1	1	1	1	0	92	296	875	130	-1	
+5	1	1	1	1	1	92	297	174	129	28.285736	Ꮎ
+5	1	1	1	1	2	286	296	681	130	70.777687	ᏌᎠᏯᏙᏣᎠ
+2	1	2	0	0	0	164	572	694	133	-1	
+3	1	2	1	0	0	164	572	694	133	-1	
+4	1	2	1	1	0	164	572	694	133	-1	
+5	1	2	1	1	1	164	568	395	184	39.039650	ᎪᎶᎩ)1
+5	1	2	1	1	2	650	572	200	132	32.426445	Ꮾ(Ꭰ
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/image_output.txt	Wed Oct 29 17:38:19 2025 +0000
@@ -0,0 +1,12 @@
+ᎢᏂᏮ (ᏄᏌᎥᏟᏦ) (ᏏᎵᏣᎳᏲ). ((9Ꮶ). ᎫᏢᏚ1
+ᎤᏙᏮᎢ (ᏒᏮ ᏕᏎ3,Ꮞ5Ꮪ6.ᏤᏚ “ᎥᎦᏃᎩ- 290 ᏧᎾᏴ
+(Ꮭ ᏧᏄᏟᏦ(ᏴᏣᎾᏚᏮ, 3Ꮪ 12. 506 ᎾᎴ Ꭼ--1311
+ᎥᎢᏣ1Ꮑ ᏄᏚᏢᎸ ᎠᏁ 1161 (ᏇᎳᏮᏌᏚᎥ (6. ᏟᏣᎥᎸ ᎥᏚ ᏚᏢᎸᏒ.
+ᎠᎴᎢ ..ᏚᏟᏂᏒᏮᎥ16” ᏏᎱᎸᏄᎠᏮ ᎬᎿᏟᏂᏚ ᏚᏢᎱᎥᎸᏰᎥ
+(ᏏᏮᎢ ᏧᏮᎯᏒ (Ꭿ016Ᏺ ᎻᏄᏁᏧ. ᏞᏮ ᎢᏣᏁᏎᎢᏧ ᏏᏛᏌᏲ
+ᏄᎢᎸᏢᎠᎥᏧᏮᏅ ᏚᎸᏌᎥᏮ ᎠᏄᎸᎱ-ᏄᎾᏮᏚᏚᏌᏚ 16 ᏟᏂᎥᏮᏁ
+ᎠᏰᎸᎱᎴᏮᏚᏚᏮᏔᏦ. ᏞᏘ ᏙᎾᎤᎥᎠᏮ (131ᎢᏣᏁᏮ ᎱᎸᎠᎥᏧᎸ
+ᏚᎸ1(3 ᏚᏅᏢᎢᎸ ᎥᎥ ᏟᏱᏒᏮ ᎠᎥᏪᎱᎾᏅ. ᎬᎥ ᏃᎾᎱᎱᎾ
+ᎢᏁᎸᎢᎱᏫᏒ ᎱᏰᎠᎥᏄᎾ ᏚᏰᎥᎥᎴ ᏚᎾᏏᎱᏮ ᏮᎥ ᎠᎾᎥᎱᎾ
+ᎠᏮᎱᏮᏃᏣᏚᎾ. Ꭺ ᎢᎸᎠᏣᏚᎸᏘ (161ᎢᏣᎥᏁ ᎢᏎᎠᎥᏧᎸ
+ᏚᎸ1(3 ᏚᎾᏣᎠᎢᏮ Ꮕ ᏟᎸᏣ ᎠᎱᎾᎬᏌᎥᏣᎾᏚᎾ.
Binary file test-data/output.pdf has changed
--- a/test-data/output.tsv	Wed Jul 09 10:36:11 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-level	page_num	block_num	par_num	line_num	word_num	left	top	width	height	conf	text
-1	1	0	0	0	0	0	0	1024	1024	-1	
-2	1	1	0	0	0	92	296	875	130	-1	
-3	1	1	1	0	0	92	296	875	130	-1	
-4	1	1	1	1	0	92	296	875	130	-1	
-5	1	1	1	1	1	92	297	174	129	28.285736	Ꮎ
-5	1	1	1	1	2	286	296	681	130	70.777687	ᏌᎠᏯᏙᏣᎠ
-2	1	2	0	0	0	164	572	694	133	-1	
-3	1	2	1	0	0	164	572	694	133	-1	
-4	1	2	1	1	0	164	572	694	133	-1	
-5	1	2	1	1	1	164	568	395	184	39.039650	ᎪᎶᎩ)1
-5	1	2	1	1	2	650	572	200	132	32.426445	Ꮾ(Ꭰ
--- a/test-data/output.txt	Wed Jul 09 10:36:11 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-ᎢᏂᏮ (ᏄᏌᎥᏟᏦ) (ᏏᎵᏣᎳᏲ). ((9Ꮶ). ᎫᏢᏚ1
-ᎤᏙᏮᎢ (ᏒᏮ ᏕᏎ3,Ꮞ5Ꮪ6.ᏤᏚ “ᎥᎦᏃᎩ- 290 ᏧᎾᏴ
-(Ꮭ ᏧᏄᏟᏦ(ᏴᏣᎾᏚᏮ, 3Ꮪ 12. 506 ᎾᎴ Ꭼ--1311
-ᎥᎢᏣ1Ꮑ ᏄᏚᏢᎸ ᎠᏁ 1161 (ᏇᎳᏮᏌᏚᎥ (6. ᏟᏣᎥᎸ ᎥᏚ ᏚᏢᎸᏒ.
-ᎠᎴᎢ ..ᏚᏟᏂᏒᏮᎥ16” ᏏᎱᎸᏄᎠᏮ ᎬᎿᏟᏂᏚ ᏚᏢᎱᎥᎸᏰᎥ
-(ᏏᏮᎢ ᏧᏮᎯᏒ (Ꭿ016Ᏺ ᎻᏄᏁᏧ. ᏞᏮ ᎢᏣᏁᏎᎢᏧ ᏏᏛᏌᏲ
-ᏄᎢᎸᏢᎠᎥᏧᏮᏅ ᏚᎸᏌᎥᏮ ᎠᏄᎸᎱ-ᏄᎾᏮᏚᏚᏌᏚ 16 ᏟᏂᎥᏮᏁ
-ᎠᏰᎸᎱᎴᏮᏚᏚᏮᏔᏦ. ᏞᏘ ᏙᎾᎤᎥᎠᏮ (131ᎢᏣᏁᏮ ᎱᎸᎠᎥᏧᎸ
-ᏚᎸ1(3 ᏚᏅᏢᎢᎸ ᎥᎥ ᏟᏱᏒᏮ ᎠᎥᏪᎱᎾᏅ. ᎬᎥ ᏃᎾᎱᎱᎾ
-ᎢᏁᎸᎢᎱᏫᏒ ᎱᏰᎠᎥᏄᎾ ᏚᏰᎥᎥᎴ ᏚᎾᏏᎱᏮ ᏮᎥ ᎠᎾᎥᎱᎾ
-ᎠᏮᎱᏮᏃᏣᏚᎾ. Ꭺ ᎢᎸᎠᏣᏚᎸᏘ (161ᎢᏣᎥᏁ ᎢᏎᎠᎥᏧᎸ
-ᏚᎸ1(3 ᏚᎾᏣᎠᎢᏮ Ꮕ ᏟᎸᏣ ᎠᎱᎾᎬᏌᎥᏣᎾᏚᎾ.
-Ꮎ ᏌᎠᏯᏙᏣᎠ
-ᎪᎶᎩ)1 ᏮᎣᎠ
Binary file test-data/pdf_output.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pdf_output.tsv	Wed Oct 29 17:38:19 2025 +0000
@@ -0,0 +1,119 @@
+level	page_num	block_num	par_num	line_num	word_num	left	top	width	height	conf	text
+1	1	0	0	0	0	0	0	1024	800	-1	
+2	1	1	0	0	0	105	66	718	47	-1	
+3	1	1	1	0	0	105	66	718	47	-1	
+4	1	1	1	1	0	105	66	718	47	-1	
+5	1	1	1	1	1	105	66	73	31	90.832413	ᎢᏂᏮ
+5	1	1	1	1	2	205	67	142	39	89.788940	(ᏄᏌᎥᏟᏦ)
+5	1	1	1	1	3	376	69	152	40	29.736076	(ᏏᎵᏣᎳᏲ).
+5	1	1	1	1	4	559	71	104	39	0.000000	((9Ꮶ).
+5	1	1	1	1	5	687	73	136	40	1.767586	ᎫᏢᏚ1
+2	1	2	0	0	0	104	115	783	50	-1	
+3	1	2	1	0	0	104	115	783	50	-1	
+4	1	2	1	1	0	104	115	783	50	-1	
+5	1	2	1	1	1	104	115	95	32	90.515266	ᎤᏙᏮᎢ
+5	1	2	1	1	2	224	117	59	31	60.761646	(ᏒᏮ
+5	1	2	1	1	3	310	117	223	38	44.115311	ᏕᏎ3,Ꮞ5Ꮪ6.ᏤᏚ
+5	1	2	1	1	4	561	121	135	41	0.000000	“ᎥᎦᏃᎩ-
+5	1	2	1	1	5	722	123	69	31	5.272217	290
+5	1	2	1	1	6	818	125	69	40	40.866249	ᏧᎾᏴ
+2	1	3	0	0	0	103	165	732	41	-1	
+3	1	3	1	0	0	103	165	732	41	-1	
+4	1	3	1	1	0	103	165	732	41	-1	
+5	1	3	1	1	1	103	165	31	31	42.624737	(Ꮭ
+5	1	3	1	1	2	160	166	236	40	3.399918	ᏧᏄᏟᏦ(ᏴᏣᎾᏚᏮ,
+5	1	3	1	1	3	424	178	39	23	72.727158	3Ꮪ
+5	1	3	1	1	4	493	171	52	31	13.502258	12.
+5	1	3	1	1	5	554	161	58	53	13.502258	506
+5	1	3	1	1	6	638	172	42	32	45.419098	ᎾᎴ
+5	1	3	1	1	7	700	174	135	32	24.030121	Ꭼ--1311
+2	1	4	0	0	0	103	215	808	49	-1	
+3	1	4	1	0	0	103	215	808	49	-1	
+4	1	4	1	1	0	103	215	808	49	-1	
+5	1	4	1	1	1	103	215	91	32	0.000000	ᎥᎢᏣ10
+5	1	4	1	1	2	222	225	82	31	65.585052	ᏄᏚᏢᎸ
+5	1	4	1	1	3	317	211	29	57	3.651657	ᎠᏁ
+5	1	4	1	1	4	360	211	204	57	3.651657	ᏁᏣ1(2ᎳᏮᏌᏚᎥ
+5	1	4	1	1	5	583	211	133	57	55.025658	(6.ᏟᏣᎥᎸ
+5	1	4	1	1	6	742	223	31	32	93.219315	ᎥᏚ
+5	1	4	1	1	7	799	233	112	31	0.000000	ᏚᏢᎸᎥᏁ.
+2	1	5	0	0	0	102	266	775	48	-1	
+3	1	5	1	0	0	102	266	775	48	-1	
+4	1	5	1	1	0	102	266	775	48	-1	
+5	1	5	1	1	1	102	266	71	31	52.466690	ᎠᎴᎢ
+5	1	5	1	1	2	198	267	208	35	0.000000	..ᏚᏟᏂᏒᏮᎥ16”
+5	1	5	1	1	3	433	269	135	33	20.663292	ᏏᎱᎸᏄᎠᏮ
+5	1	5	1	1	4	594	272	115	32	46.369892	ᎬᎿᏟᏂᏚ
+5	1	5	1	1	5	735	274	142	40	9.675285	ᏚᏢᎱᎥᎸᏰᎥ
+2	1	6	0	0	0	102	315	816	42	-1	
+3	1	6	1	0	0	102	315	816	42	-1	
+4	1	6	1	1	0	102	315	816	42	-1	
+5	1	6	1	1	1	102	315	85	32	64.574440	ᎿᏏᏮᎱ
+5	1	6	1	1	2	212	317	68	31	41.235542	ᏄᏮᎯᏒ
+5	1	6	1	1	3	306	318	124	32	47.484203	(1016Ꮑ
+5	1	6	1	1	4	456	320	116	32	79.362022	ᎻᏄᏁᏧ.
+5	1	6	1	1	5	601	322	47	32	92.560989	ᏞᏮ
+5	1	6	1	1	6	674	324	129	32	68.453568	ᎢᏮᏲᎸᎱᏧ
+5	1	6	1	1	7	827	325	91	32	29.218613	ᏏᎱᏄᏂ
+2	1	7	0	0	0	101	366	732	43	-1	
+3	1	7	1	0	0	101	366	732	43	-1	
+4	1	7	1	1	0	101	366	732	43	-1	
+5	1	7	1	1	1	101	366	173	39	42.287048	ᏄᎢᎸᏢᎠᎥᏧᏮᏅ
+5	1	7	1	1	2	302	373	101	27	78.414246	ᏚᎸᏌᎥᏮ
+5	1	7	1	1	3	428	371	213	38	0.000000	ᎠᏄᎸᎱ-ᏄᎾᏮᏚᏚᏌᏚ
+5	1	7	1	1	4	667	372	33	32	93.305000	16
+5	1	7	1	1	5	725	374	108	32	59.821590	ᏟᏂᎥᏮᏁ
+2	1	8	0	0	0	100	419	759	45	-1	
+3	1	8	1	0	0	100	419	759	45	-1	
+4	1	8	1	1	0	100	419	759	45	-1	
+5	1	8	1	1	1	100	424	208	30	18.710289	ᎠᏰᎸᎱᎴᏮᏚᏚᏮᏔᏦ.
+5	1	8	1	1	2	337	419	47	31	62.331985	ᏞᏘ
+5	1	8	1	1	3	409	420	107	39	29.658852	ᏙᎾᎤᎥᎠᏮ
+5	1	8	1	1	4	543	430	164	25	35.176102	(131ᎢᏣᏁᏮ
+5	1	8	1	1	5	733	424	126	40	39.493908	ᎱᎸᎠᎥᏧᎸ
+2	1	9	0	0	0	100	466	734	45	-1	
+3	1	9	1	0	0	100	466	734	45	-1	
+4	1	9	1	1	0	100	466	734	45	-1	
+5	1	9	1	1	1	100	466	92	31	32.093796	ᏚᎸ1(3
+5	1	9	1	1	2	219	475	105	32	63.326702	ᏚᏅᏢᎢᎸ
+5	1	9	1	1	3	351	468	25	31	90.616997	ᎥᎥ
+5	1	9	1	1	4	403	478	88	23	71.013290	ᏟᏱᏒᏮ
+5	1	9	1	1	5	517	471	116	40	45.434055	ᎠᎥᏪᎱᎾᏅ.
+5	1	9	1	1	6	662	473	41	31	93.021652	ᎬᎥ
+5	1	9	1	1	7	729	482	105	24	56.318901	ᏃᎾᎱᎱᎾ
+2	1	10	0	0	0	99	516	734	47	-1	
+3	1	10	1	0	0	99	516	734	47	-1	
+4	1	10	1	1	0	99	516	734	47	-1	
+5	1	10	1	1	1	99	516	143	32	20.518715	ᎢᏁᎸᎢᎱᏫᏒ
+5	1	10	1	1	2	268	517	127	40	5.754593	ᎱᏰᎠᎥᏄᎾ
+5	1	10	1	1	3	421	520	92	32	0.000000	ᏚᏰᎥᎥᎴ
+5	1	10	1	1	4	540	521	104	33	73.432365	ᏚᎾᏏᎱᏮ
+5	1	10	1	1	5	669	523	33	31	83.952301	ᏮᎥ
+5	1	10	1	1	6	728	532	105	31	59.241058	ᎠᎾᎥᎱᎾ
+2	1	11	0	0	0	98	568	731	45	-1	
+3	1	11	1	0	0	98	568	731	45	-1	
+4	1	11	1	1	0	98	568	731	45	-1	
+5	1	11	1	1	1	98	574	186	30	64.428001	ᎠᏮᎱᏮᏃᏣᏚᎾ.
+5	1	11	1	1	2	313	568	29	30	92.059212	Ꭺ
+5	1	11	1	1	3	369	578	128	31	40.324844	ᎢᎸᎠᏣᏚᎸᏘ
+5	1	11	1	1	4	523	579	154	25	53.635124	(161ᎢᏣᎥᏁ
+5	1	11	1	1	5	703	573	126	40	0.000000	ᎢᏎᎠᎥᏧᎸ
+2	1	12	0	0	0	98	616	612	45	-1	
+3	1	12	1	0	0	98	616	612	45	-1	
+4	1	12	1	1	0	98	616	612	45	-1	
+5	1	12	1	1	1	98	616	92	31	13.876808	ᏚᎸ1(3
+5	1	12	1	1	2	217	617	103	32	11.804543	ᏚᎾᏣᎠᎢᏮ
+5	1	12	1	1	3	346	627	20	23	66.117737	Ꮕ
+5	1	12	1	1	4	391	621	65	30	55.042454	ᏟᎸᏣ
+5	1	12	1	1	5	481	621	229	40	25.703682	ᎠᎱᎾᎬᏌᎥᏣᎾᏚᎾ.
+1	2	0	0	0	0	0	0	1024	1024	-1	
+2	2	1	0	0	0	92	296	875	130	-1	
+3	2	1	1	0	0	92	296	875	130	-1	
+4	2	1	1	1	0	92	296	875	130	-1	
+5	2	1	1	1	1	92	297	174	129	28.285736	Ꮎ
+5	2	1	1	1	2	286	296	681	130	70.777687	ᏌᎠᏯᏙᏣᎠ
+2	2	2	0	0	0	164	572	694	133	-1	
+3	2	2	1	0	0	164	572	694	133	-1	
+4	2	2	1	1	0	164	572	694	133	-1	
+5	2	2	1	1	1	164	568	395	184	39.039650	ᎪᎶᎩ)1
+5	2	2	1	1	2	650	572	200	132	32.426445	Ꮾ(Ꭰ
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pdf_output.txt	Wed Oct 29 17:38:19 2025 +0000
@@ -0,0 +1,26 @@
+ᎢᏂᏮ (ᏄᏌᎥᏟᏦ) (ᏏᎵᏣᎳᏲ). ((9Ꮶ). ᎫᏢᏚ1
+
+ᎤᏙᏮᎢ (ᏒᏮ ᏕᏎ3,Ꮞ5Ꮪ6.ᏤᏚ “ᎥᎦᏃᎩ- 290 ᏧᎾᏴ
+
+(Ꮭ ᏧᏄᏟᏦ(ᏴᏣᎾᏚᏮ, 3Ꮪ 12. 506 ᎾᎴ Ꭼ--1311
+
+ᎥᎢᏣ10 ᏄᏚᏢᎸ ᎠᏁ ᏁᏣ1(2ᎳᏮᏌᏚᎥ (6.ᏟᏣᎥᎸ ᎥᏚ ᏚᏢᎸᎥᏁ.
+
+ᎠᎴᎢ ..ᏚᏟᏂᏒᏮᎥ16” ᏏᎱᎸᏄᎠᏮ ᎬᎿᏟᏂᏚ ᏚᏢᎱᎥᎸᏰᎥ
+
+ᎿᏏᏮᎱ ᏄᏮᎯᏒ (1016Ꮑ ᎻᏄᏁᏧ. ᏞᏮ ᎢᏮᏲᎸᎱᏧ ᏏᎱᏄᏂ
+
+ᏄᎢᎸᏢᎠᎥᏧᏮᏅ ᏚᎸᏌᎥᏮ ᎠᏄᎸᎱ-ᏄᎾᏮᏚᏚᏌᏚ 16 ᏟᏂᎥᏮᏁ
+
+ᎠᏰᎸᎱᎴᏮᏚᏚᏮᏔᏦ. ᏞᏘ ᏙᎾᎤᎥᎠᏮ (131ᎢᏣᏁᏮ ᎱᎸᎠᎥᏧᎸ
+
+ᏚᎸ1(3 ᏚᏅᏢᎢᎸ ᎥᎥ ᏟᏱᏒᏮ ᎠᎥᏪᎱᎾᏅ. ᎬᎥ ᏃᎾᎱᎱᎾ
+
+ᎢᏁᎸᎢᎱᏫᏒ ᎱᏰᎠᎥᏄᎾ ᏚᏰᎥᎥᎴ ᏚᎾᏏᎱᏮ ᏮᎥ ᎠᎾᎥᎱᎾ
+
+ᎠᏮᎱᏮᏃᏣᏚᎾ. Ꭺ ᎢᎸᎠᏣᏚᎸᏘ (161ᎢᏣᎥᏁ ᎢᏎᎠᎥᏧᎸ
+
+ᏚᎸ1(3 ᏚᎾᏣᎠᎢᏮ Ꮕ ᏟᎸᏣ ᎠᎱᎾᎬᏌᎥᏣᎾᏚᎾ.
+Ꮎ ᏌᎠᏯᏙᏣᎠ
+
+ᎪᎶᎩ)1 Ꮾ(Ꭰ
Binary file test-data/test_input.pdf has changed