# HG changeset patch # User bgruening # Date 1510216902 18000 # Node ID aeb1f7daa7c2691029b2a8b6200609fccf776338 # Parent e44dda38e6ae2b4e46e8087167a72710fbdbf8bc planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/osra commit 74daed54894d9769a251a2ca6a46d96b7a27e461 diff -r e44dda38e6ae -r aeb1f7daa7c2 osra.py --- a/osra.py Tue Mar 01 10:41:20 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -#!usr/bin/env python - -import os, sys -import subprocess - -""" - OSRA_DATA_FILES is set during the toolshed Installation - If it is not set, use the standard configuration of OSRA. - That means we need to delete argument 4-7. - That script is a hack, because we do not know the content of OSRA_DATA_FILES at xml evaluation time. - - osra -f $oformat $infile - -l \$OSRA_DATA_FILES/spelling.txt -a \$OSRA_DATA_FILES/superatom.txt - > $outfile -""" - -if not os.path.exists(sys.argv[7]): - # OSRA_DATA_FILES path is not set or the spelling file is not existent - sys.argv.pop(7) # superatom.txt path - sys.argv.pop(6) # -a - sys.argv.pop(5) # speling.txt path - sys.argv.pop(4) # -l - -sys.argv[0] = 'osra' -subprocess.call(sys.argv, stdout=sys.stdout) - - diff -r e44dda38e6ae -r aeb1f7daa7c2 osra.tar.bz2 Binary file osra.tar.bz2 has changed diff -r e44dda38e6ae -r aeb1f7daa7c2 osra.xml --- a/osra.xml Tue Mar 01 10:41:20 2016 -0500 +++ b/osra.xml Thu Nov 09 03:41:42 2017 -0500 @@ -1,40 +1,38 @@ - + in Images or PDF documents (OSRA) - osra - openbabel - graphicsmagick + osra - + $outfile + $confidence + $adaptive + $thinning + > '$outfile' ]]> - + - - - + + + - + @@ -42,16 +40,16 @@ - + - - + + + + @inproceedings{filippov2010improvements, + title={Improvements in Optical Structure Recognition Application}, + author={Filippov, Igor V and Nicklaus, Marc C and Kinney, John}, + booktitle={Document Analysis Systems Workshop}, + year={2010} + } + + diff -r e44dda38e6ae -r aeb1f7daa7c2 readme --- a/readme Tue Mar 01 10:41:20 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -OSRA: Optical Structure Recognition Application - -OSRA is a utility designed to convert graphical representations of chemical -structures, as they appear in journal articles, patent documents, textbooks, -trade magazines etc., into SMILES (Simplified Molecular Input Line Entry -Specification - see http://en.wikipedia.org/wiki/SMILES) or -SD files - a computer recognizable molecular structure format. -OSRA can read a document in any of the over 90 graphical formats parseable by -ImageMagick - including GIF, JPEG, PNG, TIFF, PDF, PS etc., and generate -the SMILES or SDF representation of the molecular structure images encountered -within that document. - -Note that any software designed for optical recognition is unlikely to be -perfect, and the output produced might, and probably will, contain errors, -so curation by a human knowledgeable in chemical structures is highly recommended. - -http://cactus.nci.nih.gov/osra/ - -The wrapper comes with an automatic installation of all dependencies through the -galaxy toolshed. diff -r e44dda38e6ae -r aeb1f7daa7c2 readme.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.md Thu Nov 09 03:41:42 2017 -0500 @@ -0,0 +1,20 @@ +## OSRA: Optical Structure Recognition Application + +OSRA is a utility designed to convert graphical representations of chemical +structures, as they appear in journal articles, patent documents, textbooks, +trade magazines etc., into SMILES (Simplified Molecular Input Line Entry +Specification - see http://en.wikipedia.org/wiki/SMILES) or +SD files - a computer recognizable molecular structure format. +OSRA can read a document in any of the over 90 graphical formats parseable by +ImageMagick - including GIF, JPEG, PNG, TIFF, PDF, PS etc., and generate +the SMILES or SDF representation of the molecular structure images encountered +within that document. + +Note that any software designed for optical recognition is unlikely to be +perfect, and the output produced might, and probably will, contain errors, +so curation by a human knowledgeable in chemical structures is highly recommended. + +http://cactus.nci.nih.gov/osra/ + +The wrapper comes with an automatic installation of all dependencies through the +galaxy toolshed. diff -r e44dda38e6ae -r aeb1f7daa7c2 repository_dependencies.xml --- a/repository_dependencies.xml Tue Mar 01 10:41:20 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ - - - - diff -r e44dda38e6ae -r aeb1f7daa7c2 test-data/2008001635_153_chem.png Binary file test-data/2008001635_153_chem.png has changed diff -r e44dda38e6ae -r aeb1f7daa7c2 test-data/2008001635_153_chem.smi diff -r e44dda38e6ae -r aeb1f7daa7c2 test-data/CID_2244.pdf Binary file test-data/CID_2244.pdf has changed diff -r e44dda38e6ae -r aeb1f7daa7c2 test-data/CID_2244.png Binary file test-data/CID_2244.png has changed diff -r e44dda38e6ae -r aeb1f7daa7c2 test-data/CID_2244.sdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244.sdf Thu Nov 09 03:41:42 2017 -0500 @@ -0,0 +1,155 @@ +2244 + -OEChem-05151212332D + + 21 21 0 0 0 0 0 0 0999 V2000 + 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 1 0 0 0 0 + 1 12 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 21 1 0 0 0 0 + 3 11 2 0 0 0 0 + 4 12 2 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 2 0 0 0 0 + 6 8 2 0 0 0 0 + 6 11 1 0 0 0 0 + 7 9 1 0 0 0 0 + 7 14 1 0 0 0 0 + 8 10 1 0 0 0 0 + 8 15 1 0 0 0 0 + 9 10 2 0 0 0 0 + 9 16 1 0 0 0 0 + 10 17 1 0 0 0 0 + 12 13 1 0 0 0 0 + 13 18 1 0 0 0 0 + 13 19 1 0 0 0 0 + 13 20 1 0 0 0 0 +M END +> +2244 + +> +1 + +> +212 + +> +4 + +> +1 + +> +3 + +> +AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== + +> +2-acetoxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetoxybenzoic acid + +> +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) + +> +BSYNRYMUTXBXSQ-UHFFFAOYSA-N + +> +1.2 + +> +180.042259 + +> +C9H8O4 + +> +180.15742 + +> +CC(=O)OC1=CC=CC=C1C(=O)O + +> +CC(=O)OC1=CC=CC=C1C(=O)O + +> +63.6 + +> +180.042259 + +> +0 + +> +13 + +> +0 + +> +0 + +> +0 + +> +0 + +> +0 + +> +1 + +> +1 + +> +1 +5 +255 + +> +5 6 8 +5 7 8 +6 8 8 +7 9 8 +8 10 8 +9 10 8 + +$$$$ + diff -r e44dda38e6ae -r aeb1f7daa7c2 test-data/osra_on_CID2244.sdf diff -r e44dda38e6ae -r aeb1f7daa7c2 test-data/patent.gif Binary file test-data/patent.gif has changed diff -r e44dda38e6ae -r aeb1f7daa7c2 test_data/2008001635_153_chem.png Binary file test_data/2008001635_153_chem.png has changed diff -r e44dda38e6ae -r aeb1f7daa7c2 test_data/2008001635_153_chem.smi --- a/test_data/2008001635_153_chem.smi Tue Mar 01 10:41:20 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -CCC(c1ccc(cc1)Br)OCCCO diff -r e44dda38e6ae -r aeb1f7daa7c2 test_data/CID_2244.png Binary file test_data/CID_2244.png has changed diff -r e44dda38e6ae -r aeb1f7daa7c2 test_data/CID_2244.sdf --- a/test_data/CID_2244.sdf Tue Mar 01 10:41:20 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,155 +0,0 @@ -2244 - -OEChem-05151212332D - - 21 21 0 0 0 0 0 0 0999 V2000 - 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1 5 1 0 0 0 0 - 1 12 1 0 0 0 0 - 2 11 1 0 0 0 0 - 2 21 1 0 0 0 0 - 3 11 2 0 0 0 0 - 4 12 2 0 0 0 0 - 5 6 1 0 0 0 0 - 5 7 2 0 0 0 0 - 6 8 2 0 0 0 0 - 6 11 1 0 0 0 0 - 7 9 1 0 0 0 0 - 7 14 1 0 0 0 0 - 8 10 1 0 0 0 0 - 8 15 1 0 0 0 0 - 9 10 2 0 0 0 0 - 9 16 1 0 0 0 0 - 10 17 1 0 0 0 0 - 12 13 1 0 0 0 0 - 13 18 1 0 0 0 0 - 13 19 1 0 0 0 0 - 13 20 1 0 0 0 0 -M END -> -2244 - -> -1 - -> -212 - -> -4 - -> -1 - -> -3 - -> -AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== - -> -2-acetoxybenzoic acid - -> -2-acetyloxybenzoic acid - -> -2-acetyloxybenzoic acid - -> -2-acetyloxybenzoic acid - -> -2-acetoxybenzoic acid - -> -InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) - -> -BSYNRYMUTXBXSQ-UHFFFAOYSA-N - -> -1.2 - -> -180.042259 - -> -C9H8O4 - -> -180.15742 - -> -CC(=O)OC1=CC=CC=C1C(=O)O - -> -CC(=O)OC1=CC=CC=C1C(=O)O - -> -63.6 - -> -180.042259 - -> -0 - -> -13 - -> -0 - -> -0 - -> -0 - -> -0 - -> -0 - -> -1 - -> -1 - -> -1 -5 -255 - -> -5 6 8 -5 7 8 -6 8 8 -7 9 8 -8 10 8 -9 10 8 - -$$$$ - diff -r e44dda38e6ae -r aeb1f7daa7c2 tool_dependencies.xml --- a/tool_dependencies.xml Tue Mar 01 10:41:20 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,74 +0,0 @@ - - - - - - - - - - - - - http://downloads.sourceforge.net/project/osra/osra/2.0.0/osra-2.0.0.tgz - - - - - - - - - - - - - https://depot.galaxyproject.org/software/potrace/potrace_1.11_src_all.tar.gz - tar xfvz potrace-1.11.tar.gz && cd potrace-1.11 && ./configure --with-libpotrace --prefix=$INSTALL_DIR/potrace/build && make && make install - - - - http://downloads.sourceforge.net/project/osra/gocr-patched/gocr-0.50pre-patched.tgz - tar xfvz gocr-0.50pre-patched.tgz && cd gocr-0.50pre-patched && ./configure --prefix=$INSTALL_DIR/gocr/build && make libs && make all install - - - - http://downloads.sourceforge.net/project/tclap/tclap-1.2.1.tar.gz - tar xfvz tclap-1.2.1.tar.gz && cd tclap-1.2.1 && ./configure --prefix=$INSTALL_DIR/tclap/build && make && make install - - - http://mirror.checkdomain.de/gnu/ocrad/ocrad-0.21.tar.gz - tar xfvz ocrad-0.21.tar.gz && cd ocrad-0.21 && ./configure --prefix=$INSTALL_DIR/ocrad/build && make && make install - - - https://launchpad.net/cuneiform-linux/1.1/1.1/+download/cuneiform-linux-1.1.0.tar.bz2 - tar xfvj cuneiform-linux-1.1.0.tar.bz2 && cd cuneiform-linux-1.1.0 && mkdir build && cd build && cmake .. -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR/cuneiform/build/ && make && make install - - - - export PATH=$PATH:$GRAPHICSMAGICK_ROOT_DIR/bin/ && - ./configure --with-tclap-include=$INSTALL_DIR/tclap/build/include/ --with-potrace-include=$INSTALL_DIR/potrace/build/include/ --with-potrace-lib=$INSTALL_DIR/potrace/build/lib/ --with-gocr-include=$INSTALL_DIR/gocr/build/include/gocr/ --with-gocr-lib=$INSTALL_DIR/gocr/build/lib/ --with-ocrad-include=$INSTALL_DIR/ocrad/build/include/ --with-ocrad-lib=$INSTALL_DIR/ocrad/build/lib/ --with-cuneiform-include=$INSTALL_DIR/cuneiform/build/include/ --with-cuneiform --with-cuneiform-lib=$INSTALL_DIR/cuneiform/build/install/lib/ --with-openbabel-include=$OPENBABEL_INCLUDE_DIR/openbabel-2.0/ --with-openbabel-lib=$OPENBABEL_LIB_DIR --with-graphicsmagick-lib=$GRAPHICSMAGICK_ROOT_DIR/lib/ --with-graphicsmagick-include=$GRAPHICSMAGICK_ROOT_DIR/include/GraphicsMagick/ --prefix=$INSTALL_DIR - make - make install - - rm $INSTALL_DIR/tclap/ -r - - rm $INSTALL_DIR/gocr/ -r - rm $INSTALL_DIR/ocrad/ -r - rm $INSTALL_DIR/cuneiform/ -r - - - $ENV[GRAPHICSMAGICK_ROOT_DIR]/lib/ - $INSTALL_DIR/potrace/build/lib/ - $INSTALL_DIR/bin - - $INSTALL_DIR/share - - - - We still have a handfull of requirements - -