Mercurial > repos > recetox > rem_complex
changeset 0:a0e07a0bc047 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
| author | recetox | 
|---|---|
| date | Mon, 27 Nov 2023 09:04:04 +0000 | 
| parents | |
| children | e0ca9dfcdb18 | 
| files | macros.xml rem_complex.py rem_complex.xml test-data/input.csv test-data/input.inchi test-data/input.smi test-data/sample_output.csv test-data/sample_output.inchi test-data/sample_output.smi | 
| diffstat | 9 files changed, 215 insertions(+), 0 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Nov 27 09:04:04 2023 +0000 @@ -0,0 +1,29 @@ +<macros> + <token name="@TOOL_VERSION@">1.0.0</token> + <xml name="creator"> + <creator> + <person + givenName="Wudmir" + familyName="Rojas" + url="https://github.com/wverastegui" + identifier="0000-0001-7036-9987" /> + <person + givenName="Helge" + familyName="Hecht" + url="https://github.com/hechth" + identifier="0000-0001-6744-996X" /> + <organization + url="https://www.recetox.muni.cz/" + email="GalaxyToolsDevelopmentandDeployment@space.muni.cz" + name="RECETOX MUNI"/> + </creator> + </xml> + <token name="@HELP@"><![CDATA[ + The remove complex tool filters out coordination complexes from a list of SMLES or InChI. + + Documentation + The rem complex tool removes coordination complexes from a list of SMLES or InChI. The tool accepts input files with csv, smi and inchi formats, + and returns the same format as the input file. The tool is based on the openbabel python library. + ]]> + </token> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rem_complex.py Mon Nov 27 09:04:04 2023 +0000 @@ -0,0 +1,62 @@ +import argparse + +import pandas as pd +from openbabel import openbabel, pybel +openbabel.obErrorLog.SetOutputLevel(1) # 0: suppress warnings; 1: warnings + + +def parse_arguments() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument('-iformat', '--input_format', help='Input file format') + parser.add_argument('-i', '--input_filename', type=str, required=True, help='Input file name') + parser.add_argument('-o', '--output_filename', type=str, required=True, help='Outout file name') + args = parser.parse_args() + return args + + +def filter_csv_molecules(file_name: str, output_file_name: str) -> None: + """Removes molecules with '.' in SMILES string from csv file. + + Args: + file_name (str): Path to csv file that contains metadata. + output_file_name (str): Path to destination file, in csv format. + """ + df = pd.read_csv(file_name) + mask = df['smiles'].str.contains(".", na=False, regex=False) + mask = mask.apply(lambda x: not x) + df[mask].to_csv(output_file_name, index=False) + + +def filter_other_format_molecules(file_name: str, output_file_name: str, input_format: str) -> None: + """Removes molecules with '.' in SMILES string from smi or inchi files. + + Args: + file_name (str): Path to smi or inchi files. + output_file_name (str): Path to destination files, in smi or inchi formats. + input_format (str): Input file format. + """ + molecules = list(pybel.readfile(input_format, file_name)) + filtered_molecules = [mol for mol in molecules if "." not in mol.write('smi').strip()] + + with open(output_file_name, 'w') as f: + for mol in filtered_molecules: + f.write(mol.write(input_format)) + + +def filter_complex_molecules(file_name: str, output_file_name: str, input_format: str) -> None: + """Removes molecular complexes depending on the input format. + + Args: + file_name (str): Path to csv, smi or inchi files + output_file_name (str): Path to destination files, in csv. smi or inchi formats. + input_format (str): Input file formats. + """ + if input_format == 'csv': + filter_csv_molecules(file_name, output_file_name) + else: + filter_other_format_molecules(file_name, output_file_name, input_format) + + +if __name__ == "__main__": + args = parse_arguments() + filter_complex_molecules(args.input_filename, args.output_filename, args.input_format)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rem_complex.xml Mon Nov 27 09:04:04 2023 +0000 @@ -0,0 +1,44 @@ +<tool id="rem_complex" name="Remove coordination complexes" version="@TOOL_VERSION@+galaxy0" profile="21.09"> + <description>Remove molecular coordination complexes from a list of structure representations</description> + <macros> + <import>macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="1.5.3">pandas</requirement> + <requirement type="package" version="3.1.1">openbabel</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + python $__tool_directory__/rem_complex.py + -i '${input}' + -iformat '${input.ext}' + -o '${output}' + ]]></command> + <inputs> + <param name="input" format="inchi,smi,csv" type="data" help="Accepted input formats: CSV, SMI, and InChI."/> + </inputs> + <outputs> + <data name="output" format_source="input" /> + </outputs> + <tests> + <test> + <param name="input" ftype="smi" value="input.smi" /> + <output name="output" ftype="smi" file="sample_output.smi"/> + </test> + <test> + <param name="input" ftype="inchi" value="input.inchi"/> + <output name="output" ftype="inchi" file="sample_output.inchi"/> + </test> + <test> + <param name="input" ftype="csv" value="input.csv"/> + <output name="output" ftype="csv" file="sample_output.csv"/> + </test> + </tests> + <help> + <![CDATA[ + @HELP@ + ]]> + </help> + <citations> + <citation type="doi">https://doi.org/10.5281/zenodo.6035335</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.csv Mon Nov 27 09:04:04 2023 +0000 @@ -0,0 +1,16 @@ +inchi,formula,num_peaks,compound_name,smiles,comment,retention_index +InChI=1S/C6H4ClO2Si.C5H5.2CO.Fe/c7-10-8-5-3-1-2-4-6(5)9-10;1-2-4-5-3-1;2*1-2;/h1-4H;1-5H;;;,C13H9ClFeO4Si,3,"((.eta.5-Cyclopentadienylironbiscarbonyl)(1,2-phenylenedioxysilyl)chloride complex",Cl[Si]1Oc2ccccc2O1.[C-]#[O+].[C-]#[O+].[CH]1C=CC=C1.[Fe],SpectrumID: 1519953; Source: C4-1998-38-3; Class: Benzenoids; CASRN not real!, +"InChI=1S/C13H14O/c1-13(10-14)9-12(13)8-7-11-5-3-2-4-6-11/h2-6,12,14H,9-10H2,1H3/t12-,13-/m0/s1",C13H14O,20,"((1R*,2R*)-1-Methyl-2-phenylethynylcyclopropyl)methanol",C[C@@]1(CO)C[C@@H]1C#Cc1ccccc1,SpectrumID: 1752764; Source: A1-13-956/SMS7-13; DOI: 10.1021/ol1029996; QI: 383; Class: Benzene and substituted derivatives; CASRN not real! |RI:1588|,1588 +"InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1",C34H54O4,14,"((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate",C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12,SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353|,3353 +"InChI=1S/C11H15NO2S/c1-9-2-4-10(5-3-9)15-7-6-12-8-11(13)14/h2-5,12H,6-8H2,1H3,(H,13,14)",C11H15NO2S,20,((2-[(4-Methylphenyl)sulfanyl]ethyl)amino)acetic acid,Cc1ccc(SCCNCC(=O)O)cc1,SpectrumID: 1226271; Source: W5-1989-35586-29950; QI: 400; Class: Alpha amino acids |RI:2011|,2011 +"InChI=1S/C12H16N2O2S/c1-12(7-15)8-17-11(14-12)13-9-3-5-10(16-2)6-4-9/h3-6,15H,7-8H2,1-2H3,(H,13,14)",C12H16N2O2S,167,"((2Z)-2-[(4-Methoxyphenyl)imino]-4-methyl-1,3-thiazolidin-4-yl)methanol",COc1ccc(NC2=NC(C)(CO)CS2)cc1,SpectrumID: 1432066; Source: AD-0-2532-0; QI: 900; Class: Methoxyanilines |RI:2319|,2319 +"InChI=1S/C23H17F3N2O/c24-23(25,26)22-19(16-10-4-1-5-11-16)20(21(29)17-12-6-2-7-13-17)28(27-22)18-14-8-3-9-15-18/h1-15,19-20H/t19-,20+/m1/s1",C23H17F3N2O,9,"((3S,4R)-2,4-Diphenyl-5-trifluoromethyl-3,4-dihydro-2H-pyrazol-3-yl)-phenyl-methanone",O=C(c1ccccc1)[C@@H]1[C@@H](c2ccccc2)C(C(F)(F)F)=NN1c1ccccc1,SpectrumID: 1676300; Source: F4-43-2771-4a; QI: 56; Class: Alkyl-phenylketones; CASRN not real! |RI:2735|,2735 +"InChI=1S/C15H24O/c1-10(2)13-6-4-11(3)14-7-5-12(9-16)8-15(13)14/h4,8,10,13-16H,5-7,9H2,1-3H3/t13-,14+,15-/m0/s1",C15H24O,156,"((4aS,8S,8aR)-8-isopropyl-5-methyl-3,4,4a,7,8,8a-hexahydronaphthalen-2-yl)methanol",CC1=CC[C@@H](C(C)C)[C@@H]2C=C(CO)CC[C@H]12,"SpectrumID: 1815091; Source: N.Kacem, et al. Industrial Crops and Products, V.90, 2016, P.87-93; QI: 881; Class: Sesquiterpenoids; CASRN not real! |RI:1683|",1683 +"InChI=1S/C17H28NO6P/c1-13(2)23-25(21,24-14(3)4)18(12-17(19)20)11-10-15-6-8-16(22-5)9-7-15/h6-9,13-14H,10-12H2,1-5H3,(H,19,20)",C17H28NO6P,12,((Diisopropoxyphosphoryl)[2-(4-methoxyphenyl)ethyl]amino)acetic acid,COc1ccc(CCN(CC(=O)O)P(=O)(OC(C)C)OC(C)C)cc1,SpectrumID: 1356024; Source: W5-1989-35199-30337; QI: 78; Class: Alpha amino acids and derivatives, +"InChI=1S/C19H27NO/c1-3-19-11-7-13-20(18(19)14-17(21)10-12-19)15(2)16-8-5-4-6-9-16/h4-6,8-9,15,18H,3,7,10-14H2,1-2H3/t15-,18-,19+/m0/s1",C19H27NO,21,"(+)-(1'S,4aR,8aS)-4a-Ethyl-1-(1'-phenylethyl)octahydroquinolin-7-one",CC[C@]12CCCN([C@@H](C)c3ccccc3)[C@H]1CC(=O)CC2,SpectrumID: 1634636; Source: KD-12-2102-6; QI: 147; Class: Quinolidines; CASRN not real! |RI:2267|,2267 +"InChI=1S/C22H36O5/c1-14(23)9-10-18-21(6)12-8-11-20(4,5)19(21)17(26-15(2)24)13-22(18,7)27-16(3)25/h17-19H,8-13H2,1-7H3/t17-,18+,19-,21+,22+/m0/s1",C22H36O5,9,"(+)-(1R,2R,4S,4aS,8aS)-4-(Acetyloxy)-2,5,5,8a-tetramethyl-1-(3-oxobutyl)decahydro-2-naphthlenyl acetate",CC(=O)CC[C@@H]1[C@@]2(C)CCCC(C)(C)[C@@H]2[C@@H](OC(C)=O)C[C@@]1(C)OC(C)=O,SpectrumID: 1624823; Source: KC-57-5677-51; QI: 106; Class: Sesquiterpenoids; CASRN not real! |RI:2534|,2534 +"InChI=1S/C13H20O2/c1-9-7-8-13(15-9,12(4,5)6)10(2)11(3)14/h7-9H,2H2,1,3-6H3/t9-,13+/m0/s1",C13H20O2,14,"(+)-(2'S,5'S)-3-(2'-tert-Butyl-5'-methyl-2',5'-dihydrofuran-2'-yl)but-3-en-2-one",C=C(C(C)=O)[C@@]1(C(C)(C)C)C=C[C@H](C)O1,"SpectrumID: 1618842; Source: F4-0-2808-29; QI: 171; Class: Alpha-branched alpha,beta-unsaturated ketones; CASRN not real! |RI:1382|",1382 +"InChI=1S/C15H18O3/c1-10-13(8-15-12(16)7-14(10)18-15)17-9-11-5-3-2-4-6-11/h2-6,10,13-15H,7-9H2,1H3/t10-,13+,14-,15-/m1/s1",C15H18O3,11,"(+)-(1S,3R,4S,5R)-3-Benzyloxy-4-methyl-8-oxabicyclo[3.2.1]octan-6-one",C[C@@H]1[C@@H](OCc2ccccc2)C[C@H]2O[C@@H]1CC2=O,SpectrumID: 871257; Source: QC-10-1542-4; QI: 23; Class: Benzylethers; CASRN not real! |RI:1893|,1893 +"InChI=1S/C14H21NO2.HI/c1-9-6-11-7-12(17-5)8-13(16)14(11)10(2)15(9,3)4;/h7-10H,6H2,1-5H3;1H/t9-,10-;/m0./s1",C14H22INO2,7,"(1S,3S)-8-Hydroxy-6-methoxy-N,N-dimethyl-1,3-dimethyl-1,2,3,4-tetrahydroisoquinolinium iodide",COc1cc([O-])c2c(c1)C[C@H](C)[N+](C)(C)[C@H]2C.I,SpectrumID: 855698; Source: F-56-584-1; QI: 92; Class: Tetrahydroisoquinolines; CASRN not real!, +"InChI=1S/C18H20NOS.HI/c1-19-11-14-5-3-4-6-16(14)18(17(19)12-20)13-7-9-15(21-2)10-8-13;/h3-11,17-18,20H,12H2,1-2H3;1H/q+1;/p-1/t17-,18+;/m0./s1",C18H20INOS,20,"(3R,4R)-3-Hydroxymethyl-2-methyl-4-(4-methylthiophenyl)-3,4-dihydtoisoquinolinium iodide",CSc1ccc([C@@H]2c3ccccc3C=[N+](C)[C@H]2CO)cc1.[I-],SpectrumID: 1637146; Source: KD-15-2504-16; QI: 95; Class: Dihydroisoquinolines; CASRN not real!, +"InChI=1S/C28H40N4O6S2.2ClHO4/c1-19-25-11-15-37-27(35)24(30-22(4)34)10-6-8-14-32-18-40-26(20(32)2)12-16-38-28(36)23(29-21(3)33)9-5-7-13-31(19)17-39-25;2*2-1(3,4)5/h17-18,23-24H,5-16H2,1-4H3;2*(H,2,3,4,5)/t23-,24-;;/m0../s1",C28H42Cl2N4O14S2,4,"(9S,19S)-9,19-Diacetamido-4,14-dimethyl-10,20-dioxo-1,11-dioxa-4,14(5,3)bis*thiazole)icosaphane-4,14-diium di-perchlorate salt",CC([O-])=N[C@H]1CCCC[n+]2csc(c2C)CCOC(=O)[C@@H](N=C(C)[O-])CCCC[n+]2csc(c2C)CCOC1=O.[O-][Cl+3]([O-])([O-])O.[O-][Cl+3]([O-])([O-])O,SpectrumID: 783347; Source: KC-0-871-38b; QI: 19; Class: N-acyl-alpha amino acids and derivatives; CASRN not real!,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.inchi Mon Nov 27 09:04:04 2023 +0000 @@ -0,0 +1,15 @@ +InChI=1S/C6H4ClO2Si.C5H5.2CO.Fe/c7-10-8-5-3-1-2-4-6(5)9-10;1-2-4-5-3-1;2*1-2;/h1-4H;1-5H;;; +InChI=1S/C13H14O/c1-13(10-14)9-12(13)8-7-11-5-3-2-4-6-11/h2-6,12,14H,9-10H2,1H3/t12-,13-/m0/s1 +InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1 +InChI=1S/C11H15NO2S/c1-9-2-4-10(5-3-9)15-7-6-12-8-11(13)14/h2-5,12H,6-8H2,1H3,(H,13,14) +InChI=1S/C12H16N2O2S/c1-12(7-15)8-17-11(14-12)13-9-3-5-10(16-2)6-4-9/h3-6,15H,7-8H2,1-2H3,(H,13,14) +InChI=1S/C23H17F3N2O/c24-23(25,26)22-19(16-10-4-1-5-11-16)20(21(29)17-12-6-2-7-13-17)28(27-22)18-14-8-3-9-15-18/h1-15,19-20H/t19-,20+/m1/s1 +InChI=1S/C15H24O/c1-10(2)13-6-4-11(3)14-7-5-12(9-16)8-15(13)14/h4,8,10,13-16H,5-7,9H2,1-3H3/t13-,14+,15-/m0/s1 +InChI=1S/C17H28NO6P/c1-13(2)23-25(21,24-14(3)4)18(12-17(19)20)11-10-15-6-8-16(22-5)9-7-15/h6-9,13-14H,10-12H2,1-5H3,(H,19,20) +InChI=1S/C19H27NO/c1-3-19-11-7-13-20(18(19)14-17(21)10-12-19)15(2)16-8-5-4-6-9-16/h4-6,8-9,15,18H,3,7,10-14H2,1-2H3/t15-,18-,19+/m0/s1 +InChI=1S/C22H36O5/c1-14(23)9-10-18-21(6)12-8-11-20(4,5)19(21)17(26-15(2)24)13-22(18,7)27-16(3)25/h17-19H,8-13H2,1-7H3/t17-,18+,19-,21+,22+/m0/s1 +InChI=1S/C13H20O2/c1-9-7-8-13(15-9,12(4,5)6)10(2)11(3)14/h7-9H,2H2,1,3-6H3/t9-,13+/m0/s1 +InChI=1S/C15H18O3/c1-10-13(8-15-12(16)7-14(10)18-15)17-9-11-5-3-2-4-6-11/h2-6,10,13-15H,7-9H2,1H3/t10-,13+,14-,15-/m1/s1 +InChI=1S/C14H21NO2.HI/c1-9-6-11-7-12(17-5)8-13(16)14(11)10(2)15(9,3)4;/h7-10H,6H2,1-5H3;1H/t9-,10-;/m0./s1 +InChI=1S/C18H20NOS.HI/c1-19-11-14-5-3-4-6-16(14)18(17(19)12-20)13-7-9-15(21-2)10-8-13;/h3-11,17-18,20H,12H2,1-2H3;1H/q+1;/p-1/t17-,18+;/m0./s1 +InChI=1S/C28H40N4O6S2.2ClHO4/c1-19-25-11-15-37-27(35)24(30-22(4)34)10-6-8-14-32-18-40-26(20(32)2)12-16-38-28(36)23(29-21(3)33)9-5-7-13-31(19)17-39-25;2*2-1(3,4)5/h17-18,23-24H,5-16H2,1-4H3;2*(H,2,3,4,5)/t23-,24-;;/m0../s1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.smi Mon Nov 27 09:04:04 2023 +0000 @@ -0,0 +1,15 @@ +Cl[Si]1Oc2ccccc2O1.[C-]#[O+].[C-]#[O+].[CH]1C=CC=C1.[Fe] +C[C@@]1(CO)C[C@@H]1C#Cc1ccccc1 +C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12 +Cc1ccc(SCCNCC(=O)O)cc1 +COc1ccc(NC2=NC(C)(CO)CS2)cc1 +O=C(c1ccccc1)[C@@H]1[C@@H](c2ccccc2)C(C(F)(F)F)=NN1c1ccccc1 +CC1=CC[C@@H](C(C)C)[C@@H]2C=C(CO)CC[C@H]12 +COc1ccc(CCN(CC(=O)O)P(=O)(OC(C)C)OC(C)C)cc1 +CC[C@]12CCCN([C@@H](C)c3ccccc3)[C@H]1CC(=O)CC2 +CC(=O)CC[C@@H]1[C@@]2(C)CCCC(C)(C)[C@@H]2[C@@H](OC(C)=O)C[C@@]1(C)OC(C)=O +C=C(C(C)=O)[C@@]1(C(C)(C)C)C=C[C@H](C)O1 +C[C@@H]1[C@@H](OCc2ccccc2)C[C@H]2O[C@@H]1CC2=O +COc1cc([O-])c2c(c1)C[C@H](C)[N+](C)(C)[C@H]2C.I +CSc1ccc([C@@H]2c3ccccc3C=[N+](C)[C@H]2CO)cc1.[I-] +CC([O-])=N[C@H]1CCCC[n+]2csc(c2C)CCOC(=O)[C@@H](N=C(C)[O-])CCCC[n+]2csc(c2C)CCOC1=O.[O-][Cl+3]([O-])([O-])O.[O-][Cl+3]([O-])([O-])O
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample_output.csv Mon Nov 27 09:04:04 2023 +0000 @@ -0,0 +1,12 @@ +inchi,formula,num_peaks,compound_name,smiles,comment,retention_index +"InChI=1S/C13H14O/c1-13(10-14)9-12(13)8-7-11-5-3-2-4-6-11/h2-6,12,14H,9-10H2,1H3/t12-,13-/m0/s1",C13H14O,20,"((1R*,2R*)-1-Methyl-2-phenylethynylcyclopropyl)methanol",C[C@@]1(CO)C[C@@H]1C#Cc1ccccc1,SpectrumID: 1752764; Source: A1-13-956/SMS7-13; DOI: 10.1021/ol1029996; QI: 383; Class: Benzene and substituted derivatives; CASRN not real! |RI:1588|,1588.0 +"InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1",C34H54O4,14,"((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate",C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12,SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353|,3353.0 +"InChI=1S/C11H15NO2S/c1-9-2-4-10(5-3-9)15-7-6-12-8-11(13)14/h2-5,12H,6-8H2,1H3,(H,13,14)",C11H15NO2S,20,((2-[(4-Methylphenyl)sulfanyl]ethyl)amino)acetic acid,Cc1ccc(SCCNCC(=O)O)cc1,SpectrumID: 1226271; Source: W5-1989-35586-29950; QI: 400; Class: Alpha amino acids |RI:2011|,2011.0 +"InChI=1S/C12H16N2O2S/c1-12(7-15)8-17-11(14-12)13-9-3-5-10(16-2)6-4-9/h3-6,15H,7-8H2,1-2H3,(H,13,14)",C12H16N2O2S,167,"((2Z)-2-[(4-Methoxyphenyl)imino]-4-methyl-1,3-thiazolidin-4-yl)methanol",COc1ccc(NC2=NC(C)(CO)CS2)cc1,SpectrumID: 1432066; Source: AD-0-2532-0; QI: 900; Class: Methoxyanilines |RI:2319|,2319.0 +"InChI=1S/C23H17F3N2O/c24-23(25,26)22-19(16-10-4-1-5-11-16)20(21(29)17-12-6-2-7-13-17)28(27-22)18-14-8-3-9-15-18/h1-15,19-20H/t19-,20+/m1/s1",C23H17F3N2O,9,"((3S,4R)-2,4-Diphenyl-5-trifluoromethyl-3,4-dihydro-2H-pyrazol-3-yl)-phenyl-methanone",O=C(c1ccccc1)[C@@H]1[C@@H](c2ccccc2)C(C(F)(F)F)=NN1c1ccccc1,SpectrumID: 1676300; Source: F4-43-2771-4a; QI: 56; Class: Alkyl-phenylketones; CASRN not real! |RI:2735|,2735.0 +"InChI=1S/C15H24O/c1-10(2)13-6-4-11(3)14-7-5-12(9-16)8-15(13)14/h4,8,10,13-16H,5-7,9H2,1-3H3/t13-,14+,15-/m0/s1",C15H24O,156,"((4aS,8S,8aR)-8-isopropyl-5-methyl-3,4,4a,7,8,8a-hexahydronaphthalen-2-yl)methanol",CC1=CC[C@@H](C(C)C)[C@@H]2C=C(CO)CC[C@H]12,"SpectrumID: 1815091; Source: N.Kacem, et al. Industrial Crops and Products, V.90, 2016, P.87-93; QI: 881; Class: Sesquiterpenoids; CASRN not real! |RI:1683|",1683.0 +"InChI=1S/C17H28NO6P/c1-13(2)23-25(21,24-14(3)4)18(12-17(19)20)11-10-15-6-8-16(22-5)9-7-15/h6-9,13-14H,10-12H2,1-5H3,(H,19,20)",C17H28NO6P,12,((Diisopropoxyphosphoryl)[2-(4-methoxyphenyl)ethyl]amino)acetic acid,COc1ccc(CCN(CC(=O)O)P(=O)(OC(C)C)OC(C)C)cc1,SpectrumID: 1356024; Source: W5-1989-35199-30337; QI: 78; Class: Alpha amino acids and derivatives, +"InChI=1S/C19H27NO/c1-3-19-11-7-13-20(18(19)14-17(21)10-12-19)15(2)16-8-5-4-6-9-16/h4-6,8-9,15,18H,3,7,10-14H2,1-2H3/t15-,18-,19+/m0/s1",C19H27NO,21,"(+)-(1'S,4aR,8aS)-4a-Ethyl-1-(1'-phenylethyl)octahydroquinolin-7-one",CC[C@]12CCCN([C@@H](C)c3ccccc3)[C@H]1CC(=O)CC2,SpectrumID: 1634636; Source: KD-12-2102-6; QI: 147; Class: Quinolidines; CASRN not real! |RI:2267|,2267.0 +"InChI=1S/C22H36O5/c1-14(23)9-10-18-21(6)12-8-11-20(4,5)19(21)17(26-15(2)24)13-22(18,7)27-16(3)25/h17-19H,8-13H2,1-7H3/t17-,18+,19-,21+,22+/m0/s1",C22H36O5,9,"(+)-(1R,2R,4S,4aS,8aS)-4-(Acetyloxy)-2,5,5,8a-tetramethyl-1-(3-oxobutyl)decahydro-2-naphthlenyl acetate",CC(=O)CC[C@@H]1[C@@]2(C)CCCC(C)(C)[C@@H]2[C@@H](OC(C)=O)C[C@@]1(C)OC(C)=O,SpectrumID: 1624823; Source: KC-57-5677-51; QI: 106; Class: Sesquiterpenoids; CASRN not real! |RI:2534|,2534.0 +"InChI=1S/C13H20O2/c1-9-7-8-13(15-9,12(4,5)6)10(2)11(3)14/h7-9H,2H2,1,3-6H3/t9-,13+/m0/s1",C13H20O2,14,"(+)-(2'S,5'S)-3-(2'-tert-Butyl-5'-methyl-2',5'-dihydrofuran-2'-yl)but-3-en-2-one",C=C(C(C)=O)[C@@]1(C(C)(C)C)C=C[C@H](C)O1,"SpectrumID: 1618842; Source: F4-0-2808-29; QI: 171; Class: Alpha-branched alpha,beta-unsaturated ketones; CASRN not real! |RI:1382|",1382.0 +"InChI=1S/C15H18O3/c1-10-13(8-15-12(16)7-14(10)18-15)17-9-11-5-3-2-4-6-11/h2-6,10,13-15H,7-9H2,1H3/t10-,13+,14-,15-/m1/s1",C15H18O3,11,"(+)-(1S,3R,4S,5R)-3-Benzyloxy-4-methyl-8-oxabicyclo[3.2.1]octan-6-one",C[C@@H]1[C@@H](OCc2ccccc2)C[C@H]2O[C@@H]1CC2=O,SpectrumID: 871257; Source: QC-10-1542-4; QI: 23; Class: Benzylethers; CASRN not real! |RI:1893|,1893.0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample_output.inchi Mon Nov 27 09:04:04 2023 +0000 @@ -0,0 +1,11 @@ +InChI=1S/C13H14O/c1-13(10-14)9-12(13)8-7-11-5-3-2-4-6-11/h2-6,12,14H,9-10H2,1H3/t12-,13-/m0/s1 +InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1 +InChI=1S/C11H15NO2S/c1-9-2-4-10(5-3-9)15-7-6-12-8-11(13)14/h2-5,12H,6-8H2,1H3,(H,13,14) +InChI=1S/C12H16N2O2S/c1-12(7-15)8-17-11(14-12)13-9-3-5-10(16-2)6-4-9/h3-6,15H,7-8H2,1-2H3,(H,13,14) +InChI=1S/C23H17F3N2O/c24-23(25,26)22-19(16-10-4-1-5-11-16)20(21(29)17-12-6-2-7-13-17)28(27-22)18-14-8-3-9-15-18/h1-15,19-20H/t19-,20+/m1/s1 +InChI=1S/C15H24O/c1-10(2)13-6-4-11(3)14-7-5-12(9-16)8-15(13)14/h4,8,10,13-16H,5-7,9H2,1-3H3/t13-,14+,15-/m0/s1 +InChI=1S/C17H28NO6P/c1-13(2)23-25(21,24-14(3)4)18(12-17(19)20)11-10-15-6-8-16(22-5)9-7-15/h6-9,13-14H,10-12H2,1-5H3,(H,19,20) +InChI=1S/C19H27NO/c1-3-19-11-7-13-20(18(19)14-17(21)10-12-19)15(2)16-8-5-4-6-9-16/h4-6,8-9,15,18H,3,7,10-14H2,1-2H3/t15-,18-,19+/m0/s1 +InChI=1S/C22H36O5/c1-14(23)9-10-18-21(6)12-8-11-20(4,5)19(21)17(26-15(2)24)13-22(18,7)27-16(3)25/h17-19H,8-13H2,1-7H3/t17-,18+,19-,21+,22+/m0/s1 +InChI=1S/C13H20O2/c1-9-7-8-13(15-9,12(4,5)6)10(2)11(3)14/h7-9H,2H2,1,3-6H3/t9-,13+/m0/s1 +InChI=1S/C15H18O3/c1-10-13(8-15-12(16)7-14(10)18-15)17-9-11-5-3-2-4-6-11/h2-6,10,13-15H,7-9H2,1H3/t10-,13+,14-,15-/m1/s1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample_output.smi Mon Nov 27 09:04:04 2023 +0000 @@ -0,0 +1,11 @@ +C[C@@]1(CO)C[C@@H]1C#Cc1ccccc1 +C=C(C)[C@@H]1CC[C@]2(COC(=O)C)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(=O)C)C(C)(C)[C@@H]5CC[C@@]34C)[C@@H]12 +Cc1ccc(SCCNCC(=O)O)cc1 +COc1ccc(NC2=NC(C)(CO)CS2)cc1 +O=C(c1ccccc1)[C@@H]1[C@@H](c2ccccc2)C(=NN1c1ccccc1)C(F)(F)F +CC1=CC[C@@H](C(C)C)[C@@H]2C=C(CO)CC[C@H]12 +COc1ccc(CCN(CC(=O)O)P(=O)(OC(C)C)OC(C)C)cc1 +CC[C@]12CCCN([C@@H](C)c3ccccc3)[C@H]1CC(=O)CC2 +CC(=O)CC[C@@H]1[C@@]2(C)CCCC(C)(C)[C@@H]2[C@@H](OC(=O)C)C[C@@]1(C)OC(=O)C +C=C(C(=O)C)[C@@]1(C(C)(C)C)C=C[C@H](C)O1 +C[C@@H]1[C@@H](OCc2ccccc2)C[C@H]2O[C@@H]1CC2=O
