comparison rem_complex.py @ 3:567327a97ad2 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
author recetox
date Tue, 16 Apr 2024 11:30:27 +0000
parents a0e07a0bc047
children
comparison
equal deleted inserted replaced
2:1dc9f5a3163d 3:567327a97ad2
7 7
8 def parse_arguments() -> argparse.Namespace: 8 def parse_arguments() -> argparse.Namespace:
9 parser = argparse.ArgumentParser() 9 parser = argparse.ArgumentParser()
10 parser.add_argument('-iformat', '--input_format', help='Input file format') 10 parser.add_argument('-iformat', '--input_format', help='Input file format')
11 parser.add_argument('-i', '--input_filename', type=str, required=True, help='Input file name') 11 parser.add_argument('-i', '--input_filename', type=str, required=True, help='Input file name')
12 parser.add_argument('-o', '--output_filename', type=str, required=True, help='Outout file name') 12 parser.add_argument('-o', '--output_filename', type=str, required=True, help='Output file name')
13 args = parser.parse_args() 13 args = parser.parse_args()
14 return args 14 return args
15 15
16 16
17 def filter_csv_molecules(file_name: str, output_file_name: str) -> None: 17 def filter_csv_tsv_molecules(file_name: str, output_file_name: str, sep: str) -> None:
18 """Removes molecules with '.' in SMILES string from csv file. 18 """Removes molecules with '.' in SMILES string from csv or tsv file.
19 19
20 Args: 20 Args:
21 file_name (str): Path to csv file that contains metadata. 21 file_name (str): Path to csv or tsv file that contains metadata.
22 output_file_name (str): Path to destination file, in csv format. 22 output_file_name (str): Path to destination file, tsv format.
23 sep (str): Separator used in the file (',' for csv, '\t' for tsv).
23 """ 24 """
24 df = pd.read_csv(file_name) 25 df = pd.read_csv(file_name, sep=sep)
25 mask = df['smiles'].str.contains(".", na=False, regex=False) 26 mask = df['smiles'].str.contains(".", na=False, regex=False)
26 mask = mask.apply(lambda x: not x) 27 mask = mask.apply(lambda x: not x)
27 df[mask].to_csv(output_file_name, index=False) 28 df[mask].to_csv(output_file_name, index=False, sep='\t')
28 29
29 30
30 def filter_other_format_molecules(file_name: str, output_file_name: str, input_format: str) -> None: 31 def filter_other_format_molecules(file_name: str, output_file_name: str, input_format: str) -> None:
31 """Removes molecules with '.' in SMILES string from smi or inchi files. 32 """Removes molecules with '.' in SMILES string from smi or inchi files.
32 33
45 46
46 def filter_complex_molecules(file_name: str, output_file_name: str, input_format: str) -> None: 47 def filter_complex_molecules(file_name: str, output_file_name: str, input_format: str) -> None:
47 """Removes molecular complexes depending on the input format. 48 """Removes molecular complexes depending on the input format.
48 49
49 Args: 50 Args:
50 file_name (str): Path to csv, smi or inchi files 51 file_name (str): Path to csv, tsv, smi, or inchi files.
51 output_file_name (str): Path to destination files, in csv. smi or inchi formats. 52 output_file_name (str): Path to destination files, in corresponding formats.
52 input_format (str): Input file formats. 53 input_format (str): Input file format.
53 """ 54 """
54 if input_format == 'csv': 55 if input_format in ['csv', 'tsv']:
55 filter_csv_molecules(file_name, output_file_name) 56 sep = ',' if input_format == 'csv' else '\t'
57 filter_csv_tsv_molecules(file_name, output_file_name, sep)
56 else: 58 else:
57 filter_other_format_molecules(file_name, output_file_name, input_format) 59 filter_other_format_molecules(file_name, output_file_name, input_format)
58 60
59 61
60 if __name__ == "__main__": 62 if __name__ == "__main__":