Mercurial > repos > recetox > rem_complex
comparison rem_complex.py @ 3:567327a97ad2 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
| author | recetox |
|---|---|
| date | Tue, 16 Apr 2024 11:30:27 +0000 |
| parents | a0e07a0bc047 |
| children |
comparison
equal
deleted
inserted
replaced
| 2:1dc9f5a3163d | 3:567327a97ad2 |
|---|---|
| 7 | 7 |
| 8 def parse_arguments() -> argparse.Namespace: | 8 def parse_arguments() -> argparse.Namespace: |
| 9 parser = argparse.ArgumentParser() | 9 parser = argparse.ArgumentParser() |
| 10 parser.add_argument('-iformat', '--input_format', help='Input file format') | 10 parser.add_argument('-iformat', '--input_format', help='Input file format') |
| 11 parser.add_argument('-i', '--input_filename', type=str, required=True, help='Input file name') | 11 parser.add_argument('-i', '--input_filename', type=str, required=True, help='Input file name') |
| 12 parser.add_argument('-o', '--output_filename', type=str, required=True, help='Outout file name') | 12 parser.add_argument('-o', '--output_filename', type=str, required=True, help='Output file name') |
| 13 args = parser.parse_args() | 13 args = parser.parse_args() |
| 14 return args | 14 return args |
| 15 | 15 |
| 16 | 16 |
| 17 def filter_csv_molecules(file_name: str, output_file_name: str) -> None: | 17 def filter_csv_tsv_molecules(file_name: str, output_file_name: str, sep: str) -> None: |
| 18 """Removes molecules with '.' in SMILES string from csv file. | 18 """Removes molecules with '.' in SMILES string from csv or tsv file. |
| 19 | 19 |
| 20 Args: | 20 Args: |
| 21 file_name (str): Path to csv file that contains metadata. | 21 file_name (str): Path to csv or tsv file that contains metadata. |
| 22 output_file_name (str): Path to destination file, in csv format. | 22 output_file_name (str): Path to destination file, tsv format. |
| 23 sep (str): Separator used in the file (',' for csv, '\t' for tsv). | |
| 23 """ | 24 """ |
| 24 df = pd.read_csv(file_name) | 25 df = pd.read_csv(file_name, sep=sep) |
| 25 mask = df['smiles'].str.contains(".", na=False, regex=False) | 26 mask = df['smiles'].str.contains(".", na=False, regex=False) |
| 26 mask = mask.apply(lambda x: not x) | 27 mask = mask.apply(lambda x: not x) |
| 27 df[mask].to_csv(output_file_name, index=False) | 28 df[mask].to_csv(output_file_name, index=False, sep='\t') |
| 28 | 29 |
| 29 | 30 |
| 30 def filter_other_format_molecules(file_name: str, output_file_name: str, input_format: str) -> None: | 31 def filter_other_format_molecules(file_name: str, output_file_name: str, input_format: str) -> None: |
| 31 """Removes molecules with '.' in SMILES string from smi or inchi files. | 32 """Removes molecules with '.' in SMILES string from smi or inchi files. |
| 32 | 33 |
| 45 | 46 |
| 46 def filter_complex_molecules(file_name: str, output_file_name: str, input_format: str) -> None: | 47 def filter_complex_molecules(file_name: str, output_file_name: str, input_format: str) -> None: |
| 47 """Removes molecular complexes depending on the input format. | 48 """Removes molecular complexes depending on the input format. |
| 48 | 49 |
| 49 Args: | 50 Args: |
| 50 file_name (str): Path to csv, smi or inchi files | 51 file_name (str): Path to csv, tsv, smi, or inchi files. |
| 51 output_file_name (str): Path to destination files, in csv. smi or inchi formats. | 52 output_file_name (str): Path to destination files, in corresponding formats. |
| 52 input_format (str): Input file formats. | 53 input_format (str): Input file format. |
| 53 """ | 54 """ |
| 54 if input_format == 'csv': | 55 if input_format in ['csv', 'tsv']: |
| 55 filter_csv_molecules(file_name, output_file_name) | 56 sep = ',' if input_format == 'csv' else '\t' |
| 57 filter_csv_tsv_molecules(file_name, output_file_name, sep) | |
| 56 else: | 58 else: |
| 57 filter_other_format_molecules(file_name, output_file_name, input_format) | 59 filter_other_format_molecules(file_name, output_file_name, input_format) |
| 58 | 60 |
| 59 | 61 |
| 60 if __name__ == "__main__": | 62 if __name__ == "__main__": |
