Mercurial > repos > bimib > cobraxy
comparison COBRAxy/ras_generator.py @ 490:c6ea189ea7e9 draft
Uploaded
| author | francesco_lapi |
|---|---|
| date | Mon, 29 Sep 2025 15:13:21 +0000 |
| parents | 97eea560a10f |
| children | 96f512dff490 |
comparison
equal
deleted
inserted
replaced
| 489:97eea560a10f | 490:c6ea189ea7e9 |
|---|---|
| 104 for key, value in reactions.items(): | 104 for key, value in reactions.items(): |
| 105 ids.append(key) | 105 ids.append(key) |
| 106 rules.append(value) | 106 rules.append(value) |
| 107 return (ids, rules) | 107 return (ids, rules) |
| 108 | 108 |
| 109 ############################ check_methods #################################### | |
| 110 def gene_type(l :str, name :str) -> str: | |
| 111 """ | |
| 112 Determine the type of gene ID. | |
| 113 | |
| 114 Args: | |
| 115 l (str): The gene identifier to check. | |
| 116 name (str): The name of the dataset, used in error messages. | |
| 117 | |
| 118 Returns: | |
| 119 str: The type of gene ID ('hugo_id', 'ensembl_gene_id', 'symbol', or 'entrez_id'). | |
| 120 | |
| 121 Raises: | |
| 122 sys.exit: If the gene ID type is not supported, the execution is aborted. | |
| 123 """ | |
| 124 if check_hgnc(l): | |
| 125 return 'hugo_id' | |
| 126 elif check_ensembl(l): | |
| 127 return 'ensembl_gene_id' | |
| 128 elif check_symbol(l): | |
| 129 return 'symbol' | |
| 130 elif check_entrez(l): | |
| 131 return 'entrez_id' | |
| 132 else: | |
| 133 sys.exit('Execution aborted:\n' + | |
| 134 'gene ID type in ' + name + ' not supported. Supported ID'+ | |
| 135 'types are: HUGO ID, Ensemble ID, HUGO symbol, Entrez ID\n') | |
| 136 | |
| 137 def check_hgnc(l :str) -> bool: | |
| 138 """ | |
| 139 Check if a gene identifier follows the HGNC format. | |
| 140 | |
| 141 Args: | |
| 142 l (str): The gene identifier to check. | |
| 143 | |
| 144 Returns: | |
| 145 bool: True if the gene identifier follows the HGNC format, False otherwise. | |
| 146 """ | |
| 147 if len(l) > 5: | |
| 148 if (l.upper()).startswith('HGNC:'): | |
| 149 return l[5:].isdigit() | |
| 150 else: | |
| 151 return False | |
| 152 else: | |
| 153 return False | |
| 154 | |
| 155 def check_ensembl(l :str) -> bool: | |
| 156 """ | |
| 157 Check if a gene identifier follows the Ensembl format. | |
| 158 | |
| 159 Args: | |
| 160 l (str): The gene identifier to check. | |
| 161 | |
| 162 Returns: | |
| 163 bool: True if the gene identifier follows the Ensembl format, False otherwise. | |
| 164 """ | |
| 165 return l.upper().startswith('ENS') | |
| 166 | |
| 167 | |
| 168 def check_symbol(l :str) -> bool: | |
| 169 """ | |
| 170 Check if a gene identifier follows the symbol format. | |
| 171 | |
| 172 Args: | |
| 173 l (str): The gene identifier to check. | |
| 174 | |
| 175 Returns: | |
| 176 bool: True if the gene identifier follows the symbol format, False otherwise. | |
| 177 """ | |
| 178 if len(l) > 0: | |
| 179 if l[0].isalpha() and l[1:].isalnum(): | |
| 180 return True | |
| 181 else: | |
| 182 return False | |
| 183 else: | |
| 184 return False | |
| 185 | |
| 186 def check_entrez(l :str) -> bool: | |
| 187 """ | |
| 188 Check if a gene identifier follows the Entrez ID format. | |
| 189 | |
| 190 Args: | |
| 191 l (str): The gene identifier to check. | |
| 192 | |
| 193 Returns: | |
| 194 bool: True if the gene identifier follows the Entrez ID format, False otherwise. | |
| 195 """ | |
| 196 if len(l) > 0: | |
| 197 return l.isdigit() | |
| 198 else: | |
| 199 return False | |
| 200 | 109 |
| 201 ############################ gene ############################################# | 110 ############################ gene ############################################# |
| 202 def data_gene(gene: pd.DataFrame, type_gene: str, name: str, gene_custom: Optional[Dict[str, str]]) -> Dict[str, str]: | 111 def data_gene(gene: pd.DataFrame, type_gene: str, name: str, gene_custom: Optional[Dict[str, str]]) -> Dict[str, str]: |
| 203 """ | 112 """ |
| 204 Process gene data to ensure correct formatting and handle duplicates. | 113 Process gene data to ensure correct formatting and handle duplicates. |
