Mercurial > repos > rnateam > htseq_clip
comparison htsc_create_count_table.py @ 0:c82c41550f82 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/htseq-clip commit 4879439f0df3386b97d8507c5991051fbdda053a"
| author | rnateam |
|---|---|
| date | Sat, 15 Oct 2022 21:37:15 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c82c41550f82 |
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 | |
| 3 import argparse | |
| 4 import os | |
| 5 import subprocess | |
| 6 | |
| 7 import pysam | |
| 8 | |
| 9 """ | |
| 10 | |
| 11 Install deseq-clip | |
| 12 ================== | |
| 13 | |
| 14 conda install -c bioconda pysam | |
| 15 conda install -c bioconda htseq | |
| 16 pip install htseq-clip | |
| 17 | |
| 18 Or directly by: | |
| 19 conda install -c bioconda htseq-clip | |
| 20 | |
| 21 | |
| 22 Test call | |
| 23 ========= | |
| 24 | |
| 25 python htsc_create_count_table.py --data-id Rbp --win-bed test-data/windows.exp.bed --out test_create_count_table_out --exp-bams test-data/Rbp_exp_rep1.Synechocystis_pSYSM.bam test-data/Rbp_exp_rep2.Synechocystis_pSYSM.bam --ctr-bams test-data/Rbp_ctrl_rep1.Synechocystis_pSYSM.bam --no-zipper | |
| 26 | |
| 27 Compare: | |
| 28 diff test-data/Rbp_count_matrix.exp.txt test_create_count_table_out/count_matrix.txt | |
| 29 diff test-data/sample_info.exp.txt test_create_count_table_out/sample_info.txt | |
| 30 | |
| 31 This corresponds to: | |
| 32 htseq-clip extract -i test-data/Rbp_exp_rep1.Synechocystis_pSYSM.bam -o test_create_count_table_out/Rbp_exp_rep1.Synechocystis_pSYSM.bed -e 1 -s m -g 0 -q 10 -c 4 -m 0 -x 500 -l 10000 | |
| 33 htseq-clip extract -i test-data/Rbp_exp_rep2.Synechocystis_pSYSM.bam -o test_create_count_table_out/Rbp_exp_rep2.Synechocystis_pSYSM.bed -e 1 -s m -g 0 -q 10 -c 4 -m 0 -x 500 -l 10000 | |
| 34 htseq-clip extract -i test-data/Rbp_ctrl_rep1.Synechocystis_pSYSM.bam -o test_create_count_table_out/Rbp_ctrl_rep1.Synechocystis_pSYSM.bed -e 1 -s m -g 0 -q 10 -c 4 -m 0 -x 500 -l 10000 | |
| 35 htseq-clip count -i test_create_count_table_out/Rbp_exp_rep1.Synechocystis_pSYSM.bed -a test-data/windows.exp.bed -o test_create_count_table_out/counts_Rbp/Rbp_exp_rep1.Synechocystis_pSYSM.csv | |
| 36 htseq-clip count -i test_create_count_table_out/Rbp_exp_rep2.Synechocystis_pSYSM.bed -a test-data/windows.exp.bed -o test_create_count_table_out/counts_Rbp/Rbp_exp_rep2.Synechocystis_pSYSM.csv | |
| 37 htseq-clip count -i test_create_count_table_out/Rbp_ctrl_rep1.Synechocystis_pSYSM.bed -a test-data/windows.exp.bed -o test_create_count_table_out/counts_Rbp/Rbp_ctrl_rep1.Synechocystis_pSYSM.csv | |
| 38 htseq-clip createMatrix -i test_create_count_table_out/counts_Rbp -o test_create_count_table_out/Rbp_count_matrix.txt -b Rbp | |
| 39 | |
| 40 | |
| 41 To get BAM content for single chromosome: | |
| 42 samtools view -b -h bam_all/Rbp3_uv_rep1.bam Synechocystis_pSYSM > test-data/Rbp_exp_rep1.Synechocystis_pSYSM.bam | |
| 43 | |
| 44 More tests: | |
| 45 python htsc_create_count_table.py --hce-f test-data/chr_names.txt --data-id Rbp --win-bed test-data/windows.exp.bed --out test_create_count_table_out --exp-bams test-data/Rbp_exp_rep1.Synechocystis_pSYSM.bam test-data/Rbp_exp_rep2.Synechocystis_pSYSM.bam --ctr-bams test-data/Rbp_ctrl_rep1.Synechocystis_pSYSM.bam --no-zipper | |
| 46 | |
| 47 DEWSeq input files: | |
| 48 test_create_count_table_out/Rbp_count_matrix.txt | |
| 49 test_create_count_table_out/sample_info.txt | |
| 50 | |
| 51 """ | |
| 52 | |
| 53 | |
| 54 ################################################################################ | |
| 55 | |
| 56 def setup_argument_parser(): | |
| 57 """Setup argparse parser.""" | |
| 58 help_description = """ | |
| 59 Use htseq-clip to extract crosslink sites from BAM files, and count | |
| 60 overlaps with window regions. Finally create an R count matrix, e.g. as | |
| 61 input for DEWSeq. | |
| 62 """ | |
| 63 # Define argument parser. | |
| 64 p = argparse.ArgumentParser(add_help=False, | |
| 65 prog="htsc_create_count_table.py", | |
| 66 description=help_description, | |
| 67 formatter_class=argparse.MetavarTypeHelpFormatter) | |
| 68 | |
| 69 # Required arguments. | |
| 70 p.add_argument("-h", "--help", | |
| 71 action="help", | |
| 72 help="Print help message") | |
| 73 p.add_argument("--win-bed", | |
| 74 dest="in_win_bed", | |
| 75 type=str, | |
| 76 metavar='str', | |
| 77 required=True, | |
| 78 help="Sliding windows BED annotation file created with htseq-clip createSlidingWindows (also accepts .bed.gz)") | |
| 79 p.add_argument("--exp-bams", | |
| 80 dest="exp_bam_list", | |
| 81 type=str, | |
| 82 metavar='str', | |
| 83 nargs='+', | |
| 84 required=True, | |
| 85 help="List of IP BAM files (--exp-bams ip_rep1.bam ip_rep2.bam .. )") | |
| 86 p.add_argument("--ctr-bams", | |
| 87 dest="ctr_bam_list", | |
| 88 type=str, | |
| 89 metavar='str', | |
| 90 nargs='+', | |
| 91 required=True, | |
| 92 help="List of control (SM input) BAM files (--ctr-bams smi_rep1.bam smi_rep2.bam .. )") | |
| 93 p.add_argument("--out", | |
| 94 dest="out_folder", | |
| 95 type=str, | |
| 96 metavar='str', | |
| 97 required=True, | |
| 98 help="Results output folder") | |
| 99 # htseq-clip extract. | |
| 100 p.add_argument("--hce-e", | |
| 101 dest="hce_e", | |
| 102 type=int, | |
| 103 default=1, | |
| 104 choices=[1, 2], | |
| 105 help="htseq-clip extract -e parameter. This selects read/mate to extract crosslink sites from (default: 1)") | |
| 106 p.add_argument("--hce-s", | |
| 107 dest="hce_s", | |
| 108 type=str, | |
| 109 default="m", | |
| 110 help="htseq-clip extract -s parameter. Choose crosslink site (s: start, m: middle, e: end ... ) (default: m)") | |
| 111 p.add_argument("--hce-g", | |
| 112 dest="hce_g", | |
| 113 type=int, | |
| 114 metavar='int', | |
| 115 default=0, | |
| 116 help="htseq-clip extract -g parameter. Number of nucleotides to offset for crosslink sites (default: 0)") | |
| 117 p.add_argument("--hce-q", | |
| 118 dest="hce_q", | |
| 119 type=int, | |
| 120 metavar='int', | |
| 121 default=10, | |
| 122 help="htseq-clip extract -q parameter. Minimum alignment quality for filtering (default: 10)") | |
| 123 p.add_argument("--hce-primary", | |
| 124 dest="hce_primary", | |
| 125 default=False, | |
| 126 action="store_true", | |
| 127 help="htseq-clip extract --primary parameter. Flag to use only primary positions of multimapping reads (default: False)") | |
| 128 p.add_argument("--hce-c", | |
| 129 dest="hce_c", | |
| 130 type=int, | |
| 131 metavar='int', | |
| 132 default=1, | |
| 133 help="htseq-clip extract -c parameter. Number of threads/cores to use (default: 1)") | |
| 134 p.add_argument("--hce-m", | |
| 135 dest="hce_m", | |
| 136 type=int, | |
| 137 metavar='int', | |
| 138 default=0, | |
| 139 help="htseq-clip extract -m parameter. Minimum read length for filtering (default: 0)") | |
| 140 p.add_argument("--hce-x", | |
| 141 dest="hce_x", | |
| 142 type=int, | |
| 143 metavar='int', | |
| 144 default=500, | |
| 145 help="htseq-clip extract -x parameter. Maximum read length for filtering (default: 500)") | |
| 146 p.add_argument("--hce-l", | |
| 147 dest="hce_l", | |
| 148 type=int, | |
| 149 metavar='int', | |
| 150 default=10000, | |
| 151 help="htseq-clip extract -l parameter. Maximum read interval length (default: 10000)") | |
| 152 p.add_argument("--hce-f", | |
| 153 dest="hce_f", | |
| 154 type=str, | |
| 155 help="htseq-clip extract -f parameter. Extract crosslink sites only from chromosomes given in this file (one chromosome ID per line)") | |
| 156 # htseq-clip count. | |
| 157 p.add_argument("--hcc-unstranded", | |
| 158 dest="hcc_unstranded", | |
| 159 default=False, | |
| 160 action="store_true", | |
| 161 help="htseq-clip count --unstranded parameter. Use this flag for non strand specific crosslink site counting (default: strand-specific counting)") | |
| 162 # More. | |
| 163 p.add_argument("--data-id", | |
| 164 dest="dataset_id", | |
| 165 type=str, | |
| 166 default="RBP", | |
| 167 metavar='str', | |
| 168 help="Dataset ID used as prefix for naming datasets (default: RBP)") | |
| 169 p.add_argument("--filter-bed", | |
| 170 dest="filter_bed", | |
| 171 type=str, | |
| 172 metavar='str', | |
| 173 help="Provide BED file to filter out BAM reads overlapping with --filter-bed regions") | |
| 174 p.add_argument("--filter-mode", | |
| 175 dest="filter_mode", | |
| 176 type=int, | |
| 177 default=1, | |
| 178 choices=[1, 2], | |
| 179 help="Filter mode for --filter-bed file. 1: keep BAM reads not overlapping with --filter-bed regions. 2: Keep only BAM reads overlapping with --filter-bed (default: 1)") | |
| 180 p.add_argument("--no-zipper", | |
| 181 dest="no_zipper", | |
| 182 default=False, | |
| 183 action="store_true", | |
| 184 help="Do not gzip output files (default: False)") | |
| 185 p.add_argument("--keep-imf", | |
| 186 dest="keep_imf", | |
| 187 default=False, | |
| 188 action="store_true", | |
| 189 help="Keep intermediate files (filtered BAM, BED, CSV) (default: False)") | |
| 190 return p | |
| 191 | |
| 192 | |
| 193 ################################################################################ | |
| 194 | |
| 195 def file_make_symbolic_link(file, file_link, | |
| 196 force_overwrite=True): | |
| 197 """ | |
| 198 Create a symbolic file link file_link (ln -s) of file. | |
| 199 | |
| 200 force_overwrite: | |
| 201 Overwrites existing file link. | |
| 202 | |
| 203 """ | |
| 204 assert os.path.exists(file), "file does not exist" | |
| 205 # Get absolute path needed for symlink to work. | |
| 206 file_abs_path = os.path.abspath(file) | |
| 207 | |
| 208 check_cmd = "ln -s " | |
| 209 if force_overwrite: | |
| 210 check_cmd += "-f " | |
| 211 check_cmd = check_cmd + file_abs_path + " " + file_link | |
| 212 output = subprocess.getoutput(check_cmd) | |
| 213 error = False | |
| 214 if output: | |
| 215 error = True | |
| 216 assert error is False, "ln has problems with your input:\n%s\n%s" % (check_cmd, output) | |
| 217 | |
| 218 | |
| 219 ################################################################################ | |
| 220 | |
| 221 def bam_remove_overlap_region_reads(in_bed, in_bam, out_bam, | |
| 222 params="", | |
| 223 sort_bed=False): | |
| 224 | |
| 225 """ | |
| 226 Remove BAM reads from in_bam, based on overlap with in_bed BED regions. | |
| 227 I.e. only BAM reads not overlapping with in_bed regions are stored in | |
| 228 out_bam. | |
| 229 | |
| 230 Using intersectBed instead of samtools view -L, which allows -s for strand | |
| 231 specific filtering. | |
| 232 | |
| 233 """ | |
| 234 assert os.path.exists(in_bed), "in_bed does not exist" | |
| 235 assert os.path.exists(in_bam), "in_bam does not exist" | |
| 236 | |
| 237 if sort_bed: | |
| 238 check_cmd = "sort -k1,1 -k2,2n " + in_bed + " | " + "intersectBed -abam " + in_bam + " -b stdin " + params + " -sorted > " + out_bam | |
| 239 else: | |
| 240 check_cmd = "intersectBed -abam " + in_bam + " -b " + in_bed + " " + params + " > " + out_bam | |
| 241 output = subprocess.getoutput(check_cmd) | |
| 242 error = False | |
| 243 if output: | |
| 244 error = True | |
| 245 assert error is False, "intersectBed has problems with your input:\n%s\n%s" % (check_cmd, output) | |
| 246 | |
| 247 | |
| 248 ################################################################################ | |
| 249 | |
| 250 if __name__ == '__main__': | |
| 251 | |
| 252 parser = setup_argument_parser() | |
| 253 args = parser.parse_args() | |
| 254 | |
| 255 assert os.path.exists(args.in_win_bed), "--win-bed file \"%s\" not found" % (args.in_win_bed) | |
| 256 | |
| 257 for bam_file in args.exp_bam_list: | |
| 258 assert os.path.exists(bam_file), "--exp-bams BAM file \"%s\" not found" % (bam_file) | |
| 259 | |
| 260 for bam_file in args.ctr_bam_list: | |
| 261 assert os.path.exists(bam_file), "--ctr-bams BAM file \"%s\" not found" % (bam_file) | |
| 262 | |
| 263 # assert len(args.exp_bam_list) > 1, "# --exp-bams needs to be > 1" | |
| 264 | |
| 265 if args.filter_bed: | |
| 266 assert os.path.exists(args.filter_bed), "--filter-bed file \"%s\" not found" % (args.filter_bed) | |
| 267 | |
| 268 hce_s_dic = {"s": 1, "i": 1, "d": 1, "m": 1, "e": 1} | |
| 269 assert args.hce_s in hce_s_dic, "invalid --hce-s given (choose between {s,i,d,m,e})" | |
| 270 | |
| 271 # Crop dataset ID. | |
| 272 data_id = args.dataset_id | |
| 273 if len(args.dataset_id) > 20: | |
| 274 data_id = args.dataset_id[:20] | |
| 275 # Remove spaces. | |
| 276 data_id = data_id.replace(" ", "_") | |
| 277 | |
| 278 # Output folders. | |
| 279 if not os.path.exists(args.out_folder): | |
| 280 os.makedirs(args.out_folder) | |
| 281 counts_folder = args.out_folder + "/counts_%s" % (data_id) | |
| 282 if not os.path.exists(counts_folder): | |
| 283 os.makedirs(counts_folder) | |
| 284 | |
| 285 """ | |
| 286 Create BAM index files. | |
| 287 | |
| 288 """ | |
| 289 print("# experiment BAMs: %i" % (len(args.exp_bam_list))) | |
| 290 print("# control BAMs: %i" % (len(args.ctr_bam_list))) | |
| 291 | |
| 292 exp_bams = [] | |
| 293 control_bams = [] | |
| 294 exp_ids = [] | |
| 295 control_ids = [] | |
| 296 params = "-s -v" | |
| 297 if args.filter_mode == 2: | |
| 298 params = "-s -u" | |
| 299 intermediate_files = [] | |
| 300 | |
| 301 print("Create BAM index files ... ") | |
| 302 for i, bam_file in enumerate(args.exp_bam_list): | |
| 303 | |
| 304 idx = i + 1 | |
| 305 | |
| 306 out_bam = args.out_folder + "/%s_exp_rep%i.bam" % (data_id, idx) | |
| 307 | |
| 308 if args.filter_bed: | |
| 309 print("Filter %s by --filter-bed ... " % (bam_file)) | |
| 310 bam_remove_overlap_region_reads(args.filter_bed, bam_file, out_bam, params=params) | |
| 311 intermediate_files.append(out_bam) | |
| 312 else: | |
| 313 # out_bam = bam_file | |
| 314 file_make_symbolic_link(bam_file, out_bam) | |
| 315 # shutil.move(bam_file, out_bam) | |
| 316 | |
| 317 pysam.index(out_bam) | |
| 318 exp_bams.append(out_bam) | |
| 319 exp_ids.append("%s_exp_rep%i" % (data_id, idx)) | |
| 320 | |
| 321 for i, bam_file in enumerate(args.ctr_bam_list): | |
| 322 | |
| 323 idx = i + 1 | |
| 324 | |
| 325 out_bam = args.out_folder + "/%s_ctrl_rep%i.bam" % (data_id, idx) | |
| 326 | |
| 327 if args.filter_bed: | |
| 328 print("Filter %s by --filter-bed ... " % (bam_file)) | |
| 329 bam_remove_overlap_region_reads(args.filter_bed, bam_file, out_bam, params=params) | |
| 330 intermediate_files.append(out_bam) | |
| 331 else: | |
| 332 # out_bam = bam_file | |
| 333 file_make_symbolic_link(bam_file, out_bam) | |
| 334 # shutil.move(bam_file, out_bam) | |
| 335 | |
| 336 pysam.index(out_bam) | |
| 337 control_bams.append(out_bam) | |
| 338 control_ids.append("%s_ctrl_rep%i" % (data_id, idx)) | |
| 339 | |
| 340 """ | |
| 341 htseq-clip extract crosslink sites. | |
| 342 | |
| 343 htseq-clip extract -i bam/Rbp3_total_rep1.bam -e 1 -s m -o sites/Rbp3_total_rep1.bed | |
| 344 | |
| 345 hce_e : -e [1,2] %i | |
| 346 hce_s : -s %s | |
| 347 -s {s,i,d,m,e}, --site {s,i,d,m,e} | |
| 348 Crosslink site choices, must be one of: s, i, d, m, e | |
| 349 s: start site | |
| 350 i: insertion site | |
| 351 d: deletion site | |
| 352 m: middle site | |
| 353 e: end site (default: e). | |
| 354 hce_g : -g offset nt %i | |
| 355 hce_q : -q 10 | |
| 356 hce_primary : --primary | |
| 357 hce_c : -c 4 | |
| 358 hce_m : -m 0 | |
| 359 hce_x : -x 500 | |
| 360 hce_l : -l 10000 | |
| 361 hce_f : -f chr_id file | |
| 362 | |
| 363 """ | |
| 364 | |
| 365 print("Extract crosslink sites from BAM files ... ") | |
| 366 | |
| 367 extract_params = " -e %i -s %s -g %i -q %i -c %i -m %i -x %i -l %i" % (args.hce_e, args.hce_s, args.hce_g, args.hce_q, args.hce_c, args.hce_m, args.hce_x, args.hce_l) | |
| 368 if args.hce_primary: | |
| 369 extract_params += " --primary " | |
| 370 if args.hce_f: | |
| 371 assert os.path.exists(args.hce_f), "--hce-f file \"%s\" not found" % (args.hce_f) | |
| 372 extract_params += " -f %s " % (args.hce_f) | |
| 373 | |
| 374 # Experiment BAMs. | |
| 375 exp_beds = [] | |
| 376 for i, bam_file in enumerate(exp_bams): | |
| 377 | |
| 378 idx = i + 1 | |
| 379 | |
| 380 out_bed = args.out_folder + "/%s_exp_rep%i.bed" % (data_id, idx) | |
| 381 | |
| 382 check_cmd = "htseq-clip extract -i " + bam_file + " -o " + out_bed + extract_params | |
| 383 output = subprocess.getoutput(check_cmd) | |
| 384 | |
| 385 print(check_cmd) | |
| 386 print(output) | |
| 387 | |
| 388 assert os.path.exists(out_bed), "htseq-clip extract -o file \"%s\" not found" % (out_bed) | |
| 389 exp_beds.append(out_bed) | |
| 390 intermediate_files.append(out_bed) | |
| 391 | |
| 392 # Control BAMs. | |
| 393 control_beds = [] | |
| 394 for i, bam_file in enumerate(control_bams): | |
| 395 | |
| 396 idx = i + 1 | |
| 397 | |
| 398 out_bed = args.out_folder + "/%s_ctrl_rep%i.bed" % (data_id, idx) | |
| 399 | |
| 400 check_cmd = "htseq-clip extract -i " + bam_file + " -o " + out_bed + extract_params | |
| 401 output = subprocess.getoutput(check_cmd) | |
| 402 | |
| 403 print(check_cmd) | |
| 404 print(output) | |
| 405 | |
| 406 assert os.path.exists(out_bed), "htseq-clip extract -o file \"%s\" not found" % (out_bed) | |
| 407 control_beds.append(out_bed) | |
| 408 intermediate_files.append(out_bed) | |
| 409 | |
| 410 """ | |
| 411 Count crosslink sites in sliding windows. | |
| 412 | |
| 413 htseq-clip count -i sites/Rbp3_total_rep1.bed -a annotation/Rbp3_uv_total_w75s10.txt -o counts/Rbp3_total_rep1.csv | |
| 414 | |
| 415 """ | |
| 416 | |
| 417 print("Count reads overlapping with windows ... ") | |
| 418 | |
| 419 count_params = "" | |
| 420 if args.hcc_unstranded: | |
| 421 count_params += " --unstranded" | |
| 422 # Experiment BAMs. | |
| 423 for i, bed_file in enumerate(exp_beds): | |
| 424 | |
| 425 idx = i + 1 | |
| 426 | |
| 427 out_csv = counts_folder + "/%s_exp_rep%i.csv.gz" % (data_id, idx) | |
| 428 if args.no_zipper: | |
| 429 out_csv = counts_folder + "/%s_exp_rep%i.csv" % (data_id, idx) | |
| 430 | |
| 431 check_cmd = "htseq-clip count -i " + bed_file + " -a " + args.in_win_bed + " -o " + out_csv + count_params | |
| 432 output = subprocess.getoutput(check_cmd) | |
| 433 | |
| 434 print(check_cmd) | |
| 435 print(output) | |
| 436 | |
| 437 assert os.path.exists(out_csv), "htseq-clip count -o file \"%s\" not found" % (out_csv) | |
| 438 intermediate_files.append(out_csv) | |
| 439 | |
| 440 # Control BAMs. | |
| 441 for i, bed_file in enumerate(control_beds): | |
| 442 | |
| 443 idx = i + 1 | |
| 444 | |
| 445 out_csv = counts_folder + "/%s_ctrl_rep%i.csv.gz" % (data_id, idx) | |
| 446 if args.no_zipper: | |
| 447 out_csv = counts_folder + "/%s_ctrl_rep%i.csv" % (data_id, idx) | |
| 448 | |
| 449 check_cmd = "htseq-clip count -i " + bed_file + " -a " + args.in_win_bed + " -o " + out_csv + count_params | |
| 450 output = subprocess.getoutput(check_cmd) | |
| 451 | |
| 452 print(check_cmd) | |
| 453 print(output) | |
| 454 | |
| 455 assert os.path.exists(out_csv), "htseq-clip count -o file \"%s\" not found" % (out_csv) | |
| 456 intermediate_files.append(out_csv) | |
| 457 | |
| 458 """ | |
| 459 Create an R friendly matrix file. | |
| 460 | |
| 461 htseq-clip createMatrix -i counts/ -b Rbp3 -o counts/Rbp3_uv_total_w75s10_counts.txt.gz | |
| 462 | |
| 463 """ | |
| 464 | |
| 465 print("Create R-friendly count matrix file ... ") | |
| 466 | |
| 467 # out_matrix = args.out_folder + "/%s_count_matrix.txt.gz" %(data_id) | |
| 468 out_matrix = args.out_folder + "/count_matrix.txt.gz" | |
| 469 if args.no_zipper: | |
| 470 # out_matrix = args.out_folder + "/%s_count_matrix.txt" %(data_id) | |
| 471 out_matrix = args.out_folder + "/count_matrix.txt" | |
| 472 | |
| 473 check_cmd = "htseq-clip createMatrix -i " + counts_folder + " -b " + data_id + " -o " + out_matrix | |
| 474 output = subprocess.getoutput(check_cmd) | |
| 475 | |
| 476 print(check_cmd) | |
| 477 print(output) | |
| 478 | |
| 479 assert os.path.exists(out_matrix), "htseq-clip createMatrix -o file \"%s\" not found" % (out_matrix) | |
| 480 | |
| 481 """ | |
| 482 Create sample info file for DEWSeq. | |
| 483 | |
| 484 Sample Info file format: | |
| 485 Sample name Sample type | |
| 486 Rbp3_uv_rep1 IP | |
| 487 Rbp3_uv_rep2 IP | |
| 488 Rbp3_uv_rep3 IP | |
| 489 Rbp3_total_rep1 SMI | |
| 490 Rbp3_total_rep2 SMI | |
| 491 Rbp3_total_rep3 SMI | |
| 492 | |
| 493 """ | |
| 494 | |
| 495 print("Write sample info file ... ") | |
| 496 sample_info_file = args.out_folder + "/sample_info.txt" | |
| 497 OUTTAB = open(sample_info_file, "w") | |
| 498 OUTTAB.write("Sample name\tSample type\n") | |
| 499 for sid in exp_ids: | |
| 500 OUTTAB.write("%s\tIP\n" % (sid)) | |
| 501 for sid in control_ids: | |
| 502 OUTTAB.write("%s\tSMI\n" % (sid)) | |
| 503 OUTTAB.close() | |
| 504 | |
| 505 """ | |
| 506 Delete intermediate files. | |
| 507 | |
| 508 """ | |
| 509 if not args.keep_imf: | |
| 510 print("Delete intermediate files ... ") | |
| 511 for imf in intermediate_files: | |
| 512 if os.path.exists(imf): | |
| 513 os.remove(imf) | |
| 514 | |
| 515 """ | |
| 516 Report. | |
| 517 | |
| 518 """ | |
| 519 | |
| 520 print("") | |
| 521 print("OUTPUT FILES") | |
| 522 print("============") | |
| 523 | |
| 524 print("Count matrix file (DEWSeq input file):\n%s" % (out_matrix)) | |
| 525 print("Sample info file (DEWSeq input file):\n%s" % (sample_info_file)) | |
| 526 print("") |
