diff shuffleBed.py @ 14:76e1b1b21cce default tip

Deleted selected files
author xuebing
date Tue, 13 Mar 2012 19:05:10 -0400
parents 292186c14b08
children
line wrap: on
line diff
--- a/shuffleBed.py	Sat Mar 10 08:17:36 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,107 +0,0 @@
-'''
-simulate a random interval set that mimics the size and strand of a reference set 
-'''
-
-def inferSizeFromRefBed(filename,header):
-    '''
-    read reference interval set, get chrom size information
-    '''
-    chrSize = {}
-    f = open(filename)
-    if header:
-        header = f.readline()
-    for line in f:
-        flds = line.strip().split('\t')
-        if not chrSize.has_key(flds[0]):
-            chrSize[flds[0]] = int(flds[2])
-        elif chrSize[flds[0]] < int(flds[2]):
-            chrSize[flds[0]] = int(flds[2])
-    f.close()
-    return chrSize 
-
-def getChrSize(filename):
-    chrSize = {}
-    f = open(filename)
-    for line in f:
-        flds = line.strip().split('\t')
-        if len(flds) >1:
-            chrSize[flds[0]] = int(flds[1])
-    f.close()
-    return chrSize
-    
-def makeWeightedChrom(chrSize):
-    '''
-    make a list of chr_id, the freq is proportional to its length
-    '''
-     
-    genome_len = 0
-    
-    for chrom in chrSize:
-        chrom_len = chrSize[chrom]
-        genome_len += chrom_len
-
-    weighted_chrom = []
-    for chrom in chrSize:
-        weight = int(round(1000*float(chrSize[chrom])/genome_len))
-        weighted_chrom += [chrom]*weight
-
-    return weighted_chrom            
-
-def randomIntervalWithinChrom(infile,outfile,chrSize,header):
-    '''
-    '''
-    fin = open(infile)
-    if header:
-        header = fin.readline()
-    fout = open(outfile,'w')
-    n = 0
-    for line in fin:
-        n = n + 1
-        flds = line.strip().split('\t')
-        interval_size = int(flds[2]) - int(flds[1])
-        rstart = random.randint(0,chrSize[flds[0]]-interval_size)
-        fout.write(flds[0]+'\t'+str(rstart)+'\t'+str(rstart+interval_size)+'\t'+str(n)+'\t0\t+\n')
-    fin.close()
-    fout.close()   
-
-def randomIntervalAcrossChrom(infile,outfile,chrSize,weighted_chrom,header):
-    '''
-    '''
-    fin = open(infile)
-    if header:
-        header = fin.readline()
-    fout = open(outfile,'w')
-    n = 0
-    for line in fin:
-        n = n + 1
-        flds = line.strip().split('\t')
-        interval_size = int(flds[2]) - int(flds[1])
-        # find a random chrom
-        flds[0] = weighted_chrom[random.randint(0, len(weighted_chrom) - 1)]
-        # random start in the chrom
-        rstart = random.randint(0,chrSize[flds[0]]-interval_size)
-        fout.write(flds[0]+'\t'+str(rstart)+'\t'+str(rstart+interval_size)+'\t'+str(n)+'\t0\t+\n')
-    fin.close()
-    fout.close()            
-
-import sys,random
-def main():
-    # python random_interval.py test100.bed testout.bed across header human.hg18.genome 
-
-    reference_interval_file = sys.argv[1]
-    output_file = sys.argv[2]
-    across_or_within_chrom = sys.argv[3] # within or across 
-    if sys.argv[4] == 'header':
-        header = True 
-    else:
-        header = False
-    if len(sys.argv) == 6:
-        chrom_size_file = sys.argv[5]
-        chrSize = getChrSize(chrom_size_file)
-    else:
-        chrSize = inferSizeFromRefBed(reference_interval_file,header) 
-    if across_or_within_chrom == 'within':            
-        randomIntervalWithinChrom(reference_interval_file,output_file,chrSize,header)
-    else:
-        randomIntervalAcrossChrom(reference_interval_file,output_file,chrSize,makeWeightedChrom(chrSize),header)   
-main()