Mercurial > repos > xuebing > bed_clean
changeset 7:3ee42ab930b6
Deleted selected files
author | xuebing |
---|---|
date | Sat, 31 Mar 2012 13:08:17 -0400 |
parents | 8fcf33cf58be |
children | c887ed5d2c51 |
files | bedClean.py bedclean.xml |
diffstat | 2 files changed, 0 insertions(+), 78 deletions(-) [+] |
line wrap: on
line diff
--- a/bedClean.py Sat Mar 31 13:03:54 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ -import sys - -def readChrSize(filename): - f = open(filename) - chrSize = {} - for line in f: - chrom,size = line.strip().split() - chrSize[chrom]=int(size) - f.close() - return chrSize - -def cleanFile(filename,chrSize,outfile): - f = open(filename) - out = open(outfile,'w') - i = 0 - for line in f: - i = i + 1 - flds = line.strip().split('\t') - if len(flds) < 3: - print 'line',i,'incomplete line:\n',line - elif chrSize.has_key(flds[0]): - if int(flds[1]) > int(flds[2]): - tmp = flds[1] - flds[1] = flds[2] - flds[2] = tmp - if int( flds[1]) < 0 or int(flds[2]) <0: - print 'line',i,'negative coordinates:\n',line - elif int(flds[2]) > chrSize[flds[0]]: - print 'line',i,'end larger than chr size:\n',line - else: - if flds[5] == '*': - flds[5] = '+' - print 'line',i,' strand * changed to +\n', line - out.write('\t'.join(flds)+'\n') - else: - print 'line',i,'chromosome',flds[0],'not found!\n',line - f.close() - out.close() - -if len(sys.argv) < 4: - print "python bedClean.py in.bed chrsizefile out.bed" - exit() -cleanFile(sys.argv[1],readChrSize(sys.argv[2]),sys.argv[3])
--- a/bedclean.xml Sat Mar 31 13:03:54 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,35 +0,0 @@ -<tool id="bedclean" name="clean interval"> - <description>remove off-chromosome lines</description> - <command interpreter="python">bedclean.py $input $genome $output > $log </command> - <inputs> - <param name="input" type="data" format="interval" label="Original interval file"/> - - <param name="genome" type="select" label="Select chromsome size file" > - <options from_file="chrsize.loc"> - <column name="name" index="0"/> - <column name="value" index="1"/> - </options> - </param> - - </inputs> - <outputs> - <data format="input" name="output" label="${tool.name} on ${on_string} (interval)"/> - <data format="txt" name="log" label="${tool.name} on ${on_string} (log)"/> - </outputs> - <help> - -**Description** - -remove lines that are - -1. comment or track name lines - -2. on chr*_random - -3. or have negative coordinates - -4. or the end is larger than chromosome size - - - </help> -</tool>