Mercurial > repos > greg > vsnp_statistics
comparison vsnp_statistics.py @ 25:b908bb18008a draft
Uploaded
author | greg |
---|---|
date | Thu, 16 Sep 2021 00:56:07 +0000 |
parents | b34843f09f9f |
children |
comparison
equal
deleted
inserted
replaced
24:39ab5405b509 | 25:b908bb18008a |
---|---|
62 # Gather Series into a data frame. | 62 # Gather Series into a data frame. |
63 fastq_df = pandas.DataFrame(dict(id=s1, seq=s2)).set_index(['id']) | 63 fastq_df = pandas.DataFrame(dict(id=s1, seq=s2)).set_index(['id']) |
64 # Starting at row 3, keep every 4 row | 64 # Starting at row 3, keep every 4 row |
65 # random sample specified number of rows. | 65 # random sample specified number of rows. |
66 file_size = nice_size(os.path.getsize(fastq_file)) | 66 file_size = nice_size(os.path.getsize(fastq_file)) |
67 total_reads = int(len(fastq_df.index) / 4) | 67 total_reads = len(seqs) |
68 # Mean Read Length | 68 # Mean Read Length |
69 if sampling_size > total_reads: | 69 if sampling_size > total_reads: |
70 sampling_size = total_reads | 70 sampling_size = total_reads |
71 fastq_df = fastq_df.iloc[3::4].sample(sampling_size) | 71 try: |
72 fastq_df = fastq_df.iloc[3::4].sample(sampling_size) | |
73 except ValueError: | |
74 fastq_df = fastq_df.iloc[3::4].sample(sampling_size, replace=True) | |
72 dict_mean = {} | 75 dict_mean = {} |
73 list_length = [] | 76 list_length = [] |
74 i = 0 | 77 i = 0 |
75 for id, seq, in fastq_df.iterrows(): | 78 for id, seq, in fastq_df.iterrows(): |
76 dict_mean[id] = numpy.mean(letter_annotations[i]) | 79 dict_mean[id] = numpy.mean(letter_annotations[i]) |