-
Notifications
You must be signed in to change notification settings - Fork 1
/
whitelist_fixed.py
41 lines (26 loc) · 960 Bytes
/
whitelist_fixed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import sys
import argparse
import pysam
from collections import Counter
def get_options():
parser = argparse.ArgumentParser(prog='whitelist_fixed.py')
parser.add_argument('-i', '--input_reads', help='File containing cell barcodes')
parser.add_argument('-c', '--cells', help='Number of cells', default=5000, type=int)
parser.add_argument('-r', '--reads', help='Number of reads', default=100000000, type=int)
options = parser.parse_args()
return options
whitelist = Counter()
def parse_whitelist():
options = get_options()
_MAXREADS = options.reads
_NCELLS = options.cells
nl = 0
I = iter(pysam.FastqFile(options.input_reads, 'rb'))
while nl <= _MAXREADS:
entry = next(I)
whitelist.update([entry.sequence])
nl += 1
for bc in whitelist.most_common(_NCELLS):
sys.stdout.write(f'{bc[0]}\n')
if __name__ == '__main__':
parse_whitelist()