-
Notifications
You must be signed in to change notification settings - Fork 4
/
SamReader.py
executable file
·65 lines (59 loc) · 2.81 KB
/
SamReader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#=========================================================================
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# 2018 William H. Majoros (bmajoros@allumni.duke.edu)
#=========================================================================
from __future__ import (absolute_import, division, print_function,
unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
from Rex import Rex
rex=Rex()
import gzip
from SamRecord import SamRecord
from CigarString import CigarString
#=========================================================================
# Attributes:
# fh : file handle
# headerLines : array of header lines
# Instance Methods:
# reader=SamReader(filename)
# samRecord=reader.nextSequence() # returns None at EOF
# (record,line)=reader.nextSeqAndText() # returns None at EOF
# reader.close()
# Class Methods:
#=========================================================================
class SamReader:
"""SamReader"""
def __init__(self,filename):
self.headerLines=[]
if(filename is not None):
if(rex.find("\.gz$",filename)): self.fh=gzip.open(filename,"rt")
else: self.fh=open(filename,"r")
def close(self):
self.fh.close()
def nextSequence(self):
pair=self.nextSeqAndText()
if(pair is None): return None
(rec,line)=pair
return rec
def nextSeqAndText(self):
headerChars=set(["@","["])
fh=self.fh
line=fh.readline()
if(line is None): return None
while(line is not None and len(line)>0 and line[0] in headerChars):
if(line[0]=="@"): self.headerLines.append(line)
line=fh.readline()
if(line is None or len(line)==0): return None
fields=line.rstrip().split()
if(len(fields)<11): raise Exception("can't parse sam line: "+line)
(ID,flags,refName,refPos,mapQual,cigar,rnext,pnext,templateLen,
seq,qual)=fields[:11]
refPos=int(refPos)-1 # convert 1-based to 0-based
flags=int(flags)
CIGAR=CigarString(cigar)
tags=fields[11:]
rec=SamRecord(ID,refName,refPos,CIGAR,seq,flags,tags)
return (rec,line)
# M03884:303:000000000-C4RM6:1:1101:1776:15706 99 chrX:31786371-31797409 6687 44 150M = 6813 271 ATACTATTGCTGCGGTAATAACTGTAACTGCAGTTACTATTTAGTGATTTGTATGTAGATGTAGATGTAGTCTATGTCAGACACTATGCTGAGCATTTTATGGTTGCTATGTACTGATACATACAGAAACAAGAGGTACGTTCTTTTACA BBBBFFFFFFFGGGGGEFGGFGHFHFFFHHHFFHHHFHFHHHGFHEDGGHFHBGFHGBDHFHFFFHHHHFHHHHHGHGFFBGGGHFHFFHHFFFFHHHHGHGFHHGFHGHHHGFHFFHHFHHFFGFFFFGGEHFFEHHFGHHHGHHHHFB AS:i:300 XN:i:0