Skip to content

Commit 0006389

Browse files
committed
Updated to conform with mira's base calling.
1 parent 55902be commit 0006389

1 file changed

Lines changed: 9 additions & 24 deletions

File tree

BAM_to_TCS.py

Lines changed: 9 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
# along with 4Pipe4. If not, see <http://www.gnu.org/licenses/>.
1616

1717
import pysam
18-
from pipeutils import ASCII_to_num, Ambiguifier
18+
from pipeutils import ASCII_to_num, FASTA_parser
1919

2020

21-
def TCSwriter(bamfile_name):
21+
def TCSwriter(bamfile_name, fasta_d):
2222
"""Convert the bamfile into the TCS format. The writing and the parsing
2323
are done simultaneously."""
2424

@@ -74,10 +74,10 @@ def TCSwriter(bamfile_name):
7474
tcov += sum(covs) # Workaround
7575

7676
# Define reference base (AKA "B") and qual (AKA "Q")
77-
freqbase = major_base(bases)
78-
refbase = Ambiguifier(freqbase)
7977

80-
refqual = max((quals[basetrans.find(x)] for x in freqbase))
78+
refbase = fasta_d[refs][position]
79+
80+
refqual = quals[basetrans.find(refbase)]
8181

8282
# Define padded and unpadded positions (AKA "padPos" and "upadPos")
8383
if refbase == "*":
@@ -138,22 +138,6 @@ def covs_and_quals(bases):
138138
return covs, quals
139139

140140

141-
def major_base(bases):
142-
"""Take a dict like {base: [quals]} and return the most frequent
143-
base(s)."""
144-
base_counts = {}
145-
for k in bases:
146-
base_counts[k] = len(bases[k])
147-
148-
highest = max(base_counts.values())
149-
maxbases = [k for k, v in base_counts.items() if v == highest]
150-
151-
if len(maxbases) > 1 and "*" in maxbases:
152-
maxbases.remove("*")
153-
154-
return maxbases
155-
156-
157141
def QualityCalc(quals):
158142
"""Calculate individual bases qualities, just like mira does, as seen here:
159143
http://www.freelists.org/post/mira_talk/Quality-Values,4"""
@@ -180,10 +164,11 @@ def QualityCalc(quals):
180164
return qual
181165

182166

183-
def RunModule(bamfile_name):
167+
def RunModule(bamfile_name, padded_fasta_name):
184168
"""Run the module."""
185-
TCSwriter(bamfile_name)
169+
TCSwriter(bamfile_name, FASTA_parser(padded_fasta_name))
186170

187171
if __name__ == "__main__":
172+
# Usage: python3 BAM_to_TCS.py file.bam file_out.padded.fasta
188173
from sys import argv
189-
RunModule(argv[1])
174+
RunModule(argv[1], argv[2])

0 commit comments

Comments
 (0)