Demo entry 6636610

1

   

Submitted by wq on Aug 27, 2017 at 10:04
Language: C++. Code size: 1.5 kB.

# DIY snip caller
import sys
from collections import defaultdict
# This code was written in about 20 minutes to demonstrate
# a really simple snp caller. As it is it calls only
# homozygous mutation defined as over 50% of calls.

print "##fileformat=VCFv4.2"
print "\t".join("#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT bwa.bam".split())

for line in sys.stdin:
    # Strip the newline then split by tabs.
    column = line.strip().split("\t")
    
    # The fifth base contains the basecalls# (zero based counting).
    bases = column[4]
    
    # The quals column will tell us how many bases are called.
    quals = column[5]
    
    # Upper case everything.
    # We do lose strand information with that.
    bases = bases.upper()
    count = defaultdict(int)  
    for c in bases:
        count[c] += 1

    # How many bases cover the site
    depth = float(len(quals))
    
    for m in "ATGC":
        if count[m]/depth > 0.50:
        # We got a homozygous mutations
        # produce the CVF records
        # Collect output into an array.

        info = "DP=%s" % (int(depth))
        format = "GT:PL"
        values = "1/1:30"    
        out = [column[0], column[1], ".", column[2], m, 30, ".", info, format, values]
        
        # Make all elements of the array a string.
        out = map(str, out)
        
        # Join the fields of the output array by tabs
        # Print the joined fields.
        print "\t".join(out)

This snippet took 0.00 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).