Demo entry 6683300

py

   

Submitted by anonymous on Dec 12, 2017 at 17:24
Language: Python 3. Code size: 6.0 kB.

#!/usr/bin/env python
# -*- coding: utf-8 -*- 


import matplotlib
matplotlib.use('Agg')
import sys
import re
import numpy as np
import matplotlib.pyplot as plt; plt.rcdefaults()
import matplotlib.patches as mpatches
import time
import gc  

def read_gff(fn):
    f=open(fn)
    data,dict=[],{}
    for line in f.readlines():
        a=line.strip().split("\t")
        dict[a[1]]=[a[0],a[2],a[3],a[4],a[5]]
        data.append(a)
    return data,dict

def read_lens(fn):
    fp=open(fn)
    data=[]
    for row in fp.readlines():
        r1,r2=row.split()
        data.append([r1,r2])
    return data

def plot_chr1(lens,gl,gl2,mark,name):
    total_lens=sum([float(k[1]) for k in lens])
    step=gl/float(total_lens)
    gl_start,n,start_x=0.95,0,0.05
    mark_y=0.04
    align = dict(family='Times New Roman',style='normal',horizontalalignment="center", verticalalignment="center")
    for k in lens:
        n+=float(k[1])
        mark_new=str(mark)+str(k[0])
        x=gl_start-float(n)*step
        mark_x=x+0.5*float(k[1])*step
        plt.plot([start_x,start_x+gl2],[x,x],linestyle = '-',color='black',linewidth=0.5)
        plt.text(mark_y,mark_x,mark_new,color='black',fontsize = 12,rotation = 90,weight='semibold',**align)        
    plt.plot([start_x,start_x+gl2],[gl_start,gl_start],linestyle='-',color='black',linewidth=1)
    plt.text(mark_y-0.02,0.5*(2*gl_start-gl),name,color='black',fontsize = 18,rotation = 90,weight='semibold',**align) 
    return step

def plot_chr2(lens,gl,gl2,mark,name):
    total_lens=sum([float(k[1]) for k in lens])
    step=gl/float(total_lens)
    gl_start,n,start_x=0.05,0,0.95
    mark_y=0.96
    align = dict(family='Times New Roman',style='normal',horizontalalignment="center", verticalalignment="center")
    for k in lens:
        n+=float(k[1])
        mark_new=str(mark)+str(k[0])
        x=gl_start+float(n)*step
        mark_x=x-0.5*float(k[1])*step
        plt.plot([x,x],[start_x,start_x-gl2],linestyle='-',color='black',linewidth=0.5)
        plt.text(mark_x,mark_y,mark_new,color='black',fontsize = 12,rotation = 0,weight='semibold',**align)
    plt.plot([gl_start,gl_start],[start_x,start_x-gl2],linestyle='-',color='black',linewidth=1)
    plt.text(0.5*(2*gl_start+gl),mark_y+0.02,name,color='black',fontsize = 18,rotation = 0,weight='semibold',**align)
    return step


def gene_location(gff, lens, step):
    loc_gene,dict_chr,n={},{},0
    for i in lens:        
        dict_chr[i[0]]=n
        n+=float(i[1])
    for k in gff:
        if k[0] not in dict_chr.keys():
            continue
        loc=(float(dict_chr[k[0]])+float(k[5]))*step
        loc_gene[k[1]]=loc
    return loc_gene

def read_blast(fn):
    f=open(fn)
    data=[]
    for line in f.readlines():
        a=line.strip().split("\t")
        data.append(a)
    return data

def deal_blast(blastout,dict_gff1, dict_gff2, len_1, lens_2, evalue, score, hitnum, repnum):
    dict_blast={}
    for k in blastout:
        if str(k[0])==str(k[1]) or float(k[11])<score or float(k[10])>evalue:
            continue
        if k[0] not in dict_blast.keys():
            dict_blast[k[0]]=[[k[0],k[1]]]
        else:
            dict_blast[k[0]].append([k[0],k[1]])
    data=[]
    for a in dict_blast.values():
        b=[]
        for k in a:
            if k not in b:
                b.append(k)
        b=b[:repnum]
        data.append(b)
    return data

def plot_dot(root,data,loc1,loc2,gl):
    gl_start1,gl_start2=0.95,0.05
    for i in range(len(data)):
        if len(data[i])==0:
            continue
        else:
            for j in range(len(data[i])):
                if data[i][j][0] not in loc1.keys() or data[i][j][1] not in loc2.keys():
                    continue
                x,y=loc1[data[i][j][0]],loc2[data[i][j][1]]
                x,y=gl_start1-x,gl_start2+y
                #print(x,y)
                if j == 0:                    
                    Rectangle(root,[y,x],0.0008,0.0008,'red',0.8)
                elif j<=hitnum:
                    Rectangle(root,[y,x],0.0008,0.0008,'blue',0.5)
                else:
                    Rectangle(root,[y,x],0.0008,0.0008,'grey',0.3)


def Rectangle(ax,loc,heigt,width,color,alpha):
    p=mpatches.Rectangle(loc, width, heigt, edgecolor="none",facecolor=color,alpha=alpha)
    ax.add_patch(p)

if __name__=="__main__":
    plt.figure(figsize=(10, 10))
    root = plt.axes([0, 0, 1, 1])

    align = dict(family='Arial',style='normal',horizontalalignment="center", verticalalignment="center")
    t1=time.time()
    print("Dotplot are ready to begin")

    gff_1,dict_gff1=read_gff("os.gff")
    gff_2,dict_gff2=read_gff("sb.gff")
    t2=time.time()
    print("Reading gff took "+str(t2-t1)+" second")

    lens_1=read_lens("os_chrs.lens")
    lens_2=read_lens("sb_chrs.lens")
    t3=time.time()
    print("Reading lens took "+str(t3-t2)+" second")

    gl1,gl2=0.92,0.92
    step_1=plot_chr1(lens_1,gl1,gl2,'Os','Oryza sativa')
    step_2=plot_chr2(lens_2,gl2,gl1,'Sb','Sorghum bicolor')

    blastout=read_blast("os_sb.1e-5.blast")
    t4=time.time()
    print("Reading blastout took "+str(t4-t3)+" second")

    evalue,score,hitnum,repnum=1e-5,100,5,20
    data=deal_blast(blastout,dict_gff1,dict_gff2,lens_1,lens_2,evalue,score,hitnum,repnum)
    gene_loc_1=gene_location(gff_1,lens_1,step_1)
    gene_loc_2=gene_location(gff_2,lens_2,step_2)
    t5=time.time()
    print("Dealing blastout took "+str(t5-t4)+" second")

    del blastout,gff_1,gff_2,dict_gff1,dict_gff2
    gc.collect()
    plot_dot(root,data,gene_loc_1,gene_loc_2,gl1)
    t6=time.time()
    print("Ploting dot took "+str(t6-t5)+" second")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
    plt.savefig("os_sb.dotplot.png",dpi=500)
    t7=time.time()
    print("Dotplot totaly took "+str(t7-t1)+" second")

This snippet took 0.02 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).