Demo entry 6345787

rppa

   

Submitted by liuxy on Feb 08, 2017 at 03:01
Language: Python. Code size: 18.0 kB.

#!/usr/bin/python
# -*- coding: utf-8 -*-

import os
import argparse
from argparse import RawTextHelpFormatter

parser = argparse.ArgumentParser(description="RPPA data analysis pipeline based on SuperCurve.\n\tContact:liu.xuanyu@puruijizhun.com\n",formatter_class=RawTextHelpFormatter)
parser.add_argument('--config',help="rppa configuration file name;(required)",required=True)
argv = vars(parser.parse_args())

configFile = argv['config']

paraList=[]
with open(configFile,'r') as FILE:
	for line in FILE:
		if line.strip().startswith('#'):
			continue
		if line.strip() == '':
			continue
		lineList = [i.strip() for i in line.strip().split('=')]
		paraList.append(lineList)
paraDict = dict(paraList)			
outDir = paraDict['outDir']

def makeDir(dirAbsPath):
        ''' make a dir if exists, and return a string'''
        if not os.path.exists(dirAbsPath):
                assert not os.system('mkdir %s' % dirAbsPath)
        return dirAbsPath
makeDir(outDir)
if os.path.exists('%s/Rplots.pdf'%outDir):
	assert not os.system('rm -f %s/Rplots.pdf'%outDir)
#print paraDict #{'plotSurface': 'FALSE', 'cutoff': '0.8', 'imageDir': 'NULL', 'ignoreNegative': 'TRUE', 'antibodyFile': '/Users/xuanyu/iHOME/RPPAcodetest/rppaTumorData/proteinAssay.tsv', 'normMethod': 'vs', 'spatialAdj': 'TRUE', 'aliasFile': '/Users/xuanyu/iHOME/RPPAcodetest/rppaTumorData/layoutInfo.tsv', 'ordering': 'decreasing', 'confidenceInterval': 'FALSE', 'spotMeasure': 'Mean.Net', 'designFile': '/Users/xuanyu/iHOME/RPPAcodetest/rppaTumorData/slidedesign.tsv', 'prefitQC': 'FALSE', 'altLayout': 'NULL', 'txtDir': '/Users/xuanyu/iHOME/RPPAcodetest/rppaTumorData', 'grouping': 'bySample', 'onlyNormQCgood': 'FALSE', 'center': 'TRUE', 'k': '100', 'fitModel': 'logistic', 'trimLevel': '2', 'fitMethod': 'nls', 'gamma': '0.1', 'software': 'microvigen'}
for item in ['imageDir','txtDir','designFile','aliasFile','antibodyFile','outDir','software','altLayout']:
	paraDict[item] = paraDict[item] if paraDict[item] == 'NULL' else '"' + paraDict[item] + '"'


Rscript = '''setwd(%s)
########## parameter setting ##################
library(SuperCurve)
library(calibrate)
txtdir <- %s
outdir <- %s
imgdir <- %s
designfile <- %s
spatial <- %s
prefitqc <- %s
onlynormqcgood <- %s
software <- %s
alt.layout <- %s
antibodyfile <- %s
doprefitqc <- %s
'''%(paraDict['outDir'],paraDict['txtDir'],paraDict['outDir'],paraDict['imageDir'],paraDict['designFile'],paraDict['spatialAdj'],paraDict['prefitQC'],paraDict['onlyNormQCgood'],paraDict['software'],paraDict['altLayout'],paraDict['antibodyFile'],paraDict['prefitQC'])

Rscript +='''## Collect design parameters
designparams <- RPPADesignParams(grouping=\"%s\",
           ordering=\"%s\",
           center=%s,
           aliasfile=%s,
           designfile=%s)       
'''%(paraDict['grouping'],paraDict['ordering'],paraDict['center'],paraDict['aliasFile'],paraDict['designFile'])

Rscript += '''## Collect fit parameters
fitparams <- RPPAFitParams(measure=\"%s\",
           model=\"%s\",
           method=\"%s\",
           trim=%s,
           ci=%s,
           ignoreNegative=%s,
           warnLevel=-1)
'''%(paraDict['spotMeasure'],paraDict['fitModel'],paraDict['fitMethod'],paraDict['trimLevel'],paraDict['confidenceInterval'],paraDict['ignoreNegative'])

Rscript += '''## Collect spatial parameters
spatialparams <- RPPASpatialParams(cutoff=%s,
           k=%s,
           gamma=%s,
           plotSurface=%s)
'''%(paraDict['cutoff'],paraDict['k'],paraDict['gamma'],paraDict['plotSurface'])

Rscript += '''## Collect normalization parameters
normparams <- RPPANormalizationParams(method=\"%s\")
'''%paraDict['normMethod']

Rscript +='''## Error checking
if (!is.character(txtdir)) {
  stop(sprintf("argument %s must be character", sQuote("txtDir")))
} else if (!(length(txtdir) == 1)) {
  stop(sprintf("argument %s must be of length 1", sQuote("txtDir")))
} else if (!dir.exists(txtdir)) {
  stop(sprintf("directory %s does not exist", dQuote(txtdir)))
} else if (txtdir == outdir) {
  stop("quantification and output directories must be different",
       call.=FALSE)
}
if (!is.RPPADesignParams(designparams)) {
  stop(sprintf("argument %s must be object of class %s", 
               sQuote("designparams"), "RPPADesignParams"))
}
if (!is.RPPAFitParams(fitparams)) {
  stop(sprintf("argument %s must be object of class %s", 
               sQuote("fitparams"), "RPPAFitParams"))
}
if (!is.null(spatialparams)) {
  if (!is.RPPASpatialParams(spatialparams)) {
    stop(sprintf("argument %s must be object of class %s", 
                 sQuote("spatialparams"), "RPPASpatialParams"))
  }
}
if (!is.RPPANormalizationParams(normparams)) {
  stop(sprintf("argument %s must be object of class %s", 
               sQuote("normparams"), "RPPANormalizationParams"))
}
if (!is.logical(doprefitqc)) {
  stop(sprintf("argument %s must be logical", sQuote("doprefitqc")))
} else if (!(length(doprefitqc) == 1)) {
  stop(sprintf("argument %s must be of length 1", sQuote("doprefitqc")))
} else if (is.na(doprefitqc)) {
  doprefitqc <- FALSE
  warning(sprintf("argument %s converted from NA to FALSE", 
                  dQuote(doprefitqc)), immediate. = FALSE)
}
if (is.null(designfile)) {
  if (spatial || prefitqc) {
    stop("slide design file required for requested processing",
         call.=FALSE)
  } else {
    message("slide design file suggested for any processing",
            call.=FALSE)
  }
}
if (onlynormqcgood && !prefitqc) {
  stop("prefit qc required for requested processing (onlynormqcgood)",
       call.=FALSE)
}
if (!is.null(antibodyfile)) {
  if (!is.character(antibodyfile)) {
    stop(sprintf("argument %s must be character", sQuote("antibodyfile")))
  } else if (!(length(antibodyfile) == 1)) {
    stop(sprintf("argument %s must be of length 1", sQuote("antibodyfile")))
  } else if (!nzchar(antibodyfile)) {
    stop(sprintf("argument %s must not be empty string", 
                 sQuote("antibodyfile")))
  }
}
'''
Rscript +='''######## read level 1 data .txt file #################
getQuantificationFilenames <- function(txtdir) {
  txt.re <- "\\\.[tT][xX][tT]$"
  txtfiles <- list.files(path = txtdir, pattern = txt.re)
  settingsfile.tf <- txtfiles %in% "sc-settings.txt"
  txtfiles[!settingsfile.tf]
}
call <- match.call()
slideFilenames <- getQuantificationFilenames(txtdir)
if (length(slideFilenames) == 0) {
  stop(sprintf("no quantification files found in directory %s", 
               dQuote(txtdir)))
}
'''
Rscript +='''######## construct antibody list  ####################
## Load antibody information, if provided
loadAntibodyInfo <- function(antibodyfile,
                              slidefiles) {
    ## Check arguments
    stopifnot(is.character(antibodyfile) && length(antibodyfile) == 1)
    stopifnot(is.character(slidefiles) && length(slidefiles) >= 1)

    ## Begin processing
    tryCatch({
            stopifnot(file.exists(antibodyfile))
            if (file.info(antibodyfile)$isdir) {
                stop("argument is not a file")
            }

            ## Read datafile
            proteinassay.df <- read.delim(antibodyfile,
                                          as.is=TRUE,
                                          quote="",
                                          row.names=NULL)

            reqdColnames <- c("Antibody",
                              "Filename")

            ## Ensure required columns exist
            found <- reqdColnames %in% colnames(proteinassay.df)
            if (!(all(found))) {
                missingColumns <- reqdColnames[!found]
                stop(sprintf(ngettext(length(missingColumns),
                                      "missing required column: %s",
                                      "missing required columns: %s"),
                             paste(dQuote(missingColumns), collapse=", ")))
            }
        },
        error=function(cond) {
            stop(sprintf("cannot load antibody data from file %s - %s",
                         dQuote(antibodyfile),
                         conditionMessage(cond)))
        })

    ## Extract information from data.frame
    antibodies <- vector("list", length(slidefiles))
    names(antibodies) <- slidefiles

    for (filename in slidefiles) {
        x.antibody <- match(filename, proteinassay.df$Filename)[1]
        antibody <- proteinassay.df$Antibody[x.antibody]
        if (!is.na(antibody)) {
            x.slidefiles <- match(filename, slidefiles)
            antibodies[[x.slidefiles]] <- antibody
        }
    }

    return(antibodies)
}

ab.list <- if (!is.null(antibodyfile)) {
         loadAntibodyInfo(antibodyfile, slideFilenames)
               } else {
            vector("list", length(slideFilenames))
}
## Fill in missing values with generated defaults
x.which <- which(sapply(ab.list, is.null))
txt.re <- "\\\.[tT][xX][tT]$"
for (x in x.which) {
  ab.list[[x]] <- sub(txt.re, "", slideFilenames[x])
}
## Ensure antibody names are unique
antibodies <- make.unique(abnames <- unlist(ab.list, use.names = FALSE))
if (!identical(antibodies, abnames)) {
  warning("adjusting antibody names to be unique")
}
remove(abnames)
'''
Rscript += '''############## set up tf TRUE/False variables ###############
input.tf <- logical(length(slideFilenames))
prefitqc.tf <- logical(length(slideFilenames))
spatial.tf <- logical(length(slideFilenames))
fits.tf <- logical(length(slideFilenames))
design <- NULL
rppas <- array(list(), length(slideFilenames), list(antibodies))
'''
Rscript += '''###### construct RPPA & RPPADesign objects #############
for (i in seq_along(slideFilenames)) {
  slideFilename <- slideFilenames[i]
  antibody <- antibodies[i]
  message(paste("reading", slideFilename))
  flush.console()
  rppa <- tryCatch({
    RPPA(slideFilename, path = txtdir, antibody = antibody, 
         software = software, alt.layout = alt.layout)
  }, error = function(e) {
    message(conditionMessage(e))
    NULL
  })
  if (is.RPPA(rppa)) {
    rppas[[i]] <- rppa
    input.tf[i] <- TRUE
    if (is.null(design)) {
      design <- RPPADesignFromParams(rppa, designparams)
    }
    plotFileName <- sprintf("%s/Intensity_vs_dilutionStep_%s.pdf",outdir,rppa@antibody)
    print(plotFileName)
    pdf(plotFileName)
    plot(rppa, design, fitparams@measure)
    dev.off()
  }
}
if (!is.RPPADesign(design)) {
  stop("no slides can be processed")
}
'''
Rscript += '''#### function to plot probs of good slides based on pre-fit QC 
plotProbabilityOfGoodSlide <- function(qcprobs,
                                        good.cutoff=0.8) {
  stopifnot(is.numeric(qcprobs)     && length(qcprobs) >= 1)
  stopifnot(is.numeric(good.cutoff) && length(good.cutoff) == 1)

  
  nslides <- length(qcprobs)
  rating <- rep("poor", len=nslides)
  rating[which(qcprobs >= good.cutoff)] <- "good"
  
  rating.fac <- ordered(rating, levels=c("poor", "good"))
  col.qcprobs <- c("red", "green")
  stopifnot(nlevels(rating.fac) == length(col.qcprobs))
  
  plot(qcprobs,
       las=1,
       main="Predicted Slide Quality",
       sub=sprintf("#Good = %d, #Poor = %d",
                   ngood <- sum(rating == "good"),
                   npoor <- nslides - ngood),
       type='n',
       xaxt='n',
       xlim=c(1, nslides),
       yaxt='n',
       ylab="Probability of Good Slide",
       ylim=0:1)
  mtext(side=3, paste("cutoff =", good.cutoff))
  axis(1, at=seq_len(nslides))
  axis(2, at=seq(0, 1, by=0.1), las=1)
  rect(xleft=1,
       ybottom=c(0, good.cutoff),
       xright=nslides,
       ytop=c(good.cutoff, 1),
       col=c('lightpink', 'lightgreen'),
       border=NA)
  abline(h=good.cutoff)
  points(qcprobs,
         bg=col.qcprobs[rating.fac],
         col='black',
         pch=21)
  textxy(1:nslides,qcprobs,names(qcprobs),cex=0.6,offset=0.3)
}
'''
Rscript += '''######### perform pre-fit QC ######
prefitqcs <- array(list(), length(slideFilenames), list(antibodies))
if (doprefitqc) {
  for (i in seq_along(slideFilenames)) {
    antibody <- antibodies[i]
    message(paste("Pre-fit quality checking slide", antibody, 
                  "-", "please wait."))
    flush.console()
    rppa <- rppas[[i]]
    if (!is.null(rppa)) {
      prefitqc <- tryCatch({
        RPPAPreFitQC(rppa, design)
      }, error = function(e) {
        traceback()
        message(conditionMessage(e))
        NULL
      })
      if (is.RPPAPreFitQC(prefitqc)) {
        prefitqcs[[i]] <- prefitqc
        prefitqc.tf[i] <- TRUE
      }
    } else {
      warning(paste("no slide to quality check for", 
                    antibody))
    }
  }
  tryCatch({
    qcprobs <- sapply(prefitqcs, qcprob)
    qcprobs[is.na(qcprobs)] <- 0
    pdf(sprintf("%s/Pre-FitQC.pdf",outdir))
    plotProbabilityOfGoodSlide(qcprobs)
    dev.off()
  }, error = function(e) {
    message("cannot plot slide quality probabilities")
    warning(conditionMessage(e), immediate. = TRUE)
  })
} else {
  prefitqc.tf <- rep(NA, length(prefitqc.tf))
}
'''
Rscript += ''' ######### perform spatial adjustment ######
shouldPerformSpatialAdj <- function(spatialparams, fitparams) {
  stopifnot(is.RPPAFitParams(fitparams))
  measures <- eval(formals(spatialCorrection)$measure)
  is.RPPASpatialParams(spatialparams) && (fitparams@measure %in% 
                                            measures)
}
doSpatialAdj <- shouldPerformSpatialAdj(spatialparams, fitparams)
if (doSpatialAdj) {
  if (spatialparams@plotSurface) {
    pdf(sprintf("%s/SpatialAdj.pdf",outdir))
  }
  for (i in seq_along(slideFilenames)) {
    antibody <- antibodies[i]
    message(paste("spatially adjusting slide", antibody, 
                  "-", "please wait."))
    flush.console()
    rppa <- rppas[[i]]
    if (!is.null(rppa)) {
      rppa <- tryCatch({
        spatialAdjustmentFromParams(rppa, design, spatialparams)
      }, error = function(e) {
        traceback()
        message(conditionMessage(e))
        NULL
      })
      if (is.RPPA(rppa)) {
        ncols.list <- lapply(c(rppa, rppas[[i]]), function(x) {
          ncol(df <- slot(x, "data"))
        })
        if (!do.call("identical", ncols.list)) {
          rppas[[i]] <- rppa
          spatial.tf[i] <- TRUE
        }
        remove(ncols.list)
      }
    } else {
      warning(paste("no slide to adjust for", antibody))
    }
  }
} else {
  spatial.tf <- rep(NA, length(spatial.tf))
}
dev.off()
'''
Rscript +='''######### perform curve-fitting ######
adj.fitparams <- fitparams
if (doSpatialAdj) {
  message("fits will be performed using spatially adjusted measure")
  adjMeasure <- paste("Adj", fitparams@measure, sep = ".")
  adj.fitparams@measure <- adjMeasure
}
fits <- array(list(), length(slideFilenames), list(antibodies))
for (i in seq_along(slideFilenames)) {
  antibody <- antibodies[i]
  message(paste("fitting", antibody, "-", "please wait."))
  flush.console()
  rppa <- rppas[[i]]
  if (!is.null(rppa)) {
    fit <- tryCatch({
      RPPAFitFromParams(rppa, design = design, fitparams = adj.fitparams)
    }, error = function(e) {
      message(conditionMessage(e))
      NULL
    })
    if (is.RPPAFit(fit)) {
      fits[[i]] <- fit
      fits.tf[i] <- TRUE
    }
  } else {
    warning(paste("no slide to fit for", antibody))
  }
}
'''
Rscript +='''######### Create matrix for tracking what succeeded/failed ######
completed <- cbind(input.tf, prefitqc.tf, spatial.tf, fits.tf)
rownames(completed) <- slideFilenames
colnames(completed) <- names(getStages()[1:4])
'''
Rscript +='''######### construct RPPASet object and save it into .RData #############
rppaset <- new("RPPASet", call = call, rppas = rppas, spatialparams = spatialparams, 
    design = design, prefitqcs = prefitqcs, fits = fits, 
    fitparams = fitparams, normparams = normparams, completed = completed, 
    version = packageDescription("SuperCurve", fields = "Version"))
rda.filename <- "sc-rppaset.RData"
save(rppaset, file = file.path(outdir, rda.filename))
'''
#####################################################################################################
#> slotNames(rppaset)
# [1] "call"          "design"        "rppas"         "spatialparams" "prefitqcs"     "fitparams"     "fits"         
# [8] "completed"     "normparams"    "version"
#####################################################################################################

########################################################################
#> slotNames(rppaset@fits$Waldo_new)
# [1] "call"           "rppa"           "design"         "measure"       
# [5] "method"         "trimset"        "model"          "concentrations"
# [9] "lower"          "upper"          "conf.width"     "intensities"   
#[13] "ss.ratio"       "warn"           "version"
########################################################################

######## generate rppasetsum object; meanwhile perform normalization
#rppasetsum <- RPPASetSummary(rppaset,onlynormqcgood=onlynormqcgood)
#> slotNames(rppasetsum)
#[1] "raw"            "ss"             "norm"           "probs"          "completed"      "design"        
#[7] "onlynormqcgood" "version" 
#####################################################################################################

Rscript +='''####### get a summary and output the whole analysis results; normalization has been done during this process via summary(rppaset)
write.summary(rppaset, path = outdir, graphs = TRUE, tiffdir = imgdir, onlynormqcgood = onlynormqcgood)
'''
Rscript +='''####### output heatmaps of raw intensity ################
for (i in seq_along(slideFilenames)) {
  slideFilename <- rppaset@rppas[[i]]@antibody
  pdf(sprintf("%s/Intensity_heatmap_%s.pdf",outdir,slideFilename))
  image(rppaset@rppas[[i]],rppaset@fitparams@measure,colorbar=TRUE)
  dev.off()
}
'''
with open('%s/RPPA_run.R'%outDir,'w') as FILE:
	FILE.write(Rscript)
assert not os.system('/bio/tool/R-3.2.2X11/bin/Rscript %s/RPPA_run.R'%outDir)
assert not os.system('mv -f %s/Rplots.pdf %s/Fit_graph_all_slides.pdf'%(outDir,outDir))
assert not os.system('rm -f %s/*.jpg'%outDir)
print "Congratulations! RPPA analysis is done!"

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).