# Demo entry 6660853

**cwk**

Submitted by **anonymous**
on Nov 16, 2017 at 19:41

Language: S. Code size: 6.7 kB.

########### QUESTION 1 ########### setwd("C:/Users/LCY/Desktop") # set the working directory data <- read.csv("lung.csv",header=T) # load the data into R and save the data in "data" #Q1.a # survival time, patient’s age, calories consumed and weight loss are Numerical variables, so we use the summary function to calculate the lower quartile, median and upper quartile of these four variables summary(data[,c(2,4,9,10)]) # survival time, patient’s age, calories consumed and weight loss are in the 2nd, 4th, 9th and 10th columns of the data separately apply(data[,-c(2,4,9,10)],2,table) # use tables to summarize the distribution of the categorical and ordinal variables #Q1.b data.died <- subset(data,status=="2") # filter out the data of those patients who died mean(data.died$time) # calculate the mean of survival time for those patients who died sd(data.died$time) # calculate the standard deviation of survival time for those patients who died #Q1.c sum(is.na(data$ph.karno)) # judge if there are any data missing in the physician’s estimate sum(is.na(data$pat.karno)) # judge if there are any data missing in the patient’s estimate data1 <- data[!is.na(data$pat.karno),] #delete the rows which have missing data and save the new data in "data1" a <- which(data1$ph.karno>data1$pat.karno) # filter out the data of those patients have Physician’s estimate of Karnofsky score greater than the patient’s estimate b <- which(data1$ph.karno==data1$pat.karno) # filter out the data of those patients have Physician’s estimate of Karnofsky score equal to the patient’s estimate c <- which(data1$ph.karno<data1$pat.karno) # filter out the data of those patients have Physician’s estimate of Karnofsky score less than the patient’s estimate propor.a <- round(length(a)/length(data1$ph.karno)*100, digits = 1) # calculate the proportion of those patients have Physician’s estimate of Karnofsky score greater than the patient’s estimate propor.b <- round(length(b)/length(data1$ph.karno)*100, digits = 1) # calculate the proportion of those patients have Physician’s estimate of Karnofsky score equal to the patient’s estimate propor.c <- round(length(c)/length(data1$ph.karno)*100, digits = 1) # calculate the proportion of those patients have Physician’s estimate of Karnofsky score less than the patient’s estimate sum(propor.a,propor.b,propor.c) # judge if the sum of three proportion are equal to 100% #Q1.d newage <- cut(data$age,3,labels=F) # categorise age into three categories and save the data in "newage" datanew <- cbind(data,newage) # combind "data" and "newage" and save the new data in "datanew" wt.loss1 <- subset(datanew,newage==1,select=wt.loss) # filter out the data of weight loss in the category 1 of age wt.loss2 <- subset(datanew,newage==2,select=wt.loss) # filter out the data of weight loss in the category 2 of age wt.loss3 <- subset(datanew,newage==3,select=wt.loss) # filter out the data of weight loss in the category 3 of age mean(wt.loss1,na.rm=TRUE) apply(wt.loss3,2,mean,na.rm=TRUE) mean(data$wt.loss,na.rm=TRUE) attach(datanew) x <- wt.loss[!is.na(wt.loss)] length(x) ?aggregate plot(time,meal.cal) ########### QUESTION 2 ########### install.packages("timeDate") library(timeDate) birthday <- as.Date("05/03/1956",format="%d/%m/%Y") weekdays(birthday) which.day <- function(y){ which.day <- weekdays(ISOdate(y,03,05)) if(y<1956) stop("The Vice-Chancellor hasn't been born") return(which.day) } which.day(1900) year <- 2018:2117 for(i in 1:100){ day <- weekdays(ISOdate(year[i],03,05)) x[i] <- print(day) } year1 <- as.matrix(2018:2117) x <- apply(year1,1,which.day) y <- table(x) barplot(y,ylim=0:15) start<-as.Date("2001-01-01") end<-as.Date("2100-12-31") x<- seq(from=start,to=end,by=1) wkdays<-weekdays(x) day<-format(x,format="%d") m<-table(wkdays,day) ########### QUESTION 3 ########### #Q3.1 # plot the function g(x)=(sin(x))^2 x <- seq(0,2*pi,pi/1000) y <- (sin(x))^2 plot(x,y,type="l",xlab="x",ylab="g(x)",main="g(x)=(sin(x))^2") #Q3.2 set.seed(55) # set a seed in R, so that random objects created can be reproduced mcint <- function(m) { # approximate the area under the curve by using Monte Carlo integration b <- 2*pi a <- 0 part1 <- (b-a)/m ui <- runif(m,0,2*pi) # generate m random samples from a uniform distribution on the interval [0,2π] part2 <- sum((sin(ui))^2) area <- part2*part1 return(area) } mcint(100) # generate 100 random samples #Q3.3 ?integrate # the help file on the R function integrate f <- function(x){(sin(x))^2} t <- integrate(f,0,2*pi)$value # calculate the area under the curve between 0 and 2π and only show the value t #Q3.4 #Q3.5 m <- as.matrix(1000:10000) # generate a matrix of number from 1000 to 10000 my.answer <- apply(m,1,mcint) # approximate the area under the curve of every value of m from 1000 to 10000 ratio <- my.answer/pi # work out the ratio of my answer to the exact answer "π" of every value of m plot(ratio,type="l",col="black") # plot the line of ratio against m abline(h=1,col="yellow",lty=10,lwd=3) # add a line of ratio=1 #Q3.6 f <- function(x){ # function f(x) = g(x)/π (sin(x))^2/pi } h <- function(x){ # function h(x)=1/2π for 0 < x < 2π h <- 1/(2*pi)*(x>0 & x<2*pi) return(h) } rejsamp <- function(m,d){ # function rejsamp is to generate pseudo-random numbers from an arbitrary distribution f(x) by using rejection sampling method x <- rep(NA,m) # prepare a place to save the value of x by repeating NA for m times for(i in 1:m){ repeat{ # repeat the function until the required number of ‘accepted’ x draws have been collected u=runif(1,0,1) # generate a random sample from a uniform distribution on the interval [0,1] z=runif(1,0,2*pi) # generate a random sample from a uniform distribution on the interval [0,2π] if (u<f(z)/((d*pi)*h(z))){ # if u ≤ f(z)/dh(z), then accept z as a draw from the density; otherwise, reject z x[i]=z # save the value of z in x[i] break} } } return(x) # return the value of x } x <- seq(0,2*pi,pi/1000) f1 <- (sin(x))^2/pi h1 <- 1/(2*pi) plot(x,f1,type="l") abline(h=2*h1,col="red",lwd=3) # as what we can see in the plot, curve of the function 2*h1 can cover the curve of the function f1, so 2 is an appropriate value of d random <- rejsamp(10000,2) # generate 10000 random draws hist(random,breaks=100,freq=FALSE,main="Histogram of x") # produce a histogram of the random draws lines(density(random),col="red",lwd=3) # add the line of the density curve of the random draws

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.