# Demo entry 6660853

cwk

Submitted by anonymous on Nov 16, 2017 at 19:41
Language: S. Code size: 6.7 kB.

```###########   QUESTION 1   ###########

setwd("C:/Users/LCY/Desktop") # set the working directory
data <- read.csv("lung.csv",header=T) # load the data into R and save the data in "data"

#Q1.a

# survival time, patient’s age, calories consumed and weight loss are Numerical variables, so we use the summary function to calculate the lower quartile, median and upper quartile of these four variables
summary(data[,c(2,4,9,10)]) # survival time, patient’s age, calories consumed and weight loss are in the 2nd, 4th, 9th and 10th columns of the data separately
apply(data[,-c(2,4,9,10)],2,table) # use tables to summarize the distribution of the categorical and ordinal variables

#Q1.b

data.died <- subset(data,status=="2") # filter out the data of those patients who died
mean(data.died\$time) # calculate the mean of survival time for those patients who died
sd(data.died\$time) # calculate the standard deviation of survival time for those patients who died

#Q1.c

sum(is.na(data\$ph.karno)) # judge if there are any data missing in the physician’s estimate
sum(is.na(data\$pat.karno)) # judge if there are any data missing in the patient’s estimate
data1 <- data[!is.na(data\$pat.karno),] #delete the rows which have missing data and save the new data in "data1"
a <- which(data1\$ph.karno>data1\$pat.karno) # filter out the data of those patients have Physician’s estimate of Karnofsky score greater than the patient’s estimate
b <- which(data1\$ph.karno==data1\$pat.karno) # filter out the data of those patients have Physician’s estimate of Karnofsky score equal to the patient’s estimate
c <- which(data1\$ph.karno<data1\$pat.karno) # filter out the data of those patients have Physician’s estimate of Karnofsky score less than the patient’s estimate
propor.a <- round(length(a)/length(data1\$ph.karno)*100, digits = 1) # calculate the proportion of those patients have Physician’s estimate of Karnofsky score greater than the patient’s estimate
propor.b <- round(length(b)/length(data1\$ph.karno)*100, digits = 1) # calculate the proportion of those patients have Physician’s estimate of Karnofsky score equal to the patient’s estimate
propor.c <- round(length(c)/length(data1\$ph.karno)*100, digits = 1) # calculate the proportion of those patients have Physician’s estimate of Karnofsky score less than the patient’s estimate
sum(propor.a,propor.b,propor.c) # judge if the sum of three proportion are equal to 100%

#Q1.d

newage <- cut(data\$age,3,labels=F) # categorise age into three categories and save the data in "newage"
datanew <- cbind(data,newage) # combind "data" and "newage" and save the new data in "datanew"
wt.loss1 <- subset(datanew,newage==1,select=wt.loss) # filter out the data of weight loss in the category 1 of age
wt.loss2 <- subset(datanew,newage==2,select=wt.loss) # filter out the data of weight loss in the category 2 of age
wt.loss3 <- subset(datanew,newage==3,select=wt.loss) # filter out the data of weight loss in the category 3 of age
mean(wt.loss1,na.rm=TRUE)
apply(wt.loss3,2,mean,na.rm=TRUE)
mean(data\$wt.loss,na.rm=TRUE)
attach(datanew)
x <- wt.loss[!is.na(wt.loss)]
length(x)
?aggregate
plot(time,meal.cal)

###########   QUESTION 2   ###########

install.packages("timeDate")
library(timeDate)
birthday <- as.Date("05/03/1956",format="%d/%m/%Y")
weekdays(birthday)
which.day <- function(y){
which.day <- weekdays(ISOdate(y,03,05))
if(y<1956) stop("The Vice-Chancellor hasn't been born")
return(which.day)
}
which.day(1900)

year <- 2018:2117
for(i in 1:100){
day <- weekdays(ISOdate(year[i],03,05))
x[i] <- print(day)
}
year1 <- as.matrix(2018:2117)
x <- apply(year1,1,which.day)

y <- table(x)
barplot(y,ylim=0:15)

start<-as.Date("2001-01-01")
end<-as.Date("2100-12-31")
x<- seq(from=start,to=end,by=1)
wkdays<-weekdays(x)
day<-format(x,format="%d")
m<-table(wkdays,day)

###########   QUESTION 3   ###########

#Q3.1

# plot the function g(x)=(sin(x))^2
x <- seq(0,2*pi,pi/1000)
y <- (sin(x))^2
plot(x,y,type="l",xlab="x",ylab="g(x)",main="g(x)=(sin(x))^2")

#Q3.2

set.seed(55) # set a seed in R, so that random objects created can be reproduced
mcint <- function(m)
{
# approximate the area under the curve by using Monte Carlo integration
b <- 2*pi
a <- 0
part1 <- (b-a)/m
ui <- runif(m,0,2*pi) # generate m random samples from a uniform distribution on the interval [0,2π]
part2 <- sum((sin(ui))^2)
area <- part2*part1
return(area)
}
mcint(100) # generate 100 random samples

#Q3.3

?integrate # the help file on the R function integrate
f <- function(x){(sin(x))^2}
t <- integrate(f,0,2*pi)\$value # calculate the area under the curve between 0 and 2π and only show the value
t

#Q3.4

#Q3.5

m <- as.matrix(1000:10000) # generate a matrix of number from 1000 to 10000
my.answer <- apply(m,1,mcint) # approximate the area under the curve of every value of m from 1000 to 10000
ratio <- my.answer/pi # work out the ratio of my answer to the exact answer "π" of every value of m
plot(ratio,type="l",col="black") # plot the line of ratio against m
abline(h=1,col="yellow",lty=10,lwd=3) # add a line of ratio=1

#Q3.6

f <- function(x){
# function f(x) = g(x)/π
(sin(x))^2/pi
}

h <- function(x){
# function h(x)=1/2π for 0 < x < 2π
h <- 1/(2*pi)*(x>0 & x<2*pi)
return(h)
}

rejsamp <- function(m,d){
# function rejsamp is to generate pseudo-random numbers from an arbitrary distribution f(x) by using rejection sampling method
x <- rep(NA,m) # prepare a place to save the value of x by repeating NA for m times
for(i in 1:m){
repeat{ # repeat the function until the required number of ‘accepted’ x draws have been collected
u=runif(1,0,1) # generate a random sample from a uniform distribution on the interval [0,1]
z=runif(1,0,2*pi) # generate a random sample from a uniform distribution on the interval [0,2π]
if (u<f(z)/((d*pi)*h(z))){ # if u ≤ f(z)/dh(z), then accept z as a draw from the density; otherwise, reject z
x[i]=z # save the value of z in x[i]
break}
}
}
return(x) # return the value of x
}

x <- seq(0,2*pi,pi/1000)
f1 <- (sin(x))^2/pi
h1 <- 1/(2*pi)
plot(x,f1,type="l")
abline(h=2*h1,col="red",lwd=3) # as what we can see in the plot, curve of the function 2*h1 can cover the curve of the function f1, so 2 is an appropriate value of d

random <- rejsamp(10000,2) # generate 10000 random draws
hist(random,breaks=100,freq=FALSE,main="Histogram of x") # produce a histogram of the random draws
lines(density(random),col="red",lwd=3) # add the line of the density curve of the random draws
```

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.