1.
Frank has 3 bags of gold, containing 10 ounces each.
Sally adds five ounces to one of his bags, 3 ounces to another, and 12 to the last.
Use Vectors to show how much gold Frank has in each bag.
Upload R code to Assignment 1 using R Markdown in a Word (docx) format
Frank <-c(10,10,10)
Sally <-c(5,3,12)
Total <-Frank+Sally
Total
2.
#Print out one cell from the dataframe.
Sale[2,1]
View(Sale)
#Split the dataframe into two pieces
split(Sale, Sale$Cost)
split(Sale, Sale$Profit)
#Add up all the values in one column of the dataframe
sum(Sale$Cost)
3.
#Create a dataframe with 5 columns.
C1<-c(1,2,3,4,5,6,7,8,9)
C2<-c(2,3,4,5,6,7,8,9,10)
C3<-c(3,4,5,6,7,8,9,10,11)
C4<-c(4,5,6,7,8,9,10,11,12)
C5<-c(5,6,7,8,9,10,11,12,13)
dataframe<-[Link](C1,C2,C3,C4,C5)
View(dataframe)
#Boxplot
boxplot(dataframe)
#Scatterplot
plot(dataframe)
#Histogram
hist(dataframe)
#Calculate the standard deviation of the data in one column.
standard_deviation_C1<-sd(C1)
standard_deviation_C1
#Replace one of the datapoints with an outlier
dataframe[5,2]<-50
#Generate a new boxplot showing the outlier.
boxplot(dataframe)
4.
#Describing data
some_numbers<-c(13,15,16,20,30,4,5,6,7,8,90)
some_numbers<-some_numbers + some_numbers
some_numbers
#mean, median, range qiantile
mean_some_numbers<-mean(some_numbers)
mean_some_numbers
median_some_numbers<-median(some_numbers)
median_some_numbers
range_some_numbers<-range(some_numbers)
range_some_numbers
quantile_some_numbers<-quantile(some_numbers)
quantile_some_numbers
#standard deviation
standard_deviation_some_numbers<-sd(some_numbers)
standard_deviation_some_numbers
different_numbers<- c(1,3,4,5,6,7,7,7,3,8,10)
summary(some_numbers)
summary(different_numbers)
View(some_numbers)
#visualizing
plot(some_numbers)
some_numbers
plot(some_numbers, type = "h", col="pink", main="statistics", xlab="Value",ylab="number")
hist(some_numbers)
barplot(some_numbers)
boxplot(some_numbers)
some_dataframe<-[Link](some_numbers,different_numbers)
some_dataframe
plot(some_dataframe, type = "h", col="pink", main="statistics", xlab="Value",ylab="number")
some_dataframe[10,2]<-50
some_dataframe
5.
mydata<-Stocks
str(mydata)
head(mydata)
View(mydata)
names(mydata) [1:12]<-c("day", "Stock1","Stock2","Stock3","Stock4", "Stock5",
"Stock6","Stock7", "Stock8", "Stock9","Stock10", "Rating")
names(mydata)[1:12]
str(mydata)
mydata$Stock1<-[Link]([Link](mydata$Stock1))
mydata$Stock2<-[Link]([Link](mydata$Stock2))
mydata$Stock3<-[Link]([Link](mydata$Stock3))
mydata$Stock4<-[Link]([Link](mydata$Stock4))
mydata$Stock5<-[Link]([Link](mydata$Stock5))
mydata$Stock6<-[Link]([Link](mydata$Stock6))
mydata$Stock7<-[Link]([Link](mydata$Stock7))
mydata$Stock8<-[Link]([Link](mydata$Stock8))
mydata$Stock9<-[Link]([Link](mydata$Stock9))
mydata$Stock10<-[Link]([Link](mydata$Stock10))
mydata$Rating<-[Link]([Link](mydata$Rating))
str(mydata)
#check for null"NA"values
table([Link](mydata))
[Link](mydata)
#remove NA's by overwriting with the mean of that column
mydata$Stock1[[Link](mydata$Stock1)]=mean(mydata$Stock1,[Link] = TRUE)
6.
#Use three columns of data
weather<-c("rainy","snow","sunny")
time<-c("urgent","adequate","adequate")
health<-c("bad","good","good")
lawn<-c("no","yes","yes")
dataset<-[Link](weather,time,health,lawn)
View(dataset)
str(dataset)
dataset$weather<-[Link]([Link](dataset$weather))
dataset$time<-[Link]([Link](dataset$time))
dataset$health<-[Link]([Link](dataset$health))
dataset$lawn<-[Link]([Link](dataset$lawn))
str(dataset)
#Predicted
[Link](999)
train=dataset[trainIndex, ]
test=dataset[-trainIndex, ]
print(table(dataset$lawn))
print(train(dataset$lawn))
NBclassfier=naiveBayes(lawn~weather+time+health, dataset=train)
print(NBclassfier)
[Link] bayer
str(CreditRating)
CreditRating$PurchaseFrequency<-[Link]([Link](CreditRating$PurchaseFrequency))
CreditRating$CreditRating<-[Link]([Link](CreditRating$CreditRating))
CreditRating$Age<-[Link]([Link](CreditRating$Age))
CreditRating$Approval<-[Link]([Link](CreditRating$Approval))
str(CreditRating)
#Get package to divide data into training & test data
library(caret)
[Link](7267166) #random number generator
trainIndex=createDataPartition(CreditRating$Approval, p=0.7)$Resample1
train=CreditRating[trainIndex, ]
test=CreditRating[-trainIndex, ]
# check the balance y predicted
print(table(CreditRating$Approval))
print(table(train$Approval))
NBclassfier=naiveBayes(Approval~CreditRating+Age+Approval, data=train)
print(NBclassfier)
8.
#load dataset: iris
data("iris")
#view structure of dataset
str(iris)
#view summary of dataset
summary(iris)
#view top row
head(iris)
#creat new dataset
[Link]<- iris[,c(1,2,3,4)]
[Link]<- iris[,"Species"]
#view top row
head([Link])
head([Link])
normalize <- function(x){return ((x-min(x))/(max(x)-min(x)))}
[Link]$[Link]<- normalize([Link]$[Link])
[Link]$[Link]<- normalize([Link]$[Link])
[Link]$[Link]<- normalize([Link]$[Link])
[Link]$[Link]<- normalize([Link]$[Link])
#view top row
head([Link])
#apply k-means clustering algorithm
result<- kmeans([Link],3)
#give number of each cluster
result$size
# gives value of cluster center datapoint value
result$centers
#gives cluster vector
result$cluster
#Verify results of clustering
par(mfrow=c(2,2), mar=c(5,4,2,2))
# Plot to see sepal length and sepal width been distributed in clusters
plot([Link][c(1,2)], col=result$cluster)
# Plot to see Sepal. length and Sepal. width been distributed originally as per "class"
attribute in dataset
plot([Link][c(1,2)], col=[Link])
# Plot to see how [Link] and [Link] data points have been distributed in clusters
plot([Link][c(3,4)], col=result$cluster)
plot([Link][c(3,4)], col=[Link])
#Result of table
table(result$cluster,[Link])
9.
#explore data
View(grades)
str(grades)
table(grades$Level)
#clean data
grades$Level<-[Link]([Link](grades$Level))
str(grades)
[Link](2134)
ind<-sample(2,nrow(grades),replace = TRUE, prob = c(0.7,0.3))
train_set<-grades[ind==1,]
test_set<-grades[ind==2,]
nrow(train_set)
nrow(test_set)
#building desision tree
mytree<-tree(Level~Quiz1+Quiz2+Quiz3+Quiz4+Quiz5, data = train_set)
#summarize the model
summary(mytree)
#plot the tree
plot(mytree)
text(mytree,pretty=0, cex=0.6)
10.
#Import the [Link] file into Rstudio.
Summarize the data.
Remove the last column and create a boxplot from the remaining columns.
Create a scatterplot of column 1 and column 3.
Calculate the correlation between column 1 and 3.
data1<-[Link]("[Link]")
summary(data1)
data2<-data1[,-6]
boxplot(data2)
pairs(data2[, 1:3])
cor(data2[, 1:3])