0% found this document useful (0 votes)
24 views3 pages

EXXAM

The document outlines various data analysis techniques implemented in R, including the Apriori algorithm for association rule mining, K-means clustering, hierarchical clustering, support vector machines for classification, decision trees, linear regression, and data visualization methods. Each program is accompanied by relevant libraries, data preparation steps, and visual outputs to illustrate the results. The examples utilize datasets such as Groceries, iris, mtcars, readingSkills, and airquality.

Uploaded by

20BSI59 SUGANYA
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
24 views3 pages

EXXAM

The document outlines various data analysis techniques implemented in R, including the Apriori algorithm for association rule mining, K-means clustering, hierarchical clustering, support vector machines for classification, decision trees, linear regression, and data visualization methods. Each program is accompanied by relevant libraries, data preparation steps, and visual outputs to illustrate the results. The examples utilize datasets such as Groceries, iris, mtcars, readingSkills, and airquality.

Uploaded by

20BSI59 SUGANYA
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd

PROGRAM1 APRIORI ALGORITHM(PACK)

library(arules)
library(arulesViz)
library(RColorBrewer)
data("Groceries")
rules <- apriori(Groceries,
parameter = list(supp = 0.01, conf = 0.2))
inspect(rules[1:10])
arules::itemFrequencyPlot(Groceries,
topN = 20,
col = brewer.pal(8, 'Pastel2'),
main = "Relative Item Frequency Plot",
type = "relative",
ylab = "Item Frequency (Relative)")

PROGRAM 2 K-MEANS CLUSTERING TECHNIQUES

library(cluster)
library(clusterR)
iris_1 <- iris[-5]
set.seed(240)

kmeans.re <- kmeans(iris_1, centers = 3, nstart = 20)


print(kmeans.re)

kmeans.reScluster <- kmeans.re$cluster

cm <- table(iris$Species, kmeans.reScluster)


print(cm)

plot(iris_1[c("Sepal.Length", "Sepal.Width")])
plot(iris_1[c("Sepal.Length", "Sepal.Width")],
col = kmeans.reScluster,
pch = 19)

plot(iris_1[c("Sepal.Length", "Sepal.Width")],
col = kmeans.reScluster,
pch = 19,
main = "K-Means with 3 Clusters")

PROGRAM 3 HIERARCHICAL CLUSTERING

library(dplyr)
head(mtcars)
distance_mat <- dist(mtcars, method = "euclidean")
print(distance_mat)
set.seed(240)
Hierar_cl <- hclust(distance_mat, method = "average")
print(Hierar_cl)
plot(Hierar_cl, main = "Hierarchical Clustering Dendrogram")
abline(h = 110, col = "green")
fit <- cutree(Hierar_cl, k = 3)

PROGRAM 4 -CLASSIFICATION ALGORITHM

set.seed(10111)
x <- matrix(rnorm(40), 20, 2)
y <- rep(c(-1, 1), c(10, 10))
x[y == 1, ] <- x[y == 1, ] + 1
plot(x, col = y + 3, pch = 19, main = "SVM Classification Data")
library(e1071)
dat <- data.frame(x, y = as.factor(y))
svmfit <- svm(y ~ ., data = dat, kernel = "linear", cost = 10, scale = FALSE)
print(svmfit)
plot(svmfit, dat)

PROGRAM 5 DECISION TREE

library(datasets)
library(caTools)
library(party)
library(dplyr)
library(magrittr)
data("readingSkills")
head(readingSkills)
set.seed(123)
sample_data <- sample.split(readingSkills$nativeSpeaker, SplitRatio = 0.8)
train_data <- subset(readingSkills, sample_data == TRUE)
test_data <- subset(readingSkills, sample_data == FALSE)
model <- ctree(nativeSpeaker ~ ., data = train_data)
plot(model, main = "Decision Tree")

PROGRAM 6 LINEAR REGRESSION

library(ggplot2)
library(dplyr)
library(broom)
library(ggpubr)
data <- data.frame(x = c(1, 2, 2, 3, 5, 6, 8, 9),
y = c(7, 8, 8, 6, 9, 8, 12, 14))

model <- lm(y ~ x, data = data)


summary(model)
new_data <- data.frame(x = c(4, 5, 7, 8, 9))

predictions <- predict(model, newdata = new_data)


print(predictions)

ggplot(data, aes(x = x, y = y)) +


geom_point(color = "blue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
ggtitle("Linear Regression Model") +
xlab("X Values") +
ylab("Y Values")

data("trees")

ggplot(trees, aes(x = Girth, y = Volume)) +


geom_point(color = "darkgreen") +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
ggtitle("Linear Regression: Tree Girth vs. Volume") +
xlab("Tree Girth") +
ylab("Tree Volume")

PROGRAM 7 DATA VISUALIZATION

data("airquality")
barplot(airquality$Ozone,
main = "Ozone Concentration in Air",
xlab = "Ozone Levels",
horiz = TRUE)

barplot(airquality$Ozone,
main = "Ozone Concentration in Air",
xlab = "Ozone Levels",
horiz = FALSE)

boxplot(airquality[, 1:4],
main = "Box Plots for Air Quality Parameters",
col = rainbow(4))

set.seed(123)
data <- matrix(rnorm(25, mean = 0, sd = 5), nrow = 5, ncol = 5)

colnames(data) <- paste("col", 1:5, sep = "")


rownames(data) <- paste0("row", 1:5)

heatmap(data, main = "Heatmap of Random Data")

You might also like