PROGRAM1 APRIORI ALGORITHM(PACK)
library(arules)
library(arulesViz)
library(RColorBrewer)
data("Groceries")
rules <- apriori(Groceries,
parameter = list(supp = 0.01, conf = 0.2))
inspect(rules[1:10])
arules::itemFrequencyPlot(Groceries,
topN = 20,
col = brewer.pal(8, 'Pastel2'),
main = "Relative Item Frequency Plot",
type = "relative",
ylab = "Item Frequency (Relative)")
PROGRAM 2 K-MEANS CLUSTERING TECHNIQUES
library(cluster)
library(clusterR)
iris_1 <- iris[-5]
set.seed(240)
kmeans.re <- kmeans(iris_1, centers = 3, nstart = 20)
print(kmeans.re)
kmeans.reScluster <- kmeans.re$cluster
cm <- table(iris$Species, kmeans.reScluster)
print(cm)
plot(iris_1[c("Sepal.Length", "Sepal.Width")])
plot(iris_1[c("Sepal.Length", "Sepal.Width")],
col = kmeans.reScluster,
pch = 19)
plot(iris_1[c("Sepal.Length", "Sepal.Width")],
col = kmeans.reScluster,
pch = 19,
main = "K-Means with 3 Clusters")
PROGRAM 3 HIERARCHICAL CLUSTERING
library(dplyr)
head(mtcars)
distance_mat <- dist(mtcars, method = "euclidean")
print(distance_mat)
set.seed(240)
Hierar_cl <- hclust(distance_mat, method = "average")
print(Hierar_cl)
plot(Hierar_cl, main = "Hierarchical Clustering Dendrogram")
abline(h = 110, col = "green")
fit <- cutree(Hierar_cl, k = 3)
PROGRAM 4 -CLASSIFICATION ALGORITHM
set.seed(10111)
x <- matrix(rnorm(40), 20, 2)
y <- rep(c(-1, 1), c(10, 10))
x[y == 1, ] <- x[y == 1, ] + 1
plot(x, col = y + 3, pch = 19, main = "SVM Classification Data")
library(e1071)
dat <- data.frame(x, y = as.factor(y))
svmfit <- svm(y ~ ., data = dat, kernel = "linear", cost = 10, scale = FALSE)
print(svmfit)
plot(svmfit, dat)
PROGRAM 5 DECISION TREE
library(datasets)
library(caTools)
library(party)
library(dplyr)
library(magrittr)
data("readingSkills")
head(readingSkills)
set.seed(123)
sample_data <- sample.split(readingSkills$nativeSpeaker, SplitRatio = 0.8)
train_data <- subset(readingSkills, sample_data == TRUE)
test_data <- subset(readingSkills, sample_data == FALSE)
model <- ctree(nativeSpeaker ~ ., data = train_data)
plot(model, main = "Decision Tree")
PROGRAM 6 LINEAR REGRESSION
library(ggplot2)
library(dplyr)
library(broom)
library(ggpubr)
data <- data.frame(x = c(1, 2, 2, 3, 5, 6, 8, 9),
y = c(7, 8, 8, 6, 9, 8, 12, 14))
model <- lm(y ~ x, data = data)
summary(model)
new_data <- data.frame(x = c(4, 5, 7, 8, 9))
predictions <- predict(model, newdata = new_data)
print(predictions)
ggplot(data, aes(x = x, y = y)) +
geom_point(color = "blue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
ggtitle("Linear Regression Model") +
xlab("X Values") +
ylab("Y Values")
data("trees")
ggplot(trees, aes(x = Girth, y = Volume)) +
geom_point(color = "darkgreen") +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
ggtitle("Linear Regression: Tree Girth vs. Volume") +
xlab("Tree Girth") +
ylab("Tree Volume")
PROGRAM 7 DATA VISUALIZATION
data("airquality")
barplot(airquality$Ozone,
main = "Ozone Concentration in Air",
xlab = "Ozone Levels",
horiz = TRUE)
barplot(airquality$Ozone,
main = "Ozone Concentration in Air",
xlab = "Ozone Levels",
horiz = FALSE)
boxplot(airquality[, 1:4],
main = "Box Plots for Air Quality Parameters",
col = rainbow(4))
set.seed(123)
data <- matrix(rnorm(25, mean = 0, sd = 5), nrow = 5, ncol = 5)
colnames(data) <- paste("col", 1:5, sep = "")
rownames(data) <- paste0("row", 1:5)
heatmap(data, main = "Heatmap of Random Data")