EX NO : 1
TO PERFORM VECTOR OPERATIONS
DATE:
1
CODE
#To set the working directory
setwd("D:/ ")
#To remove an environment
rm(list=ls())
#creating vector using ':' operator
A = 1:4;
B = 5:8;
writeLines("The Vector A is:")
print(A)
writeLines("The Vector B is:")
print(B)
#creating vector using seq function
C=seq(from=1, to=10, by=2);
writeLines("The Vector C using seq is:")
print(C)
#Access Elements of a Vector
writeLines("Accessing Elements of a Vector")
writeLines("A[3] is:")
A[3]
writeLines("A[1:3] is:")
A[1:3]
2
#Performing Vector Arithmetic
writeLines("Vector Arithmetic")
c = A + B;
writeLines("A+B:")
print(c)
c = A - B;
writeLines("A-B:")
print(c)
c =A * B;
writeLines("A*B:")
print(c)
c = A / B;
writeLines("A/B:")
print(c)
c = A + (B)^2;
writeLines("A+B^2:")
print(c)
c = 2+A; print(c)
c =2+3*B; print(c)
c =(2+3)*B; print(c)
#Vector Repetition
e=rep(5,4) ;
writeLines("Vector Repetition of (5,4) is :")
print(e)
3
#Replace/Delete a Vector
# Replace single element
e[1]=10
#Delete single element
e=e[-3]
print(e)
writeLines("Vector e after Replace/Delete Operation:")
print(e)
#Delete Entire Vector
e= NULL
print(e)
4
OUTPUT
The Vector A is:
[1] 1 2 3 4
The Vector B is:
[1] 5 6 7 8
The Vector C using seq is:
[1] 1 3 5 7 9
Accessing Elements of a Vector
A[3] is:
[1] 3
A[1:3] is:
[1] 1 2 3
Vector Arithmetic
A+B:
[1] 6 8 10 12
A-B:
[1] -4 -4 -4 -4
A*B:
[1] 5 12 21 32
A/B:
[1] 0.2000000 0.3333333 0.4285714 0.5000000
A+B^2:
[1] 26 38 52 68
Vector Repetition of (5,4) is :
[1] 5 5 5 5
Vector e after Replace/Delete Operation:
[1] 10 5 5
5
6
EX NO : 2
TO PERFORM MATRIX OPERATIONS
DATE:
7
CODE
#To set the working directory
setwd("D:/ ")
#To remove an environment
rm(list=ls())
#A matrix is a two-dimensional array
#Creating a Matrix
A=matrix(1:9, nrow = 3);
writeLines("Matrix A is:")
print(A)
B=matrix(1:9, nrow=3, byrow=TRUE);
writeLines("Matrix B is:")
print(B)
#Access Elements of a matrix
writeLines("Accessing Elements of a Matrix :")
A[2, 3]
A[2, ]
A[ ,3]
#Combining Matrices
a = matrix(1:9, 3,3);
b = matrix(10:18, 3,3);
writeLines("Combining Matrices")
printc(a)
printc(b)
writeLines("Column-Wise:")
8
cbind(a,b)
writeLines("Row-Wise:")
rbind(a,b)
#Matrix Arithmetic
writeLines("Matrix Arithmetic:")
c = a+b;
writeLines("A+B:")
printc(c)
c = a-b; writeLines("A-B:")
printc(c)
c = a*b; writeLines("A*B:")
printc(c)
c = a/b; writeLines("A/B:")
printc(c)
#Modify Matrix Elements
a[3,3] = 0;
a[a > 5] = 0;
writeLines("Matrix A after Modification:")
printc(a)
9
OUTPUT
Matrix A is:
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
Matrix B is:
[,1] [,2] [,3]
[1,] 1 2 3
[2,] 4 5 6
[3,] 7 8 9
Accessing Elements of a Matrix :
[1] 8
[1] 2 5 8
[1] 7 8 9
Combining Matrices
>a
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
>b
[,1] [,2] [,3]
[1,] 10 13 16
[2,] 11 14 17
[3,] 12 15 18
Column-Wise:
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 4 7 10 13 16
[2,] 2 5 8 11 14 17
[3,] 3 6 9 12 15 18
Row-Wise:
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
[4,] 10 13 16
[5,] 11 14 17
[6,] 12 15 18
10
Matrix Arithmetic:
A+B:
[,1] [,2] [,3]
[1,] 11 17 23
[2,] 13 19 25
[3,] 15 21 27
A-B:
[,1] [,2] [,3]
[1,] -9 -9 -9
[2,] -9 -9 -9
[3,] -9 -9 -9
A*B:
[,1] [,2] [,3]
[1,] 10 52 112
[2,] 22 70 136
[3,] 36 90 162
A/B:
[,1] [,2] [,3]
[1,] 0.1000000 0.3076923 0.4375000
[2,] 0.1818182 0.3571429 0.4705882
[3,] 0.2500000 0.4000000 0.5000000
Matrix A after Modification:
[,1] [,2] [,3]
[1,] 1 4 0
[2,] 2 5 0
[3,] 3 0 0
11
12
EX NO : 3
TO PERFORM LIST OPERATIONS
DATE:
13
CODE
#To set the working directory
setwd("D:/ ")
#To remove an environment
rm(list=ls())
# A list contain elements of different types like − numbers, strings, vectors
#List contents can be accessed either by index or by name.
mylist= list( c(1, 1, 2, 5, 14, 42), [Link], matrix(c(3, -8, 1, -3), nrow = 2))
writeLines("Creating myList:")
mylist
#Naming list elements
names(mylist) = c("numbers", "months", "matrix")
writeLines("Naming list elements")
mylist
#A list’s length is the number of top-level elements that it contains
writeLines("The Length of the List is:")
length(mylist)
writeLines("Arithmetic operations on list L1 and L2")
#Arithmetic operations on list
L1 = list(1:5);
writeLines("L1:")
print(L1)
L2 = list(6:10);
writeLines("L2:")
14
print(L2)
writeLines("List Addition:")
L1[[1]] + L2[[1]]
writeLines("List Subtraction:")
L1[[1]] - L2[[1]]
writeLines("List Multiplication:")
L1[[1]] * L2[[1]]
writeLines("List Division:")
L1[[1]] / L2[[1]]
writeLines("Indexing List")
#Indexing Lists
l = list( first = 1, second = 2, third = list(alpha = 3.1, beta = 3.2))
writeLines("L[2:3] is:")
l[2:3]
l[c(FALSE, FALSE, TRUE)]
#Converting Between Vectors / Lists
writeLines("Converting Between Vectors / Lists")
b = c(1, 6, 21, 107);
[Link](b)
print(b)
#Combining Lists
a = c(list (1:3, pi), list("periyar", "university"), list(matrix(1:9, 3,3)))
writeLines("Combining Lists")
writeLines("List A:")
print(a)
15
#Insert/ Remove elements in list
writeLines("Insert/ Remove elements in list:")
writeLines("List A:")
a = list(1, 3, 5)
print(a)
a[[4]] = 7
a[-3]
a[[2]] = NULL
16
OUTPUT
Creating myList:
[[1]]
[1] 1 1 2 5 14 42
[[2]]
[1] "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"
[[3]]
[,1] [,2]
[1,] 3 1
[2,] -8 -3
Naming list elements
$numbers
[1] 1 1 2 5 14 42
$months
[1] "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"
$matrix
[,1] [,2]
[1,] 3 1
[2,] -8 -3
The Length of the List is:
[1] 3
Arithmetic operations on list L1 and L2
L1:
[[1]]
[1] 1 2 3 4 5
L2[[1]]
[1] 6 7 8 9 10
List Addition:
[1] 7 9 11 13 15
List Subtraction:
[1] -5 -5 -5 -5 -5
List Multiplication:
[1] 6 14 24 36 50
List Division:
[1] 0.1666667 0.2857143 0.3750000 0.4444444 0.5000000
17
Indexing List
L[2:3] is:
$second
[1] 2
$third
$third$alpha
[1] 3.1
$third$beta
[1] 3.2
Converting Between Vectors / Lists
[[1]]
[1] 1
[[2]]
[1] 6
[[3]]
[1] 21
[[4]]
[1] 107
Combining Lists
List A:
[[1]]
[1] 1 2 3
[[2]]
[1] 3.141593
[[3]]
[1] "periyar"
[[4]]
[1] "university"
[[5]]
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
18
Insert/ Remove elements in list:
List A:
[[1]]
[1] 1
[[2]]
[1] 3
[[3]]
[1] 5
List A:
[[1]]
[1] 1
[[2]]
[1] 3
[[3]]
[1] 7
19
20
EX NO : 4
TO PERFORM DATA-FRAME OPERATIONS
DATE:
21
CODE
#Data frame is a two dimensional data structure in R
#hold different type of data
#A data frame is created with the [Link]() function
#mydata <- [Link](col1, col2,.,colN)
#where col1, col2, col3, . are column vectors of any type (such as character, numeric, or logical)
#Creating a Data Frame
patientID <- c(1, 2, 3, 4)
age <- c(25, 34, 28, 52)
diabetes <- c("Type1", "Type2", "Type1", "Type1")
status <- c("Poor", "Improved", "Excellent", "Poor")
patientdata <- [Link](patientID, age, diabetes, status)
writeLines("Creating a Dataframe: patientdata")
patientdata
#Data Frame Properties
writeLines("Number of rows in patientdata:")
nrow(patientdata)
writeLines("Number of columns in patientdata:")
ncol(patientdata)
writeLines("Order patientdata by Age")
patientdata[order(patientdata$age),]
patientdata[order(patientdata$age, decreasing=T),]
#Accessing of a elements in Data Frame
writeLines("Access Elements in patientdata:")
patientdata[1:2]
22
patientdata[c("diabetes", "status")]
patientdata$age
writeLines("Modify Elements in patientdata:")
#Modifying elements in Data Frame
patientdata[1, "age"] <- 30
patientdata
#Adding elements to a Data Frame
patientdata <- rbind(patientdata, list(5, 40, "Type2", "Improved"))
patientdata
writeLines("Add Elements to patientdata:")
cbind(patientdata, gender=c("Male", "Male", "Female", "Male", "Female"))
#Deleting Components from Data Frame
patientdata$gender <- NULL
patientdata
writeLines("Delete Elements from patientdata:")
patientdata[-5,]
23
OUTPUT
Creating a Dataframe: patientdata
patientID age diabetes status
1 1 25 Type1 Poor
2 2 34 Type2 Improved
3 3 28 Type1 Excellent
4 4 52 Type1 Poor
Number of rows in patientdata:
[1] 4
Number of columns in patientdata:
[1] 4
Order patientdata by Age
patientID age diabetes status
1 1 25 Type1 Poor
3 3 28 Type1 Excellent
2 2 34 Type2 Improved
4 4 52 Type1 Poor
Access Elements in patientdata:
patientID age
1 1 25
2 2 34
3 3 28
4 4 52
diabetes status
1 Type1 Poor
2 Type2 Improved
3 Type1 Excellent
4 Type1 Poor
Modify Elements in patientdata:
patientID age diabetes status
1 1 30 Type1 Poor
2 2 34 Type2 Improved
3 3 28 Type1 Excellent
4 4 52 Type1 Poor
24
Add Elements to patientdata:
patientID age diabetes status gender
1 1 30 Type1 Poor Male
2 2 34 Type2 Improved Male
3 3 28 Type1 Excellent Female
4 4 52 Type1 Poor Male
5 5 40 Type2 Improved Female
Delete Records from patientdata:
patientID age diabetes status
1 1 30 Type1 Poor
2 2 34 Type2 Improved
3 3 28 Type1 Excellent
4 4 52 Type1 Poor
25
26
EX NO : 5
TO PERFORM STRINGS AND FACTOR
DATE:
OPERATIONS
27
CODE
# String Operations
# Creating Strings
a<-"First"
b<-'String'
c<-"Program in R"
writeLines("Creating Strings")
writeLines("String A:")
print(a)
writeLines("String B:")
print(b)
writeLines("String C:")
print(c)
#Concatenating Strings - paste() function
MyString<-paste(a,b,c,sep=" ", collapse = NULL)
writeLines("Concatenating A, B and C to MyString")
MyString
#Counting number of characters in a string - nchar() function
writeLines("Number of Characters in the String:")
nchar(MyString)
#Changing the case - toupper() & tolower() functions
writeLines("Changing to Upper and Lower case:")
toupper(MyString)
tolower(MyString)
28
#Extracting parts of a string - substring() function
writeLines("Extracting parts of a string:")
substring(MyString,1,5)
# Factor Operations
#Given the vector status <- c("Poor", "Improved", "Excellent", "Poor")
#the statement status <- factor(status, ordered=TRUE) will encode the vector as (3, 2, 1, 3) and
#associate these values internally as 1=Excellent, 2=Improved, and 3=Poor
#By default, factor levels for character vectors are created in alphabetical order.
writeLines("Factor Operations")
patientID <- c(1, 2, 3, 4)
age <- c(25, 34, 28, 52)
diabetes <- c("Type1", "Type2", "Type1", "Type1")
status <- c("Poor", "Improved", "Excellent", "Poor")
diabetes <- factor(diabetes)
status <- factor(status, order=TRUE)
writeLines("Creating Factor: Patient")
patientdata <- [Link](patientID, age, diabetes, status)
patientdata
writeLines("Srting in Patient:")
str(patientdata)
writeLines("Summary of Patient")
summary(patientdata)
29
OUTPUT
Creating Strings
String A:
[1] "First"
String B:
[1] "String"
String C:
[1] "Program in R"
Concatenating A, B and C to MyString
[1] "First String Program in R"
Number of Characters in the String:
Changing to Upper and Lower case:
[1] "FIRST STRING PROGRAM IN R"
[1] "first string program in r"
Extracting parts of a string:
[1] "First"
[1] 25
Factor Operations
Creating Factor: Patient
patientID age diabetes status
1 1 25 Type1 Poor
2 2 34 Type2 Improved
3 3 28 Type1 Excellent
4 4 52 Type1 Poor
Srting in Patient:
'[Link]': 4 obs. of 4 variables:
$ patientID: num 1 2 3 4
$ age : num 25 34 28 52
$ diabetes : Factor w/ 2 levels "Type1","Type2": 1 2 1 1
$ status : [Link] w/ 3 levels "Excellent"<"Improved"<..: 3 2 1 3
Summary of Patient
patientID age diabetes status
Min. :1.00 Min. :25.00 Type1:3 Excellent:1
1st Qu.:1.75 1st Qu.:27.25 Type2:1 Improved :1
Median :2.50 Median :31.00 Poor :2
Mean :2.50 Mean :34.75
3rd Qu.:3.25 3rd Qu.:38.50
Max. :4.00 Max. :52.00
30
31
EX NO : 6
TO PERFORM CONTROL STATEMENT
DATE:
OPERATIONS
32
CODE
#A series of numbers in which each number ( Fibonacci number )
#is the sum of the two preceding numbers.
#The simplest is the series 0, 1, 1, 2, 3, 5, 8, etc.
# Create a function Fibonacci
# take input from the user
nterms = [Link](readline(prompt="How many terms? "))
# first two terms
n1 = 0
n2 = 1
count = 2
# check if the number of terms is valid
if(nterms <= 0)
print("Plese enter a positive integer")
} else
if(nterms == 1)
print("Fibonacci sequence:")
print(n1)
} else
print("Fibonacci sequence:")
print(n1)
print(n2)
while(count < nterms)
33
nth = n1 + n2
print(nth)
# update values
n1 = n2
n2 = nth
count = count + 1
34
OUTPUT
Fibonacci(25)
[1] 0 1 1 2 3 5 8 13 21 34 55 89 144
[16] 233 377 610 987 1597 2584 4181 6765 10946 17711
28657 46368
35
36
EX NO : 7
TO PERFORM DATA IMPORT/EXPORT
DATE:
OPERATIONS
37
CODE
#set the working directory
setwd("H:/ ")
#to remove an environment
rm(list=ls())
#to get the student marks
m1=c(23,56,78,90,76)
m2=c(45,67,84,35,67)
m3=c(23,56,78,54,89)
m4=c(67,98,65,43,56)
m5=c(45,67,89,76,54)
var2=(1:5)/10
var3=c("R","Python","Data Mining","Big Data","C++")
#to create an [Link] method
df1=[Link](m1,m2,m3,m4,m5,var2,var3)
View(df1)
names(df1)=c("int1","int2","int3","int4","int5","float","char")
View(df1)\
#to create an .csv file
[Link](df1,"H:/PU - GL 2018/R - Supportive/2018-19-Odd
Sem/[Link]",[Link]=FALSE)
df2=[Link]("H:/PU - GL 2018/R - Supportive/2018-19-Odd Sem/[Link]")
print(df2)
View(df2)
38
#to convert an matrix format
DM=[Link](df1)
#to create an .txt file
write(DM,file="[Link]")
[Link]("[Link]")
df3=[Link]("[Link]",header=FALSE,sep="/",[Link] = TRUE,[Link] =
"EMPTY")
View(df3)
#to create an .xlsx file
#[Link]("xlsx")
library("xlsx")
[Link](df1,"H:/PU - GL 2018/R - Supportive/2018-19-Odd
Sem/[Link]",[Link]=FALSE,sheetName = "Data Frame")
df4=[Link]("H:/PU - GL 2018/R - Supportive/2018-19-Odd Sem/[Link]",sheetIndex = 1)
View(df4)
39
OUTPUT
CSV File:
int1 int2 int3 int4 int5 float char
1 23 45 23 67 45 0.1 R
2 56 67 56 98 67 0.2 Python
3 78 84 78 65 89 0.3 Data Mining
4 90 35 54 43 76 0.4 Big Data
5 76 67 89 56 54 0.5 C++
Text File:
Excel File:
40
41
EX NO : 8
TO PERFORM PRE-PROCESSING OPERATIONS
DATE:
42
CODE
#data set
aq=airquality;
#install packages
library("Amelia")
#identifying missing columns
missmap(aq)
#identifying missing position
mpos=which([Link](aq),[Link]=TRUE)
print(sprintf("total number of missing values of the airquality dataset is:%d",nrow=mpos))
print("the position of the missing value is:")
print(mpos)
a=mpos[,2]
matt=unique(a)
writeLines("the attributes having missing columns are:")
print(matt)
#find an mean value and replace NA values
writeLines("the MEAN value replacing the NA values")
aq$Ozone[[Link](aq$Ozone)]=mean(aq$Ozone,[Link]=TRUE)
aq$Solar.R[[Link](aq$Solar.R)]=mean(aq$Solar.R,[Link]=TRUE)
print(aq)
writeLines("the summary of airquality dataset is:")
print(summary(aq))
43
#find min-max normalization
writeLines("the MIN-MAX NORMALIZATION of airquality dataset is:")
aqmm=(aq-min(aq))/(max(aq)-min(aq))
print(aqmm)
44
OUTPUT
The position of the missing value is:
row col
[1,] 5 1
[2,] 10 1
[3,] 25 1
[4,] 26 1
[5,] 27 1
[6,] 32 1
[7,] 33 1
[8,] 34 1
[9,] 35 1
[10,] 36 1
[11,] 37 1
[12,] 39 1
[13,] 42 1
[14,] 43 1
[15,] 45 1
[16,] 46 1
[17,] 52 1
[18,] 53 1
[19,] 54 1
[20,] 55 1
[21,] 56 1
[22,] 57 1
[23,] 58 1
[24,] 59 1
[25,] 60 1
[26,] 61 1
[27,] 65 1
[28,] 72 1
[29,] 75 1
[30,] 83 1
[31,] 84 1
[32,] 102 1
[33,] 103 1
[34,] 107 1
[35,] 115 1
[36,] 119 1
[37,] 150 1
[38,] 5 2
[39,] 6 2
[40,] 11 2
[41,] 27 2
[42,] 96 2
[43,] 97 2
[44,] 98 2
The attributes having missing columns are:
[1] 1 2
45
the MEAN value replacing the NA values
Ozone Solar.R Wind Temp Month Day
1 41.00000 190.0000 7.4 67 5 1
2 36.00000 118.0000 8.0 72 5 2
3 12.00000 149.0000 12.6 74 5 3
4 18.00000 313.0000 11.5 62 5 4
5 42.12931 185.9315 14.3 56 5 5
6 28.00000 185.9315 14.9 66 5 6
7 23.00000 299.0000 8.6 65 5 7
8 19.00000 99.0000 13.8 59 5 8
9 8.00000 19.0000 20.1 61 5 9
10 42.12931 194.0000 8.6 69 5 10
11 7.00000 185.9315 6.9 74 5 11
12 16.00000 256.0000 9.7 69 5 12
13 11.00000 290.0000 9.2 66 5 13
14 14.00000 274.0000 10.9 68 5 14
15 18.00000 65.0000 13.2 58 5 15
………………..
…………………
the summary of airquality dataset is:
Ozone Solar.R Wind Temp Month
Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00 Min. :5.000
1st Qu.: 21.00 1st Qu.:120.0 1st Qu.: 7.400 1st Qu.:72.00 1st Qu.:6.000
Median : 42.13 Median :194.0 Median : 9.700 Median :79.00 Median :7.000
Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88 Mean :6.993
3rd Qu.: 46.00 3rd Qu.:256.0 3rd Qu.:11.500 3rd Qu.:85.00 3rd Qu.:8.000
Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00 Max. :9.000
Day
Min. : 1.0
1st Qu.: 8.0
Median :16.0
Mean :15.8
3rd Qu.:23.0
Max. :31.0
46
The MIN-MAX NORMALIZATION of airquality dataset is:
Ozone Solar.R Wind Temp Month Day
1 0.120120120 0.56756757 0.019219219 0.1981982 0.01201201 0.000000000
2 0.105105105 0.35135135 0.021021021 0.2132132 0.01201201 0.003003003
3 0.033033033 0.44444444 0.034834835 0.2192192 0.01201201 0.006006006
4 0.051051051 0.93693694 0.031531532 0.1831832 0.01201201 0.009009009
5 0.123511442 0.55534987 0.039939940 0.1651652 0.01201201 0.012012012
6 0.081081081 0.55534987 0.041741742 0.1951952 0.01201201 0.015015015
7 0.066066066 0.89489489 0.022822823 0.1921922 0.01201201 0.018018018
8 0.054054054 0.29429429 0.038438438 0.1741742 0.01201201 0.021021021
9 0.021021021 0.05405405 0.057357357 0.1801802 0.01201201 0.024024024
10 0.123511442 0.57957958 0.022822823 0.2042042 0.01201201 0.027027027
11 0.018018018 0.55534987 0.017717718 0.2192192 0.01201201 0.030030030
12 0.045045045 0.76576577 0.026126126 0.2042042 0.01201201 0.033033033
13 0.030030030 0.86786787 0.024624625 0.1951952 0.01201201 0.036036036
14 0.039039039 0.81981982 0.029729730 0.2012012 0.01201201 0.039039039
15 0.051051051 0.19219219 0.036636637 0.1711712 0.01201201 0.042042042
…………………
…………………
47
48
EX NO : 9
TO PERFORM BASIC STATISTICAL
DATE:
OPERATIONS ON DATASET
49
CODE
#STATISTICAL OPERATIONS(MEAN,MEDIAN,STANDARD DEVIATION)
#to get the iris dataset
dm=iris[,-5]
#dataset to convert into matrix
dm=[Link](dm)
#to find a MEAN value
writeLines("@@@ MEAN VALUE @@@")
meandm=mean(dm)
writeLines("******* MEAN OF IRIS DATASET *******")
writeLines(sprintf("the mean of iris dataset is:%f", meandm))
writeLines("*********MEAN OF CONDITIONAL ATTRIBUTES********")
sl=mean(dm[,1])
writeLines(sprintf("The mean of [Link] is:%f",sl))
sw=mean(dm[,2])
writeLines(sprintf("The mean of [Link] is:%f",sw))
pl=mean(dm[,3])
writeLines(sprintf("The mean of [Link] is:%f",pl))
pw=mean(dm[,4])
writeLines(sprintf("The mean of [Link] is:%f",pw))
ca=c(sl,sw,pl,pw)
50
meanca=mean(ca)
writeLines(sprintf("The mean of conditional attribute is:%f",meanca))
#to find a MEDIAN value
writeLines(sprintf("@@@ THE CALCULATION OF MEDIAN VALUE @@@"))
mediandm=median(dm)
writeLines("*******MEDIAN OF IRIS DATASET*******")
writeLines(sprintf("The median of iris dataset is:%f", mediandm))
writeLines("*********MEDIAN OF CONDITIONAL ATTRIBUTES********")
sl=median(dm[,1])
writeLines(sprintf("The median of [Link] is:%f",sl))
sw=median(dm[,2])
writeLines(sprintf("The median of [Link] is:%f",sw))
pl=median(dm[,3])
writeLines(sprintf("the median of [Link] is:%f",pl))
pw=median(dm[,4])
writeLines(sprintf("The median of [Link] is:%f",pw))
ca=c(sl,sw,pl,pw)
medianca=median(ca)
writeLines(sprintf("The median of conditional attribute is:%f",medianca))
51
#to find the STANDARD DEVIATION value
writeLines("@ THE CALCULATION OF STANDARD DEVIATION VALUE @")
sddm=sd(dm)
writeLines("******* STANDARD DEVIATION OF IRIS DATASET *******")
writeLines(sprintf("the standard deviation of iris dataset is:%f", sddm))
writeLines("*STANDARD DEVIATION OF CONDIOTIONAL ATTRIBUTES *")
sl=sd(dm[,1])
writeLines(sprintf("The standard deviation of [Link] is:%f",sl))
sw=sd(dm[,2])
writeLines(sprintf("The standard deviation of [Link] is:%f",sw))
pl=sd(dm[,3])
writeLines(sprintf("The standard deviation of [Link] is:%f",pl))
pw=sd(dm[,4])
writeLines(sprintf("The standard deviation of [Link] is:%f",pw))
ca=c(sl,sw,pl,pw)
sdca=sd(ca)
writeLines(sprintf("The standard deviation of conditional attribute is:%f",sdca))
52
OUTPUT
@@@ MEAN VALUE @@@
******* MEAN OF IRIS DATASET *******
The mean of iris dataset is:3.464500
*********MEAN OF CONDITIONAL ATTRIBUTES********
The mean of [Link] is:5.843333
The mean of [Link] is:3.057333
The mean of [Link] is:3.758000
The mean of [Link] is:1.199333
The mean of conditional attribute is:3.464500
@@@ THE CALCULATION OF MEDIAN VALUE @@@
*******MEDIAN OF IRIS DATASET*******
The median of iris dataset is:3.200000
*********MEDIAN OF CONDITIONAL ATTRIBUTES********
The median of [Link] is:5.800000
The median of [Link] is:3.000000
The median of [Link] is:4.350000
The median of [Link] is:1.300000
The median of conditional attribute is:3.675000
@ THE CALCULATION OF STANDARD DEVIATION VALUE @
The standard deviation of iris dataset is:1.975490
*STANDARD DEVIATION OF CONDIOTIONAL ATTRIBUTES *
The standard deviation of [Link] is:0.828066
The standard deviation of [Link] is:0.435866
The standard deviation of [Link] is:1.765298
The standard deviation of [Link] is:0.762238
The standard deviation of conditional attribute is:0.571299
53
54
EX NO : 10
TO PERFORM DATA EXPLORATION
DATE:
OPERATIONS
55
CODE
N=readline("Enter the number of students:")
#Variable declaration
s=0
# Using for loop
for (i in 1:N)
s[i]=readline("Enter the mark of Students:");
# To declare the data type
s=[Link](s)
#to find the maximum value
maximum=max(s)
print(sprintf( "The maximum mark obtained: %d",maximum))
#to find the minimum value
minimum=min(s)
print(sprintf("The minimum mark obtained : %d",minimum))
#to find the total value
writeLines("The total value of marks :")
total=sum(s)
print(total)
56
# to find the average value
writeLines("The average value of marks :")
average=ave(s)
print(average[1])
# to find the square root value
writeLines("The squareroot value of marks :")
squareroot=sqrt(s)
print(squareroot)
# to find the round value
writeLines("The round value of squareroot marks :")
round=round(squareroot)
print(round)
57
OUTPUT
Enter the number of students:6
Enter the mark of Students:80
Enter the mark of Students:90
Enter the mark of Students:60
Enter the mark of Students:30
Enter the mark of Students:20
Enter the mark of Students:50
[1] "The maximum mark obtained: 90"
[1] "The minimum mark obtained : 20"
The total value of marks :
[1] 330
The average value of marks :
[1] 55
The squareroot value of marks :
[1] 8.944272 9.486833 7.745967 5.477226 4.472136 7.071068
The round value of squareroot marks :
[1] 9 9 8 5 4 7
58
59
EX NO : 11
TO PERFORM DATA VISUALIZATION
DATE:
OPERATIONS
60
CODE
#Visualization of Average Rainfall in India for Last 10 Years
writeLines("Visualization of Average Rainfall in India for Last 10 Years")
Year=c(2009,2010,2011,2012,2013,2014,2015,2016,2017,2018);
Rainfall=c(69.43,43.15,35.23,50.03,60.02,47.62,48.38,38.69,52.48,58.18);
writeLines("Rainfall Data from 2009-2018")
names(Rainfall)=Year
Rainfall
#Histograms
writeLines("Histogram for Rainfall Data")
hist(Rainfall,col="yellow", border="blue")
#Pie Chart
writeLines("Pie Chart for Rainfall Data")
pie(Rainfall,col=Year,main="Average Rainfall in India for Last 10 Years")
#Bar Chart
writeLines("Bar Chart for Rainfall Data")
barplot(Rainfall,col=Year, main="Average Rainfall in India for Last 10 Years")
#Boxplot
writeLines("Box Plot for Rainfall Data")
boxplot(Rainfall,main="Boxplot of Rainfall")
#Line Graph
writeLines("Line Graph for Rainfall Data")
plot(Year,Rainfall,type='o', col="blue", main="Average Rainfall in India for Last 10 Years")
#Scatterplot
writeLines("Scatterplot for Rainfall Data")
plot(Year, Rainfall, col="red", main="Average Rainfall in India for Last 10 Years")
61
OUTPUT
Visualization of Average Rainfall in India for Last 10 Years
Rainfall Data from 2009-2018
2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
69.43 43.15 35.23 50.03 60.02 47.62 48.38 38.69 52.48 58.18
Histogram for Rainfall Data
62
Pie Chart for Rainfall Data
Bar Chart for Rainfall Data
63
Box Plot for Rainfall Data
Line Graph for Rainfall Data
64
Scatterplot for Rainfall Data
65
66
EX NO : 12
TO PERFORM T-TEST
DATE:
67
CODE
# Performing t-Tests
writeLines("Perform t-Tests ")
# One-Sample t Test
rnorm=rnorm(50,500,100)
writeLines("Summary of Data ")
summary(rnorm)
writeLines("One-Sample t-Tests ")
[Link](rnorm,mu=500)
#Paired-samples t Test
Pretest=c(25.0,41.7,41.7,54.2,29.2,50.0,54.2,45.8,54.2,33.3,33.3,54.2,37.5,12.5)
Posttest=c(41.7,66.7,91.7,70.8,70.8,54.2,87.5,54.2,70.8,50.0,58.3,79.2,87.5,45.8)
writeLines("Paired-samples t Test ")
[Link](Pretest,Posttest,paired=TRUE)
# Relationship between One-Sample and Paired-Sample t-Test
Differences=Posttest-Pretest;
writeLines("Relationship between One-Sample and Paired-Sample t-Test")
[Link](Differences)
#Two-Sample t Test
n=length(Pretest)
Pretest1=Pretest[1:(n/2)]
Pretest2=Pretest[((n/2)+1):n]
writeLines("Two-Sample t Test")
[Link](Pretest1,Pretest2);
68
OUTPUT
Perform t-Tests
Summary of Data
Min. 1st Qu. Median Mean 3rd Qu. Max.
280.9 406.4 481.7 486.5 550.4 799.8
One Sample t-test
data: rnorm
t = -0.84971, df = 49, p-value = 0.3996
alternative hypothesis: true mean is not equal to 500
95 percent confidence interval:
454.5055 518.4547
sample estimates:
mean of x
486.4801
Paired t-test
data: Pretest and Posttest
t = -6.7973, df = 13, p-value = 1.268e-05
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-34.11287 -17.65856
sample estimates:
mean of the differences
-25.88571
Relationship between One-Sample and Paired-Sample t-Test
One Sample t-test
data: Differences
t = 6.7973, df = 13, p-value = 1.268e-05
alternative hypothesis: true mean is not equal to 0
95 percent confidence interval:
17.65856 34.11287
sample estimates:
mean of x
25.88571
Two-Sample t Test
Welch Two Sample t-test
data: Pretest1 and Pretest2
t = 0.51018, df = 11.439, p-value = 0.6196
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-11.85835 19.05835
sample estimates:
mean of x mean of y
42.28571 38.68571
69
70
EX NO : 13
TO PERFORM ANOVA
DATE:
71
CODE
#ANOVA - Analysis of Variance
writeLines("Perform ANOVA-Analysis of Variance")
mpg=c(34,35,34.3,35.5,35.8,35.5,36.5,36.4,37,37.6,33.3,34,34.7,33,34.9)
brand=c("A","A","A","A","A","B","B","B","B","B","C","C","C","C","C")
mileage=[Link](mpg=mpg,brand=brand)
attach(mileage)
factor(brand)
writeLines("Create a Dataset - Automobile Mileage of Three Brands")
mileage
writeLines("Box Plot of Dataset")
boxplot(mpg~brand)
group=factor(brand)
# Use aov() to perform ANOVA
results=aov(mpg~group)
# To display the result
writeLines("ANOVA Test")
summary(results)
[Link](results,type="means")
72
OUTPUT
Create a Dataset - Automobile Mileage of Three Brands
mpg brand
1 34.0 A
2 35.0 A
3 34.3 A
4 35.5 A
5 35.8 A
6 35.5 B
7 36.5 B
8 36.4 B
9 37.0 B
10 37.6 B
11 33.3 C
12 34.0 C
13 34.7 C
14 33.0 C
15 34.9 C
Box Plot of Dataset
ANOVA Test
Df Sum Sq Mean Sq F value Pr(>F)
group 2 17.617 8.809 13.99 0.000731 ***
Residuals 12 7.556 0.630
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Tables of means
Grand mean
35.16667
group
A B C
34.92 36.60 33.98
73
74
EX NO : 14
TO PERFORM LINEAR REGRESSION AND
DATE:
LOGISTIC REGRESSION
75
CODE
X=[Link]("H:/PU - GL 2018/R - Supportive/2018-19-Odd Sem/[Link]")
str(x)
# PARTITION OF DATA
[Link](1234)
ind =sample(2,nrow(x),replace = T,prob = c(0.8,0.2))
train = x[ind==1,]
test = x[ind==2,]
plot(x$RESULT,x$AVG)
#logistic model
Logistic=glm(AVG~T_C01+T_C02+T_C03+T_C04+T_C05+T_C06,data = x)
summary(logistic)
# prediction
p1=predict(logistic,train,type ='response')
head(p1)
head(train)
p2=predict(logistic,test,type ='response')
head(p2)
head(test)
# Missclassification Error
cm=table(predict(logistic), x$AVG)
print(cm)
76
a = c( x$AVG < 50)
print(sprintf(" the student will not get seat",a))
View(a)
b=c( x$AVG > 40 || x$AVG < 80)
View(b)
c = c( x$AVG > 80)
print(sprintf(" the student will get seat",c))
View(c)
77
OUTPUT
*******Simple Linear Regression*******
Coefficients:
(Intercept) UNEM
3957 1134
[1] "The predicted fall enrollment, given a 9 Percent of unemployment rate is 14161 students"
Residuals:
Min 1Q Median 3Q Max
-7640.0 -1046.5 602.8 1934.3 4187.2
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3957.0 4000.1 0.989 0.3313
UNEM 1133.8 513.1 2.210 0.0358
---
*******Multiple Linear Regression*******
Coefficients:
(Intercept) UNEM HGRAD INC
-9153.2545 450.1245 0.4065 4.2749
[1] "The predicted fall enrollment, given a 9 Percent of unemployment rate,spring high school
graduating class (HGRAD) of 100,000, and a per capita income (INC) of $30,000 is 163792
students"
Residuals:
Min 1Q Median 3Q Max
-1148.84 -489.71 -1.88 387.40 1425.75
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -9.153e+03 1.053e+03 -8.691 5.02e-09 ***
UNEM 4.501e+02 1.182e+02 3.809 0.000807 ***
HGRAD 4.065e-01 7.602e-02 5.347 1.52e-05 ***
INC 4.275e+00 4.947e-01 8.642 5.59e-09 ***
*******Logistic Regression*******
Deviance Residuals:
Min 1Q Median 3Q Max
-8.4601 -0.2188 0.2048 0.5724 1.4485
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.81462 4.36362 0.874 0.38973
T_C01 0.13812 0.03396 4.068 0.00037 ***
T_C02 0.11006 0.08167 1.348 0.18900
T_C03 0.15764 0.07939 1.986 0.05731 .
T_C04 0.17885 0.06284 2.846 0.00835 **
T_C05 0.19952 0.05515 3.617 0.00121 **
T_C06 0.16188 0.02510 6.449 6.53e-07 ***
---
78
79
EX NO : 15
TO PERFORM CORRELATION AND MULTI-
DATE:
CORRELATION COEFFICIENT
80
CODE
#To compute correlation between age and glucose level
writeLines("Compute correlation between age and glucose level")
age=c(43,21,25,42,57,59);
writeLines("Age")
age
glucose_level=c(99,65,79,75,87,81);
writeLines("Glucose_level")
glucose_level
# Pearson Correlation
writeLines("Pearson Correlation")
Pearson_cor=cor(age, glucose_level, method = c("pearson"))
Pearson_cor
# Kendall Correlation
writeLines("Kendall Correlation")
Kendall_cor=cor(age, glucose_level, method = c("kendall"))
Kendall_cor
# Spearman Correlation
writeLines("Spearman Correlation")
Spearman_cor=cor(age, glucose_level, method = c("spearman"))
Spearman_cor
#TO compute multicorrelation between age, weight and glucose level
writeLines("Compute multicorrelation between age, weight and glucose level")
weight=c(55,40,42,55,70,65);
81
#correlation between age and glucose
a_g=cor(age, glucose_level, method = c("pearson"));
#correlation between age and weight
a_w=cor(age, weight, method = c("pearson"));
#correlation between weight and glucose
w_g=cor(weight, glucose_level,method = c("pearson"));
#Multiple Correlation
writeLines("Multiple Correlation")
Multiple_Correlation=sqrt(((a_g)^2+(w_g)^2-2*a_g*a_w*w_g)/(1-(w_g)^2));
Multiple_Correlation
82
OUTPUT
Compute correlation between age and glucose level
Age
[1] 43 21 25 42 57 59
Glucose_level
[1] 99 65 79 75 87 81
Pearson Correlation
[1] 0.5298089
Kendall Correlation
[1] 0.4666667
Spearman Correlation
[1] 0.7142857
Compute multicorrelation between age, weight and glucose level
Multiple Correlation
[1] 0.1103142
83
84