1. R语言基础
变量赋值:
# Assign the value 42 to x
x <- 42
# 打印 x
x
# Assign a value to the variables my_apples and my_oranges
my_apples <- 5
my_oranges <- 6
# Add these two variables together
my_oranges + my_apples
# Create the variable my_fruit
my_fruit <- my_oranges + my_apples
数据类型:
# Declare variables of different types
my_numeric <- 42
my_character <- "universe"
my_logical <- FALSE
# Check class of my_numeric,输出为 "numeric"
class(my_numeric)
# Check class of my_character,输出为 "character"
class(my_character)
# Check class of my_logical,输出为 "logical"
class(my_logical)
2. 向量
向量创建:
#创建向量
numeric_vector <- c(1, 10, 49)
character_vector <- c("a", "b", "c")
boolean_vector <- c(TRUE, FALSE, TRUE)
可以使用names()函数为向量的元素命名:
#方法一
# 创建向量:Poker winnings from Monday to Friday
poker_vector <- c(140, -50, 20, -120, 240)
# 向量命名:Assign days as names of poker_vector
names(poker_vector) <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
#方法二
# Poker winnings from Monday to Friday
poker_vector <- c(140, -50, 20, -120, 240)
# The variable days_vector
days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
# Assign the names of the day to poker_vector
names(poker_vector) <- days_vector
向量相加:直接操作
A_vector <- c(1, 2, 3)
B_vector <- c(4, 5, 6)
# A_vector 和 B_vector 相加
total_vector <- A_vector + B_vector
# 打印 total_vector:5 7 9
total_vector
向量求和:sum()
poker_vector <- c(140, -50, 20, -120, 240)
roulette_vector <- c(-24, -50, 100, -350, 10)
# 对poker_vector求和:230
total_poker <- sum(poker_vector)
# 对total_roulette求和:-314
total_roulette <-sum(roulette_vector)
#比较两个sum的大小:输出TRUE 或者 FALSE
total_poker > total_roulette
向量选择:
poker_vector <- c(140, -50, 20, -120, 240)
days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
names(poker_vector) <- days_vector
# 选择向量中的第三个元素:(是从1开始数的)
poker_wednesday <- poker_vector[3]
#打印出:
Wednesday
20
#选择向量中的第二,三,四个元素:
poker_midweek <- poker_vector[c(2,3,4)]
#打印出:
Tuesday Wednesday Thursday
-50 20 -120
#选择向量中从2到5个元素(no.2,no.5都包含):
poker_select <- poker_vector[2:5]
#打印出:
Tuesday Wednesday Thursday Friday
-50 20 -120 240
计算平均值:
# 直接打印计算出的平均值
mean(poker_start)
比较选择:
poker_vector <- c(140, -50, 20, -120, 240)
# 选择向量 poker_vector 中所有大于0的元素
selection_vector <- poker_vector > 0
#打印
Monday Tuesday Wednesday Thursday Friday
TRUE FALSE TRUE FALSE TRUE
# 选择向量:只打印 poker_vector 中所有大于0的元素
poker_winning_days <- poker_vector[selection_vector]
#打印
Monday Wednesday Friday
140 20 240
矩阵
创建矩阵:
- 第一个参数是元素的集合,R将这些元素排列到矩阵的行和列中。在这里使用1:9,这是c(1,2,3,4,5,6,7,8,9)的快捷方式。
- 参数byrow表示矩阵由行填充。如果我们希望矩阵由列填充,我们只需将byrow=FALSE。
- 第三个参数nrow表示矩阵应该有三行。
# 构造一个包含3行数字1到9的矩阵
matrix(1:9,byrow = TRUE, nrow = 3)
向量转化为矩阵
new_hope <- c(460.998, 314.4)
empire_strikes <- c(290.475, 247.900)
return_jedi <- c(309.306, 165.8)
# 合并这三个向量
box_office <- c(new_hope, empire_strikes, return_jedi)
# 向量转化成矩阵
star_wars_matrix <- matrix(box_office,byrow = TRUE,nrow = 3)
[,1] [,2]
[1,] 460.998 314.4
[2,] 290.475 247.9
[3,] 309.306 165.8
矩阵命名
new_hope <- c(460.998, 314.4)
empire_strikes <- c(290.475, 247.900)
return_jedi <- c(309.306, 165.8)
# 将这三个向量创建成3行矩阵
star_wars_matrix <- matrix(c(new_hope, empire_strikes, return_jedi), nrow = 3, byrow = TRUE)
# 定义行名和列名向量
region <- c("US", "non-US")
titles <- c("A New Hope", "The Empire Strikes Back", "Return of the Jedi")
# 列名是region
colnames(star_wars_matrix) <- region
# 行名是titles
rownames(star_wars_matrix) <- titles
#打印矩阵
star_wars_matrix
US non-US
A New Hope 460.998 314.4
The Empire Strikes Back 290.475 247.9
Return of the Jedi 309.306 165.8
行求和:rowSums()对每一行求和
列求和:colSums()对每一列求和
# 对每一行求和
worldwide_vector <- rowSums(star_wars_matrix)
# 打印worldwide_vector
A New Hope The Empire Strikes Back Return of the Jedi
775.398 538.375 475.106
添加行:rbind()
all_wars_matrix <- rbind(star_wars_matrix,star_wars_matrix2 )
添加列:cbind()函数向矩阵中添加一列或多列,该函数按列将矩阵和/或向量合并在一起。
# 把worldwide_vector作为列添加到all_wars_matrix上
all_wars_matrix <- cbind(star_wars_matrix,worldwide_vector)
# 打印all_wars_matrix
US non-US worldwide_vector
A New Hope 460.998 314.4 775.398
The Empire Strikes Back 290.475 247.9 538.375
Return of the Jedi 309.306 165.8 475.106
矩阵元素选择:
my_matrix[1,2]
选择第一行和第二列的元素my_matrix[1:3,2:4]
选择1~3行和2~4列的元素my_matrix[,1]
选择第一列所有元素my_matrix[1,]
选择第一行所有元素
矩阵运算:
visitors <- all_wars_matrix / 5:all_wars_matrix中所有元素除以5
Factor
函数factor()将向量编码为一个factor
# Animals
animals_vector <- c("Elephant", "Giraffe", "Donkey", "Horse")
factor_animals_vector <- factor(animals_vector)
factor_animals_vector
[1] Elephant Giraffe Donkey Horse
Levels: Donkey Elephant Giraffe Horse
# Temperature
temperature_vector <- c("High", "Low", "High","Low", "Medium")
factor_temperature_vector <- factor(temperature_vector, order = TRUE, levels = c("Low", "Medium", "High"))
factor_temperature_vector
[1] High Low High Low Medium
Levels: Low < Medium < High
更改level名称,用levels():
levels(factor_vector) <- c("name1", "name2",...)
summary():
survey_vector <- c("M", "F", "F", "M", "M")
factor_survey_vector <- factor(survey_vector)
levels(factor_survey_vector) <- c("Female", "Male")
factor_survey_vector
summary(factor_survey_vector)
Female Male
2 3
排序:
# 创建 speed_vector
speed_vector <- c("medium", "slow", "slow", "medium", "fast")
# 排序
factor_speed_vector <- factor(speed_vector,
ordered = TRUE,
levels = c("slow", "medium", "fast"))
# 打印 factor_speed_vector
factor_speed_vector
summary(factor_speed_vector)
[1] medium slow slow medium fast
Levels: slow < medium < fast
slow medium fast
2 2 1
选择:
# Factor value for second data analyst
da2 <- factor_speed_vector[2]
# Factor value for fifth data analyst
da5 <- factor_speed_vector[5]
# Is data analyst 2 faster than data analyst 5? FALSE
da2 > da5
Dataframe
可以装各种类型元素的“类矩阵”
显示前几行数据:head(dataframe名称)
显示后几行数据:tail(dataframe名称)
显示数据结构:str(dataframe名称)
创建dataframe:
# 定义vectors
# 第一列
name <- c("Mercury", "Venus", "Earth",
"Mars", "Jupiter", "Saturn",
"Uranus", "Neptune")
# 第二列
type <- c("Terrestrial planet",
"Terrestrial planet",
"Terrestrial planet",
"Terrestrial planet", "Gas giant",
"Gas giant", "Gas giant", "Gas giant")
# 第三列
diameter <- c(0.382, 0.949, 1, 0.532,
11.209, 9.449, 4.007, 3.883)
# 第四列
rotation <- c(58.64, -243.02, 1, 1.03,
0.41, 0.43, -0.72, 0.67)
#第五列
rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE)
# Create a data frame from the vectors
planets_df <- data.frame(name,type,diameter,rotation,rings)
planets_df
name type diameter rotation rings
1 Mercury Terrestrial planet 0.382 58.64 FALSE
2 Venus Terrestrial planet 0.949 -243.02 FALSE
3 Earth Terrestrial planet 1.000 1.00 FALSE
4 Mars Terrestrial planet 0.532 1.03 FALSE
5 Jupiter Gas giant 11.209 0.41 TRUE
6 Saturn Gas giant 9.449 0.43 TRUE
7 Uranus Gas giant 4.007 -0.72 TRUE
8 Neptune Gas giant 3.883 0.67 TRUE
选择:
planets_df[1:5,"diameter"]: "diameter"列的第1,2,3,4,5元素
条件选择:
# 在planets_df中选出所有歌planets
rings_vector <- planets_df$rings
# Print out rings_vector
rings_vector
# 选出所有rings是TRUE的columns
planets_df[rings_vector,]
name type diameter rotation rings
5 Jupiter Gas giant 11.209 0.41 TRUE
6 Saturn Gas giant 9.449 0.43 TRUE
7 Uranus Gas giant 4.007 -0.72 TRUE
8 Neptune Gas giant 3.883 0.67 TRUE
# 选择条件:diameter < 1
subset(planets_df, subset = diameter < 1)
name type diameter rotation rings
1 Mercury Terrestrial planet 0.382 58.64 FALSE
2 Venus Terrestrial planet 0.949 -243.02 FALSE
4 Mars Terrestrial planet 0.532 1.03 FALSE
排序:
# 根据diameter大小排序: order() 升序
positions <- order(planets_df$diameter)
# Use positions to sort planets_df
planets_df[positions, ]
List
列表是 R 语言的对象集合,可以用来保存不同类型的数据,可以是数字、字符串、向量、另一个列表、矩阵和函数等。
用list()创建
list选择:使用shining_list[[2]][1]可以从第二个组件中选择(shining_list[[2]])的第一个元素([1])
shining_list[["reviews"]]
shining_list$reviews