> install.
packages("dplyr")
WARNING: Rtools is required to build R packages but is not
currently installed. Please download and install the
appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Installing package into
‘C:/Users/LENOVO/AppData/Local/R/win-library/4.2’
(as ‘lib’ is unspecified)
trying URL
'https://cran.rstudio.com/bin/windows/contrib/4.2/dplyr_1.1
.0.zip'
Content type 'application/zip' length 1548293 bytes (1.5
MB)
downloaded 1.5 MB
package ‘dplyr’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\LENOVO\AppData\Local\Temp\RtmpqQkiOI\downloa
ded_packages
> install.packages("hflights")
WARNING: Rtools is required to build R packages but is not
currently installed. Please download and install the
appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Installing package into
‘C:/Users/LENOVO/AppData/Local/R/win-library/4.2’
(as ‘lib’ is unspecified)
trying URL
'https://cran.rstudio.com/bin/windows/contrib/4.2/hflights_
0.1.zip'
Content type 'application/zip' length 3473456 bytes (3.3
MB)
downloaded 3.3 MB
package ‘hflights’ successfully unpacked and MD5 sums
checked
The downloaded binary packages are in
C:\Users\LENOVO\AppData\Local\Temp\RtmpqQkiOI\downloa
ded_packages
> suppressMessages(library(dplyr))
Warning message:
package ‘dplyr’ was built under R version 4.2.2
> library(hflights)
> library(magrittr)
Warning message:
package ‘magrittr’ was built under R version 4.2.2
> data(hflights)
> head(hflights)
Year Month DayofMonth DayOfWeek DepTime ArrTime
UniqueCarrier FlightNum TailNum
5424 2011 1 1 6 1400 1500
AA 428 N576AA
5425 2011 1 2 7 1401 1501
AA 428 N557AA
5426 2011 1 3 1 1352 1502
AA 428 N541AA
5427 2011 1 4 2 1403 1513
AA 428 N403AA
5428 2011 1 5 3 1405 1507
AA 428 N492AA
5429 2011 1 6 4 1359 1503
AA 428 N262AA
ActualElapsedTime AirTime ArrDelay DepDelay Origin
Dest Distance TaxiIn TaxiOut
5424 60 40 -10 0 IAH
DFW 224 7 13
5425 60 45 -9 1 IAH
DFW 224 6 9
5426 70 48 -8 -8 IAH
DFW 224 5 17
5427 70 39 3 3 IAH
DFW 224 9 22
5428 62 44 -3 5 IAH
DFW 224 9 9
5429 64 45 -7 -1 IAH
DFW 224 6 13
Cancelled CancellationCode Diverted
5424 0 0
5425 0 0
5426 0 0
5427 0 0
5428 0 0
5429 0 0
> n=nrow(hflights)
> n
[1] 227496
> c=ncol(hflights)
> c
[1] 21
> d=dim(hflights)
> d
[1] 227496 21
> hflights
Year Month DayofMonth DayOfWeek DepTime ArrTime
UniqueCarrier FlightNum TailNum
5424 2011 1 1 6 1400 1500
AA 428 N576AA
5425 2011 1 2 7 1401 1501
AA 428 N557AA
5426 2011 1 3 1 1352 1502
AA 428 N541AA
5427 2011 1 4 2 1403 1513
AA 428 N403AA
5428 2011 1 5 3 1405 1507
AA 428 N492AA
5429 2011 1 6 4 1359 1503
AA 428 N262AA
5430 2011 1 7 5 1359 1509
AA 428 N493AA
5431 2011 1 8 6 1355 1454
AA 428 N477AA
5432 2011 1 9 7 1443 1554
AA 428 N476AA
5433 2011 1 10 1 1443 1553
AA 428 N504AA
5434 2011 1 11 2 1429 1539
AA 428 N565AA
5435 2011 1 12 3 1419 1515
AA 428 N577AA
5436 2011 1 13 4 1358 1501
AA 428 N476AA
5437 2011 1 14 5 1357 1504
AA 428 N552AA
5438 2011 1 15 6 1359 1459
AA 428 N462AA
5439 2011 1 16 7 1359 1509
AA 428 N555AA
5440 2011 1 17 1 1530 1634
AA 428 N518AA
5441 2011 1 18 2 1408 1508
AA 428 N507AA
5442 2011 1 19 3 1356 1503
AA 428 N523AA
5443 2011 1 20 4 1507 1622
AA 428 N425AA
5444 2011 1 21 5 1357 1459
AA 428 N251AA
5445 2011 1 22 6 1355 1456
AA 428 N551AA
5446 2011 1 23 7 1356 1501
AA 428 N479AA
5447 2011 1 24 1 1356 1513
AA 428 N531AA
5448 2011 1 25 2 1352 1452
AA 428 N561AA
5449 2011 1 26 3 1353 1455
AA 428 N541AA
5450 2011 1 27 4 1356 1458
AA 428 N512AA
5451 2011 1 28 5 1359 1505
AA 428 N4UBAA
5452 2011 1 29 6 1355 1455
AA 428 N491AA
5453 2011 1 30 7 1359 1456
AA 428 N561AA
5454 2011 1 31 1 1441 1553
AA 428 N505AA
6343 2011 1 1 6 728 840
AA 460 N520AA
6344 2011 1 2 7 719 821
AA 460 N537AA
6345 2011 1 3 1 717 834
AA 460 N512AA
6346 2011 1 4 2 714 821
AA 460 N478AA
6347 2011 1 5 3 718 822
AA 460 N551AA
6348 2011 1 6 4 719 821
AA 460 N251AA
6349 2011 1 7 5 711 827
AA 460 N478AA
6350 2011 1 8 6 713 805
AA 460 N550AA
6351 2011 1 9 7 714 829
AA 460 N586AA
6352 2011 1 10 1 715 818
AA 460 N587AA
6353 2011 1 11 2 717 820
AA 460 N574AA
6354 2011 1 12 3 714 814
AA 460 N580AA
6355 2011 1 13 4 722 841
AA 460 N586AA
6356 2011 1 14 5 715 828
AA 460 N468AA
6357 2011 1 15 6 719 833
AA 460 N251AA
6358 2011 1 16 7 743 843
AA 460 N546AA
ActualElapsedTime AirTime ArrDelay DepDelay Origin
Dest Distance TaxiIn TaxiOut
5424 60 40 -10 0 IAH
DFW 224 7 13
5425 60 45 -9 1 IAH
DFW 224 6 9
5426 70 48 -8 -8 IAH
DFW 224 5 17
5427 70 39 3 3 IAH
DFW 224 9 22
5428 62 44 -3 5 IAH
DFW 224 9 9
5429 64 45 -7 -1 IAH
DFW 224 6 13
5430 70 43 -1 -1 IAH
DFW 224 12 15
5431 59 40 -16 -5 IAH
DFW 224 7 12
5432 71 41 44 43 IAH
DFW 224 8 22
5433 70 45 43 43 IAH
DFW 224 6 19
5434 70 42 29 29 IAH
DFW 224 8 20
5435 56 41 5 19 IAH
DFW 224 4 11
5436 63 44 -9 -2 IAH
DFW 224 6 13
5437 67 47 -6 -3 IAH
DFW 224 5 15
5438 60 44 -11 -1 IAH
DFW 224 6 10
5439 70 41 -1 -1 IAH
DFW 224 12 17
5440 64 48 84 90 IAH
DFW 224 8 8
5441 60 42 -2 8 IAH
DFW 224 7 11
5442 67 46 -7 -4 IAH
DFW 224 10 11
5443 75 42 72 67 IAH
DFW 224 9 24
5444 62 47 -11 -3 IAH
DFW 224 6 9
5445 61 44 -14 -5 IAH
DFW 224 9 8
5446 65 40 -9 -4 IAH
DFW 224 7 18
5447 77 43 3 -4 IAH
DFW 224 6 28
5448 60 40 -18 -8 IAH
DFW 224 7 13
5449 62 40 -15 -7 IAH
DFW 224 8 14
5450 62 40 -12 -4 IAH
DFW 224 12 10
5451 66 46 -5 -1 IAH
DFW 224 8 12
5452 60 46 -15 -5 IAH
DFW 224 7 7
5453 57 39 -14 -1 IAH
DFW 224 7 11
5454 72 39 43 41 IAH
DFW 224 8 25
6343 72 41 5 8 IAH
DFW 224 6 25
6344 62 43 -14 -1 IAH
DFW 224 9 10
6345 77 46 -1 -3 IAH
DFW 224 21 10
6346 67 46 -14 -6 IAH
DFW 224 6 15
6347 64 44 -13 -2 IAH
DFW 224 7 13
6348 62 44 -14 -1 IAH
DFW 224 8 10
6349 76 42 -8 -9 IAH
DFW 224 24 10
6350 52 40 -30 -7 IAH
DFW 224 3 9
6351 75 51 -6 -6 IAH
DFW 224 11 13
6352 63 44 -17 -5 IAH
DFW 224 8 11
6353 63 44 -15 -3 IAH
DFW 224 7 12
6354 60 42 -21 -6 IAH
DFW 224 10 8
6355 79 49 6 2 IAH
DFW 224 16 14
6356 73 47 -7 -5 IAH
DFW 224 15 11
6357 74 49 -2 -1 IAH
DFW 224 12 13
6358 60 45 8 23 IAH
DFW 224 5 10
Cancelled CancellationCode Diverted
5424 0 0
5425 0 0
5426 0 0
5427 0 0
5428 0 0
5429 0 0
5430 0 0
5431 0 0
5432 0 0
5433 0 0
5434 0 0
5435 0 0
5436 0 0
5437 0 0
5438 0 0
5439 0 0
5440 0 0
5441 0 0
5442 0 0
5443 0 0
5444 0 0
5445 0 0
5446 0 0
5447 0 0
5448 0 0
5449 0 0
5450 0 0
5451 0 0
5452 0 0
5453 0 0
5454 0 0
6343 0 0
6344 0 0
6345 0 0
6346 0 0
6347 0 0
6348 0 0
6349 0 0
6350 0 0
6351 0 0
6352 0 0
6353 0 0
6354 0 0
6355 0 0
6356 0 0
6357 0 0
6358 0 0
[ reached 'max' / getOption("max.print") -- omitted 227449
rows ]
> flights<-tbl_df(hflights)
Warning message:
`tbl_df()` was deprecated in dplyr 1.0.0.
ℹ Please use `tibble::as_tibble()` instead.
Call `lifecycle::last_lifecycle_warnings()` to see where
this warning was generated.
> flights
# A tibble: 227,496 × 21
Year Month Dayof…¹ DayOf…² DepTime ArrTime Uniqu…³
Fligh…⁴ TailNum Actua…⁵ AirTime ArrDe…⁶
<int> <int> <int> <int> <int> <int> <chr>
<int> <chr> <int> <int> <int>
1 2011 1 1 6 1400 1500 AA
428 N576AA 60 40 -10
2 2011 1 2 7 1401 1501 AA
428 N557AA 60 45 -9
3 2011 1 3 1 1352 1502 AA
428 N541AA 70 48 -8
4 2011 1 4 2 1403 1513 AA
428 N403AA 70 39 3
5 2011 1 5 3 1405 1507 AA
428 N492AA 62 44 -3
6 2011 1 6 4 1359 1503 AA
428 N262AA 64 45 -7
7 2011 1 7 5 1359 1509 AA
428 N493AA 70 43 -1
8 2011 1 8 6 1355 1454 AA
428 N477AA 59 40 -16
9 2011 1 9 7 1443 1554 AA
428 N476AA 71 41 44
10 2011 1 10 1 1443 1553 AA
428 N504AA 70 45 43
# … with 227,486 more rows, 9 more variables: DepDelay
<int>, Origin <chr>, Dest <chr>,
# Distance <int>, TaxiIn <int>, TaxiOut <int>, Cancelled
<int>, CancellationCode <chr>,
# Diverted <int>, and abbreviated variable names ¹
DayofMonth, ²DayOfWeek, ³UniqueCarrier,
# ⁴FlightNum, ⁵ActualElapsedTime, ⁶ArrDelay
# ℹ Use `print(n = ...)` to see more rows, and `colnames()`
to see all variable names
> flights[flights$Month==1 & flights$DayofMonth==1, ]
# A tibble: 552 × 21
Year Month Dayof…¹ DayOf…² DepTime ArrTime Uniqu…³
Fligh…⁴ TailNum Actua…⁵ AirTime ArrDe…⁶
<int> <int> <int> <int> <int> <int> <chr>
<int> <chr> <int> <int> <int>
1 2011 1 1 6 1400 1500 AA
428 N576AA 60 40 -10
2 2011 1 1 6 728 840 AA
460 N520AA 72 41 5
3 2011 1 1 6 1631 1736 AA
1121 N4WVAA 65 37 -9
4 2011 1 1 6 1756 2112 AA
1294 N3DGAA 136 113 -3
5 2011 1 1 6 1012 1347 AA
1700 N3DAAA 155 117 7
6 2011 1 1 6 1211 1325 AA
1820 N593AA 74 39 15
7 2011 1 1 6 557 906 AA
1994 N3BBAA 129 113 -9
8 2011 1 1 6 1824 2106 AS
731 N614AS 282 255 -4
9 2011 1 1 6 654 1124 B6
620 N324JB 210 181 5
10 2011 1 1 6 1639 2110 B6
622 N324JB 211 188 61
# … with 542 more rows, 9 more variables: DepDelay <int>,
Origin <chr>, Dest <chr>,
# Distance <int>, TaxiIn <int>, TaxiOut <int>, Cancelled
<int>, CancellationCode <chr>,
# Diverted <int>, and abbreviated variable names ¹
DayofMonth, ²DayOfWeek, ³UniqueCarrier,
# ⁴FlightNum, ⁵ActualElapsedTime, ⁶ArrDelay
# ℹ Use `print(n = ...)` to see more rows, and `colnames()`
to see all variable names
> filter(flights,Month==1,DayofMonth==1)
# A tibble: 552 × 21
Year Month Dayof…¹ DayOf…² DepTime ArrTime Uniqu…³
Fligh…⁴ TailNum Actua…⁵ AirTime ArrDe…⁶
<int> <int> <int> <int> <int> <int> <chr>
<int> <chr> <int> <int> <int>
1 2011 1 1 6 1400 1500 AA
428 N576AA 60 40 -10
2 2011 1 1 6 728 840 AA
460 N520AA 72 41 5
3 2011 1 1 6 1631 1736 AA
1121 N4WVAA 65 37 -9
4 2011 1 1 6 1756 2112 AA
1294 N3DGAA 136 113 -3
5 2011 1 1 6 1012 1347 AA
1700 N3DAAA 155 117 7
6 2011 1 1 6 1211 1325 AA
1820 N593AA 74 39 15
7 2011 1 1 6 557 906 AA
1994 N3BBAA 129 113 -9
8 2011 1 1 6 1824 2106 AS
731 N614AS 282 255 -4
9 2011 1 1 6 654 1124 B6
620 N324JB 210 181 5
10 2011 1 1 6 1639 2110 B6
622 N324JB 211 188 61
# … with 542 more rows, 9 more variables: DepDelay <int>,
Origin <chr>, Dest <chr>,
# Distance <int>, TaxiIn <int>, TaxiOut <int>, Cancelled
<int>, CancellationCode <chr>,
# Diverted <int>, and abbreviated variable names ¹
DayofMonth, ²DayOfWeek, ³UniqueCarrier,
# ⁴FlightNum, ⁵ActualElapsedTime, ⁶ArrDelay
# ℹ Use `print(n = ...)` to see more rows, and `colnames()`
to see all variable names
> filter(flights,UniqueCarrier=="AA" | UniqueCarrier=="UA")
# A tibble: 5,316 × 21
Year Month Dayof…¹ DayOf…² DepTime ArrTime Uniqu…³
Fligh…⁴ TailNum Actua…⁵ AirTime ArrDe…⁶
<int> <int> <int> <int> <int> <int> <chr>
<int> <chr> <int> <int> <int>
1 2011 1 1 6 1400 1500 AA
428 N576AA 60 40 -10
2 2011 1 2 7 1401 1501 AA
428 N557AA 60 45 -9
3 2011 1 3 1 1352 1502 AA
428 N541AA 70 48 -8
4 2011 1 4 2 1403 1513 AA
428 N403AA 70 39 3
5 2011 1 5 3 1405 1507 AA
428 N492AA 62 44 -3
6 2011 1 6 4 1359 1503 AA
428 N262AA 64 45 -7
7 2011 1 7 5 1359 1509 AA
428 N493AA 70 43 -1
8 2011 1 8 6 1355 1454 AA
428 N477AA 59 40 -16
9 2011 1 9 7 1443 1554 AA
428 N476AA 71 41 44
10 2011 1 10 1 1443 1553 AA
428 N504AA 70 45 43
# … with 5,306 more rows, 9 more variables: DepDelay <int>,
Origin <chr>, Dest <chr>,
# Distance <int>, TaxiIn <int>, TaxiOut <int>, Cancelled
<int>, CancellationCode <chr>,
# Diverted <int>, and abbreviated variable names ¹
DayofMonth, ²DayOfWeek, ³UniqueCarrier,
# ⁴FlightNum, ⁵ActualElapsedTime, ⁶ArrDelay
# ℹ Use `print(n = ...)` to see more rows, and `colnames()`
to see all variable names
> filter(flights,UniqueCarrier %in% c("AA","UA"))
# A tibble: 5,316 × 21
Year Month Dayof…¹ DayOf…² DepTime ArrTime Uniqu…³
Fligh…⁴ TailNum Actua…⁵ AirTime ArrDe…⁶
<int> <int> <int> <int> <int> <int> <chr>
<int> <chr> <int> <int> <int>
1 2011 1 1 6 1400 1500 AA
428 N576AA 60 40 -10
2 2011 1 2 7 1401 1501 AA
428 N557AA 60 45 -9
3 2011 1 3 1 1352 1502 AA
428 N541AA 70 48 -8
4 2011 1 4 2 1403 1513 AA
428 N403AA 70 39 3
5 2011 1 5 3 1405 1507 AA
428 N492AA 62 44 -3
6 2011 1 6 4 1359 1503 AA
428 N262AA 64 45 -7
7 2011 1 7 5 1359 1509 AA
428 N493AA 70 43 -1
8 2011 1 8 6 1355 1454 AA
428 N477AA 59 40 -16
9 2011 1 9 7 1443 1554 AA
428 N476AA 71 41 44
10 2011 1 10 1 1443 1553 AA
428 N504AA 70 45 43
# … with 5,306 more rows, 9 more variables: DepDelay <int>,
Origin <chr>, Dest <chr>,
# Distance <int>, TaxiIn <int>, TaxiOut <int>, Cancelled
<int>, CancellationCode <chr>,
# Diverted <int>, and abbreviated variable names ¹
DayofMonth, ²DayOfWeek, ³UniqueCarrier,
# ⁴FlightNum, ⁵ActualElapsedTime, ⁶ArrDelay
# ℹ Use `print(n = ...)` to see more rows, and `colnames()`
to see all variable names
> #Traditional method - dplyr
> flights[,c("DepTime", "ArrTime", "FlightNum")]
# A tibble: 227,496 × 3
DepTime ArrTime FlightNum
<int> <int> <int>
1 1400 1500 428
2 1401 1501 428
3 1352 1502 428
4 1403 1513 428
5 1405 1507 428
6 1359 1503 428
7 1359 1509 428
8 1355 1454 428
9 1443 1554 428
10 1443 1553 428
# … with 227,486 more rows
# ℹ Use `print(n = ...)` to see more rows
> select(flights, DepTime, ArrTime, FlightNum)
# A tibble: 227,496 × 3
DepTime ArrTime FlightNum
<int> <int> <int>
1 1400 1500 428
2 1401 1501 428
3 1352 1502 428
4 1403 1513 428
5 1405 1507 428
6 1359 1503 428
7 1359 1509 428
8 1355 1454 428
9 1443 1554 428
10 1443 1553 428
# … with 227,486 more rows
# ℹ Use `print(n = ...)` to see more rows
> select(flights, Year:DayofMonth, contains("Taxi"),
contains("Delay"))
# A tibble: 227,496 × 7
Year Month DayofMonth TaxiIn TaxiOut ArrDelay DepDelay
<int> <int> <int> <int> <int> <int> <int>
1 2011 1 1 7 13 -10 0
2 2011 1 2 6 9 -9 1
3 2011 1 3 5 17 -8 -8
4 2011 1 4 9 22 3 3
5 2011 1 5 9 9 -3 5
6 2011 1 6 6 13 -7 -1
7 2011 1 7 12 15 -1 -1
8 2011 1 8 7 12 -16 -5
9 2011 1 9 8 22 44 43
10 2011 1 10 6 19 43 43
# … with 227,486 more rows
# ℹ Use `print(n = ...)` to see more rows
> filter(select(flights, UniqueCarrier, DepDelay),
DepDelay>60)
# A tibble: 10,242 × 2
UniqueCarrier DepDelay
<chr> <int>
1 AA 90
2 AA 67
3 AA 74
4 AA 125
5 AA 82
6 AA 99
7 AA 70
8 AA 61
9 AA 74
10 AS 73
# … with 10,232 more rows
# ℹ Use `print(n = ...)` to see more rows
> #chaining method
> flights %>%
+ select(UniqueCarrier, DepDelay) %>%
+ filter(DepDelay>60)
# A tibble: 10,242 × 2
UniqueCarrier DepDelay
<chr> <int>
1 AA 90
2 AA 67
3 AA 74
4 AA 125
5 AA 82
6 AA 99
7 AA 70
8 AA 61
9 AA 74
10 AS 73
# … with 10,232 more rows
# ℹ Use `print(n = ...)` to see more rows
> flights %>%
+ select(UniqueCarrier, DepDelay)
# A tibble: 227,496 × 2
UniqueCarrier DepDelay
<chr> <int>
1 AA 0
2 AA 1
3 AA -8
4 AA 3
5 AA 5
6 AA -1
7 AA -1
8 AA -5
9 AA 43
10 AA 43
# … with 227,486 more rows
# ℹ Use `print(n = ...)` to see more rows
> flights %<>%
+ select(UniqueCarrier, DepDelay) %<>%
+
+ filter(DepDelay>60)
> flights
# A tibble: 10,242 × 2
UniqueCarrier DepDelay
<chr> <int>
1 AA 90
2 AA 67
3 AA 74
4 AA 125
5 AA 82
6 AA 99
7 AA 70
8 AA 61
9 AA 74
10 AS 73
# … with 10,232 more rows
# ℹ Use `print(n = ...)` to see more rows
> #Traditinal Method
> x1<-1:5; x2<-2:6
> sqrt(sum((x1-x2)^2))
[1] 2.236068
> #Chaining Method
> (x1-x2)^2 %>% sum() %>% sqrt()
[1] 2.236068
> suppressMessages(library(dplyr))
> flights<-tbl_df(hflights)
Warning message:
`tbl_df()` was deprecated in dplyr 1.0.0.
ℹ Please use `tibble::as_tibble()` instead.
Call `lifecycle::last_lifecycle_warnings()` to see where
this warning was generated.
> flights
# A tibble: 227,496 × 21
Year Month Dayof…¹ DayOf…² DepTime ArrTime Uniqu…³
Fligh…⁴ TailNum Actua…⁵ AirTime ArrDe…⁶
<int> <int> <int> <int> <int> <int> <chr>
<int> <chr> <int> <int> <int>
1 2011 1 1 6 1400 1500 AA
428 N576AA 60 40 -10
2 2011 1 2 7 1401 1501 AA
428 N557AA 60 45 -9
3 2011 1 3 1 1352 1502 AA
428 N541AA 70 48 -8
4 2011 1 4 2 1403 1513 AA
428 N403AA 70 39 3
5 2011 1 5 3 1405 1507 AA
428 N492AA 62 44 -3
6 2011 1 6 4 1359 1503 AA
428 N262AA 64 45 -7
7 2011 1 7 5 1359 1509 AA
428 N493AA 70 43 -1
8 2011 1 8 6 1355 1454 AA
428 N477AA 59 40 -16
9 2011 1 9 7 1443 1554 AA
428 N476AA 71 41 44
10 2011 1 10 1 1443 1553 AA
428 N504AA 70 45 43
# … with 227,486 more rows, 9 more variables: DepDelay
<int>, Origin <chr>, Dest <chr>,
# Distance <int>, TaxiIn <int>, TaxiOut <int>, Cancelled
<int>, CancellationCode <chr>,
# Diverted <int>, and abbreviated variable names ¹
DayofMonth, ²DayOfWeek, ³UniqueCarrier,
# ⁴FlightNum, ⁵ActualElapsedTime, ⁶ArrDelay
# ℹ Use `print(n = ...)` to see more rows, and `colnames()`
to see all variable names
> flights[order(flights$DepDelay), c("UniqueCarrier",
"DepDelay")]
# A tibble: 227,496 × 2
UniqueCarrier DepDelay
<chr> <int>
1 OO -33
2 MQ -23
3 XE -19
4 XE -19
5 CO -18
6 EV -18
7 XE -17
8 CO -17
9 XE -17
10 MQ -17
# … with 227,486 more rows
# ℹ Use `print(n = ...)` to see more rows
> flights %>%
+ select(UniqueCarrier, DepDelay) %>%
+ arrange(desc(DepDelay))
# A tibble: 227,496 × 2
UniqueCarrier DepDelay
<chr> <int>
1 CO 981
2 AA 970
3 MQ 931
4 UA 869
5 MQ 814
6 MQ 803
7 CO 780
8 CO 758
9 DL 730
10 MQ 691
# … with 227,486 more rows
# ℹ Use `print(n = ...)` to see more rows
> flights$Speed <- flights$Distance / flights$AirTime*60
> flights[, c("Distance", "AirTime", "Speed")]
# A tibble: 227,496 × 3
Distance AirTime Speed
<int> <int> <dbl>
1 224 40 336
2 224 45 299.
3 224 48 280
4 224 39 345.
5 224 44 305.
6 224 45 299.
7 224 43 313.
8 224 40 336
9 224 41 328.
10 224 45 299.
# … with 227,486 more rows
# ℹ Use `print(n = ...)` to see more rows
> flights %>%
+ select(Distance, AirTime) %>%
+ mutate(Speed = Distance / AirTime*60)
# A tibble: 227,496 × 3
Distance AirTime Speed
<int> <int> <dbl>
1 224 40 336
2 224 45 299.
3 224 48 280
4 224 39 345.
5 224 44 305.
6 224 45 299.
7 224 43 313.
8 224 40 336
9 224 41 328.
10 224 45 299.
# … with 227,486 more rows
# ℹ Use `print(n = ...)` to see more rows
> flights = flights %>% mutate(Speed = Distance /
AirTime*60)
> head(with(flights, tapply(ArrDelay, Dest, mean,
na.rm=TRUE)))
ABQ AEX AGS AMA ANC ASE
7.226259 5.839437 4.000000 6.840095 26.080645 6.794643
> head(aggregate(ArrDelay~Dest, flights, mean))
Dest ArrDelay
1 ABQ 7.226259
2 AEX 5.839437
3 AGS 4.000000
4 AMA 6.840095
5 ANC 26.080645
6 ASE 6.794643
> flights%>%
+ group_by(Dest)%>%
+ summarise(Avg_Delay=mean(ArrDelay, na.rm=TRUE))
# A tibble: 116 × 2
Dest Avg_Delay
<chr> <dbl>
1 ABQ 7.23
2 AEX 5.84
3 AGS 4
4 AMA 6.84
5 ANC 26.1
6 ASE 6.79
7 ATL 8.23
8 AUS 7.45
9 AVL 9.97
10 BFL -13.2
# … with 106 more rows
# ℹ Use `print(n = ...)` to see more rows