美文网首页
R数据科学第三章

R数据科学第三章

作者: 寂静之巅 | 来源:发表于2019-12-12 21:04 被阅读0次

filter(flights, arr_delay >= 120)
??nycflights13
library(nycflights13)
install.packages("nycflights13")
library(tidyverse)
flights
?flights
View(flights)
calss(flights)

鍙橀噺绫诲瀷

int 鏁存暟锛宒bl 鍙岀簿搴︽诞鐐规暟銆佸疄鏁帮紝dttm 鏃ユ湡+鏃堕棿锛宭gl 閫昏緫鍨嬪彉閲忥紝浠呭寘鎷琓RUE鍜孎ALSE, fctr 鍥犲瓙锛宒ate 鏃ユ湡鍨嬪彉閲?

filter 鎸夎绛涢€?
arrange 缁欒鎺掑簭
select 鎸夊垪绛涢€?
mutate鏍规嵁鐜版湁鐨勫彉閲忓垱寤烘柊鐨勫彉閲?
summarize鎽樿缁熻

浣跨敤filter杩涜绛涢€?

filter(flights, month == 1, day == 1)
jan1 <- filter(flights, month == 1, day == 1)
jan1
(dec25 <- filter(flights, month == 12, day == 25))
dec25
姣旇緝杩愮畻绗?

,>=,<,<=,!+,==
filter(flights, month == 11 | month == 12)
a <- filter(flights, month == 11 | month == 12)
View(a)
filter(flights, month %in% c(11,12))
df <- tibble(x = c(1, NA, 3))
filter(df, x > 1)
filter(df, is.na(x) | x >1)
arrange(flights, year, month, day)
arrange(flights, desc(dep_delay))##闄嶅簭
鏃犺姝e簭鍜屽€掑簭锛岀┖鍊兼帓鍦ㄦ渶鍚?
df <- tibble(x = c(5, 2, NA))
arrange(df, x)
arrange(df, desc(x))
arrange(flights, desc(is.na(dep_time)), dep_time)

select鎸夊垪绛涢€?

select(flights, year, month, day)#鏍规嵁鍒楀悕鍗曢€夋嫨鏌愬嚑鍒?
select(flights,year:day)#杩為€夊嚑鍒?
select(flights, -(year:day))#杩為€?+鍙嶉€?
starts_with("abc")#閫夋嫨abc寮€澶寸殑鍚嶇О
ends_with("eyz") #鍖归厤浠モ€渆yz鈥濈粨灏剧殑鍚嶇О
contains("ijk") #鍖归厤鍖呭惈ijk鐨勫悕绉?
matches("(.)\1") #鍖归厤姝g‘琛ㄨ揪寮忕殑閭d簺鍙橀噺
num_range("x",1:3) #鍖归厤x1,x2,x3
rename(flights,tail_num = tailnum) #閲嶅懡鍚嶏紝淇敼tailnum鏀逛负tail_num
select(flights, time_hour, air_time, everything())#鏌愬嚑鍒楃Щ鍔ㄥ埌寮€澶达紝everyting鏄叾浣欏嚑鍒椼€?
缁冧範
vars <- c("year", "month", "day", "dep_delay", "arr_delay")
select(flights, one_of(vars))
var 鏄痗haracter vector(鐗瑰緛鍚戦噺)銆傝繖鏍峰啓鍑虹殑select锛堬級涓嶉渶瑕佸啓澶氫釜鍚戦噺鍚?
select(flights,contains("YEAR",ignore.case = FALSE)) #鏈潵鏄拷鐣ュぇ灏忓啓锛岃繖鏍锋槸涓嶅拷鐣ュぇ灏忓啓

5. mutate()鐢熸垚鏂板垪

mutate鐢熸垚鏂板垪鍚庯紝娣诲姞鏂板垪鍒版暟鎹鏈熬锛岀敓鎴愭柊鐨勬暟鎹銆倀ransmute鍒欐槸鍙繚鐣欐柊鐢熸垚鐨勫垪锛屽師鏈夌殑鍒楀姞鍑忎箻闄ょ瓑璁$畻鍑虹殑缁撴灉浣滀负鏂板垪

鐢熸垚gain鍜宻peed

flights_sml <- select(flights,
year:day,
ends_with("delay"),
distance,
air_time)
flights_sml
mutate(flights_sml,
gain = dep_delay - arr_delay,
speed = distance / air_time * 60)
鏂板垪鍙互鐩存帴鍙備笌鍙︿竴鏂板垪鐨勭敓鎴?
mutate(flights_sml,
gain = dep_delay - arr_delay,
hours = air_time / 60,
gain_per_hour = gain / hours)
transmute()鍙繚鐣欐柊鍒?
transmute(flights,
gain = dep_delay - arr_delay,
hours = air_time /60,
gain_per_hour = gain / hours)
summarise()##杩涜鍒嗙粍鎽樿
summarise(flights, delay = mean(dep_delay, na.rm = TRUE))
group_by()鍑芥暟浼氫娇summarize鏇存湁鐢?
by_day <- group_by(flights, year, month, day)
summarise(by_day, delay = mean(dep_delay, na.rm = TRUE))

绠¢亾鎿嶄綔

绠¢亾鎿嶄綔浼氱畝鍖栦唬鐮侊紝鏁版嵁妗嗗悕涓嶅繀閲嶅鍐欏娆?

level1鈥斺€旂敓鎴愪腑闂翠骇鐗?

by_day <- group_by(flights, year, month, day)
summarise(by_day, delay = mean(dep_delay, na.rm = TRUE))

level2鈥斺€?

summarise(group_by(flights, year, month, day), delay = mean(dep_delay, na.rm = TRUE))

level3鈥斺€旂閬撴搷浣滈摼鎺ヤ袱姝ワ紝鏁版嵁妗嗗悕鍦ㄦ嫭鍙烽噷闈?

group_by(flights, year, month, day) %>% summarise(delay = mean(dep_delay, na.rm = TRUE))

level4_

flights %>% group_by(year, month, day) %>% summarise(delay = mean(dep_delay, na.rm = TRUE))
锛?2锛夌己澶卞€?
濡傛灉鏁版嵁涓湁NA锛岃绠楃粨鏋滃氨鍏ㄩ儴涓篘A锛屾墍浠ヤ唬鐮佷腑闇€瑕? na.rm = TRUE. 璁$畻鍓嶇Щ闄ょ己澶卞€?
锛堣鏁帮級

璁℃暟

渚嬪瓙锛氭壘鍑哄钩鍧囧欢璇椂闂存渶闀跨殑椋炴満
锛堟牴鎹畉ailname杩涜鍒嗙粍锛屽緱鍒扮殑鏄叏骞村悓涓€鏋堕鏈虹殑鍚勭鏁版嵁锛?
carriers <- group_by(flights, carrier)
s1 <- summarise(carriers, n())
s1
s2 <- count(flights, carrier)

s2
mu <- mutate(carriers,n = n())
s3 <- distinct(mu, n)
s3
s4 <- distinct(mu, carrier, n)
s4
鍥涚鏂规硶缁熻鐨勭粨鏋滄槸涓€鑷寸殑锛宒istinct鏄幓閲嶅銆?

写在最后,学习了好几天,还看了小洁老师的笔记,最后保存时用UTF8,然后就成这样了,弄的自己都不认识,我也是人才,还有我也转换不过去了。这次当交学费了。

相关文章

网友评论

      本文标题:R数据科学第三章

      本文链接:https://www.haomeiwen.com/subject/xoxxnctx.html