美文网首页
R 常用语句汇总

R 常用语句汇总

作者: LSim | 来源:发表于2020-05-25 16:21 被阅读0次

## 分组统计  

display(df %>% filter(RESP_DT == '20191102') %>% group_by(colname) %>% summarise(cnt = length(RESP_DT)) %>% arrange(cnt))

## 表连接

a<-df%>%dplyr::filter(RESP_TYPE_NM=="Mail Open")%>%dplyr::group_by(colname)%>%dplyr::summarise(email_cnt=length(unique(email_name)))

c<-df %>%dplyr::group_by(colname)%>%dplyr::summarise(email_cnt=length(unique(email_name)))

c<-df%>%dplyr::group_by(colname)%>%dplyr::summarise(email_cnt=n_distinct(email_name))

d<-c%>%dplyr::left_join(a,by="colname")%>%dplyr::left_join(b,by="colname")

### 增加字段

d<-dplyr::mutate(d,clickpercent=click/open)

###表连接

c<-c%>%dplyr::left_join(a,by="colname")%>%dplyr::left_join(b,by="colname")

x %>% left_join(y, by = c("name" = "name"))

coon1 <- merge(x =contact_phone_nbr,y = phone_nbr,by.x='phonenumber',by.y='id',all.x = T)

### 多条件匹配,交集

merge(a,b,by = intersect(names(a), names(b)))

### 两个相同的字段匹配,左连接

merge(a,b,all.x=TRUE)

## 修改日期

a$col<- lubridate::ymd(a$col)

a$DT<-lubridate::ymd(s$DT)

## 字符串截取函数

a$col<-substring(a$col,1,9)

substr(data,1,4)

## 新增加一个自增字段

a<-a%>% dplyr::mutate(ID=row_number())

## 字符串匹配函数

unique(a[grep('xxx',tolower(a$col)),]$col)

txt <- c("Dother")

txt1 <- c("otherD")

b <- df[grep(txt,df$CN),]

b1 <- df[grep(txt1,df$CN),]

## ifelse语句

phone <- phone %>% dplyr::mutate(flag =  case_when(phone$raw_phone %in% blacklist_phone$Phone~  '1',

                                      phone$raw_phone %in% blacklist_phone$cleanphone ~  '1',

                                      phone$cleanphone  %in% blacklist_phone$Phone~ '1',

                                      phone$cleanphone %in% blacklist_phone$cleanphone ~  '1',

                                      TRUE ~ '0'))

## 删除字段

df[,"colname"]<-NULL

## 去重

df%>% unique()

## 两个字段合并后去重计数

length(unique(paste(df$col1,df$col2)))

## 筛选出col1,col2计数超过2的字段

filter(df%>% group_by(col1,col2) %>% summarise(cnt = length(col)),cnt > 1) %>% arrange(desc(cnt))

## 去空格

df$col<- gsub(" ", "", df$col, fixed = TRUE)

df$col<- trimws(df$col,which="both")

## 拉横数据

library(reshape2)

library(dplyr)

df<- reshape2::dcast(df, col1+col2~Type, value.var = "price")

## 拉直数据

library(reshape2)

a1 <- a %>% reshape2::melt(id=c("A_NM","B_NM","C_NM") %>% dplyr::arrange(A_NM))

# 分条件加列

df$flag <- ifelse((df$rawphone %in% blacklist_phone$phone| phone$rawphone %in% blacklist_phone$cleanphone | phone$cleanphone  %in% blacklist_phone$MAIN_PHN_NBR | phone$cleanphone %in% blacklist_phone$cleanphone),1,0)

#  选出最新日期(三种方法)

a1<-a %>% dplyr::group_by(col1,col2) %>% arrange(col1,col2,desc(date)) %>%dplyr::mutate(index=row_number()) %>% dplyr::filter(index==1)

a1 <- a %>% dplyr::group_by(col1,col2) %>% dplyr::arrange(QUANTITY,desc(date))%>% mutate(rn = rank(desc(date), ties.method = "first")) %>% dplyr::filter(rn==1)

a1 <- a %>% dplyr::group_by(col1,col2) %>% dplyr::arrange(QUANTITY,desc(date))%>% mutate(id=seq(1,length(paste(col1,col2)))) %>% dplyr::filter(id==1)

# 去空格大写

df$col1<-toupper(gsub(" ", "", df$col1, fixed = TRUE))

## 检查两列是否有重复

df$col<-  paste(df$col1,df$col2,sep = "")

display(df%>% dplyr::group_by(col) %>% dplyr::summarise(cnt=length(col)) %>% dplyr::arrange(desc(cnt)))

## 不满足条件,终止下面cmd的运行

stopifnot( nrow(df%>% dplyr::group_by(col1) %>% dplyr::summarise(cnt=length(col2)) %>% dplyr::filter(cnt>1)) != 0)

if (nrow(df%>% dplyr::group_by(col1) %>% dplyr::summarise(cnt=length(col2)) %>% dplyr::filter(cnt>1)) != 0) {

    stop("invalid productid please double check if any space or else in, and resave the file or the script will not run")

    }

if model_day==view_result['end_date'].unique().max():

  del view_result['end_date']

else:

  raise BaseException

## 判断是否相等

identical( 2,2) / 2==2

## 不在

df%>% dplyr::filter( !(df$col1%in% df1$col1) )

## 当一个col1对应多个col2时,随机选取一条

display(aa<-c%>%dplyr::group_by(col1)%>%dplyr::mutate(cnt=length(col2)) %>% mutate(seq=row_number()) %>%dplyr::filter(seq==1) )

##生成123456789

df$col<- rownames(df)

## 非空去掉

df<- df%>% dplyr::filter(col !='')

## 每个col,随意选择两条

b1 <- df%>% dplyr::group_by(col)  %>% dplyr::arrange(col1) %>% dplyr::mutate(id=seq(1,length(paste(col,col1,seq="")))) %>% dplyr::filter(id <=2)

## 指定字段重命名

colnames(df)[6] <- 'colName'

b<-df[which((df$col)=="999999999"),]

table(a$Tag,a$Tag_1)

## 以特定字符把列进行切分(分成多列)

df$col<- strsplit(df$col,"_")

sd<- data.frame(sd,p=sapply(sd$col1,function(i){i[[1]]}),q=sapply(df$col1,function(i){i[[2]]}))

## 生成系统时间的下月一号

lubridate::ymd(paste0(substr(as.character(Sys.Date() %m+% base::months(1)), 1, 8),"01"))

相关文章

网友评论

      本文标题:R 常用语句汇总

      本文链接:https://www.haomeiwen.com/subject/vrifwctx.html