美文网首页
R 常用语句汇总

R 常用语句汇总

作者: LSim | 来源:发表于2020-05-25 16:21 被阅读0次

    ## 分组统计  

    display(df %>% filter(RESP_DT == '20191102') %>% group_by(colname) %>% summarise(cnt = length(RESP_DT)) %>% arrange(cnt))

    ## 表连接

    a<-df%>%dplyr::filter(RESP_TYPE_NM=="Mail Open")%>%dplyr::group_by(colname)%>%dplyr::summarise(email_cnt=length(unique(email_name)))

    c<-df %>%dplyr::group_by(colname)%>%dplyr::summarise(email_cnt=length(unique(email_name)))

    c<-df%>%dplyr::group_by(colname)%>%dplyr::summarise(email_cnt=n_distinct(email_name))

    d<-c%>%dplyr::left_join(a,by="colname")%>%dplyr::left_join(b,by="colname")

    ### 增加字段

    d<-dplyr::mutate(d,clickpercent=click/open)

    ###表连接

    c<-c%>%dplyr::left_join(a,by="colname")%>%dplyr::left_join(b,by="colname")

    x %>% left_join(y, by = c("name" = "name"))

    coon1 <- merge(x =contact_phone_nbr,y = phone_nbr,by.x='phonenumber',by.y='id',all.x = T)

    ### 多条件匹配,交集

    merge(a,b,by = intersect(names(a), names(b)))

    ### 两个相同的字段匹配,左连接

    merge(a,b,all.x=TRUE)

    ## 修改日期

    a$col<- lubridate::ymd(a$col)

    a$DT<-lubridate::ymd(s$DT)

    ## 字符串截取函数

    a$col<-substring(a$col,1,9)

    substr(data,1,4)

    ## 新增加一个自增字段

    a<-a%>% dplyr::mutate(ID=row_number())

    ## 字符串匹配函数

    unique(a[grep('xxx',tolower(a$col)),]$col)

    txt <- c("Dother")

    txt1 <- c("otherD")

    b <- df[grep(txt,df$CN),]

    b1 <- df[grep(txt1,df$CN),]

    ## ifelse语句

    phone <- phone %>% dplyr::mutate(flag =  case_when(phone$raw_phone %in% blacklist_phone$Phone~  '1',

                                          phone$raw_phone %in% blacklist_phone$cleanphone ~  '1',

                                          phone$cleanphone  %in% blacklist_phone$Phone~ '1',

                                          phone$cleanphone %in% blacklist_phone$cleanphone ~  '1',

                                          TRUE ~ '0'))

    ## 删除字段

    df[,"colname"]<-NULL

    ## 去重

    df%>% unique()

    ## 两个字段合并后去重计数

    length(unique(paste(df$col1,df$col2)))

    ## 筛选出col1,col2计数超过2的字段

    filter(df%>% group_by(col1,col2) %>% summarise(cnt = length(col)),cnt > 1) %>% arrange(desc(cnt))

    ## 去空格

    df$col<- gsub(" ", "", df$col, fixed = TRUE)

    df$col<- trimws(df$col,which="both")

    ## 拉横数据

    library(reshape2)

    library(dplyr)

    df<- reshape2::dcast(df, col1+col2~Type, value.var = "price")

    ## 拉直数据

    library(reshape2)

    a1 <- a %>% reshape2::melt(id=c("A_NM","B_NM","C_NM") %>% dplyr::arrange(A_NM))

    # 分条件加列

    df$flag <- ifelse((df$rawphone %in% blacklist_phone$phone| phone$rawphone %in% blacklist_phone$cleanphone | phone$cleanphone  %in% blacklist_phone$MAIN_PHN_NBR | phone$cleanphone %in% blacklist_phone$cleanphone),1,0)

    #  选出最新日期(三种方法)

    a1<-a %>% dplyr::group_by(col1,col2) %>% arrange(col1,col2,desc(date)) %>%dplyr::mutate(index=row_number()) %>% dplyr::filter(index==1)

    a1 <- a %>% dplyr::group_by(col1,col2) %>% dplyr::arrange(QUANTITY,desc(date))%>% mutate(rn = rank(desc(date), ties.method = "first")) %>% dplyr::filter(rn==1)

    a1 <- a %>% dplyr::group_by(col1,col2) %>% dplyr::arrange(QUANTITY,desc(date))%>% mutate(id=seq(1,length(paste(col1,col2)))) %>% dplyr::filter(id==1)

    # 去空格大写

    df$col1<-toupper(gsub(" ", "", df$col1, fixed = TRUE))

    ## 检查两列是否有重复

    df$col<-  paste(df$col1,df$col2,sep = "")

    display(df%>% dplyr::group_by(col) %>% dplyr::summarise(cnt=length(col)) %>% dplyr::arrange(desc(cnt)))

    ## 不满足条件,终止下面cmd的运行

    stopifnot( nrow(df%>% dplyr::group_by(col1) %>% dplyr::summarise(cnt=length(col2)) %>% dplyr::filter(cnt>1)) != 0)

    if (nrow(df%>% dplyr::group_by(col1) %>% dplyr::summarise(cnt=length(col2)) %>% dplyr::filter(cnt>1)) != 0) {

        stop("invalid productid please double check if any space or else in, and resave the file or the script will not run")

        }

    if model_day==view_result['end_date'].unique().max():

      del view_result['end_date']

    else:

      raise BaseException

    ## 判断是否相等

    identical( 2,2) / 2==2

    ## 不在

    df%>% dplyr::filter( !(df$col1%in% df1$col1) )

    ## 当一个col1对应多个col2时,随机选取一条

    display(aa<-c%>%dplyr::group_by(col1)%>%dplyr::mutate(cnt=length(col2)) %>% mutate(seq=row_number()) %>%dplyr::filter(seq==1) )

    ##生成123456789

    df$col<- rownames(df)

    ## 非空去掉

    df<- df%>% dplyr::filter(col !='')

    ## 每个col,随意选择两条

    b1 <- df%>% dplyr::group_by(col)  %>% dplyr::arrange(col1) %>% dplyr::mutate(id=seq(1,length(paste(col,col1,seq="")))) %>% dplyr::filter(id <=2)

    ## 指定字段重命名

    colnames(df)[6] <- 'colName'

    b<-df[which((df$col)=="999999999"),]

    table(a$Tag,a$Tag_1)

    ## 以特定字符把列进行切分(分成多列)

    df$col<- strsplit(df$col,"_")

    sd<- data.frame(sd,p=sapply(sd$col1,function(i){i[[1]]}),q=sapply(df$col1,function(i){i[[2]]}))

    ## 生成系统时间的下月一号

    lubridate::ymd(paste0(substr(as.character(Sys.Date() %m+% base::months(1)), 1, 8),"01"))

    相关文章

      网友评论

          本文标题:R 常用语句汇总

          本文链接:https://www.haomeiwen.com/subject/vrifwctx.html