美文网首页
Balance diagnostics after propen

Balance diagnostics after propen

作者: 北欧森林 | 来源:发表于2021-05-12 08:07 被阅读0次

    Standardized mean difference (SMD) is the most commonly used statistic to examine the balance of covariate distribution between treatment groups. Because SMD is independent of the unit of measurement, it allows comparison between variables with different unit of measurement. (Zhang Z et al. 2019)

    1. to create a simulated dataset
    psSim<-function(CatVarN=2,ContVarN=2,
                    seed=123,n=1000){
      set.seed(seed);
      Xcont <- replicate(ContVarN, rnorm(n))
      Xcat <- replicate(CatVarN, rbinom(n,size = 1,prob = 0.3))
      linpredT<-cbind(1, Xcont,Xcat) %*%
        sample(c(-5:-1,1:5), 1+CatVarN+ContVarN) +
        rnorm(n,-0.8,1)
      probTreatment <- exp(linpredT) / (1 + exp(linpredT))
      Treat <- rbinom(n, 1, probTreatment);
      linpredY <- 1 + cbind(Xcont,Xcat) %*%
        rep(1, CatVarN+ContVarN) +
        Treat + rnorm(n, -2, 2);
      prY = 1/(1+exp(-linpredY));
      mort <- rbinom(n,1,prY);
      dt <- data.frame(Xcont=Xcont,Xcat=Xcat,Treat, mort)
      return(dt)
    }
    dt<-psSim()
    
    str(dt)
    ## 'data.frame':    1000 obs. of  6 variables:
    ##  $ Xcont.1: num  -0.5605 -0.2302 1.5587 0.0705 0.1293 ...
    ##  $ Xcont.2: num  -0.996 -1.04 -0.018 -0.132 -2.549 ...
    ##  $ Xcat.1 : int  0 1 0 1 0 0 1 0 0 1 ...
    ##  $ Xcat.2 : int  0 1 1 0 0 0 0 0 1 0 ...
    ##  $ Treat  : int  0 0 0 0 0 0 0 1 0 0 ...
    ##  $ mort   : int  0 1 1 0 1 0 1 0 1 1 ...
    
    1. to examine the balance of covariates between treated and untreated groups
    library(tableone)
    myVars <- names(dt)[1:4]
    tabbefore <- CreateTableOne(vars = myVars,
                                  data = dt,
                                  strata = 'Treat',
                                  factorVars = c('Xcat.1','Xcat.2'),
                                  smd = T)
    tabbefore <- print(tabbefore,
                         printToggle = FALSE,
                         noSpaces = TRUE, smd=TRUE,
                         quote=T)
    
    tabbefore
    ##                        "Stratified by Treat"
    ##  ""                     "0"            "1"            "p"      "test" "SMD"  
    ##   "n"                   "766"          "234"          ""       ""     ""     
    ##   "Xcont.1 (mean (SD))" "0.12 (0.99)"  "-0.32 (0.94)" "<0.001" ""     "0.455"
    ##   "Xcont.2 (mean (SD))" "-0.25 (0.89)" "1.00 (0.77)"  "<0.001" ""     "1.499"
    ##   "Xcat.1 = 1 (%)"      "263 (34.3)"   "19 (8.1)"     "<0.001" ""     "0.677"
    ##   "Xcat.2 = 1 (%)"      "279 (36.4)"   "23 (9.8)"     "<0.001" ""     "0.665"
    
    1. PSM
    #install.packages("MatchIt")
    library(MatchIt);
    m.out<-matchit(Treat~ Xcont.1+Xcont.2+Xcat.1+Xcat.2,
                     dt, method = "nearest", caliper=0.1)
    
    1. SMD
    #install.packages("cobalt")
    library(cobalt)
    
    bal.tab(m.out,m.threshold=0.1)
    ## Call
    ##  matchit(formula = Treat ~ Xcont.1 + Xcont.2 + Xcat.1 + Xcat.2, 
    ##     data = dt, method = "nearest", caliper = 0.1)
    ## 
    ## Balance Measures
    ##              Type Diff.Adj    M.Threshold
    ## distance Distance   0.0455 Balanced, <0.1
    ## Xcont.1   Contin.   0.0271 Balanced, <0.1
    ## Xcont.2   Contin.   0.0506 Balanced, <0.1
    ## Xcat.1     Binary  -0.0114 Balanced, <0.1
    ## Xcat.2     Binary   0.0000 Balanced, <0.1
    ## 
    ## Balance tally for mean differences
    ##                    count
    ## Balanced, <0.1         5
    ## Not Balanced, >0.1     0
    ## 
    ## Variable with the greatest mean difference
    ##  Variable Diff.Adj    M.Threshold
    ##   Xcont.2   0.0506 Balanced, <0.1
    ## 
    ## Sample sizes
    ##           Control Treated
    ## All           766     234
    ## Matched        88      88
    ## Unmatched     678     146
    
    1. Visualization the distribution changes before and after matching
    bal.plot(m.out,var.name = 'Xcont.2',which = 'both')
    
    image.png
    bal.plot(m.out,var.name = 'Xcat.1',which = 'both')
    
    image.png
    1. Publication quality plot (change the temporary variable names)
    v <- data.frame(old = c("Xcont.1","Xcont.2","Xcat.1","Xcat.2"),
                    new = c("Age", "WBC", "Gender",'Surgery'))
    love.plot(bal.tab(m.out, m.threshold=0.1),
                stat = "mean.diffs", var.names = v, abs = F)
    ## Warning: Standardized mean differences and raw mean differences are present in the same plot. 
    ## Use the 'stars' argument to distinguish between them and appropriately label the x-axis.
    
    image.png
    1. to compare difference after PSM (refer to tabbefore)
    df.match <- match.data(m.out)[1:ncol(dt)]
    tabafter <- CreateTableOne(vars = myVars,
                                 data = df.match,
                                 strata = 'Treat',
                                 factorVars = c('Xcat.1','Xcat.2'),
                                 smd = T)
    tabafter <- print(tabafter,
                        printToggle = FALSE,
                        noSpaces = TRUE,smd=TRUE)
    
    tabafter;
    ##                      Stratified by Treat
    ##                       0              1              p       test SMD     
    ##   n                   "88"           "88"           ""      ""   ""      
    ##   Xcont.1 (mean (SD)) "-0.18 (0.93)" "-0.15 (1.04)" "0.864" ""   "0.026" 
    ##   Xcont.2 (mean (SD)) "0.50 (0.67)"  "0.54 (0.76)"  "0.719" ""   "0.054" 
    ##   Xcat.1 = 1 (%)      "9 (10.2)"     "8 (9.1)"      "1.000" ""   "0.038" 
    ##   Xcat.2 = 1 (%)      "12 (13.6)"    "12 (13.6)"    "1.000" ""   "<0.001"
    

    8.1 SMD calculation in CreateTableOne()

    abs((mean(df.match[df.match$Treat==1,'Xcont.1'])-
           mean(df.match[df.match$Treat==0,'Xcont.1'])))/
      sqrt((var(df.match[df.match$Treat==1,'Xcont.1'])+
              var(df.match[df.match$Treat==0,'Xcont.1']))/2)
    ## [1] 0.02581165
    

    8.2 SMD calculation in tab():

    (mean(df.match[df.match$Treat==1,'Xcont.1'])-
        mean(df.match[df.match$Treat==0,'Xcont.1']))/
      sqrt((var(dt[dt$Treat==1,'Xcont.1'])))
    ## [1] 0.02705606
    
    1. Variance ratio

    Note: A variance ratio of 1 in matched sample indicates a good matching, and a variance ratio below 2 is generally acceptable.

    bal.tab(m.out,v.threshold=2)
    ## Call
    ##  matchit(formula = Treat ~ Xcont.1 + Xcont.2 + Xcat.1 + Xcat.2, 
    ##     data = dt, method = "nearest", caliper = 0.1)
    ## 
    ## Balance Measures
    ##              Type Diff.Adj V.Ratio.Adj  V.Threshold
    ## distance Distance   0.0455      1.0950 Balanced, <2
    ## Xcont.1   Contin.   0.0271      1.2512 Balanced, <2
    ## Xcont.2   Contin.   0.0506      1.2954 Balanced, <2
    ## Xcat.1     Binary  -0.0114           .             
    ## Xcat.2     Binary   0.0000           .             
    ## 
    ## Balance tally for variance ratios
    ##                  count
    ## Balanced, <2         3
    ## Not Balanced, >2     0
    ## 
    ## Variable with the greatest variance ratio
    ##  Variable V.Ratio.Adj  V.Threshold
    ##   Xcont.2      1.2954 Balanced, <2
    ## 
    ## Sample sizes
    ##           Control Treated
    ## All           766     234
    ## Matched        88      88
    ## Unmatched     678     146
    
    1. Prognostic score for assessing balance

    The prognostic score is defined as the predicted probability of outcome under the control condition. It can be estimated by regressing the outcome on covariates in the control group. Then that fitted model is used to predict outcome for all subjects.

    ctrl.data <- dt[dt$Treat == 0,]
    ctrl.fit <- glm(mort ~ Xcont.1+Xcont.2+Xcat.1+Xcat.2,
                      data = ctrl.data)
    dt$prog.score <- predict(ctrl.fit, dt)
    
    bal.tab(m.out, distance = dt["prog.score"])
    ## Call
    ##  matchit(formula = Treat ~ Xcont.1 + Xcont.2 + Xcat.1 + Xcat.2, 
    ##     data = dt, method = "nearest", caliper = 0.1)
    ## 
    ## Balance Measures
    ##                Type Diff.Adj
    ## prog.score Distance   0.0360
    ## distance   Distance   0.0455
    ## Xcont.1     Contin.   0.0271
    ## Xcont.2     Contin.   0.0506
    ## Xcat.1       Binary  -0.0114
    ## Xcat.2       Binary   0.0000
    ## 
    ## Sample sizes
    ##           Control Treated
    ##  All           766     234
    ## Matched        88      88
    ## Unmatched     678     146
    
    1. to identify evidence of model misspecification
    library(car)
    mod1<-glm(Treat~Xcont.1+Xcont.2+Xcat.1+Xcat.2,
                dt,family = binomial)
    residualPlots(mod1,terms=~Xcont.1+Xcont.2,fitted=T)
    
    ##         Test stat Pr(>|Test stat|)
    ## Xcont.1    0.5216           0.4701
    ## Xcont.2    0.6596           0.4167
    
    image.png

    Reference:
    Zhang Z, Kim HJ, Lonjon G, Zhu Y; written on behalf of AME Big-Data Clinical Trial Collaborative Group. Balance diagnostics after propensity score matching. Ann Transl Med 2019;7(1):16. doi: 10.21037/atm.2018.12.10

    相关文章

      网友评论

          本文标题:Balance diagnostics after propen

          本文链接:https://www.haomeiwen.com/subject/emsbdltx.html