2020-07-08 R基础绘图+统计

作者: 程凉皮儿 | 来源:发表于2020-07-08 13:33 被阅读0次

2020-07-08 R基础绘图+统计
Day—4
生信入门学习笔记day4@2021.06.28
学习小组Day4笔记——山川石
学习小组Day4笔记--大水
学习小组Day4笔记--镜筒
学习小组Day4笔记-- Louis-jl
学习小组day4笔记--strengthen
大数据 | 从实例教你掌握R语言
学习小组Day4笔记—白兔儿溜溜

R统计

安装加载必要的R包

options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor/")
options("repos" = c(CRAN="http://mirrors.cloud.tencent.com/CRAN/")) 
options(download.file.method = 'libcurl')
options(url.method='libcurl')

#install.packages(c("beanplot", "plotrix", "pastecs", "reshape2"))

library(beanplot)
library(plotrix)
#> Warning: package 'plotrix' was built under R version 3.6.2
library(pastecs)
library(reshape2)
#> Warning: package 'reshape2' was built under R version 3.6.2

数据类型定义

QUALITATIVE DATA

## power analysis ##
power.prop.test(p1 = .25, p2 = .7, power = .8, sig.level = 0.05)
#> 
#>      Two-sample comparison of proportions power calculation 
#> 
#>               n = 18.10585
#>              p1 = 0.25
#>              p2 = 0.7
#>       sig.level = 0.05
#>           power = 0.8
#>     alternative = two.sided
#> 
#> NOTE: n is number in *each* group

## Load the data ##
cats.data<-read.table("cats.dat", sep="\t",header=T)

## Look at the data ##
head(cats.data)
#>         Training Dance
#> 1 Food as Reward   Yes
#> 2 Food as Reward   Yes
#> 3 Food as Reward   Yes
#> 4 Food as Reward   Yes
#> 5 Food as Reward   Yes
#> 6 Food as Reward   Yes
#View(cats.data)

## Plot the data and contingency table ##
plot(cats.data$Training, cats.data$Dance, xlab = "Training", ylab = "Dance")

image.png

plot(table(cats.data))

image.png


contingency.table <- table(cats.data)
contingency.table100<-prop.table(contingency.table,1)
contingency.table100<-round(contingency.table100*100)
contingency.table100
#>                      Dance
#> Training              No Yes
#>   Affection as Reward 70  30
#>   Food as Reward      26  74

contingency.table100<-cbind(contingency.table100[,"Yes"],contingency.table100[,"No"])
colnames(contingency.table100) <- c("Yes", "No")
contingency.table100
#>                     Yes No
#> Affection as Reward  30 70
#> Food as Reward       74 26

barplot(t(contingency.table100), 
        legend.text = TRUE,
        ylab = "Percentages",
        las=1)

image.png


#### Pretty ###
par(lwd=2) 
barplot(t(contingency.table100), 
        col=c("chartreuse3","lemonchiffon2"),
        cex.axis=1.2, 
        cex.names=1.5,
        cex.lab=1.5,
        ylab = "Percentages",
        las=1
        )

legend("topright",   
       title="Dancing", 
       inset=.05, 
       c("Yes","No"), 
       horiz=TRUE, 
       pch=15,
       col=c("chartreuse3","lemonchiffon2"))

image.png


dev.off()
#> null device 
#>           1

QUANTITATIVE DATA

卡方检验，Fishers检验

## Chi2  ##
chisq.test(contingency.table)
#> 
#>  Pearson's Chi-squared test with Yates' continuity correction
#> 
#> data:  contingency.table
#> X-squared = 23.52, df = 1, p-value = 1.236e-06

## Chi2 without continuity correction #
chisq.test(contingency.table, correct=F)
#> 
#>  Pearson's Chi-squared test
#> 
#> data:  contingency.table
#> X-squared = 25.356, df = 1, p-value = 4.767e-07

## Fishers test  ##
fisher.test(contingency.table)
#> 
#>  Fisher's Exact Test for Count Data
#> 
#> data:  contingency.table
#> p-value = 1.312e-06
#> alternative hypothesis: true odds ratio is not equal to 1
#> 95 percent confidence interval:
#>   2.837773 16.429686
#> sample estimates:
#> odds ratio 
#>   6.579265

T检验

## Import data ##
coyote<-read.csv("coyote.csv")
#View(coyote)
head(coyote)
#>   length gender
#> 1   93.0 female
#> 2   97.0 female
#> 3   92.0 female
#> 4  101.6 female
#> 5   93.0 female
#> 6   84.5 female

## Power calculation # 
#power.t.test(n = , d = , sig.level = , power = , type = c("two.sample", "one.sample", "paired"))  

# mean 1 = 92 and mean 2 = 87.4
# sd = 7

delta <- 92 - 87.4
power.t.test(delta = 92 - 87.4, sd = 7, sig.level = 0.05, power = 0.8)
#> 
#>      Two-sample t test power calculation 
#> 
#>               n = 37.33624
#>           delta = 4.6
#>              sd = 7
#>       sig.level = 0.05
#>           power = 0.8
#>     alternative = two.sided
#> 
#> NOTE: n is number in *each* group

Graphical exploration of the data

## Stripchart ##
stripchart(coyote$length~coyote$gender)

image.png

stripchart(coyote$length~coyote$gender,vertical=TRUE, method="jitter")

image.png


stripchart(coyote$length~coyote$gender,
           vertical=TRUE, 
           method="jitter", 
           las=1, 
           ylab="Length", 
           pch=16,
           col=c("darkorange","purple"), 
           cex=1.5
           )

# Add group means #
length.means <- tapply(coyote$length,coyote$gender,mean) 

segments(1:2-0.15,length.means, 1:2+0.15, length.means, col="black", lwd=3)

image.png


## Boxplot or beanplot # beanplot package ##
boxplot(coyote$length~coyote$gender,notch = TRUE)

image.png


# Prettier: 
boxplot(coyote$length~coyote$gender, col=c("orange","purple"), ylab = 'Length (cm)', las = 1)

image.png


## Beanplot ##
beanplot(coyote$length~coyote$gender,las=1,ylab="Length (cm)")

image.png


## Combination boxplot and scatterplot ##
boxplot(coyote$length~coyote$gender, 
        lwd = 2, 
        ylab = 'Length', 
        cex.axis=1.5,
        las=1, 
        cex.lab=1.5)

stripchart(coyote$length~coyote$gender, 
           vertical = TRUE, 
           method = "jitter", 
           pch = 20, 
           col = 'red', 
           cex=2, 
           add = TRUE)

image.png


## Combination boxplot and beanplot ##
beanplot(coyote$length~coyote$gender,
         las=1, overallline = "median", 
         ylab = 'Length', 
         cex.lab=1.5, 
         col="bisque",
         what = c(0,1,1,0),
         cex.axis=1.5) 

boxplot(coyote$length~coyote$gender, 
        add=TRUE, 
        col=rgb(0.2,0.5,0.3, alpha=0.5), 
        pch = 20,  
        cex=2, 
        lwd=2, 
        yaxt='n',
        xaxt='n')

image.png


## Histogram ##
par(mfrow=c(1,2))
hist(coyote[coyote$gender=="male",]$length)
hist(coyote[coyote$gender=="female",]$length)

image.png


# Prettier:
hist(coyote[coyote$gender=="male",]$length,main="Male", xlab="Length",col="lightgreen")
hist(coyote[coyote$gender=="female",]$length,main="Female", xlab="Length",col="tomato1")

image.png


### Stats summary of test for normality (1st assumption)  ## pastecs package ###
#by(coyote$length,coyote$gender, stat.desc, basic=F, desc=F, norm=T)#
tapply(coyote$length,coyote$gender, stat.desc, basic=F, desc=F, norm=T)
#> $female
#>   skewness   skew.2SE   kurtosis   kurt.2SE normtest.W normtest.p 
#> -0.5290403 -0.7320175  0.5029741  0.3546892  0.9700101  0.3164448 
#> 
#> $male
#>    skewness    skew.2SE    kurtosis    kurt.2SE  normtest.W  normtest.p 
#> -0.08453876 -0.11697379 -0.67756124 -0.47780521  0.98445697  0.81898309

### Homogeneity of variance (test 2nd assumption) 
bartlett.test(coyote$length~coyote$gender)
#> 
#>  Bartlett test of homogeneity of variances
#> 
#> data:  coyote$length by coyote$gender
#> Bartlett's K-squared = 0.02021, df = 1, p-value = 0.887

###### Independent t-test ###########
t.test(coyote$length~coyote$gender, var.equal=T)
#> 
#>  Two Sample t-test
#> 
#> data:  coyote$length by coyote$gender
#> t = -1.6411, df = 84, p-value = 0.1045
#> alternative hypothesis: true difference in means is not equal to 0
#> 95 percent confidence interval:
#>  -5.184747  0.496375
#> sample estimates:
#> mean in group female   mean in group male 
#>             89.71163             92.05581

## Plot a bar chart ##
dev.off()
#> null device 
#>           1
bar.length<-barplot(length.means) 
bar.length<-barplot(length.means,xlim=c(0,1),width=0.3, ylim=c(0,100)) 

# Prettier:
bar.length<-barplot(length.means, col=c("darkslategray1","darkseagreen1"), xlim=c(0,1),width=0.3,ylim=c(50,100), ylab="Mean length",las=1, xpd=FALSE)

## Calculation of SEM for error bars # plotrix package ##
length.se<-tapply(coyote$length,coyote$gender,std.error)
length.se
#>    female      male 
#> 0.9988377 1.0211241

# Alternative #
length.se<-tapply(coyote$length,coyote$gender,function(x) sd(x)/sqrt(length(x)))

## Barplot returns the values for the centers of the bars ##
bar.length
#>      [,1]
#> [1,] 0.21
#> [2,] 0.57

# Adding error bars #
arrows(x0=bar.length, y0=length.means-length.se, x1=bar.length, y1=length.means+length.se, length=0.3, angle=90,code=3)

###### Independent t-test ###########
t.test(coyote$length~coyote$gender, var.equal=T)
#> 
#>  Two Sample t-test
#> 
#> data:  coyote$length by coyote$gender
#> t = -1.6411, df = 84, p-value = 0.1045
#> alternative hypothesis: true difference in means is not equal to 0
#> 95 percent confidence interval:
#>  -5.184747  0.496375
#> sample estimates:
#> mean in group female   mean in group male 
#>             89.71163             92.05581

###### Paired t-test ###########
working.memory<-read.csv("Working.memory.csv", header=T)
head(working.memory)
#>   Subject placebo DA.depletion
#> 1      M1       9            7
#> 2      M2      10            7
#> 3      M3      15           10
#> 4      M4      18           12
#> 5      M5      19           13
#> 6      M6      22           15

### Graphical exploration of the data ###

## Boxplot ##
boxplot(working.memory[2:3])
beanplot(working.memory[2:3],las=1,ylab="Scores") 

## Stripchart ##
stripchart(working.memory[2:3])
stripchart(working.memory[2:3],vertical=TRUE, method="jitter", las=1, ylab="Scores", pch=16,col=c("darkred","darkgreen"), cex=1.5)

# Add group means #
scores.means <- colMeans(working.memory[2:3])
scores.means
#>      placebo DA.depletion 
#>     27.26667     18.86667
segments(1:2-0.15,scores.means, 1:2+0.15, scores.means, col="black", lwd=3)

## Check assumption for normality ##
stat.desc(working.memory$placebo, basic=F, desc=F, norm=T)
#>   skewness   skew.2SE   kurtosis   kurt.2SE normtest.W normtest.p 
#>  0.3338775  0.2877662 -1.0318530 -0.4602800  0.9591353  0.6773687
stat.desc(working.memory$DA.depletion, basic=F, desc=F, norm=T)
#>   skewness   skew.2SE   kurtosis   kurt.2SE normtest.W normtest.p 
#>  0.4334375  0.3735762 -0.9792473 -0.4368141  0.9427404  0.4180816

## Running paired t-test ##
t.test(working.memory$placebo, working.memory$DA.depletion, paired=T)
#> 
#>  Paired t-test
#> 
#> data:  working.memory$placebo and working.memory$DA.depletion
#> t = 8.6161, df = 14, p-value = 5.715e-07
#> alternative hypothesis: true difference in means is not equal to 0
#> 95 percent confidence interval:
#>   6.308997 10.491003
#> sample estimates:
#> mean of the differences 
#>                     8.4

## Plot the difference ##
working.memory$Difference<- working.memory$placebo-working.memory$DA.depletion
head(working.memory)
#>   Subject placebo DA.depletion Difference
#> 1      M1       9            7          2
#> 2      M2      10            7          3
#> 3      M3      15           10          5
#> 4      M4      18           12          6
#> 5      M5      19           13          6
#> 6      M6      22           15          7
stat.desc(working.memory$Difference, basic=F, desc=F, norm=T)
#>    skewness    skew.2SE    kurtosis    kurt.2SE  normtest.W  normtest.p 
#>  0.08857017  0.07633789 -1.04670910 -0.46690688  0.97726706  0.94740752

## Difference as stripchart ##
stripchart(working.memory$Difference)
stripchart(working.memory$Difference,vertical=TRUE, method="jitter", las=1, ylab="Differences",pch=16, col="blue", cex=2)

diff.mean <- mean(working.memory$Difference)
loc.strip<-1
segments(loc.strip-0.15,diff.mean, loc.strip+0.15, diff.mean, col="black", lwd=3)
diff.se <- std.error(working.memory$Difference) # plotrix package #

## Confidence intervals ##
lower<-diff.mean-1.96*diff.se
upper<-diff.mean+1.96*diff.se

## Plot confidence intervals on a scatterplot ##
arrows(x0=loc.strip, y0=lower, x1=loc.strip, y1=upper, length=0.3, angle=90,code=3,lwd=3)

## Plot beanplots ##
beanplot(working.memory$Difference,las=1,ylab="Differences") 

## One-sample t-test ##
t.test(working.memory$Difference ,mu=0)
#> 
#>  One Sample t-test
#> 
#> data:  working.memory$Difference
#> t = 8.6161, df = 14, p-value = 5.715e-07
#> alternative hypothesis: true mean is not equal to 0
#> 95 percent confidence interval:
#>   6.308997 10.491003
#> sample estimates:
#> mean of x 
#>       8.4

ANOVA

# Import and plot dataset #
protein<-read.csv("protein.expression.csv",header=T)
head(protein)
#>      A    B    C    D    E
#> 1 0.40 0.26 0.24 1.04 0.74
#> 2 1.50 0.47 0.25 2.78 0.99
#> 3 0.98 0.42 1.01 0.82 1.26
#> 4 0.33 0.64 0.77 1.65 1.50
#> 5 0.75 0.32 0.47 0.49 0.30
#> 6 1.48 0.65 0.47 0.97 0.34

protein.stack<-melt(protein) ## reshape2 package ## this will generate a warning because we do not specify the ID ##
colnames(protein.stack)<-c("line","expression")
protein.stack.clean <- protein.stack[!is.na(protein.stack$expression),]
head(protein.stack.clean)
#>   line expression
#> 1    A       0.40
#> 2    A       1.50
#> 3    A       0.98
#> 4    A       0.33
#> 5    A       0.75
#> 6    A       1.48

## stripchart ##

stripchart(protein.stack.clean$expression~protein.stack.clean$line,
           vertical=TRUE,
           method="jitter",
           las=1,
           ylab="Protein Expression", 
           pch=16, 
           col=rainbow(5))

expression.means<-tapply(protein.stack.clean$expression,protein.stack.clean$line,mean)
segments(1:5-0.15,expression.means, 1:5+0.15, expression.means, col="black", lwd=3)

image.png


## boxplot, beanplot and normality test ##
boxplot(protein.stack.clean$expression~protein.stack.clean$line)

image.png

boxplot(protein.stack.clean$expression~protein.stack.clean$line,col=rainbow(5), las=1,ylab="Protein Expression")

image.png


beanplot(protein.stack.clean$expression~protein.stack.clean$line,overallline = "median",log="",ylab="Protein Expression", las=1) ## beanplot package ##

image.png


tapply(protein.stack.clean$expression,protein.stack.clean$line,stat.desc,desc=F, basic=F, norm=T) ## pastecs package ##
#> $A
#>    skewness    skew.2SE    kurtosis    kurt.2SE  normtest.W  normtest.p 
#> -0.02086018 -0.01636601 -1.16300793 -0.47190556  0.92956709  0.37554602 
#> 
#> $B
#>    skewness    skew.2SE    kurtosis    kurt.2SE  normtest.W  normtest.p 
#>  0.11228032  0.08809036 -1.38388312 -0.56152854  0.95351440  0.68878672 
#> 
#> $C
#>    skewness    skew.2SE    kurtosis    kurt.2SE  normtest.W  normtest.p 
#> 1.277108966 1.190715643 0.565595525 0.272498653 0.819684006 0.002921089 
#> 
#> $D
#>     skewness     skew.2SE     kurtosis     kurt.2SE   normtest.W   normtest.p 
#> 1.9465885553 1.8149065614 3.6022084100 1.7355104443 0.7530719928 0.0003548725 
#> 
#> $E
#>   skewness   skew.2SE   kurtosis   kurt.2SE normtest.W normtest.p 
#>  0.4569725  0.4260594 -0.4885993 -0.2354026  0.9670693  0.7411281

## stripchart of log transformed data ##
stripchart(protein.stack.clean$expression~protein.stack.clean$line,
           vertical=TRUE,
           method="jitter",
           las=1,
           ylab="Protein Expression", 
           pch=16, 
           col=rainbow(5)
)

expression.log.means<-tapply(protein.stack.clean$expression,protein.stack.clean$line,mean)
segments(1:5-0.15,expression.log.means,1:5+0.15, expression.log.means, col="black", lwd=3)

image.png


beanplot(protein.stack.clean$expression~protein.stack.clean$line,overallline = "median",ylab="Protein Expression") ## beanplot package ##

image.png


# log transform y and test again #
protein.stack.clean$log10.expression<-log10(protein.stack.clean$expression)
head(protein.stack.clean)
#>   line expression log10.expression
#> 1    A       0.40     -0.397940009
#> 2    A       1.50      0.176091259
#> 3    A       0.98     -0.008773924
#> 4    A       0.33     -0.481486060
#> 5    A       0.75     -0.124938737
#> 6    A       1.48      0.170261715

tapply(protein.stack.clean$log10.expression,protein.stack.clean$line,stat.desc,basic=F, norm=T, desc=F)
#> $A
#>    skewness    skew.2SE    kurtosis    kurt.2SE  normtest.W  normtest.p 
#> -0.61963191 -0.48613680 -1.28185977 -0.52013124  0.85424637  0.04143953 
#> 
#> $B
#>   skewness   skew.2SE   kurtosis   kurt.2SE normtest.W normtest.p 
#> -0.3231837 -0.2535561 -1.2210293 -0.4954485  0.9458450  0.5772532 
#> 
#> $C
#>   skewness   skew.2SE   kurtosis   kurt.2SE normtest.W normtest.p 
#>  0.1851959  0.1726679 -1.0600052 -0.5107006  0.9657060  0.7141796 
#> 
#> $D
#>   skewness   skew.2SE   kurtosis   kurt.2SE normtest.W normtest.p 
#>  0.3345489  0.3119174 -0.4044918 -0.1948804  0.9868425  0.9934883 
#> 
#> $E
#>   skewness   skew.2SE   kurtosis   kurt.2SE normtest.W normtest.p 
#> -0.7155948 -0.6671865 -0.3741176 -0.1802464  0.9313425  0.2050270

## boxplot log transformed data ##
boxplot(protein.stack.clean$log10.expression~protein.stack.clean$line,col=rainbow(5))

image.png


# equal variance (2nd assumption) #
bartlett.test(protein.stack.clean$log10.expression~protein.stack.clean$line)
#> 
#>  Bartlett test of homogeneity of variances
#> 
#> data:  protein.stack.clean$log10.expression by protein.stack.clean$line
#> Bartlett's K-squared = 5.8261, df = 4, p-value = 0.2125

#run the anova
anova.log.protein<-aov(log10.expression~line,data=protein.stack.clean)
summary(anova.log.protein)
#>             Df Sum Sq Mean Sq F value   Pr(>F)    
#> line         4  2.691  0.6728   8.123 1.78e-05 ***
#> Residuals   73  6.046  0.0828                     
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#par(mfrow=c(2,2))
#plot(anova.log.protein)

## Bonferroni ##
pairwise.t.test(protein.stack.clean$log10.expression,protein.stack.clean$line, p.adj = "bonf")
#> 
#>  Pairwise comparisons using t tests with pooled SD 
#> 
#> data:  protein.stack.clean$log10.expression and protein.stack.clean$line 
#> 
#>   A      B       C      D     
#> B 0.3655 -       -      -     
#> C 1.0000 1.0000  -      -     
#> D 0.0571 1.9e-05 0.0017 -     
#> E 1.0000 0.0062  0.3318 0.7675
#> 
#> P value adjustment method: bonferroni

## Tukey ##
TukeyHSD(anova.log.protein)
#>   Tukey multiple comparisons of means
#>     95% family-wise confidence level
#> 
#> Fit: aov(formula = log10.expression ~ line, data = protein.stack.clean)
#> 
#> $line
#>            diff          lwr        upr     p adj
#> B-A -0.25024832 -0.578882494 0.07838585 0.2187264
#> C-A -0.07499724 -0.374997820 0.22500335 0.9560187
#> D-A  0.30549397  0.005493391 0.60549456 0.0438762
#> E-A  0.13327517 -0.166725416 0.43327575 0.7265567
#> C-B  0.17525108 -0.124749499 0.47525167 0.4809387
#> D-B  0.55574230  0.255741712 0.85574288 0.0000183
#> E-B  0.38352349  0.083522904 0.68352407 0.0054767
#> D-C  0.38049121  0.112162532 0.64881989 0.0015431
#> E-C  0.20827240 -0.060056276 0.47660108 0.2023355
#> E-D -0.17221881 -0.440547487 0.09610987 0.3841989

## as a bar plot ##
bar.expression<-barplot(expression.means, beside=TRUE, ylab="Mean expression",ylim=c(0,3),las=1)
expression.se <- tapply(protein.stack.clean$expression, protein.stack.clean$line,std.error) 

arrows(x0=bar.expression, y0=expression.means-expression.se, x1=bar.expression, y1=expression.means+expression.se, length=0.2, angle=90,code=3)

image.png

Correlation

#dev.off()
## Import and plot correlation data ##
exam.anxiety<-read.table("exam.anxiety.dat", sep="\t",header=T)
head(exam.anxiety)
#>   Code Revise Exam Anxiety Gender
#> 1    1      4   40  86.298   Male
#> 2    2     11   65  88.716 Female
#> 3    3     27   80  70.178   Male
#> 4    4     53   80  61.312   Male
#> 5    5      4   40  89.522   Male
#> 6    6     22   70  60.506 Female

## Plot the data ##
plot(exam.anxiety$Anxiety~exam.anxiety$Revise,col=exam.anxiety$Gender,pch=16)

## Add legend  ##
legend("topright", title="Gender",inset=.05, c("Female","Male"), horiz=TRUE, pch=16,col=1:2)

# also works: 
legend("topright", title="Gender",inset=.05, c("Female","Male"), horiz=TRUE, pch=16,col=c("black","red"))

## Fit the model ##
fit.male <- lm(Anxiety~Revise, data=exam.anxiety[exam.anxiety$Gender=="Male",])
fit.female <- lm(Anxiety~Revise, data=exam.anxiety[exam.anxiety$Gender=="Female",])

## Add a line of best-fit ##
abline((fit.male), col="red")
abline((fit.female), col="black")

image.png


## Check the assumptions ##
par(mfrow=c(2,2))
plot(fit.male)

image.png

plot(fit.female)

image.png


## Test the significance of the model ##
summary(fit.male)
#> 
#> Call:
#> lm(formula = Anxiety ~ Revise, data = exam.anxiety[exam.anxiety$Gender == 
#>     "Male", ])
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -73.124  -2.900   2.221   6.750  16.600 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  84.1941     2.6213  32.119  < 2e-16 ***
#> Revise       -0.5353     0.1016  -5.267 2.94e-06 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 13.3 on 50 degrees of freedom
#> Multiple R-squared:  0.3568, Adjusted R-squared:  0.344 
#> F-statistic: 27.74 on 1 and 50 DF,  p-value: 2.937e-06
summary(fit.female)
#> 
#> Call:
#> lm(formula = Anxiety ~ Revise, data = exam.anxiety[exam.anxiety$Gender == 
#>     "Female", ])
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -22.687  -6.263  -1.204   4.197  38.628 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) 91.94181    2.27858   40.35  < 2e-16 ***
#> Revise      -0.82380    0.08173  -10.08 1.54e-13 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 10.42 on 49 degrees of freedom
#> Multiple R-squared:  0.6746, Adjusted R-squared:  0.668 
#> F-statistic: 101.6 on 1 and 49 DF,  p-value: 1.544e-13

## Correlation ##
cor(exam.anxiety[exam.anxiety$Gender=="Male",c("Exam","Anxiety","Revise")])
#>               Exam    Anxiety     Revise
#> Exam     1.0000000 -0.5056874  0.3593981
#> Anxiety -0.5056874  1.0000000 -0.5973682
#> Revise   0.3593981 -0.5973682  1.0000000
cor(exam.anxiety[exam.anxiety$Gender == "Female", c("Exam","Anxiety","Revise")])
#>               Exam    Anxiety     Revise
#> Exam     1.0000000 -0.3813845  0.4399865
#> Anxiety -0.3813845  1.0000000 -0.8213698
#> Revise   0.4399865 -0.8213698  1.0000000

## Let's filter out the misbehaving students ##
exam.anxiety.filtered <- exam.anxiety[c(-78,-87),]

## Model without the outlier/influential case ##
fit.male2 <- lm(Anxiety~Revise, data=exam.anxiety.filtered[exam.anxiety.filtered$Gender=="Male",])
summary(fit.male2)
#> 
#> Call:
#> lm(formula = Anxiety ~ Revise, data = exam.anxiety.filtered[exam.anxiety.filtered$Gender == 
#>     "Male", ])
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -22.0296  -3.8704   0.5626   6.0786  14.2525 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) 86.97461    1.64755  52.790  < 2e-16 ***
#> Revise      -0.60752    0.06326  -9.603 7.59e-13 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 8.213 on 49 degrees of freedom
#> Multiple R-squared:  0.653,  Adjusted R-squared:  0.6459 
#> F-statistic: 92.22 on 1 and 49 DF,  p-value: 7.591e-13

fit.female2 <- lm(Anxiety~Revise, data=exam.anxiety.filtered[exam.anxiety.filtered$Gender=="Female",])
summary(fit.female2)
#> 
#> Call:
#> lm(formula = Anxiety ~ Revise, data = exam.anxiety.filtered[exam.anxiety.filtered$Gender == 
#>     "Female", ])
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -18.7518  -5.7069  -0.7782   3.2117  18.5538 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) 92.24536    1.93591   47.65   <2e-16 ***
#> Revise      -0.87504    0.07033  -12.44   <2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 8.849 on 48 degrees of freedom
#> Multiple R-squared:  0.7633, Adjusted R-squared:  0.7584 
#> F-statistic: 154.8 on 1 and 48 DF,  p-value: < 2.2e-16

## New correlation ##
cor(exam.anxiety.filtered[exam.anxiety.filtered$Gender=="Male",c("Exam","Anxiety","Revise")])
#>               Exam    Anxiety     Revise
#> Exam     1.0000000 -0.4653914  0.4028863
#> Anxiety -0.4653914  1.0000000 -0.8080950
#> Revise   0.4028863 -0.8080950  1.0000000
cor(exam.anxiety.filtered[exam.anxiety.filtered$Gender=="Female",c("Exam","Anxiety","Revise")])
#>               Exam    Anxiety     Revise
#> Exam     1.0000000 -0.4070663  0.4312691
#> Anxiety -0.4070663  1.0000000 -0.8736684
#> Revise   0.4312691 -0.8736684  1.0000000

## New plot ##
#dev.off()
plot(exam.anxiety$Anxiety~exam.anxiety$Revise,col=exam.anxiety$Gender,pch=16)

## Add legend  ##
legend("topright", title="Gender", inset=.05, c("Female","Male"), horiz=TRUE, pch=16,col=1:2)

## Lines of best fit ##
abline((fit.male), col="red")
abline((fit.female), col="black")
abline((fit.male2), col="red",lty=3)
abline((fit.female2), col="black",lty=3)

image.png

2020-07-08 R基础绘图+统计
R统计安装加载必要的R包 QUALITATIVE DATA QUANTITATIVE DATA 卡方检验，Fis...
Day—4
R 编程语言，是进行统计计算和绘图的环境 R语言的基础绘图系统主要由基础包graphics提供 Rstudio 图...
生信入门学习笔记day4@2021.06.28
R语言基础 R 语言是一种主要用于统计分析、绘图、数据挖掘的数学编程语言。R language: The R Pr...
学习小组Day4笔记——山川石
R语言基础 R和RStudio R和RStudio介绍 R是一种编程语言，也是统计计算和绘图的环境，它汇集了许多函...
学习小组Day4笔记--大水
R语言基础 0.1什么是R语言？ R是用于统计、绘图的语言和操作环境。R是属于GNU系统的一个自由、免费、源代码开...
学习小组Day4笔记--镜筒
R语言基础 R和RSTUDIO简介 1. R 一种编程语言统计计算和绘图的环境汇集了许多函数，能够提供强大的功...
学习小组Day4笔记-- Louis-jl
Day4笔记 R语言基础认识R语言及Rstudio R是用于统计分析、绘图的语言和操作环境，汇集了许多函数，有一...
学习小组day4笔记--strengthen
R语言基础什么是R语言 R是用于统计分析、绘图的语言和操作环境。它是属于GNU系统的一个自由、免费、源代码开放的...
大数据 | 从实例教你掌握R语言
【R语言基础知识】： R：是用于统计分析、绘图的语言和操作环境。R是属于GNU系统的一个自由、免费、源代码开放的软...
学习小组Day4笔记—白兔儿溜溜
R和Rstudio初识之前其实乱入过R，还是扎扎实实打好基础。简介是一种编程语言，也是统计计算和绘图的环境，...