单细胞GSEA分析运行记录2

作者: Seurat_Satija | 来源:发表于2021-03-27 09:19 被阅读0次
    > #加载R包
    > library(tidyverse)
    -- Attaching packages ------------------------------------------------------------------------- tidyverse 1.3.0 --
    √ ggplot2 3.3.3     √ purrr   0.3.4
    √ tibble  3.0.4     √ dplyr   1.0.2
    √ tidyr   1.1.2     √ stringr 1.4.0
    √ readr   1.4.0     √ forcats 0.5.0
    -- Conflicts ---------------------------------------------------------------------------- tidyverse_conflicts() --
    x dplyr::filter() masks stats::filter()
    x dplyr::lag()    masks stats::lag()
    > library(Seurat)
    Registered S3 method overwritten by 'spatstat':
      method     from
      print.boxx cli 
    Attaching SeuratObject
    Warning message:
    程辑包‘Seurat’是用R版本4.0.4 来建造的 
    > workflow
    Error: object 'workflow' not found
    > # Load the PBMC dataset
    > pbmc.data <- Read10X(data.dir = "pbmc5k_v3_filtered_feature_bc_matrix/")
    > #workflow
    > # Load the PBMC dataset
    > pbmc.data <- Read10X(data.dir = "pbmc5k_v3_filtered_feature_bc_matrix/")
    > # Initialize the Seurat object with the raw (non-normalized data).
    > pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc5k", min.cells = 3, min.features = 200)
    > pbmc
    An object of class Seurat 
    18791 features across 4962 samples within 1 assay 
    Active assay: RNA (18791 features, 0 variable features)
    > #QC
    > pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
    > # Visualize QC metrics as a violin plot
    > VlnPlot(pbmc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
    > pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 5000 & percent.mt < 25)
    > #Normalization
    > pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
    Performing log-normalization
    0%   10   20   30   40   50   60   70   80   90   100%
    [----|----|----|----|----|----|----|----|----|----|
    **************************************************|
    > #高变基因选择
    > pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
    Calculating gene variances
    0%   10   20   30   40   50   60   70   80   90   100%
    [----|----|----|----|----|----|----|----|----|----|
    **************************************************|
    Calculating feature variances of standardized and clipped values
    0%   10   20   30   40   50   60   70   80   90   100%
    [----|----|----|----|----|----|----|----|----|----|
    **************************************************|
    > #标准化
    > all.genes <- rownames(pbmc)
    > pbmc <- ScaleData(pbmc, features = all.genes)
    Centering and scaling data matrix
      |========================================================================================================| 100%
    > #去除MT,重新进行标准化
    > pbmc <- ScaleData(pbmc, vars.to.regress = "percent.mt")
    Regressing out percent.mt
      |========================================================================================================| 100%
    Centering and scaling data matrix
      |========================================================================================================| 100%
    Warning message:
    In strsplit(conditionMessage(e), "\n") :
      input string 1 is invalid in this locale
    > #PCA
    > pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc), verbose = FALSE)
    > #聚类
    > pbmc <- FindNeighbors(pbmc, dims = 1:20)
    Computing nearest neighbor graph
    Computing SNN
    > pbmc <- FindClusters(pbmc, resolution = 0.5)
    Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
    
    Number of nodes: 4595
    Number of edges: 165660
    
    Running Louvain algorithm...
    0%   10   20   30   40   50   60   70   80   90   100%
    [----|----|----|----|----|----|----|----|----|----|
    **************************************************|
    Maximum modularity in 10 random starts: 0.9001
    Number of communities: 13
    Elapsed time: 0 seconds
    > #可视化
    > pbmc <- RunUMAP(pbmc, dims = 1:20)
    Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
    To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
    This message will be shown once per session
    13:05:18 UMAP embedding parameters a = 0.9922 b = 1.112
    13:05:18 Read 4595 rows and found 20 numeric columns
    13:05:18 Using Annoy for neighbor search, n_neighbors = 30
    13:05:18 Building Annoy index with metric = cosine, n_trees = 50
    0%   10   20   30   40   50   60   70   80   90   100%
    [----|----|----|----|----|----|----|----|----|----|
    **************************************************|
    13:05:19 Writing NN index file to temp file C:\Users\Nano\AppData\Local\Temp\RtmpMfsJdO\file2d3c3a9c70be
    13:05:19 Searching Annoy index using 1 thread, search_k = 3000
    13:05:21 Annoy recall = 100%
    13:05:21 Commencing smooth kNN distance calibration using 1 thread
    13:05:22 Initializing from normalized Laplacian + noise
    13:05:23 Commencing optimization for 500 epochs, with 192042 positive edges
    0%   10   20   30   40   50   60   70   80   90   100%
    [----|----|----|----|----|----|----|----|----|----|
    **************************************************|
    13:05:44 Optimization finished
    > pbmc<- RunTSNE(pbmc, dims = 1:20)
    > ## after we run UMAP and TSNE, there are more entries in the reduction slot
    > str(pbmc@reductions)
    List of 3
     $ pca :Formal class 'DimReduc' [package "SeuratObject"] with 9 slots
      .. ..@ cell.embeddings           : num [1:4595, 1:50] -31.75 -27.91 4.87 11.28 -30.22 ...
      .. .. ..- attr(*, "dimnames")=List of 2
      .. .. .. ..$ : chr [1:4595] "AAACCCAAGCGTATGG-1" "AAACCCAGTCCTACAA-1" "AAACGCTAGGGCATGT-1" "AAACGCTGTAGGTACG-1" ...
      .. .. .. ..$ : chr [1:50] "PC_1" "PC_2" "PC_3" "PC_4" ...
      .. ..@ feature.loadings          : num [1:2000, 1:50] -0.014365 -0.000161 0.005058 -0.013235 -0.058571 ...
      .. .. ..- attr(*, "dimnames")=List of 2
      .. .. .. ..$ : chr [1:2000] "FCER1A" "PPBP" "IGLC2" "C1QA" ...
      .. .. .. ..$ : chr [1:50] "PC_1" "PC_2" "PC_3" "PC_4" ...
      .. ..@ feature.loadings.projected: num[0 , 0 ] 
      .. ..@ assay.used                : chr "RNA"
      .. ..@ global                    : logi FALSE
      .. ..@ stdev                     : num [1:50] 15.65 7.92 6.7 5.5 4.53 ...
      .. ..@ key                       : chr "PC_"
      .. ..@ jackstraw                 :Formal class 'JackStrawData' [package "SeuratObject"] with 4 slots
      .. .. .. ..@ empirical.p.values     : num[0 , 0 ] 
      .. .. .. ..@ fake.reduction.scores  : num[0 , 0 ] 
      .. .. .. ..@ empirical.p.values.full: num[0 , 0 ] 
      .. .. .. ..@ overall.p.values       : num[0 , 0 ] 
      .. ..@ misc                      :List of 1
      .. .. ..$ total.variance: num 1563
     $ umap:Formal class 'DimReduc' [package "SeuratObject"] with 9 slots
      .. ..@ cell.embeddings           : num [1:4595, 1:2] -7.7 -9.575 -5.893 0.395 -7.803 ...
      .. .. ..- attr(*, "scaled:center")= num [1:2] -1.1672 0.0318
      .. .. ..- attr(*, "dimnames")=List of 2
      .. .. .. ..$ : chr [1:4595] "AAACCCAAGCGTATGG-1" "AAACCCAGTCCTACAA-1" "AAACGCTAGGGCATGT-1" "AAACGCTGTAGGTACG-1" ...
      .. .. .. ..$ : chr [1:2] "UMAP_1" "UMAP_2"
      .. ..@ feature.loadings          : num[0 , 0 ] 
      .. ..@ feature.loadings.projected: num[0 , 0 ] 
      .. ..@ assay.used                : chr "RNA"
      .. ..@ global                    : logi TRUE
      .. ..@ stdev                     : num(0) 
      .. ..@ key                       : chr "UMAP_"
      .. ..@ jackstraw                 :Formal class 'JackStrawData' [package "SeuratObject"] with 4 slots
      .. .. .. ..@ empirical.p.values     : num[0 , 0 ] 
      .. .. .. ..@ fake.reduction.scores  : num[0 , 0 ] 
      .. .. .. ..@ empirical.p.values.full: num[0 , 0 ] 
      .. .. .. ..@ overall.p.values       : num[0 , 0 ] 
      .. ..@ misc                      : list()
     $ tsne:Formal class 'DimReduc' [package "SeuratObject"] with 9 slots
      .. ..@ cell.embeddings           : num [1:4595, 1:2] -7.65 10.12 -32.29 -22.12 1.52 ...
      .. .. ..- attr(*, "dimnames")=List of 2
      .. .. .. ..$ : chr [1:4595] "AAACCCAAGCGTATGG-1" "AAACCCAGTCCTACAA-1" "AAACGCTAGGGCATGT-1" "AAACGCTGTAGGTACG-1" ...
      .. .. .. ..$ : chr [1:2] "tSNE_1" "tSNE_2"
      .. ..@ feature.loadings          : num[0 , 0 ] 
      .. ..@ feature.loadings.projected: num[0 , 0 ] 
      .. ..@ assay.used                : chr "RNA"
      .. ..@ global                    : logi TRUE
      .. ..@ stdev                     : num(0) 
      .. ..@ key                       : chr "tSNE_"
      .. ..@ jackstraw                 :Formal class 'JackStrawData' [package "SeuratObject"] with 4 slots
      .. .. .. ..@ empirical.p.values     : num[0 , 0 ] 
      .. .. .. ..@ fake.reduction.scores  : num[0 , 0 ] 
      .. .. .. ..@ empirical.p.values.full: num[0 , 0 ] 
      .. .. .. ..@ overall.p.values       : num[0 , 0 ] 
      .. ..@ misc                      : list()
    > DimPlot(pbmc, reduction = "umap", label = TRUE)
    > #查找marker基因
    > pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
    Calculating cluster 0
      |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=04s  
    Calculating cluster 1
      |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=28s  
    Calculating cluster 2
      |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=03s  
    Calculating cluster 3
      |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=22s  
    Calculating cluster 4
      |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=06s  
    Calculating cluster 5
      |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=06s  
    Calculating cluster 6
      |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=06s  
    Calculating cluster 7
      |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=23s  
    Calculating cluster 8
      |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=09s  
    Calculating cluster 9
      |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=04s  
    Calculating cluster 10
      |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=16s  
    Calculating cluster 11
      |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=09s  
    Calculating cluster 12
      |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=30s  
    > saveRDS(pbmc, "pbmc_5k_v3.rds")
    > saveRDS(pbmc, "pbmc_5k_v3.rds",compress = TRUE)
    > saveRDS(pbmc, "pbmc_5k_v3.rds",compress = F)
    > #如图所示在处理以后,进行细胞分群后可以分为14个细胞亚群,分别为cluster 0-13。
    > write.csv(pbmc.markers,file = 'pbmc.markers.csv')
    > library(Seurat)
    > library(patchwork)
    > p1 <- DimPlot(pbmc, reduction = "umap", label = TRUE)
    > p1 <- DimPlot(pbmc, reduction = "umap", label = TRUE)
    > ggsave("DimPlot1.pdf", plot=p1, width=8, height=6)
    > ggsave("DimPlot1.png", plot=p1, width=8, height=6)
    > library(Seurat)
    > pbmc<- readRDS("pbmc_5k_v3.rds")
    > #library(devtools)
    > #install_github('immunogenomics/presto')
    > library(presto)
    Error in library(presto) : 不存在叫‘presto’这个名字的程辑包
    > install_github('immunogenomics/presto')
    Error in install_github("immunogenomics/presto") : 
      could not find function "install_github"
    > library(devtools)
    载入需要的程辑包:usethis
    Warning message:
    程辑包‘devtools’是用R版本4.0.4 来建造的 
    > library(devtools)
    > install_github('immunogenomics/presto')
    Downloading GitHub repo immunogenomics/presto@HEAD
    immunogenomics-presto-052085d/data/object_sce.rda: truncated gzip input
    tar.exe: Error exit delayed from previous errors.
    Error: Failed to install 'presto' from GitHub:
      Does not appear to be an R package (no DESCRIPTION)
    In addition: Warning messages:
    1: In utils::untar(tarfile, ...) :
      ‘tar.exe -xf "C:\Users\Nano\AppData\Local\Temp\RtmpMfsJdO\file2d3c26343827.tar.gz" -C "C:/Users/Nano/AppData/Local/Temp/RtmpMfsJdO/remotes2d3c40465af0"’ returned error code 1
    2: In system(cmd, intern = TRUE) :
      running command 'tar.exe -tf "C:\Users\Nano\AppData\Local\Temp\RtmpMfsJdO\file2d3c26343827.tar.gz"' had status 1
    > library(devtools)
    > install_github('immunogenomics/presto')
    Downloading GitHub repo immunogenomics/presto@HEAD
    These packages have more recent versions available.
    It is recommended to update all of them.
    Which would you like to update?
    
     1: All                                           
     2: CRAN packages only                            
     3: None                                          
     4: Rcpp         (1.0.5      -> 1.0.6     ) [CRAN]
     5: utf8         (1.1.4      -> 1.2.1     ) [CRAN]
     6: crayon       (1.3.4      -> 1.4.1     ) [CRAN]
     7: cli          (2.2.0      -> 2.3.1     ) [CRAN]
     8: pillar       (1.4.7      -> 1.5.1     ) [CRAN]
     9: fansi        (0.4.1      -> 0.4.2     ) [CRAN]
    10: lifecycle    (0.2.0      -> 1.0.0     ) [CRAN]
    11: farver       (2.0.3      -> 2.1.0     ) [CRAN]
    12: withr        (2.3.0      -> 2.4.1     ) [CRAN]
    13: tibble       (3.0.4      -> 3.1.0     ) [CRAN]
    14: rlang        (0.4.9      -> 0.4.10    ) [CRAN]
    15: isoband      (0.2.3      -> 0.2.4     ) [CRAN]
    16: fastmap      (1.0.1      -> 1.1.0     ) [CRAN]
    17: cachem       (1.0.1      -> 1.0.4     ) [CRAN]
    18: memoise      (1.1.0      -> 2.0.0     ) [CRAN]
    19: mime         (0.9        -> 0.10      ) [CRAN]
    20: RSQLite      (2.2.2      -> 2.2.4     ) [CRAN]
    21: DBI          (1.1.0      -> 1.1.1     ) [CRAN]
    22: XML          (3.99-0.5   -> 3.99-0.6  ) [CRAN]
    23: formatR      (1.7        -> 1.8       ) [CRAN]
    24: matrixStats  (0.57.0     -> 0.58.0    ) [CRAN]
    25: RCurl        (1.98-1.2   -> 1.98-1.3  ) [CRAN]
    26: DelayedArray (0.16.0     -> 0.16.3    ) [CRAN]
    27: GenomeInfoDb (1.26.2     -> 1.26.4    ) [CRAN]
    28: MatrixGen... (1.2.0      -> 1.2.1     ) [CRAN]
    29: RcppArmad... (0.10.1.2.0 -> 0.10.2.2.0) [CRAN]
    30: cpp11        (0.2.4      -> 0.2.6     ) [CRAN]
    31: dplyr        (1.0.2      -> 1.0.5     ) [CRAN]
    32: DESeq2       (1.30.0     -> 1.30.1    ) [CRAN]
    33: tidyr        (1.1.2      -> 1.1.3     ) [CRAN]
    34: data.table   (1.13.6     -> 1.14.0    ) [CRAN]
    
    Enter one or more numbers, or an empty line to skip updates:3
    √  checking for file 'C:\Users\Nano\AppData\Local\Temp\RtmpMfsJdO\remotes2d3c24d31e5c\immunogenomics-presto-052085d/DESCRIPTION' ...
    -  preparing 'presto':
    √  checking DESCRIPTION meta-information ... 
    -  cleaning src
    -  checking for LF line-endings in source and make files and shell scripts
    -  checking for empty or unneeded directories
    -  looking to see if a 'data/datalist' file should be added
    -  building 'presto_1.0.0.tar.gz'
       
    Installing package into ‘C:/Users/Nano/Documents/R/win-library/4.0’
    (as ‘lib’ is unspecified)
    * installing *source* package 'presto' ...
    ** using staged installation
    ** libs
    "C:/rtools40/mingw64/bin/"g++  -std=gnu++11 -I"C:/PROGRA~1/R/R-40~1.3/include" -DNDEBUG  -I'C:/Users/Nano/Documents/R/win-library/4.0/Rcpp/include' -I'C:/Users/Nano/Documents/R/win-library/4.0/RcppArmadillo/include'        -O2 -Wall  -mfpmath=sse -msse2 -mstackrealign -c RcppExports.cpp -o RcppExports.o
    "C:/rtools40/mingw64/bin/"g++  -std=gnu++11 -I"C:/PROGRA~1/R/R-40~1.3/include" -DNDEBUG  -I'C:/Users/Nano/Documents/R/win-library/4.0/Rcpp/include' -I'C:/Users/Nano/Documents/R/win-library/4.0/RcppArmadillo/include'        -O2 -Wall  -mfpmath=sse -msse2 -mstackrealign -c fast_wilcox.cpp -o fast_wilcox.o
    fast_wilcox.cpp: In function 'std::__cxx11::list<float> cpp_in_place_rank_mean(arma::vec&, int, int)':
    fast_wilcox.cpp:116:34: warning: comparison of integer expressions of different signedness: 'size_t' {aka 'long long unsigned int'} and 'int' [-Wsign-compare]
         for (size_t i = idx_begin; i <= idx_end; i++) {
                                    ~~^~~~~~~~~~
    C:/rtools40/mingw64/bin/g++ -shared -s -static-libgcc -o presto.dll tmp.def RcppExports.o fast_wilcox.o -LC:/PROGRA~1/R/R-40~1.3/bin/x64 -lR
    installing to C:/Users/Nano/Documents/R/win-library/4.0/00LOCK-presto/00new/presto/libs/x64
    ** R
    ** data
    ** byte-compile and prepare package for lazy loading
    ** help
    *** installing help indices
      converting help for package 'presto'
        finding HTML links ... 好了
        exprs                                   html  
        nnzeroGroups                            html  
        object_sce                              html  
        object_seurat                           html  
        pipe                                    html  
        presto                                  html  
        rank_matrix                             html  
        sumGroups                               html  
        top_markers                             html  
        wilcoxauc                               html  
        y                                       html  
    ** building package indices
    ** installing vignettes
    ** testing if installed package can be loaded from temporary location
    ** testing if installed package can be loaded from final location
    ** testing if installed package keeps a record of temporary installation path
    * DONE (presto)
    > #install_github('immunogenomics/presto')
    > library(presto)
    载入需要的程辑包:Rcpp
    载入需要的程辑包:data.table
    data.table 1.13.6 using 2 threads (see ?getDTthreads).  Latest news: r-datatable.com
    **********
    用中文运行data.table。软件包只提供英语支持。当在在线搜索帮助时,也要确保检查英语错误信息。这个可以通过查看软件包源文件中的po/R-zh_CN.po和po/zh_CN.po文件获得,这个文件可以并排找到母语和英语错误信息。
    **********
    
    载入程辑包:‘data.table’
    
    The following objects are masked from ‘package:dplyr’:
    
        between, first, last
    
    The following object is masked from ‘package:purrr’:
    
        transpose
    
    > #Loading required package: Rcpp
    > pbmc.genes <- wilcoxauc(pbmc, 'seurat_clusters')
    > head(pbmc.genes)
         feature group     avgExpr         logFC statistic       auc         pval         padj    pct_in
    1 AL627309.1     0 0.004801016 -0.0048337160   2113594 0.4966881 0.0451711892 0.0817186691 0.5443235
    2 AL627309.3     0 0.000000000 -0.0004923881   2126401 0.4996978 0.3781096732 0.4754137751 0.0000000
    3 AL669831.5     0 0.058225088 -0.0101451241   2081447 0.4891337 0.0182067030 0.0364774662 6.8429238
    4     FAM87B     0 0.000000000 -0.0017590952   2121900 0.4986401 0.0612494406 0.1063124181 0.0000000
    5  LINC00115     0 0.033568380 -0.0054911605   2090280 0.4912094 0.0148011270 0.0301102065 3.7325039
    6     FAM41C     0 0.015927696 -0.0162813395   2077756 0.4882664 0.0001622764 0.0004571717 2.0217729
         pct_out
    1 1.20882442
    2 0.06044122
    3 9.21728619
    4 0.27198549
    5 5.59081293
    6 4.38198852
    > # 我们拥有每个cluster的所有基因
    > dplyr::count(pbmc.genes, group)
       group     n
    1      0 18791
    2      1 18791
    3     10 18791
    4     11 18791
    5     12 18791
    6      2 18791
    7      3 18791
    8      4 18791
    9      5 18791
    10     6 18791
    11     7 18791
    12     8 18791
    13     9 18791
    > #使用fgsea进行基因集富集
    > library(msigdbr)
    Warning message:
    程辑包‘msigdbr’是用R版本4.0.4 来建造的 
    > library(fgsea)
    > library(dplyr)
    > library(ggplot2)
    > msigdbr_show_species()#我们看看都有神马物种的数据
     [1] "Bos taurus"               "Caenorhabditis elegans"   "Canis lupus familiaris"  
     [4] "Danio rerio"              "Drosophila melanogaster"  "Gallus gallus"           
     [7] "Homo sapiens"             "Mus musculus"             "Rattus norvegicus"       
    [10] "Saccharomyces cerevisiae" "Sus scrofa"              
    Warning message:
    'msigdbr_show_species' is deprecated.
    Use 'msigdbr_species' instead.
    See help("Deprecated") 
    > m_df<- msigdbr(species = "Homo sapiens", category = "C7")#我们使用C7免疫基因集
    > head(m_df)
    # A tibble: 6 x 17
      gs_cat gs_subcat gs_name entrez_gene gene_symbol human_entrez_ge~ human_gene_symb~ gs_id gs_pmid
      <chr>  <chr>     <chr>         <int> <chr>                  <int> <chr>            <chr> <chr>  
    1 C7     ""        GOLDRA~          20 ABCA2                     20 ABCA2            M3044 164927~
    2 C7     ""        GOLDRA~       10057 ABCC5                  10057 ABCC5            M3044 164927~
    3 C7     ""        GOLDRA~       25864 ABHD14A                25864 ABHD14A          M3044 164927~
    4 C7     ""        GOLDRA~          34 ACADM                     34 ACADM            M3044 164927~
    5 C7     ""        GOLDRA~          54 ACP5                      54 ACP5             M3044 164927~
    6 C7     ""        GOLDRA~       51205 ACP6                   51205 ACP6             M3044 164927~
    # ... with 8 more variables: gs_geoid <chr>, gs_exact_source <chr>, gs_url <chr>,
    #   gs_description <chr>, species_name <chr>, species_common_name <chr>, ortholog_sources <chr>,
    #   num_ortholog_sources <dbl>
    > fgsea_sets<- m_df %>% split(x = .$gene_symbol, f = .$gs_name)
    > fgsea_sets$GSE11057_NAIVE_VS_MEMORY_CD4_TCELL_UP
      [1] "ABLIM1"       "ACER1"        "ADGRA3"       "ADGRL1"       "AEBP1"        "AGRN"        
      [7] "AIF1"         "ALG10B"       "AMN1"         "ANKRD36BP2"   "APBA2"        "APBB1"       
     [13] "ARMCX2"       "BACH2"        "BEND5"        "BNIP3L"       "BTBD3"        "CA6"         
     [19] "CADPS2"       "CAMK4"        "CD248"        "CD55"         "CENPV"        "CEP41"       
     [25] "CHI3L2"       "CHML"         "CHMP7"        "CIAPIN1"      "CLCN5"        "COL5A2"      
     [31] "CRLF3"        "CYHR1"        "DDR1"         "DNHD1"        "DNTT"         "DSC1"        
     [37] "EDAR"         "EEA1"         "EFNA1"        "ENGASE"       "EXPH5"        "FCGRT"       
     [43] "GAL3ST4"      "GNAI1"        "GP5"          "GPR160"       "GPRASP1"      "GPRASP2"     
     [49] "GPRC5B"       "HEMGN"        "HIPK2"        "HSF2"         "IGF1R"        "IGIP"        
     [55] "ITGA6"        "KCNQ5"        "KCTD3"        "KLF3-AS1"     "KLHDC1"       "KLHL13"      
     [61] "KLHL24"       "KRT18"        "KRT2"         "KRT72"        "KRT73"        "LAGE3P1"     
     [67] "LEF1-AS1"     "LINC02175"    "LINC02604"    "LMLN"         "LRRN3"        "MALL"        
     [73] "MAML2"        "MANSC1"       "ME3"          "MEF2A"        "MEST"         "METAP1D"     
     [79] "MIR101-1"     "MIR600HG"     "MLXIP"        "MPP1"         "MPP7"         "MRPL45P2"    
     [85] "MYB"          "MZF1"         "NAA16"        "NBEA"         "NDC1"         "NDFIP1"      
     [91] "NET1"         "NPAS2"        "NPM3"         "NSUN5"        "NUCB2"        "NUDT12"      
     [97] "NUDT17"       "OBSCN-AS1"    "PADI4"        "PCED1B"       "PCSK5"        "PDCD4-AS1"   
    [103] "PDE3B"        "PDE7A"        "PDE7B"        "PDE9A"        "PDK1"         "PECAM1"      
    [109] "PHGDH"        "PIGL"         "PIK3CD"       "PIK3IP1"      "PITPNM2"      "PJVK"        
    [115] "PKIG"         "PLA2G12A"     "PLAG1"        "PLLP"         "PRKCQ-AS1"    "PRRT1"       
    [121] "PRXL2A"       "PTPRK"        "PXYLP1"       "RAPGEF6"      "REG4"         "RETREG1"     
    [127] "RGS10"        "RHPN2"        "RIN3"         "RNF157-AS1"   "RNF175"       "RNF227"      
    [133] "ROBO3"        "SATB1"        "SCAI"         "SCARB1"       "SCML1"        "SCML2"       
    [139] "SERPINE2"     "SERTAD2"      "SERTM1"       "SETD1B"       "SFMBT2"       "SFXN2"       
    [145] "SH3RF3"       "SIAH1"        "SLC11A2"      "SLC25A25-AS1" "SLC25A37"     "SLC29A2"     
    [151] "SLC2A11"      "SMPD1"        "SNHG32"       "SNORD104"     "SNPH"         "SNRPN"       
    [157] "SNX9"         "SORCS3"       "SPPL2B"       "SREBF1"       "STAP1"        "STK17A"      
    [163] "TAF4B"        "TARBP1"       "TBC1D32"      "TGFBR2"       "TIMP2"        "TLE2"        
    [169] "TMEM170B"     "TMEM198B"     "TMEM220"      "TMEM263"      "TMEM272"      "TMEM41B"     
    [175] "TMIGD2"       "TOM1L2"       "TSPAN3"       "TTC28"        "TUG1"         "UBE2E2"      
    [181] "USP44"        "VPS52"        "ZBTB18"       "ZC4H2"        "ZMYND8"       "ZNF182"      
    [187] "ZNF229"       "ZNF496"       "ZNF506"       "ZNF516"       "ZNF546"       "ZNF563"      
    [193] "ZNF662"       "ZNF780B"      "ZNRD1ASP"    
    > # [161] "SMPD1"        "SNORD104"     "SNPH"         "SNRPN"
    > # [165] "SNX9"         "SORCS3"       "SPPL2B"       "SREBF1"
    > # [169] "STAP1"        "STK17A"       "TAF4B"        "TARBP1"
    > # [173] "TGFBR2"       "TIMP2"        "TLE2"         "TMEM170B"
    > # [177] "TMEM220"      "TMEM41B"      "TMEM48"       "TMIGD2"
    > # [181] "TOM1L2"       "TSPAN3"       "TTC28"        "TUG1"
    > # [185] "UBE2E2"       "USP44"        "VPS52"        "ZC4H2"
    > # [189] "ZMYND8"       "ZNF182"       "ZNF229"       "ZNF238"
    > # [193] "ZNF496"       "ZNF506"       "ZNF516"       "ZNF546"
    > # [197] "ZNF563"       "ZNF662"       "ZNF780B"      "ZNRD1-AS1"
    > GSE11057_NAIVE_VS_MEMORY_CD4_TCELL_UP#代表神马呢?
    Error: object 'GSE11057_NAIVE_VS_MEMORY_CD4_TCELL_UP' not found
    > # Naive CD4+ T cells
    > pbmc.genes %>%
    +   dplyr::filter(group == "0") %>%
    +   arrange(desc(logFC), desc(auc)) %>%
    +   head(n = 10)      #进行降序排序
       feature group  avgExpr     logFC statistic       auc          pval          padj   pct_in  pct_out
    1     LDHB     0 2.236842 1.1323561   3729084 0.8763234  0.000000e+00  0.000000e+00 98.75583 74.94711
    2     TRAC     0 2.001595 1.1146542   3292895 0.7738204 9.776911e-196 1.716990e-193 97.43390 45.21003
    3     CD3E     0 1.875531 1.0540950   3318700 0.7798844 1.776334e-204 3.441143e-202 97.97823 44.90783
    4     TCF7     0 1.587248 1.0267936   3595559 0.8449454  0.000000e+00  0.000000e+00 94.71229 43.94077
    5      LTB     0 2.393454 1.0051991   3036392 0.7135429 5.121301e-114 4.130230e-112 98.75583 65.30674
    6     CD3D     0 1.732485 0.9454245   3220600 0.7568311 3.387398e-174 5.092207e-172 96.81182 42.85283
    7     CCR7     0 1.178436 0.9284103   3607895 0.8478444  0.000000e+00  0.000000e+00 85.38103 22.69568
    8     IL7R     0 1.648002 0.8906921   3109680 0.7307653 2.634379e-148 3.113372e-146 88.88025 35.75098
    9    SARAF     0 2.226737 0.8493958   3722765 0.8748385  0.000000e+00  0.000000e+00 99.14463 89.36235
    10 TRABD2A     0 1.041894 0.8438752   3554334 0.8352577  0.000000e+00  0.000000e+00 80.09331 19.49229
    > # 仅选择fgsea的feature和auc列
    > cluster0.genes<- pbmc.genes %>%
    +   dplyr::filter(group == "0") %>%
    +   arrange(desc(auc)) %>%
    +   dplyr::select(feature, auc)
    > ranks<- deframe(cluster0.genes)
    > head(ranks)
        RPL30     RPS3A     RPS13    RPL35A     RPL32     RPL34 
    0.9376551 0.9358956 0.9250745 0.9196189 0.9185743 0.9132980 
    > fgseaRes<- fgsea(fgsea_sets, stats = ranks, nperm = 1000)
    Warning messages:
    1: In fgsea(fgsea_sets, stats = ranks, nperm = 1000) :
      You are trying to run fgseaSimple. It is recommended to use fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument in the fgsea function call.
    2: In preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam,  :
      There are ties in the preranked stats (20.92% of the list).
    The order of those tied genes will be arbitrary, which may produce unexpected results.
    3: In preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam,  :
      All values in the stats vector are greater than zero and scoreType is "std", maybe you should switch to scoreType = "pos".
    > fgseaResTidy <- fgseaRes %>%
    +   as_tibble() %>%
    +   arrange(desc(NES))
    > fgseaResTidy %>%
    +   dplyr::select(-leadingEdge, -ES, -nMoreExtreme) %>%
    +   arrange(padj) %>%
    +   head()
    # A tibble: 6 x 5
      pathway                                         pval    padj   NES  size
      <chr>                                          <dbl>   <dbl> <dbl> <int>
    1 GSE10325_CD4_TCELL_VS_MYELOID_UP             0.00122 0.00688  9.87   196
    2 GSE10325_LUPUS_CD4_TCELL_VS_LUPUS_MYELOID_UP 0.00122 0.00688  9.81   193
    3 GSE22886_NAIVE_TCELL_VS_MONOCYTE_UP          0.00122 0.00688  9.44   196
    4 GSE22886_NAIVE_CD4_TCELL_VS_MONOCYTE_UP      0.00122 0.00688  9.30   195
    5 GSE22886_NAIVE_TCELL_VS_DC_UP                0.00122 0.00688  8.90   195
    6 GSE10325_LUPUS_CD4_TCELL_VS_LUPUS_BCELL_UP   0.00122 0.00688  8.29   196
    > #应用标准化富集分数绘制barplot
    > # 显示top20信号通路
    > ggplot(fgseaResTidy %>% filter(padj < 0.008) %>% head(n= 20), aes(reorder(pathway, NES), NES)) +
    +   geom_col(aes(fill= NES < 7.5)) +
    +   coord_flip() +
    +   labs(x="Pathway", y="Normalized Enrichment Score",
    +        title="Hallmark pathways NES from GSEA") +
    +   theme_minimal() ####以7.5进行绘图填色
    > #GSEA style plot
    > plotEnrichment(fgsea_sets[["GSE10325_CD4_TCELL_VS_MYELOID_UP"]],
    +                ranks) + labs(title="GSE10325 CD4 TCELL VS MYELOID UP")
    > #应用标准化富集分数绘制barplot
    > # 显示top20信号通路
    > p2 <- ggplot(fgseaResTidy %>% filter(padj < 0.008) %>% head(n= 20), aes(reorder(pathway, NES), NES)) +
    +   geom_col(aes(fill= NES < 7.5)) +
    +   coord_flip() +
    +   labs(x="Pathway", y="Normalized Enrichment Score",
    +        title="Hallmark pathways NES from GSEA") +
    +   theme_minimal() ####以7.5进行绘图填色
    > p2
    > ggsave("top20.pdf", plot=p2, width=8, height=6)
    > ggsave("top20.png", plot=p2, width=8, height=6)
    > #GSEA style plot
    > p3 <- plotEnrichment(fgsea_sets[["GSE10325_CD4_TCELL_VS_MYELOID_UP"]],
    +                ranks) + labs(title="GSE10325 CD4 TCELL VS MYELOID UP")
    > p3
    > ggsave("plotEnrichment.pdf", plot=p3, width=8, height=6)
    > ggsave("plotEnrichment.png", plot=p3, width=8, height=6)
    > head(ranks)
        RPL30     RPS3A     RPS13    RPL35A     RPL32     RPL34 
    0.9376551 0.9358956 0.9250745 0.9196189 0.9185743 0.9132980 
    > ranks<- deframe(cluster0.genes)
    > head(ranks)
        RPL30     RPS3A     RPS13    RPL35A     RPL32     RPL34 
    0.9376551 0.9358956 0.9250745 0.9196189 0.9185743 0.9132980
    

    相关文章

      网友评论

        本文标题:单细胞GSEA分析运行记录2

        本文链接:https://www.haomeiwen.com/subject/ubujhltx.html