Skip to content

Guppy GPU


GEOchina

Instriction

This document is an example of GEO data mining only support array assay.

download GSE95166 data

eSet=getGEO(‘GSE95166’, destdir=“.”, AnnotGPL = F, getGPL = F)[[1]]

ckeck data for normalization

library(GEOmirror)
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
## GEOmirror v 0.1.0  welcome to use GEOmirror!
## If any suggestion please feel free to email to jmzeng1314@163.com!
## If you use GEOmirror in published research, please acknowledgements:
## We thank Dr.Jianming Zeng(University of Macau), and all the members of his bioinformatics team, biotrainee, for generously sharing their experience and codes.
library(Biobase)
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colMeans,
##     colnames, colSums, dirname, do.call, duplicated, eval, evalq,
##     Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply,
##     lengths, Map, mapply, match, mget, order, paste, pmax, pmax.int,
##     pmin, pmin.int, Position, rank, rbind, Reduce, rowMeans, rownames,
##     rowSums, sapply, setdiff, sort, table, tapply, union, unique,
##     unsplit, which, which.max, which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
eSet=geoChina('GSE95166') #download form GEO mirror server in China
## you can also use getGEO from GEOquery, by 
## getGEO('GSE95166', destdir=".", AnnotGPL = F, getGPL = F)
eSet
## $GSE95166_series_matrix.txt.gz
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 16840 features, 8 samples 
##   element names: exprs 
## protocolData: none
## phenoData
##   sampleNames: GSM2498136 GSM2498137 ... GSM2498143 (8 total)
##   varLabels: title geo_accession ... tissue:ch1 (30 total)
##   varMetadata: labelDescription
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation: GPL15314
eSet=eSet[[1]]
head(eSet)
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 1 features, 8 samples 
##   element names: exprs 
## protocolData: none
## phenoData
##   sampleNames: GSM2498136 GSM2498137 ... GSM2498143 (8 total)
##   varLabels: title geo_accession ... tissue:ch1 (30 total)
##   varMetadata: labelDescription
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation: GPL15314
probes_expr <- exprs(eSet);dim(probes_expr)
## [1] 16840     8
head(probes_expr[,1:4])
##                 GSM2498136 GSM2498137 GSM2498138 GSM2498139
## ASHG19A3A000001   5.108310   5.780102   5.590192   5.983714
## ASHG19A3A000002   3.813785   5.329042   5.255791   5.227710
## ASHG19A3A000006   7.392555   7.580803   7.689547   7.720062
## ASHG19A3A000007  12.559763  12.510866  12.568397  12.411778
## ASHG19A3A000009  11.421825  11.654983  11.597765  11.601813
## ASHG19A3A000011  10.873756  11.234830  11.257159  11.187628
boxplot(probes_expr,las=2)

pheno info

phenoDat <- pData(eSet)
head(phenoDat[,1:4])
##            title geo_accession                status submission_date
## GSM2498136   T_1    GSM2498136 Public on Aug 16 2019     Feb 22 2017
## GSM2498137   T_2    GSM2498137 Public on Aug 16 2019     Feb 22 2017
## GSM2498138   T_3    GSM2498138 Public on Aug 16 2019     Feb 22 2017
## GSM2498139   T_4    GSM2498139 Public on Aug 16 2019     Feb 22 2017
## GSM2498140   I_1    GSM2498140 Public on Aug 16 2019     Feb 22 2017
## GSM2498141   I_2    GSM2498141 Public on Aug 16 2019     Feb 22 2017
# https://www.ncbi.nlm.nih.gov/pubmed/31430288

groupList=factor(c(rep('npc',4),rep('normal',4)))
table(groupList)
## groupList
## normal    npc 
##      4      4
eSet@annotation
## [1] "GPL15314"
# GPL15314  Arraystar Human LncRNA microarray V2.0 (Agilent_033010 Probe Name version)

PCA analysis

genes_expr=probes_expr
library("FactoMineR")
library("factoextra")
groupList=factor(c(rep('npc',4),rep('normal',4)))
dat.pca <- PCA(t(genes_expr) , graph = FALSE)
dat.pca
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 8 individuals, described by 16840 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"
fviz_pca_ind(dat.pca,
             geom.ind = "point",
             col.ind = groupList,
             addEllipses = TRUE,
             legend.title = "Groups"
)

library(limma)
design=model.matrix(~factor(groupList))
design
##   (Intercept) factor(groupList)npc
## 1           1                    1
## 2           1                    1
## 3           1                    1
## 4           1                    1
## 5           1                    0
## 6           1                    0
## 7           1                    0
## 8           1                    0
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$`factor(groupList)`
## [1] "contr.treatment"
fit=lmFit(genes_expr,design)
fit=eBayes(fit)
DEG=topTable(fit,coef=2,n=Inf)
head(DEG)
##                      logFC   AveExpr          t      P.Value    adj.P.Val
## ASHG19A3A032053  -5.178114 10.457274 -121.96371 1.432469e-13 2.235589e-09
## ASHG19A3A032177   4.865081 10.972414  107.30211 3.717871e-13 2.235589e-09
## ASHG19A3A045676   6.217461  8.069184  104.01585 4.686864e-13 2.235589e-09
## ASHG19A3A054608   4.986319 11.692864  102.28612 5.310188e-13 2.235589e-09
## ASHG19A3A009208  -4.926533 13.043726  -92.43595 1.128562e-12 3.345733e-09
## ASHG19A3L0001532  5.999643  9.845148   91.75872 1.192067e-12 3.345733e-09
##                         B
## ASHG19A3A032053  20.54039
## ASHG19A3A032177  20.00474
## ASHG19A3A045676  19.86423
## ASHG19A3A054608  19.78680
## ASHG19A3A009208  19.29452
## ASHG19A3L0001532 19.25714
# We observed that 2107 lncRNAs were upregulated
# while 2090 lncRNAs were downregulated by more than 2-fold,
# NKILA among these downregulated lncRNAs (Fig 1A, GSE95166).

for volcano plot

df=DEG
attach(df)
df$v= -log10(P.Value)
df$g=ifelse(df$P.Value>0.05,'stable',
            ifelse( df$logFC >1,'up',
                    ifelse( df$logFC < -1,'down','stable') )
)
table(df$g)
## 
##   down stable     up 
##   1572  13629   1639
df$name=rownames(df)
head(df)
##                      logFC   AveExpr          t      P.Value    adj.P.Val
## ASHG19A3A032053  -5.178114 10.457274 -121.96371 1.432469e-13 2.235589e-09
## ASHG19A3A032177   4.865081 10.972414  107.30211 3.717871e-13 2.235589e-09
## ASHG19A3A045676   6.217461  8.069184  104.01585 4.686864e-13 2.235589e-09
## ASHG19A3A054608   4.986319 11.692864  102.28612 5.310188e-13 2.235589e-09
## ASHG19A3A009208  -4.926533 13.043726  -92.43595 1.128562e-12 3.345733e-09
## ASHG19A3L0001532  5.999643  9.845148   91.75872 1.192067e-12 3.345733e-09
##                         B        v    g             name
## ASHG19A3A032053  20.54039 12.84391 down  ASHG19A3A032053
## ASHG19A3A032177  20.00474 12.42971   up  ASHG19A3A032177
## ASHG19A3A045676  19.86423 12.32912   up  ASHG19A3A045676
## ASHG19A3A054608  19.78680 12.27489   up  ASHG19A3A054608
## ASHG19A3A009208  19.29452 11.94747 down  ASHG19A3A009208
## ASHG19A3L0001532 19.25714 11.92370   up ASHG19A3L0001532
library(ggpubr)
ggpubr::ggscatter(df, x = "logFC", y = "v", color = "g",size = 0.5,
          label = "name", repel = T,
          label.select =head(rownames(df)),
          palette = c("#00AFBB", "#E7B800", "#FC4E07") )

detach(df)


x=DEG$logFC
names(x)=rownames(DEG)
cg=c(names(head(sort(x),100)),
     names(tail(sort(x),100)))
cg
##   [1] "ASHG19A3A037294"      "ASHG19A3A014288"      "ASHG19A3A032053"     
##   [4] "ASHG19A3A044463"      "ASHG19A3A028512"      "ASHG19A3A009208"     
##   [7] "ASHG19A3A043706"      "ASHG19A3A022918"      "ASHG19A3A008536"     
##  [10] "ASHG19A3A009728"      "ASHG19A3L0000609"     "ASHG19A3A027773"     
##  [13] "ASHG19A3A038970"      "ASHG19A3A048255"      "CUST_201_PI426073487"
##  [16] "ASHG19A3A025425"      "ASHG19A3A032707"      "ASHG19A3A026656"     
##  [19] "ASHG19A3A052254"      "ASHG19A3A026236"      "ASHG19A3A034058"     
##  [22] "ASHG19A3A021642"      "ASHG19A3A025932"      "ASHG19A3A042290"     
##  [25] "ASHG19A3A050483"      "ASHG19A3A016469"      "ASHG19A3A007421"     
##  [28] "ASHG19A3A029443"      "ASHG19A3A027206"      "CUST_182_PI426073487"
##  [31] "ASHG19A3A043900"      "ASHG19A3A046734"      "ASHG19A3A055137"     
##  [34] "ASHG19A3A007827"      "ASHG19A3A036375"      "ASHG19A3A007925"     
##  [37] "ASHG19A3A012793"      "ASHG19A3L0001458"     "ASHG19A3A018502"     
##  [40] "ASHG19A3A030420"      "ASHG19A3A029379"      "CUST_7_PI426265844"  
##  [43] "ASHG19A3A010380"      "ASHG19A3A008068"      "ASHG19A3A028826"     
##  [46] "ASHG19A3A020031"      "ASHG19A3A042710"      "ASHG19A3A049662"     
##  [49] "ASHG19A3A008285"      "ASHG19A3A000176"      "ASHG19A3A030424"     
##  [52] "ASHG19A3A007656"      "ASHG19A3A018851"      "ASHG19A3A043205"     
##  [55] "ASHG19A3A050482"      "ASHG19A3A049716"      "ASHG19A3A055267"     
##  [58] "ASHG19A3H0000076"     "ASHG19A3A046245"      "ASHG19A3L0002616"    
##  [61] "ASHG19A3A048878"      "ASHG19A3A015744"      "ASHG19A3L0001890"    
##  [64] "ASHG19A3A037654"      "ASHG19A3A036146"      "ASHG19A3A008127"     
##  [67] "ASHG19A3A009676"      "ASHG19A3A034140"      "ASHG19A3L0001340"    
##  [70] "CUST_817_PI426073487" "ASHG19A3A009264"      "ASHG19A3A045231"     
##  [73] "ASHG19A3A007778"      "ASHG19A3A050209"      "ASHG19A3A022149"     
##  [76] "CUST_580_PI426073487" "ASHG19A3A008066"      "ASHG19A3A020657"     
##  [79] "ASHG19A3A030845"      "ASHG19A3A009209"      "ASHG19A3A035363"     
##  [82] "ASHG19A3A043337"      "ASHG19A3A017413"      "ASHG19A3A015048"     
##  [85] "ASHG19A3A040421"      "ASHG19A3A052003"      "ASHG19A3A040786"     
##  [88] "ASHG19A3A033939"      "ASHG19A3A039699"      "ASHG19A3A007991"     
##  [91] "CUST_469_PI426073487" "ASHG19A3A033805"      "ASHG19A3A000334"     
##  [94] "ASHG19A3A023054"      "ASHG19A3A049750"      "ASHG19A3A009936"     
##  [97] "ASHG19A3A028350"      "ASHG19A3A051223"      "ASHG19A3L0000967"    
## [100] "ASHG19A3A024146"      "ASHG19A3A040555"      "ASHG19A3A021832"     
## [103] "ASHG19A3A015839"      "ASHG19A3A046765"      "ASHG19A3A009182"     
## [106] "ASHG19A3A026841"      "ASHG19A3A021913"      "ASHG19A3A011708"     
## [109] "ASHG19A3A018631"      "ASHG19A3A050707"      "ASHG19A3A011930"     
## [112] "ASHG19A3A044455"      "ASHG19A3A027168"      "ASHG19A3A016579"     
## [115] "ASHG19A3A024449"      "ASHG19A3A029727"      "ASHG19A3A018447"     
## [118] "ASHG19A3A031354"      "ASHG19A3L0001044"     "ASHG19A3A039009"     
## [121] "ASHG19A3A020726"      "ASHG19A3A029193"      "ASHG19A3A025209"     
## [124] "ASHG19A3A021269"      "ASHG19A3A034743"      "ASHG19A3A037821"     
## [127] "ASHG19A3A029728"      "ASHG19A3A012595"      "ASHG19A3A038556"     
## [130] "ASHG19A3A020550"      "ASHG19A3A000250"      "ASHG19A3A014761"     
## [133] "ASHG19A3A047769"      "ASHG19A3A018630"      "ASHG19A3A025279"     
## [136] "ASHG19A3A025333"      "ASHG19A3A015374"      "ASHG19A3A032455"     
## [139] "ASHG19A3A010149"      "ASHG19A3A054317"      "ASHG19A3A038845"     
## [142] "ASHG19A3A054362"      "ASHG19A3A037410"      "ASHG19A3A029601"     
## [145] "ASHG19A3A052950"      "ASHG19A3A014009"      "ASHG19A3A040895"     
## [148] "ASHG19A3A045574"      "ASHG19A3A026958"      "ASHG19A3A036303"     
## [151] "ASHG19A3A050390"      "ASHG19A3A052917"      "ASHG19A3A031041"     
## [154] "ASHG19A3A012637"      "ASHG19A3A021683"      "ASHG19A3A055182"     
## [157] "ASHG19A3A024967"      "ASHG19A3A054092"      "ASHG19A3A051381"     
## [160] "ASHG19A3A051106"      "ASHG19A3A028699"      "ASHG19A3A016789"     
## [163] "ASHG19A3A050524"      "ASHG19A3A007768"      "ASHG19A3A036368"     
## [166] "ASHG19A3A020850"      "ASHG19A3A023397"      "ASHG19A3A042993"     
## [169] "ASHG19A3L0001225"     "ASHG19A3A031025"      "ASHG19A3A009718"     
## [172] "ASHG19A3A024669"      "ASHG19A3A024195"      "ASHG19A3A051121"     
## [175] "ASHG19A3A035347"      "ASHG19A3A042123"      "ASHG19A3A036013"     
## [178] "ASHG19A3A024670"      "ASHG19A3A050474"      "ASHG19A3A034746"     
## [181] "ASHG19A3A032177"      "ASHG19A3A010756"      "ASHG19A3A054608"     
## [184] "ASHG19A3L0000526"     "ASHG19A3A015189"      "ASHG19A3A025210"     
## [187] "ASHG19A3A020718"      "ASHG19A3A051379"      "ASHG19A3A017973"     
## [190] "ASHG19A3L0000831"     "ASHG19A3A053508"      "ASHG19A3L0002776"    
## [193] "ASHG19A3A050298"      "ASHG19A3A055186"      "ASHG19A3A030034"     
## [196] "ASHG19A3L0001532"     "ASHG19A3A045676"      "ASHG19A3A009944"     
## [199] "ASHG19A3A019292"      "ASHG19A3A035994"
library(pheatmap)
n=t(scale(t(genes_expr[cg,])))
n[n>2]=2
n[n< -2]= -2
n[1:4,1:4]
##                 GSM2498136 GSM2498137 GSM2498138 GSM2498139
## ASHG19A3A037294 -0.9086247 -0.9369028 -0.9825185 -0.9123974
## ASHG19A3A014288 -0.8962710 -0.8946978 -1.0526637 -0.8923081
## ASHG19A3A032053 -0.9398680 -0.9271158 -0.9316023 -0.9429676
## ASHG19A3A044463 -0.9669862 -0.8377226 -0.9428009 -0.9903002
ac=data.frame(groupList=groupList)
rownames(ac)=colnames(n)  
pheatmap(n,show_colnames =F,show_rownames = F,
         annotation_col=ac)