Guppy GPU¶
GEOchina
HYX
October 7, 2020
Instriction
This document is an example of GEO data mining only support array assay.
download GSE95166 data
eSet=getGEO(‘GSE95166’, destdir=“.”, AnnotGPL = F, getGPL = F)[[1]]
ckeck data for normalization
library(GEOmirror)
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
## GEOmirror v 0.1.0 welcome to use GEOmirror!
## If any suggestion please feel free to email to jmzeng1314@163.com!
## If you use GEOmirror in published research, please acknowledgements:
## We thank Dr.Jianming Zeng(University of Macau), and all the members of his bioinformatics team, biotrainee, for generously sharing their experience and codes.
library(Biobase)
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colMeans,
## colnames, colSums, dirname, do.call, duplicated, eval, evalq,
## Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply,
## lengths, Map, mapply, match, mget, order, paste, pmax, pmax.int,
## pmin, pmin.int, Position, rank, rbind, Reduce, rowMeans, rownames,
## rowSums, sapply, setdiff, sort, table, tapply, union, unique,
## unsplit, which, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
eSet=geoChina('GSE95166') #download form GEO mirror server in China
## you can also use getGEO from GEOquery, by
## getGEO('GSE95166', destdir=".", AnnotGPL = F, getGPL = F)
eSet
## $GSE95166_series_matrix.txt.gz
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 16840 features, 8 samples
## element names: exprs
## protocolData: none
## phenoData
## sampleNames: GSM2498136 GSM2498137 ... GSM2498143 (8 total)
## varLabels: title geo_accession ... tissue:ch1 (30 total)
## varMetadata: labelDescription
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation: GPL15314
eSet=eSet[[1]]
head(eSet)
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 1 features, 8 samples
## element names: exprs
## protocolData: none
## phenoData
## sampleNames: GSM2498136 GSM2498137 ... GSM2498143 (8 total)
## varLabels: title geo_accession ... tissue:ch1 (30 total)
## varMetadata: labelDescription
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation: GPL15314
probes_expr <- exprs(eSet);dim(probes_expr)
## [1] 16840 8
head(probes_expr[,1:4])
## GSM2498136 GSM2498137 GSM2498138 GSM2498139
## ASHG19A3A000001 5.108310 5.780102 5.590192 5.983714
## ASHG19A3A000002 3.813785 5.329042 5.255791 5.227710
## ASHG19A3A000006 7.392555 7.580803 7.689547 7.720062
## ASHG19A3A000007 12.559763 12.510866 12.568397 12.411778
## ASHG19A3A000009 11.421825 11.654983 11.597765 11.601813
## ASHG19A3A000011 10.873756 11.234830 11.257159 11.187628
boxplot(probes_expr,las=2)
pheno info
phenoDat <- pData(eSet)
head(phenoDat[,1:4])
## title geo_accession status submission_date
## GSM2498136 T_1 GSM2498136 Public on Aug 16 2019 Feb 22 2017
## GSM2498137 T_2 GSM2498137 Public on Aug 16 2019 Feb 22 2017
## GSM2498138 T_3 GSM2498138 Public on Aug 16 2019 Feb 22 2017
## GSM2498139 T_4 GSM2498139 Public on Aug 16 2019 Feb 22 2017
## GSM2498140 I_1 GSM2498140 Public on Aug 16 2019 Feb 22 2017
## GSM2498141 I_2 GSM2498141 Public on Aug 16 2019 Feb 22 2017
# https://www.ncbi.nlm.nih.gov/pubmed/31430288
groupList=factor(c(rep('npc',4),rep('normal',4)))
table(groupList)
## groupList
## normal npc
## 4 4
eSet@annotation
## [1] "GPL15314"
# GPL15314 Arraystar Human LncRNA microarray V2.0 (Agilent_033010 Probe Name version)
PCA analysis
genes_expr=probes_expr
library("FactoMineR")
library("factoextra")
groupList=factor(c(rep('npc',4),rep('normal',4)))
dat.pca <- PCA(t(genes_expr) , graph = FALSE)
dat.pca
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 8 individuals, described by 16840 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
fviz_pca_ind(dat.pca,
geom.ind = "point",
col.ind = groupList,
addEllipses = TRUE,
legend.title = "Groups"
)
library(limma)
design=model.matrix(~factor(groupList))
design
## (Intercept) factor(groupList)npc
## 1 1 1
## 2 1 1
## 3 1 1
## 4 1 1
## 5 1 0
## 6 1 0
## 7 1 0
## 8 1 0
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$`factor(groupList)`
## [1] "contr.treatment"
fit=lmFit(genes_expr,design)
fit=eBayes(fit)
DEG=topTable(fit,coef=2,n=Inf)
head(DEG)
## logFC AveExpr t P.Value adj.P.Val
## ASHG19A3A032053 -5.178114 10.457274 -121.96371 1.432469e-13 2.235589e-09
## ASHG19A3A032177 4.865081 10.972414 107.30211 3.717871e-13 2.235589e-09
## ASHG19A3A045676 6.217461 8.069184 104.01585 4.686864e-13 2.235589e-09
## ASHG19A3A054608 4.986319 11.692864 102.28612 5.310188e-13 2.235589e-09
## ASHG19A3A009208 -4.926533 13.043726 -92.43595 1.128562e-12 3.345733e-09
## ASHG19A3L0001532 5.999643 9.845148 91.75872 1.192067e-12 3.345733e-09
## B
## ASHG19A3A032053 20.54039
## ASHG19A3A032177 20.00474
## ASHG19A3A045676 19.86423
## ASHG19A3A054608 19.78680
## ASHG19A3A009208 19.29452
## ASHG19A3L0001532 19.25714
# We observed that 2107 lncRNAs were upregulated
# while 2090 lncRNAs were downregulated by more than 2-fold,
# NKILA among these downregulated lncRNAs (Fig 1A, GSE95166).
for volcano plot
df=DEG
attach(df)
df$v= -log10(P.Value)
df$g=ifelse(df$P.Value>0.05,'stable',
ifelse( df$logFC >1,'up',
ifelse( df$logFC < -1,'down','stable') )
)
table(df$g)
##
## down stable up
## 1572 13629 1639
df$name=rownames(df)
head(df)
## logFC AveExpr t P.Value adj.P.Val
## ASHG19A3A032053 -5.178114 10.457274 -121.96371 1.432469e-13 2.235589e-09
## ASHG19A3A032177 4.865081 10.972414 107.30211 3.717871e-13 2.235589e-09
## ASHG19A3A045676 6.217461 8.069184 104.01585 4.686864e-13 2.235589e-09
## ASHG19A3A054608 4.986319 11.692864 102.28612 5.310188e-13 2.235589e-09
## ASHG19A3A009208 -4.926533 13.043726 -92.43595 1.128562e-12 3.345733e-09
## ASHG19A3L0001532 5.999643 9.845148 91.75872 1.192067e-12 3.345733e-09
## B v g name
## ASHG19A3A032053 20.54039 12.84391 down ASHG19A3A032053
## ASHG19A3A032177 20.00474 12.42971 up ASHG19A3A032177
## ASHG19A3A045676 19.86423 12.32912 up ASHG19A3A045676
## ASHG19A3A054608 19.78680 12.27489 up ASHG19A3A054608
## ASHG19A3A009208 19.29452 11.94747 down ASHG19A3A009208
## ASHG19A3L0001532 19.25714 11.92370 up ASHG19A3L0001532
library(ggpubr)
ggpubr::ggscatter(df, x = "logFC", y = "v", color = "g",size = 0.5,
label = "name", repel = T,
label.select =head(rownames(df)),
palette = c("#00AFBB", "#E7B800", "#FC4E07") )
detach(df)
x=DEG$logFC
names(x)=rownames(DEG)
cg=c(names(head(sort(x),100)),
names(tail(sort(x),100)))
cg
## [1] "ASHG19A3A037294" "ASHG19A3A014288" "ASHG19A3A032053"
## [4] "ASHG19A3A044463" "ASHG19A3A028512" "ASHG19A3A009208"
## [7] "ASHG19A3A043706" "ASHG19A3A022918" "ASHG19A3A008536"
## [10] "ASHG19A3A009728" "ASHG19A3L0000609" "ASHG19A3A027773"
## [13] "ASHG19A3A038970" "ASHG19A3A048255" "CUST_201_PI426073487"
## [16] "ASHG19A3A025425" "ASHG19A3A032707" "ASHG19A3A026656"
## [19] "ASHG19A3A052254" "ASHG19A3A026236" "ASHG19A3A034058"
## [22] "ASHG19A3A021642" "ASHG19A3A025932" "ASHG19A3A042290"
## [25] "ASHG19A3A050483" "ASHG19A3A016469" "ASHG19A3A007421"
## [28] "ASHG19A3A029443" "ASHG19A3A027206" "CUST_182_PI426073487"
## [31] "ASHG19A3A043900" "ASHG19A3A046734" "ASHG19A3A055137"
## [34] "ASHG19A3A007827" "ASHG19A3A036375" "ASHG19A3A007925"
## [37] "ASHG19A3A012793" "ASHG19A3L0001458" "ASHG19A3A018502"
## [40] "ASHG19A3A030420" "ASHG19A3A029379" "CUST_7_PI426265844"
## [43] "ASHG19A3A010380" "ASHG19A3A008068" "ASHG19A3A028826"
## [46] "ASHG19A3A020031" "ASHG19A3A042710" "ASHG19A3A049662"
## [49] "ASHG19A3A008285" "ASHG19A3A000176" "ASHG19A3A030424"
## [52] "ASHG19A3A007656" "ASHG19A3A018851" "ASHG19A3A043205"
## [55] "ASHG19A3A050482" "ASHG19A3A049716" "ASHG19A3A055267"
## [58] "ASHG19A3H0000076" "ASHG19A3A046245" "ASHG19A3L0002616"
## [61] "ASHG19A3A048878" "ASHG19A3A015744" "ASHG19A3L0001890"
## [64] "ASHG19A3A037654" "ASHG19A3A036146" "ASHG19A3A008127"
## [67] "ASHG19A3A009676" "ASHG19A3A034140" "ASHG19A3L0001340"
## [70] "CUST_817_PI426073487" "ASHG19A3A009264" "ASHG19A3A045231"
## [73] "ASHG19A3A007778" "ASHG19A3A050209" "ASHG19A3A022149"
## [76] "CUST_580_PI426073487" "ASHG19A3A008066" "ASHG19A3A020657"
## [79] "ASHG19A3A030845" "ASHG19A3A009209" "ASHG19A3A035363"
## [82] "ASHG19A3A043337" "ASHG19A3A017413" "ASHG19A3A015048"
## [85] "ASHG19A3A040421" "ASHG19A3A052003" "ASHG19A3A040786"
## [88] "ASHG19A3A033939" "ASHG19A3A039699" "ASHG19A3A007991"
## [91] "CUST_469_PI426073487" "ASHG19A3A033805" "ASHG19A3A000334"
## [94] "ASHG19A3A023054" "ASHG19A3A049750" "ASHG19A3A009936"
## [97] "ASHG19A3A028350" "ASHG19A3A051223" "ASHG19A3L0000967"
## [100] "ASHG19A3A024146" "ASHG19A3A040555" "ASHG19A3A021832"
## [103] "ASHG19A3A015839" "ASHG19A3A046765" "ASHG19A3A009182"
## [106] "ASHG19A3A026841" "ASHG19A3A021913" "ASHG19A3A011708"
## [109] "ASHG19A3A018631" "ASHG19A3A050707" "ASHG19A3A011930"
## [112] "ASHG19A3A044455" "ASHG19A3A027168" "ASHG19A3A016579"
## [115] "ASHG19A3A024449" "ASHG19A3A029727" "ASHG19A3A018447"
## [118] "ASHG19A3A031354" "ASHG19A3L0001044" "ASHG19A3A039009"
## [121] "ASHG19A3A020726" "ASHG19A3A029193" "ASHG19A3A025209"
## [124] "ASHG19A3A021269" "ASHG19A3A034743" "ASHG19A3A037821"
## [127] "ASHG19A3A029728" "ASHG19A3A012595" "ASHG19A3A038556"
## [130] "ASHG19A3A020550" "ASHG19A3A000250" "ASHG19A3A014761"
## [133] "ASHG19A3A047769" "ASHG19A3A018630" "ASHG19A3A025279"
## [136] "ASHG19A3A025333" "ASHG19A3A015374" "ASHG19A3A032455"
## [139] "ASHG19A3A010149" "ASHG19A3A054317" "ASHG19A3A038845"
## [142] "ASHG19A3A054362" "ASHG19A3A037410" "ASHG19A3A029601"
## [145] "ASHG19A3A052950" "ASHG19A3A014009" "ASHG19A3A040895"
## [148] "ASHG19A3A045574" "ASHG19A3A026958" "ASHG19A3A036303"
## [151] "ASHG19A3A050390" "ASHG19A3A052917" "ASHG19A3A031041"
## [154] "ASHG19A3A012637" "ASHG19A3A021683" "ASHG19A3A055182"
## [157] "ASHG19A3A024967" "ASHG19A3A054092" "ASHG19A3A051381"
## [160] "ASHG19A3A051106" "ASHG19A3A028699" "ASHG19A3A016789"
## [163] "ASHG19A3A050524" "ASHG19A3A007768" "ASHG19A3A036368"
## [166] "ASHG19A3A020850" "ASHG19A3A023397" "ASHG19A3A042993"
## [169] "ASHG19A3L0001225" "ASHG19A3A031025" "ASHG19A3A009718"
## [172] "ASHG19A3A024669" "ASHG19A3A024195" "ASHG19A3A051121"
## [175] "ASHG19A3A035347" "ASHG19A3A042123" "ASHG19A3A036013"
## [178] "ASHG19A3A024670" "ASHG19A3A050474" "ASHG19A3A034746"
## [181] "ASHG19A3A032177" "ASHG19A3A010756" "ASHG19A3A054608"
## [184] "ASHG19A3L0000526" "ASHG19A3A015189" "ASHG19A3A025210"
## [187] "ASHG19A3A020718" "ASHG19A3A051379" "ASHG19A3A017973"
## [190] "ASHG19A3L0000831" "ASHG19A3A053508" "ASHG19A3L0002776"
## [193] "ASHG19A3A050298" "ASHG19A3A055186" "ASHG19A3A030034"
## [196] "ASHG19A3L0001532" "ASHG19A3A045676" "ASHG19A3A009944"
## [199] "ASHG19A3A019292" "ASHG19A3A035994"
library(pheatmap)
n=t(scale(t(genes_expr[cg,])))
n[n>2]=2
n[n< -2]= -2
n[1:4,1:4]
## GSM2498136 GSM2498137 GSM2498138 GSM2498139
## ASHG19A3A037294 -0.9086247 -0.9369028 -0.9825185 -0.9123974
## ASHG19A3A014288 -0.8962710 -0.8946978 -1.0526637 -0.8923081
## ASHG19A3A032053 -0.9398680 -0.9271158 -0.9316023 -0.9429676
## ASHG19A3A044463 -0.9669862 -0.8377226 -0.9428009 -0.9903002
ac=data.frame(groupList=groupList)
rownames(ac)=colnames(n)
pheatmap(n,show_colnames =F,show_rownames = F,
annotation_col=ac)