# #——前,回声= FALSE,结果= '隐藏 '-------------------------------------------- 库(knitr) opts_chunk美元集(警告= FALSE,消息= FALSE,缓存= FALSE) # #——loadLibrary -------------------------------------------------------------- 库(GEOquery ) ## ----------------------------------------------------------------------------- # 如果你有网络访问,典型的方法是使用#:# gds < - getGEO (GDS507) gds < - getGEO(文件名=执行(“extdata / GDS507.soft.gz”,包= " GEOquery ")) ## ----------------------------------------------------------------------------- # 如果你有网络访问,典型的方法是使用#:# gds < - getGEO (GSM11805) gsm < - getGEO(文件名=执行(“extdata / GSM11805.txt.gz”,包= " GEOquery ")) ## ----------------------------------------------------------------------------- # 看看gsm元数据:头(元(gsm)) #看看与gsm相关联的数据:#但限制只有前5行,简洁表(gsm)[1:5,] #看看列描述:列(gsm ) ## ----------------------------------------------------------------------------- 列(gds) [1:3 ] ## ----------------------------------------------------------------------------- # 具有良好的网络访问,人会做的事:# gse < - getGEO(“GSE781”,GSEMatrix = FALSE) gse < - getGEO(文件名=执行(“extdata / GSE781_family.soft.gz”、包=“GEOquery”))头(元(gse)) #名称中包含的所有GSM对象的gse名称(GSMList (gse)) #,名单上的第一个GSM对象GSMList (gse)[[1]] #和gpl的名称(GPLList (gse代表的名字 )) ## ----------------------------------------------------------------------------- # 注意,默认gse2553 < - GSEMatrix = TRUEgetGEO (GSE2553, GSEMatrix = TRUE) (GSE2553)显示(pData (phenoData (GSE2553 [[1]])) [1:5, c(1、6、8 )]) ## ----------------------------------------------------------------------------- eset < - GDS2eSet (gds do.log2 = TRUE ) ## ----------------------------------------------------------------------------- eset pData (eset) [1:3 ] ## ----------------------------------------------------------------------------- # 从GDS获取平台元数据元(GDS) $平台#所以在调用中使用这些信息getGEO gpl < - getGEO(文件名=执行(“extdata / GPL97.annot.gz”,包= " GEOquery ")) ## ----------------------------------------------------------------------------- 马< - GDS2MA (gds, GPL = GPL)类(MA ) ## ----------------------------------------------------------------------------- gsmplatforms < -拉普(GSMList (gse)、功能(x){元(x) $ platform_id})头(gsmplatforms ) ## ----------------------------------------------------------------------------- gsmlist =过滤器(函数(gsm){元(gsm) $ platform_id = =“GPL96”},GSMList (gse)) (GSMList长度 ) ## ----------------------------------------------------------------------------- 表(gsmlist[[1]])[1:5], #,让列描述列(gsmlist [[1]]) [1:5 ,] ## ----------------------------------------------------------------------------- # 得到probeset订购probesets < -表(GPLList (gse) [[1]]) $ ID #让每个GSM数据矩阵的值列#小心匹配probesets的顺序platform # with those in the GSMs data.matrix <- do.call('cbind',lapply(gsmlist,function(x) {tab <- Table(x) mymatch <- match(probesets,tab$ID_REF) return(tab$VALUE[mymatch]) })) data.matrix <- apply(data.matrix,2,function(x) {as.numeric(as.character(x))}) data.matrix <- log2(data.matrix) data.matrix[1:5,] ## ----------------------------------------------------------------------------- require(Biobase) # go through the necessary steps to make a compliant ExpressionSet rownames(data.matrix) <- probesets colnames(data.matrix) <- names(gsmlist) pdata <- data.frame(samples=names(gsmlist)) rownames(pdata) <- names(gsmlist) pheno <- as(pdata,"AnnotatedDataFrame") eset2 <- new('ExpressionSet',exprs=data.matrix,phenoData=pheno) eset2 ## ----------------------------------------------------------------------------- gpl97 <- getGEO('GPL97') Meta(gpl97)$title head(Meta(gpl97)$series_id) length(Meta(gpl97)$series_id) head(Meta(gpl97)$sample_id) length(Meta(gpl97)$sample_id) ## ----------------------------------------------------------------------------- gsmids <- Meta(gpl97)$sample_id gsmlist <- sapply(gsmids[1:5],getGEO) names(gsmlist) ## ----citation----------------------------------------------------------------- citation("GEOquery") ## ----eval=FALSE--------------------------------------------------------------- # bug.report(package='GEOquery') ## ----echo=FALSE--------------------------------------------------------------- sessionInfo()