参数< -列表(种子= 8432)# # - - - - - eval = FALSE - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #如果需要(BiocManager)) (! # install.packages (BiocManager) # BiocManager::安装(glmSparseNet) # #——包、消息= FALSE,警告= FALSE - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -库(dplyr)库(ggplot2)图书馆(生存)图书馆(futile.logger)图书馆(curatedTCGAData)图书馆(TCGAutils) #库(glmSparseNet) # #一些一般选择徒劳的。记录器.Last调试包。值< - flog.layout(布局。格式(“[~ l] ~ m”)) .Last。值< - glmSparseNet::: show.message (FALSE) #设置ggplot2默认主题最小theme_set (ggplot2: theme_minimal ()) # #——curated_data,包括= FALSE - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #块不包括因为它产生许多不必要的消息skcm <——curatedTCGAData (diseaseCode = skcm,化验= RNASeq2GeneNorm, version = 1.1.38, dry.run = FALSE) # #——curated_data_non_eval, eval = FALSE - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # skcm < - curatedTCGAData (diseaseCode = skcm,化验=“RNASeq2GeneNorm”, # version = 1.1.38, dry.run = FALSE) # #——数据。显示,警告= FALSE,错误= FALSE - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - skcm。转移性< - TCGAutils: TCGAsplitAssays xdata (skcm, 06年”)。生< - t(试验(skcm.metastatic [[1]])) # ydata获取生存信息。原始< - colData (skcm.metastatic) % > % as.data.frame % > % #所有天之间找到最大的时间(忽略失踪)dplyr:行()% > % dplyr::变异(时间= max (days_to_last_followup days_to_death, na。rm = TRUE) % > % #只保留生存变量和代码dplyr::选择(patientID、状态= vital_status时间)% > % #抛弃患者生存时间小于等于0 dplyr::过滤器(! is.na(时间)和时间> 0)% > % as.data.frame () # ydata获取生存信息。原始< - colData (skcm) % > % as.data.frame % > % #所有天之间找到最大的时间(忽略失踪)dplyr:行()% > % dplyr::变异(时间= max (days_to_last_followup days_to_death, na。rm = TRUE) % > % #只保留生存变量和代码dplyr::选择(patientID、状态= vital_status时间)% > % #抛弃患者生存时间小于等于0 dplyr::过滤器(! is.na(时间)和时间> 0)% > % as.data.frame #索引设置为patientID rownames (ydata.raw) < - ydata。生patientID #美元只保留功能,标准偏差xdata > 0。生< - xdata。raw[TCGAbarcode(rownames(xdata.raw)) %in% rownames(ydata.raw),] xdata.raw <- xdata.raw %>% { (apply(., 2, sd) != 0) } %>% { xdata.raw[, .] } %>% scale # Order ydata the same as assay ydata.raw <- ydata.raw[TCGAbarcode(rownames(xdata.raw)), ] set.seed(params$seed) small.subset <- c('FOXL2', 'KLHL5', 'PCYT2', 'SLC6A10P', 'STRAP', 'TMEM33', 'WT1-AS', sample(colnames(xdata.raw), 100)) xdata <- xdata.raw[, small.subset[small.subset %in% colnames(xdata.raw)]] ydata <- ydata.raw %>% dplyr::select(time, status) ## ----fit---------------------------------------------------------------------- fitted <- cv.glmHub( xdata, Surv(ydata$time, ydata$status), family = 'cox', foldid = glmSparseNet:::balanced.cv.folds(!!ydata$status)$output, network = 'correlation', network.options = networkOptions(min.degree = .2, cutoff = .6) ) ## ----results------------------------------------------------------------------ plot(fitted) ## ----show_coefs--------------------------------------------------------------- coefs.v <- coef(fitted, s = 'lambda.min')[,1] %>% { .[. != 0]} coefs.v %>% { data.frame(ensembl.id = names(.), gene.name = geneNames(names(.))$external_gene_name, coefficient = ., stringsAsFactors = FALSE) } %>% arrange(gene.name) %>% knitr::kable() ## ----hallmarks---------------------------------------------------------------- geneNames(names(coefs.v)) %>% { hallmarks(.$external_gene_name)$heatmap } ## ----------------------------------------------------------------------------- separate2GroupsCox(as.vector(coefs.v), xdata[, names(coefs.v)], ydata, plot.title = 'Full dataset', legend.outside = FALSE) ## ----sessionInfo-------------------------------------------------------------- sessionInfo()