# # - eval = FALSE ------------------------------------------------------------- # 如果(!requireNamespace(“BiocManager”,悄悄地= TRUE)) {# install.packages(“BiocManager ") # } # # (“AssessORF BiocManager:安装 ") ## ---- eval = FALSE ------------------------------------------------------------- # 如果(!requireNamespace(“BiocManager”,悄悄地= TRUE)) {# install.packages(“BiocManager ") # } # # (“AssessORFData BiocManager:安装 ") ## ---- 错误= FALSE,警告= FALSE,消息= FALSE -------------------------- 库(解密)# # SQL数据库文件路径(必要时将创建)# #在这个例子中,将使用一个临时文件,但建议# #用户指定一个文件路径他们喜欢用# #数据库的位置。下面是另一个选项: ##包含相关基因组链接的文件路径##在本例中,该文件来自NCBI的基因组浏览器网站,格式为## CSV。如果用户还使用从NCBI的##基因组浏览器站点下载的CSV文件作为相关基因组链接的源,则用户##可以替换` system。file'函数调用,而不是使用他们的## CSV文件的路径。这个示例的其余部分假设链接以这种格式提供。有关将以##其他格式提供的基因组序列添加到数据库的帮助,请参阅DECIPHER的手册和插图。relGenomesFile <- system。file("extdata", "AdenoviridaeGenomes.csv", package = "AssessORF") ##以下是替代选项:# relGenomesFile <- " ##从CSV文件中提取FTP链接,并确保它们是正确的##格式,以便可以从NCBI服务器下载。为了尽量减少运行示例所花费的时间,只使用表的前13行。如果用户打算使用这个##示例作为将序列放入数据库的起点,那么他们应该放弃'[1:13,]'部分。genomesTable <- read.csv(relGenomesFile, stringsAsFactors = FALSE)[1:13,] ftps <- genomesTable$GenBank. csv(relGenomesFile, stringsAsFactors = FALSE)FTP ftps <- paste(ftps, paste0(basename(ftps), " _genome .fna.gz"), sep="/") ftps <- ftps[(substring(ftps, 1,6) == "ftp://")] ##在本例中,表中的第一个基因组是中心基因组。##然而,在大多数用户场景中,到中心基因组的路径不会在相关基因组文件中。相反,用户应该指定包含中心基因组序列的##文件的路径(FASTA格式),并将该文件路径附加到包含FTP链接的向量的开头。##基因组文件<- " # ftps <- c(genome efile, ftps) ##此向量将保存成功添加到数据库的基因组。pass <- logical(length(ftps)) ##将序列添加到数据库。在构建包时,如果没有互联网连接,循环可能会导致超时错误,因此它已被注释掉,因此示例##可以顺利运行。用户在调整##示例供自己使用时,应该取消此循环的注释。# for (pIdx in seq_along(pass)) {# t <- try(Seqs2DB(ftps[pIdx], "FASTA", databaseFile, as.character(pIdx), # compressRepeats=TRUE, verbose=FALSE), # silent=TRUE) # # if (!(is(t, "try-error"))) {# pass[pIdx] <- TRUE #} #} #这个向量包含数据库中基因组的标识符列表,根据哪些标识符被成功添加。标识符是字符##字符串。第一个标识符“1”对应中央基因组。其余标识符,在本例中为“2”:“13”,对应于相关的##基因组。identifier <- as.character(which(pass)) ## ----echo = FALSE------------------------------------------------------------- ##如果部分(或全部)基因组序列没有添加到数据库中,##复制包中的数据库文件,并将其用作小插图示例的数据库##。 Next, fix 'identifiers' appropriately. if (length(identifiers) < length(pass)) { file.copy(system.file("extdata", "Adenoviridae.sqlite", package = "AssessORF"), databaseFile, overwrite = TRUE) identifiers <- as.character(seq_along(pass)) } ## ----results = FALSE---------------------------------------------------------- library(AssessORF) ## Reminder: the first identifier in the database, in this case identifier "1", ## corresponds to the central genome. The remaining identifers, in this case ## "2":"13", correspond to the related genomes. myMapObj <- MapAssessmentData(databaseFile, central_ID = identifiers[1], related_IDs = identifiers[-1], speciesName = "Human adenovirus 1", useProt = FALSE) ## ----------------------------------------------------------------------------- ## Remember to use 'unlink' to remove a database once it is no longer needed. unlink(databaseFile) ## ----eval = FALSE------------------------------------------------------------- # myMapObj <- MapAssessmentData(databaseFile, ## File path to the SQL database containing the genomes # central_ID = identifiers[1], ## Identifier for the central genome # related_IDs = identifiers[-1], ## Identifers for the related genomes # protHits_Seqs = protSeqs, ## Sequences for the proteomics hits # protHits_Scores = protScores, ## Confidence scores for the proteomics hits # strainID = strain, ## The identifer for the strain # speciesName = species) ## The name of the species ## ----results = FALSE---------------------------------------------------------- currMapObj <- readRDS(system.file("extdata", "MGAS5005_PreSaved_DataMapObj.rds", package = "AssessORF")) currProdigal <- readLines(system.file("extdata", "MGAS5005_Prodigal.sco", package = "AssessORF"))[-1:-2] prodigalLeft <- as.numeric(sapply(strsplit(currProdigal, "_", fixed=TRUE), `[`, 2L)) prodigalRight <- as.numeric(sapply(strsplit(currProdigal, "_", fixed=TRUE), `[`, 3L)) prodigalStrand <- sapply(strsplit(currProdigal, "_", fixed=TRUE), `[`, 4L) currResObj <- AssessGenes(geneLeftPos = prodigalLeft, geneRightPos = prodigalRight, geneStrand = prodigalStrand, inputMapObj = currMapObj, geneSource = "Prodigal") ## ----------------------------------------------------------------------------- print(currResObj) ## ----fig.height=7, fig.width=7, fig.align="center"---------------------------- plot(currResObj) ## ----fig.height=15, fig.width=15, fig.align="center"-------------------------- plot(currMapObj, currResObj, interactive_GV = FALSE, rangeStart_GV = 106000, rangeEnd_GV = 120000) ## ----fig.height=15, fig.width=15, fig.align="center"-------------------------- mosaicplot(currResObj) ## ----------------------------------------------------------------------------- resObj2 <- readRDS(system.file("extdata", "MGAS5005_PreSaved_ResultsObj_GeneMarkS2.rds", package = "AssessORF")) CompareAssessmentResults(currResObj, resObj2) ## ----echo = FALSE------------------------------------------------------------- print(sessionInfo(), locale = FALSE)