Skip to content

Commit 0060be2

Browse files
committed
Updates for CRAN submission
1 parent d6fb2d0 commit 0060be2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+595
-529
lines changed

.Rbuildignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@
66
^docs$
77
^pkgdown$
88
^\.github$
9+
^data-raw$

DESCRIPTION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: ProActive
22
Title:
33
Detect Elevations and Gaps in Mapped Sequencing Read Coverage
4-
Version: 0.0.0.9000
4+
Version: 0.0.1
55
Maintainer: Jessie Maier <jlmaier@ncsu.edu>
66
Authors@R:
77
c(person(given = "Jessie",
@@ -17,9 +17,9 @@ Description:
1717
The `ProActive` R package automatically detects regions of gapped and elevated
1818
read coverage using a pattern-matching algorithm. `ProActive` can detect, characterize
1919
and visualize read coverage patterns in both genomes and metagenomes. Optionally,
20-
users may provide gene annotations associated with their genome or metagenome
20+
users may provide gene predictions associated with their genome or metagenome
2121
in the form of a .gff file. In this case, `ProActive` will generate an additional
22-
output table containing the ORFs found within the detected regions of gapped
22+
output table containing the gene predictions found within the detected regions of gapped
2323
and elevated read coverage.
2424
License: GPL-2
2525
Encoding: UTF-8

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ importFrom(stats,median)
88
importFrom(stats,na.omit)
99
importFrom(stringr,regex)
1010
importFrom(stringr,str_detect)
11+
importFrom(stringr,str_extract)
1112
importFrom(stringr,str_extract_all)
1213
importFrom(utils,capture.output)
1314
importFrom(utils,write.table)

NEWS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
# ProActive 0.0.1.9000
1+
# ProActive 0.0.1
22

33
* Initial CRAN submission.

R/FullElevGapFunctions.R

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,23 @@ fullElevGap <- function(pileupSubset, windowSize, minSize, maxSize, elevOrGap) {
2020
minCovSteps <- (abs((minReadCov + quarterReadCov) - minReadCov)) / 2
2121
maxCoverages <- seq((minReadCov + (quarterReadCov * 3)), maxReadCov, maxCovSteps)
2222
minCoverages <- seq(minReadCov, (minReadCov + quarterReadCov), minCovSteps)
23-
elevLength <- ifelse((nrow(pileupSubset) - (10000 / windowSize)) > (maxSize / windowSize), maxSize / windowSize, nrow(pileupSubset) - (10000 / windowSize))
23+
elevLength <- ifelse((nrow(pileupSubset) - (10000 / windowSize)) > (maxSize / windowSize),
24+
maxSize / windowSize,
25+
nrow(pileupSubset) - (10000 / windowSize))
2426
nonElev <- nrow(pileupSubset) - (elevLength + (5000 / windowSize))
2527
maxOrMin <- ifelse(elevOrGap == "Elevation", minReadCov, maxReadCov)
2628
maxOrMin2 <- ifelse(elevOrGap == "Elevation", maxReadCov, minReadCov)
2729
pattern <- c(rep(maxOrMin, 5000 / windowSize), rep(maxOrMin2, elevLength), rep(maxOrMin, nonElev))
2830
bestMatchInfo <- collectBestMatchInfo(pattern, pileupSubset, elevOrGap, "Full")
29-
lapply(seq_along(maxCoverages), function(maxCov) {
30-
bestMatchInfo <<- fullElevGapShrink(minReadCov, windowSize, maxCoverages[[maxCov]], elevLength, nonElev, bestMatchInfo, pileupSubset, minSize, elevOrGap)
31-
lapply(seq_along(minCoverages), function(minCov) {
32-
bestMatchInfo <<- fullElevGapShrink(minCoverages[[minCov]], windowSize, maxCoverages[[maxCov]], elevLength, nonElev, bestMatchInfo, pileupSubset, minSize, elevOrGap)
33-
})
34-
})
31+
for (maxCov in seq_along(maxCoverages)){
32+
bestMatchInfo <- fullElevGapShrink(minReadCov, windowSize, maxCoverages[[maxCov]],
33+
elevLength, nonElev, bestMatchInfo, pileupSubset, minSize, elevOrGap)
34+
for (minCov in seq_along(minCoverages)) {
35+
bestMatchInfo <- fullElevGapShrink(minCoverages[[minCov]], windowSize,
36+
maxCoverages[[maxCov]], elevLength, nonElev,
37+
bestMatchInfo, pileupSubset, minSize, elevOrGap)
38+
}
39+
}
3540
return(bestMatchInfo)
3641
}
3742

R/GPsInElevGaps.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,12 @@
1717
#' @importFrom dplyr bind_rows
1818
#' @keywords internal
1919
GPsInElevGaps <- function(elevGapSummList, windowSize, gffTSV, mode, chunkContigs) {
20-
GPlist <- list()
2120
colnames(gffTSV) <- c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attributes")
2221
if (TRUE %in% (str_detect(gffTSV[,9], regex('product', ignore_case = T)))){
2322
product <- str_extract_all(gffTSV [,9], regex("(?<=product=)[\\s\\S]*",ignore_case = T))
2423
gffTSV$geneproduct <- product
2524
}
26-
lapply(seq_along(elevGapSummList), function(i) {
25+
GPlist <- lapply(seq_along(elevGapSummList), function(i) {
2726
trueRefName <- elevGapSummList[[i]][[8]]
2827
if(mode == "metagenome"){
2928
refName <- elevGapSummList[[i]][[8]]
@@ -41,8 +40,9 @@ GPsInElevGaps <- function(elevGapSummList, windowSize, gffTSV, mode, chunkContig
4140
return(NULL)
4241
}
4342
GPs$Classification <- elevGapSummList[[i]][[7]]
44-
GPlist[[i]] <<- GPs
43+
GPs
4544
})
45+
GPlist <- (GPlist[!vapply(GPlist, is.null, logical(1))])
4646
GPsummTable <- bind_rows(GPlist)
4747
return(as.data.frame(GPsummTable))
4848
}

R/ProActive.R

Lines changed: 23 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -28,34 +28,27 @@
2828
#' on. Default is 25000.
2929
#' @param chunkSize If `mode`="genome" OR if `mode`="metagenome" and `chunkContigs`=TRUE,
3030
#' chunk the genome or contigs, respectively, into smaller subsets for pattern-matching.
31-
#' `chunkSize` determines the size (in bp) of each 'chunk'. Default is 50000.
31+
#' `chunkSize` determines the size (in bp) of each 'chunk'. Default is 100000.
3232
#' @param IncludeNoPatterns TRUE or FALSE, If TRUE the noPattern pattern-matches will
3333
#' be included in the ProActive PatternMatches output list. If you would like to visualize
3434
#' the noPattern pattern-matches in `plotProActiveResults()`, this should be set to TRUE.
35+
#' @param verbose TRUE or FALSE. Print progress messages to console. Default is TRUE.
3536
#' @param saveFilesTo Optional, Provide a path to the directory you wish to save
3637
#' output to. A folder will be made within the provided directory to store
3738
#' results.
3839
#' @importFrom utils capture.output write.table
3940
#' @return A list containing 6 objects described in the function description.
4041
#' @export
4142
#' @examples
42-
#' ## Metagenome mode with gffTSV
4343
#' metagenome_results <- ProActive(
4444
#' pileup = sampleMetagenomePileup,
4545
#' mode = "metagenome",
4646
#' gffTSV = sampleMetagenomegffTSV
4747
#' )
48-
#'
49-
#' ## Genome mode without gffTSV
50-
#' genome_results <- ProActive(
51-
#' pileup = exampleGenomePileupSubset,
52-
#' mode = "genome"
53-
#' )
54-
#'
55-
#' ##gffTSV is optional!
5648
ProActive <- function(pileup, mode, gffTSV, windowSize = 1000, chunkContigs = FALSE,
5749
minSize = 10000, maxSize = Inf, minContigLength = 30000,
58-
chunkSize = 100000, IncludeNoPatterns = FALSE, saveFilesTo) {
50+
chunkSize = 100000, IncludeNoPatterns = FALSE, verbose = TRUE,
51+
saveFilesTo) {
5952
## error catching
6053
if ((chunkSize %% 100) > 0) {
6154
stop("chunkSize must be divisible by 100")
@@ -73,55 +66,58 @@ ProActive <- function(pileup, mode, gffTSV, windowSize = 1000, chunkContigs = FA
7366
stop("Pileup file MUST have a windowSize/binsize of 100!")
7467
}
7568
startTime <- Sys.time()
76-
message("Preparing input file for pattern-matching...")
69+
if(verbose){message("Preparing input file for pattern-matching...")}
7770
pileup <- pileupFormatter(pileup, mode)
7871
if (mode == "genome") {
7972
pileup <- genomeChunks(pileup, chunkSize)
8073
}
81-
if (chunkContigs == TRUE) {
74+
if (mode == "metagenome" & chunkContigs == TRUE) {
8275
pileup <- contigChunks(pileup, chunkSize)
8376
}
84-
message("Starting pattern-matching...")
85-
patternMatchSummary <- patternMatcher(pileup, windowSize, minSize, maxSize, mode, minContigLength)
86-
if (IncludeNoPatterns == TRUE) {
77+
if(verbose){message("Starting pattern-matching...")}
78+
patternMatchSummary <- patternMatcher(pileup, windowSize, minSize, maxSize, mode, minContigLength, verbose)
79+
if (IncludeNoPatterns) {
8780
classifList <- patternMatchSummary[[1]]
8881
} else {
8982
classifList <- removeNoPatterns(patternMatchSummary[[1]])
9083
}
9184
filteredOutContigsDf <- patternMatchSummary[[2]]
92-
message("Summarizing pattern-matching results")
85+
if(verbose){message("Summarizing pattern-matching results")}
9386
summaryTable <- classifSumm(pileup, patternMatchSummary[[1]], windowSize, mode)
9487
if (missing(gffTSV) == FALSE) {
95-
message("Finding gene predictions in elevated or gapped regions of read coverage...")
88+
if(verbose){message("Finding gene predictions in elevated or gapped regions of read coverage...")}
9689
elevGapSummList <- removeNoPatterns(patternMatchSummary[[1]])
9790
GPSummTable <- GPsInElevGaps(elevGapSummList, windowSize, gffTSV, mode, chunkContigs)
9891
}
99-
message("Finalizing output")
92+
if(verbose){message("Finalizing output")}
10093
endTime <- Sys.time()
10194
duration <- difftime(endTime, startTime)
102-
message("Execution time: ", round(duration[[1]], 2), units(duration))
103-
message(
95+
if(verbose){message("Execution time: ", round(duration[[1]], 2), units(duration))}
96+
if(verbose){message(
10497
length(which(
10598
filteredOutContigsDf[, 2] == "Low read cov"
10699
)),
107100
" contigs were filtered out based on low read coverage"
108-
)
109-
message(
101+
)}
102+
if(verbose){message(
110103
length(which(
111104
filteredOutContigsDf[, 2] == "Too Short"
112105
)),
113106
" contigs were filtered out based on length (< minContigLength)"
114-
)
107+
)}
115108
arguments <- list(windowSize, mode, chunkSize, chunkContigs)
116109
cleanSummaryTable <- summaryTable[-which(summaryTable[,2]=="NoPattern"),]
117110
finalSummaryList <- list(summaryTable, cleanSummaryTable, classifList, filteredOutContigsDf, arguments)
118111
names(finalSummaryList) <- c("SummaryTable", "CleanSummaryTable", "PatternMatches", "FilteredOut", "Arguments")
119112
if (missing(gffTSV) == FALSE) {
120113
finalSummaryList <- c(finalSummaryList, list(GPSummTable))
121-
names(finalSummaryList)[6] <- "GenePredictTable"
114+
names(finalSummaryList)[6] <- "GeneAnnotTable"
122115
}
123116
table <- (table(summaryTable[, 2]))
124-
message(paste0(capture.output(table), collapse = "\n"))
117+
if(verbose){message(paste0(capture.output(table), collapse = "\n"))}
118+
if(mode == "genome" || (mode == "metagenome" & chunkContigs == TRUE)){
119+
linkChunks(classifList, pileup, windowSize, mode, verbose)
120+
}
125121
if (missing(saveFilesTo) == FALSE) {
126122
ifelse(!dir.exists(paths = paste0(saveFilesTo, "\\ProActiveOutput")),
127123
dir.create(paste0(saveFilesTo, "\\ProActiveOutput")),
@@ -135,7 +131,7 @@ ProActive <- function(pileup, mode, gffTSV, windowSize = 1000, chunkContigs = FA
135131
GPSummTable,
136132
file = paste0(
137133
saveFilesTo,
138-
"\\ProActiveOutput\\ProActiveGenePredictstable.csv"
134+
"\\ProActiveOutput\\ProActiveGeneAnnotsTable.csv"
139135
),
140136
sep = ",",
141137
row.names = FALSE

R/bestMatchListFunctions.R

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -11,25 +11,19 @@
1111
#' @param mode Either "genome" or "metagenome"
1212
#' @keywords internal
1313
classifSumm <- function(pileup, bestMatchList, windowSize, mode) {
14-
refName <- rep(NA, length(bestMatchList))
15-
elevRatio <- rep(NA, length(bestMatchList))
16-
startPos <- rep(NA, length(bestMatchList))
17-
endPos <- rep(NA, length(bestMatchList))
18-
classification <- rep(NA, length(bestMatchList))
19-
matchSize <- rep(NA, length(bestMatchList))
2014
if (length(bestMatchList) == 0) {
2115
stop("No pattern-matches detected")
2216
}
23-
lapply(seq_along(bestMatchList), function(i) {
24-
refName[i] <<- bestMatchList[[i]][[8]]
17+
refName <- vapply(seq_along(bestMatchList), function(i) {bestMatchList[[i]][[8]]}, character(1))
18+
elevRatio <- vapply(seq_along(bestMatchList), function(i) {bestMatchList[[i]][[6]]}, numeric(1))
19+
startPos <- vapply(seq_along(bestMatchList), function(i) {bestMatchList[[i]][[4]]} * windowSize, numeric(1))
20+
endPos <- vapply(seq_along(bestMatchList), function(i) {bestMatchList[[i]][[5]]} * windowSize, numeric(1))
21+
classification <- vapply(seq_along(bestMatchList), function(i) {bestMatchList[[i]][[7]]}, character(1))
22+
matchSize <- vapply(seq_along(bestMatchList), function(i) {
2523
pileupSubset <- pileup[which(pileup[, 1] == bestMatchList[[i]][[8]]), ]
2624
pileupSubset <- changewindowSize(pileupSubset, windowSize, mode)
27-
elevRatio[i] <<- bestMatchList[[i]][[6]]
28-
classification[i] <<- bestMatchList[[i]][[7]]
29-
startPos[i] <<- pileupSubset[bestMatchList[[i]][[4]], 3]
30-
endPos[i] <<- pileupSubset[bestMatchList[[i]][[5]], 3]
31-
matchSize[i] <<- (length(seq(pileupSubset[bestMatchList[[i]][[4]], 3], pileupSubset[bestMatchList[[i]][[5]], 3], windowSize)) - 1) * windowSize
32-
})
25+
(length(seq(pileupSubset[bestMatchList[[i]][[4]], 3], pileupSubset[bestMatchList[[i]][[5]], 3], windowSize)) - 1) * windowSize},
26+
numeric(1))
3327
classifSummTable <- cbind.data.frame(refName, classification, elevRatio, startPos, endPos, matchSize)
3428
return(classifSummTable)
3529
}
@@ -43,18 +37,14 @@ classifSumm <- function(pileup, bestMatchList, windowSize, mode) {
4337
#' all contigs/chunks classified by `ProActive()` pattern-matching
4438
#' @keywords internal
4539
removeNoPatterns <- function(bestMatchList) {
46-
newBestMatchList <- list()
47-
length(bestMatchList)
48-
X <- 1
49-
lapply(seq_along(bestMatchList), function(i) {
40+
newBestMatchList <- lapply(seq_along(bestMatchList), function(i) {
5041
bestMatchInfo <- bestMatchList[[i]]
5142
classification <- bestMatchInfo[[7]]
5243
if (classification == "NoPattern") {
5344
return(NULL)
5445
} else {
55-
newBestMatchList[[X]] <<- bestMatchInfo
56-
X <<- X + 1
46+
bestMatchInfo
5747
}
5848
})
59-
return(newBestMatchList)
49+
return(return(newBestMatchList[!vapply(newBestMatchList, is.null, logical(1))]))
6050
}

0 commit comments

Comments
 (0)