2828# ' on. Default is 25000.
2929# ' @param chunkSize If `mode`="genome" OR if `mode`="metagenome" and `chunkContigs`=TRUE,
3030# ' chunk the genome or contigs, respectively, into smaller subsets for pattern-matching.
31- # ' `chunkSize` determines the size (in bp) of each 'chunk'. Default is 50000 .
31+ # ' `chunkSize` determines the size (in bp) of each 'chunk'. Default is 100000 .
3232# ' @param IncludeNoPatterns TRUE or FALSE, If TRUE the noPattern pattern-matches will
3333# ' be included in the ProActive PatternMatches output list. If you would like to visualize
3434# ' the noPattern pattern-matches in `plotProActiveResults()`, this should be set to TRUE.
35+ # ' @param verbose TRUE or FALSE. Print progress messages to console. Default is TRUE.
3536# ' @param saveFilesTo Optional, Provide a path to the directory you wish to save
3637# ' output to. A folder will be made within the provided directory to store
3738# ' results.
3839# ' @importFrom utils capture.output write.table
3940# ' @return A list containing 6 objects described in the function description.
4041# ' @export
4142# ' @examples
42- # ' ## Metagenome mode with gffTSV
4343# ' metagenome_results <- ProActive(
4444# ' pileup = sampleMetagenomePileup,
4545# ' mode = "metagenome",
4646# ' gffTSV = sampleMetagenomegffTSV
4747# ' )
48- # '
49- # ' ## Genome mode without gffTSV
50- # ' genome_results <- ProActive(
51- # ' pileup = exampleGenomePileupSubset,
52- # ' mode = "genome"
53- # ' )
54- # '
55- # ' ##gffTSV is optional!
5648ProActive <- function (pileup , mode , gffTSV , windowSize = 1000 , chunkContigs = FALSE ,
5749 minSize = 10000 , maxSize = Inf , minContigLength = 30000 ,
58- chunkSize = 100000 , IncludeNoPatterns = FALSE , saveFilesTo ) {
50+ chunkSize = 100000 , IncludeNoPatterns = FALSE , verbose = TRUE ,
51+ saveFilesTo ) {
5952 # # error catching
6053 if ((chunkSize %% 100 ) > 0 ) {
6154 stop(" chunkSize must be divisible by 100" )
@@ -73,55 +66,58 @@ ProActive <- function(pileup, mode, gffTSV, windowSize = 1000, chunkContigs = FA
7366 stop(" Pileup file MUST have a windowSize/binsize of 100!" )
7467 }
7568 startTime <- Sys.time()
76- message(" Preparing input file for pattern-matching..." )
69+ if ( verbose ){ message(" Preparing input file for pattern-matching..." )}
7770 pileup <- pileupFormatter(pileup , mode )
7871 if (mode == " genome" ) {
7972 pileup <- genomeChunks(pileup , chunkSize )
8073 }
81- if (chunkContigs == TRUE ) {
74+ if (mode == " metagenome " & chunkContigs == TRUE ) {
8275 pileup <- contigChunks(pileup , chunkSize )
8376 }
84- message(" Starting pattern-matching..." )
85- patternMatchSummary <- patternMatcher(pileup , windowSize , minSize , maxSize , mode , minContigLength )
86- if (IncludeNoPatterns == TRUE ) {
77+ if ( verbose ){ message(" Starting pattern-matching..." )}
78+ patternMatchSummary <- patternMatcher(pileup , windowSize , minSize , maxSize , mode , minContigLength , verbose )
79+ if (IncludeNoPatterns ) {
8780 classifList <- patternMatchSummary [[1 ]]
8881 } else {
8982 classifList <- removeNoPatterns(patternMatchSummary [[1 ]])
9083 }
9184 filteredOutContigsDf <- patternMatchSummary [[2 ]]
92- message(" Summarizing pattern-matching results" )
85+ if ( verbose ){ message(" Summarizing pattern-matching results" )}
9386 summaryTable <- classifSumm(pileup , patternMatchSummary [[1 ]], windowSize , mode )
9487 if (missing(gffTSV ) == FALSE ) {
95- message(" Finding gene predictions in elevated or gapped regions of read coverage..." )
88+ if ( verbose ){ message(" Finding gene predictions in elevated or gapped regions of read coverage..." )}
9689 elevGapSummList <- removeNoPatterns(patternMatchSummary [[1 ]])
9790 GPSummTable <- GPsInElevGaps(elevGapSummList , windowSize , gffTSV , mode , chunkContigs )
9891 }
99- message(" Finalizing output" )
92+ if ( verbose ){ message(" Finalizing output" )}
10093 endTime <- Sys.time()
10194 duration <- difftime(endTime , startTime )
102- message(" Execution time: " , round(duration [[1 ]], 2 ), units(duration ))
103- message(
95+ if ( verbose ){ message(" Execution time: " , round(duration [[1 ]], 2 ), units(duration ))}
96+ if ( verbose ){ message(
10497 length(which(
10598 filteredOutContigsDf [, 2 ] == " Low read cov"
10699 )),
107100 " contigs were filtered out based on low read coverage"
108- )
109- message(
101+ )}
102+ if ( verbose ){ message(
110103 length(which(
111104 filteredOutContigsDf [, 2 ] == " Too Short"
112105 )),
113106 " contigs were filtered out based on length (< minContigLength)"
114- )
107+ )}
115108 arguments <- list (windowSize , mode , chunkSize , chunkContigs )
116109 cleanSummaryTable <- summaryTable [- which(summaryTable [,2 ]== " NoPattern" ),]
117110 finalSummaryList <- list (summaryTable , cleanSummaryTable , classifList , filteredOutContigsDf , arguments )
118111 names(finalSummaryList ) <- c(" SummaryTable" , " CleanSummaryTable" , " PatternMatches" , " FilteredOut" , " Arguments" )
119112 if (missing(gffTSV ) == FALSE ) {
120113 finalSummaryList <- c(finalSummaryList , list (GPSummTable ))
121- names(finalSummaryList )[6 ] <- " GenePredictTable "
114+ names(finalSummaryList )[6 ] <- " GeneAnnotTable "
122115 }
123116 table <- (table(summaryTable [, 2 ]))
124- message(paste0(capture.output(table ), collapse = " \n " ))
117+ if (verbose ){message(paste0(capture.output(table ), collapse = " \n " ))}
118+ if (mode == " genome" || (mode == " metagenome" & chunkContigs == TRUE )){
119+ linkChunks(classifList , pileup , windowSize , mode , verbose )
120+ }
125121 if (missing(saveFilesTo ) == FALSE ) {
126122 ifelse(! dir.exists(paths = paste0(saveFilesTo , " \\ ProActiveOutput" )),
127123 dir.create(paste0(saveFilesTo , " \\ ProActiveOutput" )),
@@ -135,7 +131,7 @@ ProActive <- function(pileup, mode, gffTSV, windowSize = 1000, chunkContigs = FA
135131 GPSummTable ,
136132 file = paste0(
137133 saveFilesTo ,
138- " \\ ProActiveOutput\\ ProActiveGenePredictstable .csv"
134+ " \\ ProActiveOutput\\ ProActiveGeneAnnotsTable .csv"
139135 ),
140136 sep = " ," ,
141137 row.names = FALSE
0 commit comments