diff --git a/DESCRIPTION b/DESCRIPTION index 8aaa50b..34554af 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,13 +1,14 @@ Package: staplr Type: Package Title: A Toolkit for PDF Files -Version: 3.1.1 +Version: 3.2.0 Depends: R (>= 3.4.0) Authors@R: c( person("Priyanga Dilini", "Talagala", email="pritalagala@gmail.com", role= c("aut","cre")), person("Ogan", "Mancarci", email="ogan.mancarci@gmail.com", role='aut'), person("Daniel", "Padfield", email= "d.padfield@exeter.ac.uk", role ='aut'), - person("Granville", "Matheson", email= "mathesong@gmail.com", role ='aut') + person("Granville", "Matheson", email= "mathesong@gmail.com", role ='aut'), + person("Pedro Rafael", "D. Marinho", email= "pedro.rafael.marinho@gmail.com", role ='ctb', comment = c(ORCID = "0000-0003-1591-8300")) ) Description: Provides function to manipulate PDF files: fill out PDF forms; @@ -30,10 +31,12 @@ Imports: assertthat, glue, XML, - rJava + rJava, + fs, + purrr, + pdftools Suggests: lattice, - testthat, - pdftools + testthat Encoding: UTF-8 BugReports: https://github.com/pridiltal/staplr/issues diff --git a/NAMESPACE b/NAMESPACE index 3402c07..0118021 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(combine_pdf) export(get_fields) export(idenfity_form_fields) export(remove_pages) @@ -13,5 +14,17 @@ export(split_pdf) export(staple_pdf) import(utils) importFrom(assertthat,assert_that) +importFrom(fs,file_temp) +importFrom(fs,file_temp_push) +importFrom(fs,path_ext_remove) +importFrom(fs,path_file) +importFrom(glue,glue) +importFrom(pdftools,pdf_combine) +importFrom(pdftools,pdf_length) +importFrom(pdftools,pdf_subset) +importFrom(purrr,flatten_dbl) +importFrom(purrr,map2_dbl) +importFrom(purrr,map_if) +importFrom(purrr,walk2) importFrom(stringr,str_extract) importFrom(tcltk,tk_choose.dir) diff --git a/R/combine_pdf.R b/R/combine_pdf.R new file mode 100644 index 0000000..f2f9639 --- /dev/null +++ b/R/combine_pdf.R @@ -0,0 +1,82 @@ +#' Combine multiple PDF files +#' @description Combine multiple PDF files by delimiting the sequences of pages in each file. +#' @importFrom glue glue +#' @importFrom pdftools pdf_subset pdf_combine pdf_length +#' @importFrom purrr map2_dbl walk2 flatten_dbl map_if +#' @importFrom fs file_temp file_temp_push path_ext_remove path_file +#' @param vec_input Vector with paths of PDF files to be combined. +#' @param output PDF file path result of the combination. +#' @param start_pages Vector with the initial pages of each file. If \code{NA}, +#' the default, will be considered the first page. +#' @param end_pages Vector with the final pages of each file. If \code{NA}, the +#' default, will be considered the last page. +#' @return +#' In the path informed in \code{output}, the PDF file resulting from the combination +#' of multiple files passed to \code{vec_output} will be saved. +#' @export +#' @examples +#' +#' \dontrun{ +#' combine_pdf( +#' vec_input = +#' c( +#' "file_1.pdf", +#' "file_2.pdf", +#' ), +#' output = "output.pdf", +#' start_pages = c(NA, NA), +#' end_pages = c(NA, NA) +#' ) +#' } + +combine_pdf <- function(vec_input, output = "output.pdf", start_pages = NA, end_pages = NA) { + + if(length(start_pages) != length(vec_input) || length(end_pages) != length(vec_input)) + stop("Start_pages and end_pages must be a vector of the same length as vec_input!") + + start_pages <- + flatten_dbl( + map_if( + .x = start_pages, + .f = function(x) 1, + .p = is.na + ) + ) + + f <- function(x, y) + ifelse( + is.na(x), + pdf_length(vec_input[y]), + x + ) + + end_pages <- + purrr::map2_dbl( + .x = end_pages, + .y = seq_along(vec_input), + .f = f + ) + + files <- + file_temp_push( + glue( + "{tempdir()}/{path_ext_remove(path_file(output))}_{1L:length(vec_input)}.pdf" + ) + ) + + one_step <- function(x, y) { + pdf_subset( + input = x, + output = file_temp(), + pages = start_pages[y]:end_pages[y] + ) + } + + walk2( + .x = vec_input, + .y = 1L:length(vec_input), + .f = ~ one_step(.x, .y) + ) + + pdf_combine(input = files, output = output) +} diff --git a/man/combine_pdf.Rd b/man/combine_pdf.Rd new file mode 100644 index 0000000..6d49875 --- /dev/null +++ b/man/combine_pdf.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/combine_pdf.R +\name{combine_pdf} +\alias{combine_pdf} +\title{Combine multiple PDF files} +\usage{ +combine_pdf(vec_input, output = "output.pdf", start_pages = NA, end_pages = NA) +} +\arguments{ +\item{vec_input}{Vector with paths of PDF files to be combined.} + +\item{output}{PDF file path result of the combination.} + +\item{start_pages}{Vector with the initial pages of each file. If \code{NA}, +the default, will be considered the first page.} + +\item{end_pages}{Vector with the final pages of each file. If \code{NA}, the +default, will be considered the last page.} +} +\value{ +In the path informed in \code{output}, the PDF file resulting from the combination +of multiple files passed to \code{vec_output} will be saved. +} +\description{ +Combine multiple PDF files by delimiting the sequences of pages in each file. +} +\examples{ + +\dontrun{ +combine_pdf( + vec_input = + c( + "file_1.pdf", + "file_2.pdf", + ), + output = "output.pdf", + start_pages = c(NA, NA), + end_pages = c(NA, NA) + ) +} +}