Which part of the codes do I need to change in order to include subfolders?
File handle.py
import glob
import os
import sys
from typing import Listdef get_filenames(filepath: str, pattern: str) -> List[str]:"""Returns all filenames that matches the pattern in current folder.Args:filepath (str): folder path.pattern (str): filename pattern.Returns:List[str]: list of paths."""filenames = glob.glob(os.path.join(filepath, pattern))if filenames:return filenamesreturn sys.exit("Error: no file found, check the documentation for more info.")
Main.py
import mathimport clickimport pdf_split_tool.file_handler
import pdf_split_tool.pdf_splitterdef _confirm_split_file(filepath: str, max_size_bytes: int) -> None:"""Split file if user confirms or is valid.Args:filepath: PDF path.max_size_bytes: max size in bytes."""splitter = pdf_split_tool.pdf_splitter.PdfSplitter(filepath)valid = Trueif not valid:click.secho(("Warning: {} has more than 200kb per page. ""Consider reducing resolution before splitting.").format(filepath),fg="yellow",)if not click.confirm("Do you want to continue?"):click.secho("{} skipped.".format(filepath), fg="blue")returnsplitter.split_max_size(max_size_bytes)@click.command()
@click.version_option()
@click.argument("filepath", type=click.Path(exists=True), default=".")
@click.option("-m","--max-size",type=float,help="Max size in megabytes.",default=20,show_default=True,
)
def main(filepath: str, max_size: float) -> None:"""Pdf Split Tool."""max_size_bytes = math.floor(max_size * 1024 * 1024) # convert to bytesif filepath.endswith(".pdf"):_confirm_split_file(filepath, max_size_bytes)else:filepaths = pdf_split_tool.file_handler.get_filenames(filepath, "*.pdf")for path in filepaths:_confirm_split_file(path, max_size_bytes)if __name__ == "__main__":main(prog_name="pdf-split-tool") # pragma: no cover
pdf_splitter.py
import os
import sys
import tempfileimport PyPDF4class PdfSplitter:"""Pdf Splitter class."""def __init__(self, filepath: str) -> None:"""Constructor."""self.filepath = filepathself.input_pdf = PyPDF4.PdfFileReader(filepath, "rb")self.total_pages = self.input_pdf.getNumPages()self.size = os.path.getsize(filepath)self.avg_size = self.size / self.total_pagesprint("File: {}\nFile size: {}\nTotal pages: {}\nAverage size: {}".format(filepath, self.size, self.total_pages, self.avg_size))def _get_pdf_size(self, pdf_writer: PyPDF4.PdfFileWriter) -> int:"""Generates temporary PDF.Args:pdf_writer: pdf writer.Returns:int: generated file size."""with tempfile.TemporaryFile(mode="wb") as fp:pdf_writer.write(fp)return fp.tell()def split_max_size(self, max_size: int) -> int:"""Creates new files based on max size.Args:max_size: size in integer megabytes.Returns:int: number of PDFs created."""if self.size > max_size:avg_step = int(max_size / self.avg_size)pdfs_count = 0current_page = 0while current_page != self.total_pages:end_page = current_page + avg_stepif end_page > self.total_pages:end_page = self.total_pagescurrent_size = sys.maxsize# while PDF is too big create smaller PDFswhile current_size > max_size:pdf_writer = PyPDF4.PdfFileWriter()for page in range(current_page, end_page):pdf_writer.addPage(self.input_pdf.getPage(page))current_size = self._get_pdf_size(pdf_writer)self.input_pdf = PyPDF4.PdfFileReader(self.filepath, "rb")end_page -= 1# write PDF with size max_sizewith open(self.filepath.replace(".pdf", "-{}.pdf".format(pdfs_count)), "wb") as out:pdf_writer.write(out)current_page = end_page + 1pdfs_count += 1return pdfs_countreturn 0