Question 1

Which part of the codes do I need to change in order to include subfolders?

File handle.py

import glob
import os
import sys
from typing import Listdef get_filenames(filepath: str, pattern: str) -> List[str]:"""Returns all filenames that matches the pattern in current folder.Args:filepath (str): folder path.pattern (str): filename pattern.Returns:List[str]: list of paths."""filenames = glob.glob(os.path.join(filepath, pattern))if filenames:return filenamesreturn sys.exit("Error: no file found, check the documentation for more info.")

Main.py

import mathimport clickimport pdf_split_tool.file_handler
import pdf_split_tool.pdf_splitterdef _confirm_split_file(filepath: str, max_size_bytes: int) -> None:"""Split file if user confirms or is valid.Args:filepath: PDF path.max_size_bytes: max size in bytes."""splitter = pdf_split_tool.pdf_splitter.PdfSplitter(filepath)valid = Trueif not valid:click.secho(("Warning: {} has more than 200kb per page. ""Consider reducing resolution before splitting.").format(filepath),fg="yellow",)if not click.confirm("Do you want to continue?"):click.secho("{} skipped.".format(filepath), fg="blue")returnsplitter.split_max_size(max_size_bytes)@click.command()
@click.version_option()
@click.argument("filepath", type=click.Path(exists=True), default=".")
@click.option("-m","--max-size",type=float,help="Max size in megabytes.",default=20,show_default=True,
)
def main(filepath: str, max_size: float) -> None:"""Pdf Split Tool."""max_size_bytes = math.floor(max_size * 1024 * 1024)  # convert to bytesif filepath.endswith(".pdf"):_confirm_split_file(filepath, max_size_bytes)else:filepaths = pdf_split_tool.file_handler.get_filenames(filepath, "*.pdf")for path in filepaths:_confirm_split_file(path, max_size_bytes)if __name__ == "__main__":main(prog_name="pdf-split-tool")  # pragma: no cover

pdf_splitter.py

import os
import sys
import tempfileimport PyPDF4class PdfSplitter:"""Pdf Splitter class."""def __init__(self, filepath: str) -> None:"""Constructor."""self.filepath = filepathself.input_pdf = PyPDF4.PdfFileReader(filepath, "rb")self.total_pages = self.input_pdf.getNumPages()self.size = os.path.getsize(filepath)self.avg_size = self.size / self.total_pagesprint("File: {}\nFile size: {}\nTotal pages: {}\nAverage size: {}".format(filepath, self.size, self.total_pages, self.avg_size))def _get_pdf_size(self, pdf_writer: PyPDF4.PdfFileWriter) -> int:"""Generates temporary PDF.Args:pdf_writer: pdf writer.Returns:int: generated file size."""with tempfile.TemporaryFile(mode="wb") as fp:pdf_writer.write(fp)return fp.tell()def split_max_size(self, max_size: int) -> int:"""Creates new files based on max size.Args:max_size: size in integer megabytes.Returns:int: number of PDFs created."""if self.size > max_size:avg_step = int(max_size / self.avg_size)pdfs_count = 0current_page = 0while current_page != self.total_pages:end_page = current_page + avg_stepif end_page > self.total_pages:end_page = self.total_pagescurrent_size = sys.maxsize# while PDF is too big create smaller PDFswhile current_size > max_size:pdf_writer = PyPDF4.PdfFileWriter()for page in range(current_page, end_page):pdf_writer.addPage(self.input_pdf.getPage(page))current_size = self._get_pdf_size(pdf_writer)self.input_pdf = PyPDF4.PdfFileReader(self.filepath, "rb")end_page -= 1# write PDF with size max_sizewith open(self.filepath.replace(".pdf", "-{}.pdf".format(pdfs_count)), "wb") as out:pdf_writer.write(out)current_page = end_page + 1pdfs_count += 1return pdfs_countreturn 0

Question 2

What you could do is, for each file in filenames, check if it's a folder, if it is, rerun the function on it, using recursivity

To check wether a file is a folder or not you can use

os.path.isdir(path)

where path is the path to the file

EDIT: Posting the code is better than an image because it can help people showing you the solution without having to rewrite everything

EDIT2:

You could try doing that, tried it myself and it should hopefully do what you want

def get_filenames(filepath, pattern, file_list=None):if file_list is None:file_list = []filenames = glob.glob(os.path.join(filepath, pattern))if filenames:for file in filenames:file_list.append(file)if os.path.isdir(file): # If it's a folder, rerun the function to get every file inside it recursivelyreturn get_filenames(file, pattern, file_list)return file_list

How to make this Python script to run subfolders too?

Related Q&A

Why doesnt this recursive GCD function work? [duplicate]

How can I append \n at the end of the list in list comperhansion

Seaborn bar plot y axis has different values than expected

Merge multiple JSON into single one (Python)

I need to filter contents of my text file

How to remove a number from a list that has a range between two numbers? [closed]

How do I solve an attribute error?

Two variables in Django URL

Python 3.2 Replace all words in a text document that are a certain length?

Python split list at zeros [duplicate]