watermark is using utils as well

ec738e7b · Christian Rohlfing · c665a804 · ec738e7b
Commit ec738e7b authored Feb 5, 2021 by Christian Rohlfing
--- a/watermark.py
+++ b/watermark.py
@@ -14,15 +14,12 @@ import time  # keep track of time
 import argparse  # handle command line arguments
 from multiprocessing import Pool  # multi processing
 from functools import partial
-# TODO decide wand vs pdf2image
 from wand.image import Image as wi  # PDF to images
-import io  # converting wand image to pillow image
-from pdf2image import convert_from_path
 from PIL import Image, ImageDraw, ImageFont  # Image handling
 from PyPDF2 import PdfFileMerger, PdfFileReader  # PDF handling
+import utils.matnum as utils
 def convert_pdf_to_img(pdf_file, input_dir, tmp_dir, dpi):
    """Converts all pages from a PDF to single images
@@ -60,46 +57,6 @@ def convert_pdf_to_img(pdf_file, input_dir, tmp_dir, dpi):
    return img_files
-def convert_pdf_to_img_new(pdf_file, input_dir, tmp_dir, dpi):
-    # PDF path
-    pdf_path = os.path.join(input_dir, pdf_file)
-    # Output path
-    img_base = os.path.splitext(pdf_file)[0] + '_'
-    convert_from_path(pdf_path, dpi=dpi, output_folder=tmp_dir,
-                      fmt='png', output_file=img_base)
-    # Iterate over pages and store them as image
-    img_files = os.listdir(tmp_dir)
-    img_files = [os.path.join(tmp_dir, _)
-                 for _ in img_files if _.startswith(img_base)]
-    img_files.sort()
-    return img_files
-def get_matnum(s):
-    """Extracts matriculation number from string
-    Args:
-    s (str): file name with first 6 characters matriculation number
-    Returns:
-    str: 6-digit matriculation number
-    """
-    # Get matriculation number
-    # Has to be separated by "_" from the rest of the file name
-    matnum = s.split('_', 1)[0]
-    # Sanity check
-    if len(matnum) != 6 or not matnum.isdigit():
-        raise ValueError("{} not a valid matriculation number".format(matnum))
-    return matnum
 def create_watermark_template(img_file, matnum, dpi):
    """Creates transparent image with repeated matriculation number
@@ -229,7 +186,7 @@ def watermark_pdf(input_dir, tmp_dir, output_dir, dpi, pdf_file):
    img_files = convert_pdf_to_img(pdf_file, input_dir, tmp_dir, dpi)
    # Extracting matriculation numebers
-    matnum = get_matnum(pdf_file)
+    matnum = utils.get_matnum(pdf_file)
    # Watermarking PDF page images
    # Create template for first page
@@ -286,7 +243,8 @@ def main(args):
    # Print status
    starttime = time.time()
    pdf_folder = os.listdir(infolder)
-    pdf_files = [_ for _ in pdf_folder if _.endswith(".pdf")]
+    pdf_files = [_ for _ in pdf_folder
+                 if _.endswith(".pdf") and utils.check_matnum(_[0:6])]
    print("""
 Available PDFs to be watermarked:
 - {}