Skip to content
Snippets Groups Projects
Commit ec738e7b authored by Christian Rohlfing's avatar Christian Rohlfing
Browse files

watermark is using utils as well

parent c665a804
Branches
Tags
1 merge request!17Preparemoodle: multiple files per student
...@@ -14,15 +14,12 @@ import time # keep track of time ...@@ -14,15 +14,12 @@ import time # keep track of time
import argparse # handle command line arguments import argparse # handle command line arguments
from multiprocessing import Pool # multi processing from multiprocessing import Pool # multi processing
from functools import partial from functools import partial
# TODO decide wand vs pdf2image
from wand.image import Image as wi # PDF to images from wand.image import Image as wi # PDF to images
import io # converting wand image to pillow image
from pdf2image import convert_from_path
from PIL import Image, ImageDraw, ImageFont # Image handling from PIL import Image, ImageDraw, ImageFont # Image handling
from PyPDF2 import PdfFileMerger, PdfFileReader # PDF handling from PyPDF2 import PdfFileMerger, PdfFileReader # PDF handling
import utils.matnum as utils
def convert_pdf_to_img(pdf_file, input_dir, tmp_dir, dpi): def convert_pdf_to_img(pdf_file, input_dir, tmp_dir, dpi):
"""Converts all pages from a PDF to single images """Converts all pages from a PDF to single images
...@@ -60,46 +57,6 @@ def convert_pdf_to_img(pdf_file, input_dir, tmp_dir, dpi): ...@@ -60,46 +57,6 @@ def convert_pdf_to_img(pdf_file, input_dir, tmp_dir, dpi):
return img_files return img_files
def convert_pdf_to_img_new(pdf_file, input_dir, tmp_dir, dpi):
# PDF path
pdf_path = os.path.join(input_dir, pdf_file)
# Output path
img_base = os.path.splitext(pdf_file)[0] + '_'
convert_from_path(pdf_path, dpi=dpi, output_folder=tmp_dir,
fmt='png', output_file=img_base)
# Iterate over pages and store them as image
img_files = os.listdir(tmp_dir)
img_files = [os.path.join(tmp_dir, _)
for _ in img_files if _.startswith(img_base)]
img_files.sort()
return img_files
def get_matnum(s):
"""Extracts matriculation number from string
Args:
s (str): file name with first 6 characters matriculation number
Returns:
str: 6-digit matriculation number
"""
# Get matriculation number
# Has to be separated by "_" from the rest of the file name
matnum = s.split('_', 1)[0]
# Sanity check
if len(matnum) != 6 or not matnum.isdigit():
raise ValueError("{} not a valid matriculation number".format(matnum))
return matnum
def create_watermark_template(img_file, matnum, dpi): def create_watermark_template(img_file, matnum, dpi):
"""Creates transparent image with repeated matriculation number """Creates transparent image with repeated matriculation number
...@@ -229,7 +186,7 @@ def watermark_pdf(input_dir, tmp_dir, output_dir, dpi, pdf_file): ...@@ -229,7 +186,7 @@ def watermark_pdf(input_dir, tmp_dir, output_dir, dpi, pdf_file):
img_files = convert_pdf_to_img(pdf_file, input_dir, tmp_dir, dpi) img_files = convert_pdf_to_img(pdf_file, input_dir, tmp_dir, dpi)
# Extracting matriculation numebers # Extracting matriculation numebers
matnum = get_matnum(pdf_file) matnum = utils.get_matnum(pdf_file)
# Watermarking PDF page images # Watermarking PDF page images
# Create template for first page # Create template for first page
...@@ -286,7 +243,8 @@ def main(args): ...@@ -286,7 +243,8 @@ def main(args):
# Print status # Print status
starttime = time.time() starttime = time.time()
pdf_folder = os.listdir(infolder) pdf_folder = os.listdir(infolder)
pdf_files = [_ for _ in pdf_folder if _.endswith(".pdf")] pdf_files = [_ for _ in pdf_folder
if _.endswith(".pdf") and utils.check_matnum(_[0:6])]
print(""" print("""
Available PDFs to be watermarked: Available PDFs to be watermarked:
- {} - {}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment