Commit ba38aa92 authored by Christian Rohlfing's avatar Christian Rohlfing
Browse files

introduced starts_with_matnum

parent a7c34f4a
......@@ -16,7 +16,7 @@ import argparse
import sys
import time
import utils.matnum as utils
import utils.matnum as matnum_utils
def encrypt(pdf_file, enc_file, password):
......@@ -90,8 +90,8 @@ Files in output folder {} will be overwritten during this process.
enc_files = []
for pdf_file in pdf_files:
# File ID for password storage
if utils.check_matnum(pdf_file[0:6]): # PDF file is linked to matnum
pdf_id = utils.get_matnum(pdf_file)
if matnum_utils.starts_with_matnum(pdf_file): # PDF linked to matnum
pdf_id = matnum_utils.get_matnum(pdf_file)
else: # PDF file does not start with matnum
pdf_id = pdf_file
......
......@@ -6,7 +6,7 @@ import time
import shutil # copyfile, make_archive
import argparse
import sys
import utils.matnum as utils
import utils.matnum as matnum_utils
def find_unmatched_pdfs(infolder, matnums, nowarn):
......@@ -26,7 +26,7 @@ def find_unmatched_pdfs(infolder, matnums, nowarn):
for pdffile in files:
if pdffile.endswith(".pdf"):
# Get matriculation number from file
matnum = utils.get_matnum(pdffile)
matnum = matnum_utils.get_matnum(pdffile)
# Search matriculation number in CSV
if matnum not in matnums:
......@@ -125,7 +125,7 @@ Processing {} lines
# If pdf files for current student exists, create a directory and
# copy the pdf files to it. The resulting directories can be
# uploaded to Moodle
longpdffiles = utils.find_file(matnum + "*.pdf", infolder)
longpdffiles = matnum_utils.find_file(matnum + "*.pdf", infolder)
if len(longpdffiles) > 0: # Found some file(s)
numfoundpdfs += 1
......
......@@ -15,7 +15,7 @@ import argparse # handle command line arguments
import shutil # copy
import csv # handle CSV files
import utils.matnum as utils
import utils.matnum as matnum_utils
def copy_supplements(supp_dir, supp_files, prefixes, output_dir, dry=False):
......@@ -127,7 +127,8 @@ Files in output folder {} will be overwritten during this process.
if pdf_dir != "": # Take prefixes from pdf directory
pdf_folder = os.listdir(pdf_dir)
pdf_files = [_ for _ in pdf_folder
if _.endswith(".pdf") and utils.check_matnum(_.split('_', 1)[0])]
if _.endswith(".pdf") and
matnum_utils.starts_with_matnum(_)]
prefixes = []
for pdf_file in pdf_files:
prefix = os.path.splitext(pdf_file)[0] # take file name as prefix
......@@ -144,7 +145,7 @@ Files in output folder {} will be overwritten during this process.
name = row[1] # [Lastname], [Firstname]
name = name[0] # Take only first letter of lastname
matnum = row[2] # matriculation number (6-digit)
if not utils.check_matnum(matnum):
if not matnum_utils.check_matnum(matnum):
raise Exception("Invalid matriculation number found")
prefix = matnum + "_" + name
......
......@@ -23,8 +23,9 @@ class MainTest(unittest.TestCase):
# Prepare parameter
in_dir = './pdfs'
dpi = 250
dpi = 150
quality = 75
fontsize = 75
pdf_file = '123456_Nachname.pdf'
tmp_dir = os.path.join(self.test_dir, 'tmp')
......@@ -34,8 +35,9 @@ class MainTest(unittest.TestCase):
os.mkdir(out_dir)
# Call function
watermark.watermark_pdf(in_dir, tmp_dir, out_dir,
dpi, quality, pdf_file)
watermark.watermark_pdf(input_dir=in_dir, tmp_dir=tmp_dir,
output_dir=out_dir, fontsize=fontsize, dpi=dpi,
quality=quality, pdf_file=pdf_file)
self.assertTrue(os.listdir(out_dir)[0], '123456_Nachname_w.pdf')
......@@ -48,7 +50,7 @@ class MainTest(unittest.TestCase):
# Prepare parameter
in_dir = './pdfs'
dpi = 250
dpi = 150
tmp_dir = os.path.join(self.test_dir, 'tmp')
os.mkdir(tmp_dir)
......
......@@ -23,31 +23,64 @@ def find_file(pattern, path):
return result
def _extract_matnum(s):
"""Internal wrapper for splitting string
Args:
s (string): string
Returns:
string: all characters in string until first occurence of '_'.
"""
# Has to be separated by "_" from the rest of the file name
return s.split('_', 1)[0]
def check_matnum(matnum):
"""Checks for valid matriculation number
* All characters have to be digits
* Either five or six digits
Args:
matnum (str): matriculation number
Returns:
bool: valid
"""
return (len(matnum) == 6 or len(matnum) == 5) and matnum.isdigit()
def starts_with_matnum(s):
"""Checks string for starting with valid matriculation number
Args:
s (str): file name with first 5 to 6 characters matriculation number
Returns:
bool: valid
"""
matnum = _extract_matnum(s)
return check_matnum(matnum)
def get_matnum(s):
"""Extracts matriculation number from string
"""Extracts matriculation number from string and checks if valid
Args:
s (str): file name with first 6 characters matriculation number
Returns:
str: 6-digit matriculation number
Throws:
Error (ValueError) if file does not start with propper matriculation number
"""
# Get matriculation number
# Has to be separated by "_" from the rest of the file name
matnum = s.split('_', 1)[0]
matnum = _extract_matnum(s)
# Sanity check
if not check_matnum(matnum):
......
......@@ -16,10 +16,9 @@ from multiprocessing import Pool # multi processing
from functools import partial
from wand.image import Image as wi # PDF to images
from PIL import Image, ImageDraw, ImageFont # Image handling
from pikepdf import Pdf #combining PDFs
from glob import glob #combining PDFs
from pikepdf import Pdf # combining PDFs
import utils.matnum as utils
import utils.matnum as matnum_utils
def convert_pdf_to_img(pdf_file, input_dir, tmp_dir, dpi):
......@@ -108,17 +107,20 @@ def create_watermark_template(img_file, matnum, fontsize, dpi):
return template
def remove_transparency(im, bg_colour=(255, 255, 255)):
"""
Correct transparent image turning black issue
Args:
pdf page image
background color white code
im (PIL.Image.Image): pdf page image
bg_colour (tuple): background color white code
Returns:
corrected image when the image is transparent
else just retuirn the pdf page image
PIL.Image.Image: corrected image when the image is transparent
else just return the pdf page image
"""
if im.mode in ('RGBA', 'LA') or (im.mode == 'P' and 'transparency' in im.info):
if (im.mode in ('RGBA', 'LA')) or (im.mode == 'P' and
'transparency' in im.info):
alpha = im.convert('RGBA').split()[-1]
# Create a new background image of our matt color.
# Must be RGBA because paste requires both images have the same format
......@@ -211,7 +213,7 @@ def watermark_pdf(input_dir, tmp_dir, output_dir,
img_files = convert_pdf_to_img(pdf_file, input_dir, tmp_dir, dpi)
# Extracting matriculation numebers
matnum = utils.get_matnum(pdf_file)
matnum = matnum_utils.get_matnum(pdf_file)
# Watermarking PDF page images
# Create template for first page
......@@ -250,19 +252,23 @@ def main(args):
Watermarked PDFs are stored in folder 'out'
''')
parser.add_argument("-i", "--infolder", default="./pdfs",
help="Input folder with PDFs. Default: ./pdfs")
help="Input folder with PDFs. Default: ./pdfs")
parser.add_argument("-o", "--outfolder", default="./pdfs_watermarked",
help="Output folder of the PDFs. Default: ./pdfs_watermarked")
help="Output folder of the PDFs. " +
"Default: ./pdfs_watermarked")
parser.add_argument("-f", "--fontsize", default="75",
help="Font size of watermark text in points. Default: 75")
help="Font size of watermark text in points. " +
"Default: 75")
parser.add_argument("-c", "--cores", default="1",
help="Number of cores for parallel processing. Default: 1")
help="Number of cores for parallel processing. " +
"Default: 1")
parser.add_argument("-t", "--tmp", default="./tmp",
help="tmp folder. Default: ./tmp/")
parser.add_argument("-d", "--dpi", default="250",
help="dpi parameter for conversion from pdf to images. Default: 250")
help="tmp folder. Default: ./tmp/")
parser.add_argument("-d", "--dpi", default="150",
help="DPI parameter for PDF to image conversion. " +
"Default: 150")
parser.add_argument("-q", "--quality", default="75",
help="quality parameter for jpeg. Default: 75")
help="quality parameter for jpeg. Default: 75")
args = parser.parse_args(args)
infolder = args.infolder
......@@ -277,7 +283,7 @@ def main(args):
starttime = time.time()
pdf_folder = os.listdir(infolder)
pdf_files = [_ for _ in pdf_folder
if _.endswith(".pdf") and utils.check_matnum(_.split('_', 1)[0])]
if _.endswith(".pdf") and matnum_utils.starts_with_matnum(_)]
print("""
Available PDFs to be watermarked:
- {}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment