Commit ed632793 authored by Christian Rohlfing's avatar Christian Rohlfing
Browse files

enhanced functionality in supplements.py

parent e673da87
......@@ -2,10 +2,10 @@
"""Prepare supplement material
Given a folder with exam scans, this script copies supplementary material (such
as exam or sample solution) to have the same prefix (e.g.
"[matnum]_[lastname]") as the exam scan to be ready for watermarking / moodle
upload.
This script copies and renames supplementary material (such as exam sheet or
sample solution) to have the prefix ("[matnum]_[lastname]").
This information is either taken from the filenames of exam scan PDFs or from
the Moodle grading CSV file.
"""
import sys # get arguments from command line
......@@ -13,36 +13,37 @@ import os # path listing/manipulation/...
import time # keep track of time
import argparse # handle command line arguments
import shutil # copy
import csv # handle CSV files
import utils.matnum as utils
def copy_supplements(supp_dir, output_dir, pdf_files, dry=False):
def copy_supplements(supp_dir, supp_files, prefixes, output_dir, dry=False):
"""Copy supplement files
Args:
supp_dir (str): path to supplement folder
output_dir (str): path to output folder
pdf_files (list): list of pdf files
prefixes (list): list of prefixes
dry (bool): indicate dry run
"""
dryout = []
if dry:
print("Dry run\n")
else:
print("Start renaming...", sep='', end='', flush=True)
# Iterate over supplement files
supp_files = os.listdir(supp_dir)
cnt = 0
num_files = len(supp_files)*len(pdf_files)
num_files = len(supp_files)*len(prefixes)
copied_files = []
for supp_file in supp_files:
supp_filefull = os.path.join(supp_dir, supp_file)
supp_stem = os.path.splitext(supp_file)[0] # filename without .pdf
# Iterate over scanned PDF files
for pdf_file in pdf_files:
prefix = os.path.splitext(pdf_file)[0]
for prefix in prefixes:
new_file = prefix + "_" + supp_stem + ".pdf"
new_filefull = os.path.join(output_dir, new_file)
......@@ -62,6 +63,8 @@ def copy_supplements(supp_dir, output_dir, pdf_files, dry=False):
if dry:
dryout.sort()
print("\nDry run results:\n{}".format("\n".join(dryout)))
else:
print("done")
return copied_files
......@@ -78,37 +81,89 @@ def main(args):
# Argument handling
parser = argparse.ArgumentParser(description='''
PDFs of exam scans from folder 'in' are watermarked with the
matriculation number of the respective student.
Watermarked PDFs are stored in folder 'out'
This script copies supplementary material (such as exam sheet or sample
solution) to have the prefix (e.g. "[matnum]_[lastname]").
This information is either taken from the filenames of exam scan PDFs or
from the Moodle grading CSV file.
''')
parser.add_argument("-s", "--supplementfolder", default="./supplements",
parser.add_argument("-i", "--infolder", default="./supplements",
help="Folder with supplements. Default: ./supplements")
parser.add_argument("-p", "--pdffolder", default="./pdfs",
help="PDF folder with scanned PDFs. Default: ./pdfs")
parser.add_argument("-p", "--prefix", default="./pdfs",
help="Provides information to construct prefixes. " +
"Either PDF folder with scanned PDFs or " +
"Moodle grading CSV file. Default: ./pdfs")
parser.add_argument("-o", "--outfolder", default="./supplements_out",
help="Output folder. Default: ./supplements_out")
parser.add_argument("-d", "--dry", action='store_true',
help="Flag for dry run")
args = parser.parse_args(args)
supp_dir = args.supplementfolder
pdf_dir = args.pdffolder
supp_dir = args.infolder
prefixinfo = args.prefix
output_dir = args.outfolder
dry = args.dry
# Decide whether PDF folder or CSV file was given
csvfilename = pdf_dir = ""
ext = os.path.splitext(prefixinfo)[1].lower()
if ext == '.csv': # CSV file
csvfilename = prefixinfo
if not os.path.isfile(csvfilename):
raise Exception("File {} does not exist.".format(csvfilename))
elif ext == '': # Folder
pdf_dir = prefixinfo
if not os.path.isdir(pdf_dir):
raise Exception("Folder {} does not exist.".format(pdf_dir))
else:
raise Exception("{} neither CSV file nor folder.".format(prefixinfo))
# Print status
starttime = time.time()
pdf_folder = os.listdir(pdf_dir)
pdf_files = [_ for _ in pdf_folder
if _.endswith(".pdf") and utils.check_matnum(_[0:6])]
copied_files = copy_supplements(supp_dir, output_dir, pdf_files, dry)
supp_folder = os.listdir(supp_dir)
supp_files = [_ for _ in supp_folder if _.endswith(".pdf")]
print("""
Available supplement PDFs to be copied:
- {}
Files in output folder {} will be overwritten during this process.
""".format("\n- ".join(supp_files), output_dir))
# Create prefixes
if pdf_dir != "": # Take prefixes from pdf directory
pdf_folder = os.listdir(pdf_dir)
pdf_files = [_ for _ in pdf_folder
if _.endswith(".pdf") and utils.check_matnum(_[0:6])]
prefixes = []
for pdf_file in pdf_files:
prefix = os.path.splitext(pdf_file)[0] # take file name as prefix
prefixes.append(prefix)
else: # Take prefixes from CSV file
prefixes = []
# Open CSV file
with open(csvfilename, newline='') as csvfile:
# Loop over all lines in CSV file
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
next(reader) # skip header CSV line
for row in reader:
# Parse required fields from CSV line
name = row[1] # [Lastname], [Firstname]
name = name.replace(", ", "_") # [Lastname]_[Firstname]
name = name.replace(" ", "-")
matnum = row[2] # matriculation number (6-digit)
if not utils.check_matnum(matnum):
raise Exception("Invalid matriculation number found")
prefix = matnum + "_" + name
prefixes.append(prefix) # save prefix
# Copy supplements to output dir and prepend prefixes
copied_files = copy_supplements(supp_dir, supp_files, prefixes,
output_dir, dry)
# Print status
endtime = time.time()
print("""All PDFs are watermarked and can be found in {} folder:
print("""
All PDFs are renamed and can be found in {} folder.
Time taken: {:.2f}s
""".format(output_dir, endtime-starttime))
......
......@@ -16,13 +16,66 @@ class MainTest(unittest.TestCase):
t = self.toc - self.tic
print('Time: %.3f' % (t))
def test_supplements_from_pdf_folder(self):
import supplements
expected_files = ['123456_Nachname_GDET3_20H.pdf',
'123456_Nachname_GDET3_20H_loes.pdf',
'456789_Lastname_GDET3_20H.pdf',
'456789_Lastname_GDET3_20H_loes.pdf']
# Prepare parameter
supp_dir = './supplements'
pdf_dir = './pdfs'
supp_out_dir = os.path.join(self.test_dir, 'supplements_out')
os.mkdir(supp_out_dir)
tmp_dir = os.path.join(self.test_dir, 'tmp')
os.mkdir(tmp_dir)
# Copy supplements file
supplements.main(["-i", supp_dir, "-p", pdf_dir, "-o", supp_out_dir])
# Assert output
created_files = os.listdir(supp_out_dir)
created_files.sort()
self.assertEqual(expected_files, created_files)
def test_supplements_from_csv(self):
import supplements
expected_files = ['123456_Nachname_Vorname_GDET3_20H.pdf',
'123456_Nachname_Vorname_GDET3_20H_loes.pdf',
'987654_Noch_Jemand_GDET3_20H.pdf',
'987654_Noch_Jemand_GDET3_20H_loes.pdf']
# Prepare parameter
supp_dir = './supplements'
csv = 'Bewertungen.csv'
supp_out_dir = os.path.join(self.test_dir, 'supplements_out')
os.mkdir(supp_out_dir)
tmp_dir = os.path.join(self.test_dir, 'tmp')
os.mkdir(tmp_dir)
# Copy supplements file
supplements.main(["-i", supp_dir, "-p", csv, "-o", supp_out_dir])
# Assert output
created_files = os.listdir(supp_out_dir)
created_files.sort()
self.assertEqual(expected_files, created_files)
def test_supplements_watermark(self):
import supplements
import watermark
import utils.matnum as utils
expected_files = ['123456_Nachname_GDET3_20H_loes_w.pdf', '123456_Nachname_GDET3_20H_w.pdf',
'456789_Lastname_GDET3_20H_loes_w.pdf', '456789_Lastname_GDET3_20H_w.pdf']
expected_files = ['123456_Nachname_GDET3_20H_loes_w.pdf',
'123456_Nachname_GDET3_20H_w.pdf',
'456789_Lastname_GDET3_20H_loes_w.pdf',
'456789_Lastname_GDET3_20H_w.pdf']
# Prepare parameter
supp_dir = './supplements'
......@@ -39,7 +92,7 @@ class MainTest(unittest.TestCase):
os.mkdir(out_dir)
# Copy supplements file
supplements.main(["-s", supp_dir, "-p", pdf_dir, "-o", supp_out_dir])
supplements.main(["-i", supp_dir, "-p", pdf_dir, "-o", supp_out_dir])
# Watermark files
watermark.main(["-i", supp_out_dir, "-o", out_dir,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment