Commit 66ace6c1 authored by Christian Rohlfing's avatar Christian Rohlfing
Browse files

Merge remote-tracking branch 'origin/master' into rohlfing-patch-batch

parents 102fbf2a 9ac74c6e
......@@ -25,6 +25,10 @@ Exemplary outputs can be downloaded:
* The filename of each PDF should start with the student's matriculation number (e.g. `123456_Nachname.pdf`).
* Place all PDFs in a folder, e.g. `pdfs`.
* **Optional: Sample Solutions alongside exams (Refer [here](https://git.rwth-aachen.de/rwthmoodle/exam-scan/-/issues/3))**
* Scan the sample solutions and save the scans as PDFs (each page should be A4). For most copy machines, you can save an A3 scan (double page of an exam) as two A4 pages.
* Place all PDFs in a folder, e.g. `supplements`.
* **Create and setup Moodle**
* In your Moodle course room, create an `assign` module following this [guideline](https://help.itc.rwth-aachen.de/service/8d9eb2f36eea4fcaa9abd0e1ca008b22/article/0cfca4212fef4712ad2d432ac83eaf3e)
* Download the grading table `Bewertungen.csv` from Moodle via: `Alle Angaben anzeigen` → `Bewertungsvorgang` → `Bewertungstabelle herunterladen`
......@@ -52,7 +56,8 @@ Exemplary outputs can be downloaded:
### Process
Run `watermark.py`, `encrypt.py`, and `preparemoodle.py` (or run `batch.py` which runs all three) as described in the sections below. In summary, these steps will
Run `supplements.py`(if you want to add sample solutions as well),`watermark.py`, `encrypt.py`, and `preparemoodle.py` (or run `batch.py` which runs all three) as described in the sections below. In summary, these steps will
1. prepare sample solution for each students
1. watermark each page of each PDF with the corresponding matriculation number,
1. encrypt each PDF with a password (global or per-student) and
1. construct a zip-archive enabling batch upload and assignment of each PDF to each student in Moodle.
......@@ -63,9 +68,26 @@ Upload `moodle_feedbacks.zip` to Moodle
### Commands
### Prepare copies of Sample Solutions for each student (Optional)
We assume that the folder `./supplements` holds the scans of the sample solution.
```
python supplements.py
```
or
```
python3 supplements.py
```
Folder `supplements_out` contains copies of the sample solutions for each student.
#### Watermark
We assume that the folder `./pdfs` holds the scans of the exams.
We assume that the folder `./pdfs` holds the scans of the exams and .
The filename of each PDF should start with the matriculation number of the student, e.g. `./pdfs/123456_Lastname.pdf`.
```
......@@ -80,6 +102,20 @@ python3 watermark.py --in ./pdfs --out ./pdfs_watermarked --cores 2
Folder `pdfs_watermarked` contains watermarked PDFs, with each page watermarked with the matriculation number of the student.
**Watermark Sample solution copies**
We assume that the folder `./supplements_out` holds the copies for every students
```
python watermark.py --in ./supplements_out --out ./pdfs_watermarked --cores 2
```
or
```
python3 watermark.py --in ./supplements_out --out ./pdfs_watermarked --cores 2
```
#### Encrypt
Use either a global password by specifying it with the `--password` option or per-student passwords by ommiting `--password`.
......
......@@ -2,8 +2,8 @@
"""Creates encrypted copies of PDFs
This scripts creates encrypted copies of the watermarked PDFs created by
watermark.py as well as a csv files storing passwords for each file
This scripts creates encrypted copies of (watermarked) PDFs well as a CSV file
storing passwords for each file.
Author: Amrita Deb <Deb@itc.rwth-aachen.de>
"""
......@@ -48,16 +48,21 @@ def main(args):
Alongside with a CSV file mapping passwords to each PDF.
''')
parser.add_argument("-i", "--infolder", default="./pdfs_watermarked",
help="Input folder with watermarked PDFs. Default: ./pdfs_watermarked")
help="Input PDF folder. Default: ./pdfs_watermarked")
parser.add_argument("-o", "--outfolder", default="./pdfs_encrypted",
help="Output folder of the encrypted PDFs Default: ./pdfs_encrypted")
help="Output folder storing encrypted PDFs. " +
"Default: ./pdfs_encrypted")
parser.add_argument("-p", "--password", default="",
help="Common password for all encrypted PDFs. Default: '' will be changed to a 8 character randomly generated password")
help="Common password for all encrypted PDFs. " +
"Default: '' " +
"results in generation of random passwords.")
parser.add_argument("-w", "--passwordout", default="",
help="Output path for CSV file. Default: '' will be changed to [outfolder]/passwords.csv.")
help="Output path for CSV file. " +
"Default: '' will be changed to " +
"[outfolder]/passwords.csv.")
args = parser.parse_args(args)
infolder = args.infolder
......@@ -72,7 +77,7 @@ def main(args):
starttime = time.time()
pdf_folder = os.listdir(infolder)
pdf_files = [_ for _ in pdf_folder
if _.endswith(".pdf") and utils.check_matnum(_[0:6])]
if _.endswith(".pdf")]
print("""
Available PDFs to be encrypted:
- {}
......@@ -84,8 +89,11 @@ Files in output folder {} will be overwritten during this process.
csv_lines = []
enc_files = []
for pdf_file in pdf_files:
# PDF file has to start with 6 digit mat number
matnum = utils.get_matnum(pdf_file)
# File ID for password storage
if utils.check_matnum(pdf_file[0:6]): # PDF file is linked to matnum
pdf_id = utils.get_matnum(pdf_file)
else: # PDF file does not start with matnum
pdf_id = pdf_file
# Generate random password if common password not given
if args.password == "":
......@@ -93,13 +101,14 @@ Files in output folder {} will be overwritten during this process.
else:
password = args.password
# Encrypt
in_file = os.path.join(infolder, pdf_file)
enc_file = os.path.splitext(pdf_file)[0] + '_aes.pdf'
enc_file = os.path.join(outfolder, enc_file)
encrypt(in_file, enc_file, password)
# Save matnum password mapping to be stored in CSV later
csv_lines.append([matnum, password])
csv_lines.append([pdf_id, password])
enc_files.append(enc_file)
# Store matnum password mappings in CSV file
......
......@@ -2,10 +2,10 @@
"""Prepare supplement material
Given a folder with exam scans, this script copies supplementary material (such
as exam or sample solution) to have the same prefix (e.g.
"[matnum]_[lastname]") as the exam scan to be ready for watermarking / moodle
upload.
This script copies and renames supplementary material (such as exam sheet or
sample solution) to have the prefix ("[matnum]_[lastname]").
This information is either taken from the filenames of exam scan PDFs or from
the Moodle grading CSV file.
"""
import sys # get arguments from command line
......@@ -13,36 +13,37 @@ import os # path listing/manipulation/...
import time # keep track of time
import argparse # handle command line arguments
import shutil # copy
import csv # handle CSV files
import utils.matnum as utils
def copy_supplements(supp_dir, output_dir, pdf_files, dry=False):
def copy_supplements(supp_dir, supp_files, prefixes, output_dir, dry=False):
"""Copy supplement files
Args:
supp_dir (str): path to supplement folder
output_dir (str): path to output folder
pdf_files (list): list of pdf files
prefixes (list): list of prefixes
dry (bool): indicate dry run
"""
dryout = []
if dry:
print("Dry run\n")
else:
print("Start renaming...", sep='', end='', flush=True)
# Iterate over supplement files
supp_files = os.listdir(supp_dir)
cnt = 0
num_files = len(supp_files)*len(pdf_files)
num_files = len(supp_files)*len(prefixes)
copied_files = []
for supp_file in supp_files:
supp_filefull = os.path.join(supp_dir, supp_file)
supp_stem = os.path.splitext(supp_file)[0] # filename without .pdf
# Iterate over scanned PDF files
for pdf_file in pdf_files:
prefix = os.path.splitext(pdf_file)[0]
for prefix in prefixes:
new_file = prefix + "_" + supp_stem + ".pdf"
new_filefull = os.path.join(output_dir, new_file)
......@@ -62,53 +63,100 @@ def copy_supplements(supp_dir, output_dir, pdf_files, dry=False):
if dry:
dryout.sort()
print("\nDry run results:\n{}".format("\n".join(dryout)))
else:
print("done")
return copied_files
def main(args):
"""Main function
For all PDFs in ./pdfs folder:
1) Convert each page of the PDFs into image
2) Watermark each image
3) Convert each image into single page PDFs
4) Merge PDFs to one combined PDF
"""
# Argument handling
parser = argparse.ArgumentParser(description='''
PDFs of exam scans from folder 'in' are watermarked with the
matriculation number of the respective student.
Watermarked PDFs are stored in folder 'out'
This script copies supplementary material (such as exam sheet or sample
solution) to have the prefix (e.g. "[matnum]_[lastname]").
This information is either taken from the filenames of exam scan PDFs or
from the Moodle grading CSV file.
''')
parser.add_argument("-s", "--supplementfolder", default="./supplements",
parser.add_argument("-i", "--infolder", default="./supplements",
help="Folder with supplements. Default: ./supplements")
parser.add_argument("-p", "--pdffolder", default="./pdfs",
help="PDF folder with scanned PDFs. Default: ./pdfs")
parser.add_argument("-p", "--prefix", default="./pdfs",
help="Provides information to construct prefixes. " +
"Either PDF folder with scanned PDFs or " +
"Moodle grading CSV file. Default: ./pdfs")
parser.add_argument("-o", "--outfolder", default="./supplements_out",
help="Output folder. Default: ./supplements_out")
parser.add_argument("-d", "--dry", action='store_false',
parser.add_argument("-d", "--dry", action='store_true',
help="Flag for dry run")
args = parser.parse_args(args)
supp_dir = args.supplementfolder
pdf_dir = args.pdffolder
supp_dir = args.infolder
prefixinfo = args.prefix
output_dir = args.outfolder
dry = args.dry
# Decide whether PDF folder or CSV file was given
csvfilename = pdf_dir = ""
ext = os.path.splitext(prefixinfo)[1].lower()
if ext == '.csv': # CSV file
csvfilename = prefixinfo
if not os.path.isfile(csvfilename):
raise Exception("File {} does not exist.".format(csvfilename))
elif ext == '': # Folder
pdf_dir = prefixinfo
if not os.path.isdir(pdf_dir):
raise Exception("Folder {} does not exist.".format(pdf_dir))
else:
raise Exception("{} neither CSV file nor folder.".format(prefixinfo))
# Print status
starttime = time.time()
pdf_folder = os.listdir(pdf_dir)
pdf_files = [_ for _ in pdf_folder
if _.endswith(".pdf") and utils.check_matnum(_[0:6])]
copied_files = copy_supplements(supp_dir, output_dir, pdf_files, dry)
supp_folder = os.listdir(supp_dir)
supp_files = [_ for _ in supp_folder if _.endswith(".pdf")]
print("""
Available supplement PDFs to be copied:
- {}
Files in output folder {} will be overwritten during this process.
""".format("\n- ".join(supp_files), output_dir))
# Create prefixes
if pdf_dir != "": # Take prefixes from pdf directory
pdf_folder = os.listdir(pdf_dir)
pdf_files = [_ for _ in pdf_folder
if _.endswith(".pdf") and utils.check_matnum(_[0:6])]
prefixes = []
for pdf_file in pdf_files:
prefix = os.path.splitext(pdf_file)[0] # take file name as prefix
prefixes.append(prefix)
else: # Take prefixes from CSV file
prefixes = []
# Open CSV file
with open(csvfilename, newline='') as csvfile:
# Loop over all lines in CSV file
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
next(reader) # skip header CSV line
for row in reader:
# Parse required fields from CSV line
name = row[1] # [Lastname], [Firstname]
name = name[0] # Take only first letter of lastname
matnum = row[2] # matriculation number (6-digit)
if not utils.check_matnum(matnum):
raise Exception("Invalid matriculation number found")
prefix = matnum + "_" + name
prefixes.append(prefix) # save prefix
# Copy supplements to output dir and prepend prefixes
copied_files = copy_supplements(supp_dir, supp_files, prefixes,
output_dir, dry)
# Print status
endtime = time.time()
print("""All PDFs are watermarked and can be found in {} folder:
print("""
All PDFs are renamed and can be found in {} folder.
Time taken: {:.2f}s
""".format(output_dir, endtime-starttime))
......
......@@ -18,7 +18,7 @@ class MainTest(unittest.TestCase):
# Clean up
shutil.rmtree(self.test_dir)
def test_encrypt_pdfs(self):
def test_encrypt_scans(self):
import encrypt
expected_files = ['123456_Nachname_aes.pdf', '456789_Lastname_aes.pdf',
......@@ -36,3 +36,22 @@ class MainTest(unittest.TestCase):
created_files = os.listdir(out_dir)
created_files.sort()
self.assertEqual(expected_files, created_files)
def test_encrypt_supplements(self):
import encrypt
expected_files = ['GDET3_20H_aes.pdf' 'GDET3_20H_loes_aes.pdf',
'passwords.csv']
# Prepare parameter
in_dir = './supplements'
out_dir = os.path.join(self.test_dir, 'out')
os.mkdir(out_dir)
# Encrypt files
encrypt.main(["-i", in_dir, "-o", out_dir])
created_files = os.listdir(out_dir)
created_files.sort()
self.assertEqual(expected_files, created_files)
......@@ -16,13 +16,66 @@ class MainTest(unittest.TestCase):
t = self.toc - self.tic
print('Time: %.3f' % (t))
def test_supplements_from_pdf_folder(self):
import supplements
expected_files = ['123456_Nachname_GDET3_20H.pdf',
'123456_Nachname_GDET3_20H_loes.pdf',
'456789_Lastname_GDET3_20H.pdf',
'456789_Lastname_GDET3_20H_loes.pdf']
# Prepare parameter
supp_dir = './supplements'
pdf_dir = './pdfs'
supp_out_dir = os.path.join(self.test_dir, 'supplements_out')
os.mkdir(supp_out_dir)
tmp_dir = os.path.join(self.test_dir, 'tmp')
os.mkdir(tmp_dir)
# Copy supplements file
supplements.main(["-i", supp_dir, "-p", pdf_dir, "-o", supp_out_dir])
# Assert output
created_files = os.listdir(supp_out_dir)
created_files.sort()
self.assertEqual(expected_files, created_files)
def test_supplements_from_csv(self):
import supplements
expected_files = ['123456_Nachname_Vorname_GDET3_20H.pdf',
'123456_Nachname_Vorname_GDET3_20H_loes.pdf',
'987654_Noch_Jemand_GDET3_20H.pdf',
'987654_Noch_Jemand_GDET3_20H_loes.pdf']
# Prepare parameter
supp_dir = './supplements'
csv = 'Bewertungen.csv'
supp_out_dir = os.path.join(self.test_dir, 'supplements_out')
os.mkdir(supp_out_dir)
tmp_dir = os.path.join(self.test_dir, 'tmp')
os.mkdir(tmp_dir)
# Copy supplements file
supplements.main(["-i", supp_dir, "-p", csv, "-o", supp_out_dir])
# Assert output
created_files = os.listdir(supp_out_dir)
created_files.sort()
self.assertEqual(expected_files, created_files)
def test_supplements_watermark(self):
import supplements
import watermark
import utils.matnum as utils
expected_files = ['123456_Nachname_GDET3_20H_loes_w.pdf', '123456_Nachname_GDET3_20H_w.pdf',
'456789_Lastname_GDET3_20H_loes_w.pdf', '456789_Lastname_GDET3_20H_w.pdf']
expected_files = ['123456_Nachname_GDET3_20H_loes_w.pdf',
'123456_Nachname_GDET3_20H_w.pdf',
'456789_Lastname_GDET3_20H_loes_w.pdf',
'456789_Lastname_GDET3_20H_w.pdf']
# Prepare parameter
supp_dir = './supplements'
......@@ -39,7 +92,7 @@ class MainTest(unittest.TestCase):
os.mkdir(out_dir)
# Copy supplements file
supplements.main(["-s", supp_dir, "-p", pdf_dir, "-o", supp_out_dir])
supplements.main(["-i", supp_dir, "-p", pdf_dir, "-o", supp_out_dir])
# Watermark files
watermark.main(["-i", supp_out_dir, "-o", out_dir,
......
......@@ -40,6 +40,8 @@ class MainTest(unittest.TestCase):
def test_watermark_pdfs(self):
import watermark
expected_files = ['123456_Nachname_w.pdf', '456789_Lastname_w.pdf']
# Prepare parameter
in_dir = './pdfs'
dpi = 250
......@@ -54,4 +56,6 @@ class MainTest(unittest.TestCase):
watermark.main(["-i", in_dir, "-o", out_dir,
"-t", tmp_dir, "--dpi", str(dpi)])
self.assertTrue(True)
created_files = os.listdir(out_dir)
created_files.sort()
self.assertEqual(expected_files, created_files)
......@@ -16,7 +16,8 @@ from multiprocessing import Pool # multi processing
from functools import partial
from wand.image import Image as wi # PDF to images
from PIL import Image, ImageDraw, ImageFont # Image handling
from PyPDF2 import PdfFileMerger, PdfFileReader # PDF handling
from pikepdf import Pdf #combining PDFs
from glob import glob #combining PDFs
import utils.matnum as utils
......@@ -148,12 +149,12 @@ def combine_all_pdfs(pdf_pages, out_dir):
Returns:
str: path to combined PDF
"""
# Merge single pages to one PDF
mergedObject = PdfFileMerger()
# mergedObject = PdfFileMerger()
mergedObject = Pdf.new() # create a blank PDF
for pdf_page in pdf_pages:
mergedObject.append(PdfFileReader(pdf_page, 'rb'))
os.remove(pdf_page)
src = Pdf.open(pdf_page)
mergedObject.pages.extend(src.pages)
# Create file name of merged PDF
pdf_name = os.path.basename(pdf_pages[0]) # remove full path
......@@ -162,7 +163,7 @@ def combine_all_pdfs(pdf_pages, out_dir):
pdf_file = out_dir+'/'+pdf_name+'_w.pdf'
# Save merged PDF
mergedObject.write(pdf_file)
mergedObject.save(pdf_file)
return pdf_file
......@@ -203,6 +204,8 @@ def watermark_pdf(input_dir, tmp_dir, output_dir, dpi, pdf_file):
else:
raise Exception("{}: No PDF pages found".format(pdf_file))
for pdf_file in pdf_files:
os.remove(pdf_file)
return watermarked_pdf
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment