Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
RWTHmoodle
exam-scan
Commits
ba38aa92
Commit
ba38aa92
authored
Apr 23, 2021
by
Christian Rohlfing
Browse files
introduced starts_with_matnum
parent
a7c34f4a
Changes
6
Hide whitespace changes
Inline
Side-by-side
encrypt.py
View file @
ba38aa92
...
...
@@ -16,7 +16,7 @@ import argparse
import
sys
import
time
import
utils.matnum
as
utils
import
utils.matnum
as
matnum_
utils
def
encrypt
(
pdf_file
,
enc_file
,
password
):
...
...
@@ -90,8 +90,8 @@ Files in output folder {} will be overwritten during this process.
enc_files
=
[]
for
pdf_file
in
pdf_files
:
# File ID for password storage
if
utils
.
check
_matnum
(
pdf_file
[
0
:
6
]
):
# PDF
file is
linked to matnum
pdf_id
=
utils
.
get_matnum
(
pdf_file
)
if
matnum_utils
.
starts_with
_matnum
(
pdf_file
):
# PDF linked to matnum
pdf_id
=
matnum_
utils
.
get_matnum
(
pdf_file
)
else
:
# PDF file does not start with matnum
pdf_id
=
pdf_file
...
...
preparemoodle.py
View file @
ba38aa92
...
...
@@ -6,7 +6,7 @@ import time
import
shutil
# copyfile, make_archive
import
argparse
import
sys
import
utils.matnum
as
utils
import
utils.matnum
as
matnum_
utils
def
find_unmatched_pdfs
(
infolder
,
matnums
,
nowarn
):
...
...
@@ -26,7 +26,7 @@ def find_unmatched_pdfs(infolder, matnums, nowarn):
for
pdffile
in
files
:
if
pdffile
.
endswith
(
".pdf"
):
# Get matriculation number from file
matnum
=
utils
.
get_matnum
(
pdffile
)
matnum
=
matnum_
utils
.
get_matnum
(
pdffile
)
# Search matriculation number in CSV
if
matnum
not
in
matnums
:
...
...
@@ -125,7 +125,7 @@ Processing {} lines
# If pdf files for current student exists, create a directory and
# copy the pdf files to it. The resulting directories can be
# uploaded to Moodle
longpdffiles
=
utils
.
find_file
(
matnum
+
"*.pdf"
,
infolder
)
longpdffiles
=
matnum_
utils
.
find_file
(
matnum
+
"*.pdf"
,
infolder
)
if
len
(
longpdffiles
)
>
0
:
# Found some file(s)
numfoundpdfs
+=
1
...
...
supplements.py
View file @
ba38aa92
...
...
@@ -15,7 +15,7 @@ import argparse # handle command line arguments
import
shutil
# copy
import
csv
# handle CSV files
import
utils.matnum
as
utils
import
utils.matnum
as
matnum_
utils
def
copy_supplements
(
supp_dir
,
supp_files
,
prefixes
,
output_dir
,
dry
=
False
):
...
...
@@ -127,7 +127,8 @@ Files in output folder {} will be overwritten during this process.
if
pdf_dir
!=
""
:
# Take prefixes from pdf directory
pdf_folder
=
os
.
listdir
(
pdf_dir
)
pdf_files
=
[
_
for
_
in
pdf_folder
if
_
.
endswith
(
".pdf"
)
and
utils
.
check_matnum
(
_
.
split
(
'_'
,
1
)[
0
])]
if
_
.
endswith
(
".pdf"
)
and
matnum_utils
.
starts_with_matnum
(
_
)]
prefixes
=
[]
for
pdf_file
in
pdf_files
:
prefix
=
os
.
path
.
splitext
(
pdf_file
)[
0
]
# take file name as prefix
...
...
@@ -144,7 +145,7 @@ Files in output folder {} will be overwritten during this process.
name
=
row
[
1
]
# [Lastname], [Firstname]
name
=
name
[
0
]
# Take only first letter of lastname
matnum
=
row
[
2
]
# matriculation number (6-digit)
if
not
utils
.
check_matnum
(
matnum
):
if
not
matnum_
utils
.
check_matnum
(
matnum
):
raise
Exception
(
"Invalid matriculation number found"
)
prefix
=
matnum
+
"_"
+
name
...
...
tests/test_watermark.py
View file @
ba38aa92
...
...
@@ -23,8 +23,9 @@ class MainTest(unittest.TestCase):
# Prepare parameter
in_dir
=
'./pdfs'
dpi
=
2
50
dpi
=
1
50
quality
=
75
fontsize
=
75
pdf_file
=
'123456_Nachname.pdf'
tmp_dir
=
os
.
path
.
join
(
self
.
test_dir
,
'tmp'
)
...
...
@@ -34,8 +35,9 @@ class MainTest(unittest.TestCase):
os
.
mkdir
(
out_dir
)
# Call function
watermark
.
watermark_pdf
(
in_dir
,
tmp_dir
,
out_dir
,
dpi
,
quality
,
pdf_file
)
watermark
.
watermark_pdf
(
input_dir
=
in_dir
,
tmp_dir
=
tmp_dir
,
output_dir
=
out_dir
,
fontsize
=
fontsize
,
dpi
=
dpi
,
quality
=
quality
,
pdf_file
=
pdf_file
)
self
.
assertTrue
(
os
.
listdir
(
out_dir
)[
0
],
'123456_Nachname_w.pdf'
)
...
...
@@ -48,7 +50,7 @@ class MainTest(unittest.TestCase):
# Prepare parameter
in_dir
=
'./pdfs'
dpi
=
2
50
dpi
=
1
50
tmp_dir
=
os
.
path
.
join
(
self
.
test_dir
,
'tmp'
)
os
.
mkdir
(
tmp_dir
)
...
...
utils/matnum.py
View file @
ba38aa92
...
...
@@ -23,31 +23,64 @@ def find_file(pattern, path):
return
result
def
_extract_matnum
(
s
):
"""Internal wrapper for splitting string
Args:
s (string): string
Returns:
string: all characters in string until first occurence of '_'.
"""
# Has to be separated by "_" from the rest of the file name
return
s
.
split
(
'_'
,
1
)[
0
]
def
check_matnum
(
matnum
):
"""Checks for valid matriculation number
* All characters have to be digits
* Either five or six digits
Args:
matnum (str): matriculation number
Returns:
bool: valid
"""
return
(
len
(
matnum
)
==
6
or
len
(
matnum
)
==
5
)
and
matnum
.
isdigit
()
def
starts_with_matnum
(
s
):
"""Checks string for starting with valid matriculation number
Args:
s (str): file name with first 5 to 6 characters matriculation number
Returns:
bool: valid
"""
matnum
=
_extract_matnum
(
s
)
return
check_matnum
(
matnum
)
def
get_matnum
(
s
):
"""Extracts matriculation number from string
"""Extracts matriculation number from string
and checks if valid
Args:
s (str): file name with first 6 characters matriculation number
Returns:
str: 6-digit matriculation number
Throws:
Error (ValueError) if file does not start with propper matriculation number
"""
# Get matriculation number
# Has to be separated by "_" from the rest of the file name
matnum
=
s
.
split
(
'_'
,
1
)[
0
]
matnum
=
_extract_matnum
(
s
)
# Sanity check
if
not
check_matnum
(
matnum
):
...
...
watermark.py
View file @
ba38aa92
...
...
@@ -16,10 +16,9 @@ from multiprocessing import Pool # multi processing
from
functools
import
partial
from
wand.image
import
Image
as
wi
# PDF to images
from
PIL
import
Image
,
ImageDraw
,
ImageFont
# Image handling
from
pikepdf
import
Pdf
#combining PDFs
from
glob
import
glob
#combining PDFs
from
pikepdf
import
Pdf
# combining PDFs
import
utils.matnum
as
utils
import
utils.matnum
as
matnum_
utils
def
convert_pdf_to_img
(
pdf_file
,
input_dir
,
tmp_dir
,
dpi
):
...
...
@@ -108,17 +107,20 @@ def create_watermark_template(img_file, matnum, fontsize, dpi):
return
template
def
remove_transparency
(
im
,
bg_colour
=
(
255
,
255
,
255
)):
"""
Correct transparent image turning black issue
Args:
pdf page image
background color white code
im (PIL.Image.Image):
pdf page image
bg_colour (tuple):
background color white code
Returns:
corrected image when the image is transparent
else just retu
i
rn the pdf page image
PIL.Image.Image:
corrected image when the image is transparent
else just return the pdf page image
"""
if
im
.
mode
in
(
'RGBA'
,
'LA'
)
or
(
im
.
mode
==
'P'
and
'transparency'
in
im
.
info
):
if
(
im
.
mode
in
(
'RGBA'
,
'LA'
))
or
(
im
.
mode
==
'P'
and
'transparency'
in
im
.
info
):
alpha
=
im
.
convert
(
'RGBA'
).
split
()[
-
1
]
# Create a new background image of our matt color.
# Must be RGBA because paste requires both images have the same format
...
...
@@ -211,7 +213,7 @@ def watermark_pdf(input_dir, tmp_dir, output_dir,
img_files
=
convert_pdf_to_img
(
pdf_file
,
input_dir
,
tmp_dir
,
dpi
)
# Extracting matriculation numebers
matnum
=
utils
.
get_matnum
(
pdf_file
)
matnum
=
matnum_
utils
.
get_matnum
(
pdf_file
)
# Watermarking PDF page images
# Create template for first page
...
...
@@ -250,19 +252,23 @@ def main(args):
Watermarked PDFs are stored in folder 'out'
'''
)
parser
.
add_argument
(
"-i"
,
"--infolder"
,
default
=
"./pdfs"
,
help
=
"Input folder with PDFs. Default: ./pdfs"
)
help
=
"Input folder with PDFs. Default: ./pdfs"
)
parser
.
add_argument
(
"-o"
,
"--outfolder"
,
default
=
"./pdfs_watermarked"
,
help
=
"Output folder of the PDFs. Default: ./pdfs_watermarked"
)
help
=
"Output folder of the PDFs. "
+
"Default: ./pdfs_watermarked"
)
parser
.
add_argument
(
"-f"
,
"--fontsize"
,
default
=
"75"
,
help
=
"Font size of watermark text in points. Default: 75"
)
help
=
"Font size of watermark text in points. "
+
"Default: 75"
)
parser
.
add_argument
(
"-c"
,
"--cores"
,
default
=
"1"
,
help
=
"Number of cores for parallel processing. Default: 1"
)
help
=
"Number of cores for parallel processing. "
+
"Default: 1"
)
parser
.
add_argument
(
"-t"
,
"--tmp"
,
default
=
"./tmp"
,
help
=
"tmp folder. Default: ./tmp/"
)
parser
.
add_argument
(
"-d"
,
"--dpi"
,
default
=
"250"
,
help
=
"dpi parameter for conversion from pdf to images. Default: 250"
)
help
=
"tmp folder. Default: ./tmp/"
)
parser
.
add_argument
(
"-d"
,
"--dpi"
,
default
=
"150"
,
help
=
"DPI parameter for PDF to image conversion. "
+
"Default: 150"
)
parser
.
add_argument
(
"-q"
,
"--quality"
,
default
=
"75"
,
help
=
"quality parameter for jpeg. Default: 75"
)
help
=
"quality parameter for jpeg. Default: 75"
)
args
=
parser
.
parse_args
(
args
)
infolder
=
args
.
infolder
...
...
@@ -277,7 +283,7 @@ def main(args):
starttime
=
time
.
time
()
pdf_folder
=
os
.
listdir
(
infolder
)
pdf_files
=
[
_
for
_
in
pdf_folder
if
_
.
endswith
(
".pdf"
)
and
utils
.
check_matnum
(
_
.
split
(
'_'
,
1
)[
0
]
)]
if
_
.
endswith
(
".pdf"
)
and
matnum_utils
.
starts_with_matnum
(
_
)]
print
(
"""
Available PDFs to be watermarked:
- {}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment