Commit e3c199c2 authored by Christian Rohlfing's avatar Christian Rohlfing
Browse files

Enable encryption for arbitratry pdf files

parent 57b52b42
......@@ -2,8 +2,8 @@
"""Creates encrypted copies of PDFs
This scripts creates encrypted copies of the watermarked PDFs created by
watermark.py as well as a csv files storing passwords for each file
This scripts creates encrypted copies of (watermarked) PDFs well as a CSV file
storing passwords for each file.
Author: Amrita Deb <Deb@itc.rwth-aachen.de>
"""
......@@ -48,16 +48,21 @@ def main(args):
Alongside with a CSV file mapping passwords to each PDF.
''')
parser.add_argument("-i", "--infolder", default="./pdfs_watermarked",
help="Input folder with watermarked PDFs. Default: ./pdfs_watermarked")
help="Input PDF folder. Default: ./pdfs_watermarked")
parser.add_argument("-o", "--outfolder", default="./pdfs_encrypted",
help="Output folder of the encrypted PDFs Default: ./pdfs_encrypted")
help="Output folder storing encrypted PDFs. " +
"Default: ./pdfs_encrypted")
parser.add_argument("-p", "--password", default="",
help="Common password for all encrypted PDFs. Default: '' will be changed to a 8 character randomly generated password")
help="Common password for all encrypted PDFs. " +
"Default: '' " +
"results in generation of random passwords.")
parser.add_argument("-w", "--passwordout", default="",
help="Output path for CSV file. Default: '' will be changed to [outfolder]/passwords.csv.")
help="Output path for CSV file. " +
"Default: '' will be changed to " +
"[outfolder]/passwords.csv.")
args = parser.parse_args(args)
infolder = args.infolder
......@@ -72,7 +77,7 @@ def main(args):
starttime = time.time()
pdf_folder = os.listdir(infolder)
pdf_files = [_ for _ in pdf_folder
if _.endswith(".pdf") and utils.check_matnum(_[0:6])]
if _.endswith(".pdf")]
print("""
Available PDFs to be encrypted:
- {}
......@@ -84,8 +89,11 @@ Files in output folder {} will be overwritten during this process.
csv_lines = []
enc_files = []
for pdf_file in pdf_files:
# PDF file has to start with 6 digit mat number
matnum = utils.get_matnum(pdf_file)
# File ID for password storage
if utils.check_matnum(pdf_file[0:6]): # PDF file is linked to matnum
pdf_id = utils.get_matnum(pdf_file)
else: # PDF file does not start with matnum
pdf_id = pdf_file
# Generate random password if common password not given
if args.password == "":
......@@ -93,13 +101,14 @@ Files in output folder {} will be overwritten during this process.
else:
password = args.password
# Encrypt
in_file = os.path.join(infolder, pdf_file)
enc_file = os.path.splitext(pdf_file)[0] + '_aes.pdf'
enc_file = os.path.join(outfolder, enc_file)
encrypt(in_file, enc_file, password)
# Save matnum password mapping to be stored in CSV later
csv_lines.append([matnum, password])
csv_lines.append([pdf_id, password])
enc_files.append(enc_file)
# Store matnum password mappings in CSV file
......
......@@ -18,7 +18,7 @@ class MainTest(unittest.TestCase):
# Clean up
shutil.rmtree(self.test_dir)
def test_encrypt_pdfs(self):
def test_encrypt_scans(self):
import encrypt
expected_files = ['123456_Nachname_aes.pdf', '456789_Lastname_aes.pdf',
......@@ -36,3 +36,22 @@ class MainTest(unittest.TestCase):
created_files = os.listdir(out_dir)
created_files.sort()
self.assertEqual(expected_files, created_files)
def test_encrypt_supplements(self):
import encrypt
expected_files = ['GDET3_20H_aes.pdf' 'GDET3_20H_loes_aes.pdf',
'passwords.csv']
# Prepare parameter
in_dir = './supplements'
out_dir = os.path.join(self.test_dir, 'out')
os.mkdir(out_dir)
# Encrypt files
encrypt.main(["-i", in_dir, "-o", out_dir])
created_files = os.listdir(out_dir)
created_files.sort()
self.assertEqual(expected_files, created_files)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment