Something went wrong on our end
Select Git revision
-
Tim Stadtmann authoredTim Stadtmann authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
encrypt.py 4.07 KiB
#!/usr/bin/env python
"""Creates encrypted copies of PDFs
This scripts creates encrypted copies of (watermarked) PDFs well as a CSV file
storing passwords for each file.
Author: Amrita Deb <Deb@itc.rwth-aachen.de>
"""
import pikepdf
import os
import csv
import pwgen
import argparse
import sys
import time
import utils.matnum as utils
def encrypt(pdf_file, enc_file, password):
"""PDF encryption
Args:
pdf_file (str): path to PDF
enc_file (str): path of the encrypted pdf
password (str): password
"""
pdf = pikepdf.Pdf.open(pdf_file)
enc = pikepdf.Encryption(owner=password, user=password, R=4)
pdf.save(enc_file, encryption=enc)
pdf.close()
def main(args):
"""Main function
1) Lists all PDFs to be encrypted from input folder
2) Encrypt pdf witn randomly generated 8 character long password
3) Prepare a csv file that contains matriculation number and password
"""
# Argument handling
parser = argparse.ArgumentParser(description='''
PDFs in input folder are encrypted and stored in output folder.
Alongside with a CSV file mapping passwords to each PDF.
''')
parser.add_argument("-i", "--infolder", default="./pdfs_watermarked",
help="Input PDF folder. Default: ./pdfs_watermarked")
parser.add_argument("-o", "--outfolder", default="./pdfs_encrypted",
help="Output folder storing encrypted PDFs. " +
"Default: ./pdfs_encrypted")
parser.add_argument("-p", "--password", default="",
help="Common password for all encrypted PDFs. " +
"Default: '' " +
"results in generation of random passwords.")
parser.add_argument("-w", "--passwordout", default="",
help="Output path for CSV file. " +
"Default: '' will be changed to " +
"[outfolder]/passwords.csv.")
args = parser.parse_args(args)
infolder = args.infolder
outfolder = args.outfolder
if args.passwordout == "":
password_file = os.path.join(outfolder, 'passwords.csv')
else:
password_file = args.passwordout
# List all PDFs
starttime = time.time()
pdf_folder = os.listdir(infolder)
pdf_files = [_ for _ in pdf_folder
if _.endswith(".pdf")]
print("""
Available PDFs to be encrypted:
- {}
Files in output folder {} will be overwritten during this process.
""".format("\n- ".join(pdf_files), outfolder))
# Encrypt all PDFs in input folder
csv_lines = []
enc_files = []
for pdf_file in pdf_files:
# File ID for password storage
if utils.check_matnum(pdf_file[0:6]): # PDF file is linked to matnum
pdf_id = utils.get_matnum(pdf_file)
else: # PDF file does not start with matnum
pdf_id = pdf_file
# Generate random password if common password not given
if args.password == "":
password = pwgen.pwgen(8)
else:
password = args.password
# Encrypt
in_file = os.path.join(infolder, pdf_file)
enc_file = os.path.splitext(pdf_file)[0] + '_aes.pdf'
enc_file = os.path.join(outfolder, enc_file)
encrypt(in_file, enc_file, password)
# Save matnum password mapping to be stored in CSV later
csv_lines.append([pdf_id, password])
enc_files.append(enc_file)
# Store matnum password mappings in CSV file
print('Saving passwords')
with open(password_file, mode='w') as password_handle:
writer = csv.writer(password_handle, delimiter=',', quotechar='"',
quoting=csv.QUOTE_MINIMAL)
for csv_line in csv_lines:
writer.writerow(csv_line)
# Print status
endtime = time.time()
print("""All PDFs are encrypted and can be found in {} folder.
CSV file is stored in {}.
Time taken: {:.2f}s
""".format(outfolder, password_file, endtime-starttime))
return enc_files, csv_lines
if __name__ == '__main__':
main(sys.argv[1:])