encrypt.py 4.1 KB
Newer Older
Christian Rohlfing's avatar
Christian Rohlfing committed
1
2
3
4
#!/usr/bin/env python

"""Creates encrypted copies of PDFs

5
6
This scripts creates encrypted copies of (watermarked) PDFs well as a CSV file
storing passwords for each file.
Christian Rohlfing's avatar
Christian Rohlfing committed
7
8
9

Author: Amrita Deb <Deb@itc.rwth-aachen.de>
"""
10
11

import pikepdf
Christian Rohlfing's avatar
Christian Rohlfing committed
12
13
14
15
16
17
18
import os
import csv
import pwgen
import argparse
import sys
import time

19
import utils.matnum as matnum_utils
Christian Rohlfing's avatar
Christian Rohlfing committed
20
21
22
23
24
25
26
27
28
29
30
31
32
33


def encrypt(pdf_file, enc_file, password):
    """PDF encryption

    Args:
        pdf_file (str): path to PDF
        enc_file (str): path of the encrypted pdf
        password (str): password
    """

    pdf = pikepdf.Pdf.open(pdf_file)
    enc = pikepdf.Encryption(owner=password, user=password, R=4)
    pdf.save(enc_file, encryption=enc)
34
35
36
    pdf.close()


Christian Rohlfing's avatar
Christian Rohlfing committed
37
38
39
40
def main(args):
    """Main function

    1) Lists all PDFs to be encrypted from input folder
Christian Rohlfing's avatar
Christian Rohlfing committed
41
    2) Encrypt pdf with randomly generated 8 character long password
Christian Rohlfing's avatar
Christian Rohlfing committed
42
43
44
45
46
47
48
    3) Prepare a csv file that contains matriculation number and password
    """

    # Argument handling
    parser = argparse.ArgumentParser(description='''
      PDFs in input folder are encrypted and stored in output folder.
      Alongside with a CSV file mapping passwords to each PDF.
49
      ''')
Christian Rohlfing's avatar
Christian Rohlfing committed
50
    parser.add_argument("-i", "--infolder", default="./pdfs_watermarked",
51
                        help="Input PDF folder. Default: ./pdfs_watermarked")
52
53

    parser.add_argument("-o", "--outfolder", default="./pdfs_encrypted",
54
55
                        help="Output folder storing encrypted PDFs. " +
                             "Default: ./pdfs_encrypted")
56

Christian Rohlfing's avatar
Christian Rohlfing committed
57
    parser.add_argument("-p", "--password", default="",
58
59
60
                        help="Common password for all encrypted PDFs. " +
                             "Default: '' " +
                             "results in generation of random passwords.")
61

Christian Rohlfing's avatar
Christian Rohlfing committed
62
    parser.add_argument("-w", "--passwordout", default="",
63
64
65
                        help="Output path for CSV file. " +
                             "Default: '' will be changed to " +
                             "[outfolder]/passwords.csv.")
Christian Rohlfing's avatar
Christian Rohlfing committed
66
67

    args = parser.parse_args(args)
68
69
    infolder = args.infolder
    outfolder = args.outfolder
Christian Rohlfing's avatar
Christian Rohlfing committed
70
71
72
73
74
75
76

    if args.passwordout == "":
        password_file = os.path.join(outfolder, 'passwords.csv')
    else:
        password_file = args.passwordout

    # List all PDFs
77
    starttime = time.time()
Christian Rohlfing's avatar
Christian Rohlfing committed
78
79
    pdf_folder = os.listdir(infolder)
    pdf_files = [_ for _ in pdf_folder
80
                 if _.lower().endswith(".pdf")]
Christian Rohlfing's avatar
Christian Rohlfing committed
81
82
83
84
85
86
87
88
89
90
91
    print("""
Available PDFs to be encrypted:
- {}

Files in output folder {} will be overwritten during this process.
    """.format("\n- ".join(pdf_files), outfolder))

    # Encrypt all PDFs in input folder
    csv_lines = []
    enc_files = []
    for pdf_file in pdf_files:
92
        # File ID for password storage
93
94
        if matnum_utils.starts_with_matnum(pdf_file):  # PDF linked to matnum
            pdf_id = matnum_utils.get_matnum(pdf_file)
95
96
        else:  # PDF file does not start with matnum
            pdf_id = pdf_file
Christian Rohlfing's avatar
Christian Rohlfing committed
97
98
99

        # Generate random password if common password not given
        if args.password == "":
100
101
102
            password = pwgen.pwgen(8)
        else:
            password = args.password
Christian Rohlfing's avatar
Christian Rohlfing committed
103

104
        # Encrypt
Christian Rohlfing's avatar
Christian Rohlfing committed
105
106
107
108
109
110
        in_file = os.path.join(infolder, pdf_file)
        enc_file = os.path.splitext(pdf_file)[0] + '_aes.pdf'
        enc_file = os.path.join(outfolder, enc_file)
        encrypt(in_file, enc_file, password)

        # Save matnum password mapping to be stored in CSV later
111
        csv_lines.append([pdf_id, password])
Christian Rohlfing's avatar
Christian Rohlfing committed
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
        enc_files.append(enc_file)

    # Store matnum password mappings in CSV file
    print('Saving passwords')
    with open(password_file, mode='w') as password_handle:
        writer = csv.writer(password_handle, delimiter=',', quotechar='"',
                            quoting=csv.QUOTE_MINIMAL)

        for csv_line in csv_lines:
            writer.writerow(csv_line)

    # Print status
    endtime = time.time()
    print("""All PDFs are encrypted and can be found in {} folder.
CSV file is stored in {}.

Time taken: {:.2f}s
    """.format(outfolder, password_file, endtime-starttime))

    return enc_files, csv_lines


if __name__ == '__main__':
    main(sys.argv[1:])