encrypt.py 4.2 KB
Newer Older
Christian Rohlfing's avatar
Christian Rohlfing committed
1
2
3
4
#!/usr/bin/env python

"""Creates encrypted copies of PDFs

5
6
PDFs in input folder are encrypted and stored in output folder well as a CSV
file mapping passwords to each PDF.
Christian Rohlfing's avatar
Christian Rohlfing committed
7

8
Attention: Contents in output folder will be overwritten in the following!
Christian Rohlfing's avatar
Christian Rohlfing committed
9
"""
10

11
12
13
14
__author__ = "Amrita Deb (deb@itc.rwth-aachen.de), " +\
    "Christian Rohlfing (rohlfing@ient.rwth-aachen.de)"


15
import pikepdf
Christian Rohlfing's avatar
Christian Rohlfing committed
16
17
18
19
20
21
22
import os
import csv
import pwgen
import argparse
import sys
import time

23
import utils.matnum as matnum_utils
Christian Rohlfing's avatar
Christian Rohlfing committed
24
25
26
27
28
29
30
31
32
33
34
35
36
37


def encrypt(pdf_file, enc_file, password):
    """PDF encryption

    Args:
        pdf_file (str): path to PDF
        enc_file (str): path of the encrypted pdf
        password (str): password
    """

    pdf = pikepdf.Pdf.open(pdf_file)
    enc = pikepdf.Encryption(owner=password, user=password, R=4)
    pdf.save(enc_file, encryption=enc)
38
39
40
    pdf.close()


41
42
def _make_parser():
    parser = argparse.ArgumentParser(
Christian Rohlfing's avatar
bugfix    
Christian Rohlfing committed
43
        description=__doc__, prog='encrypt.py',
44
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
Christian Rohlfing's avatar
Christian Rohlfing committed
45

46
    parser.add_argument(
47
        "infolder", help="Input folder of PDFs to be encrypted.")
48
    parser.add_argument(
49
50
        "outfolder", help="Output folder of encrypted PDFs.")

51
52
53
    parser.add_argument(
        "-p", "--password", default="",
        help="Common password for all encrypted PDFs. " +
54
        "Default='' results in generation of random passwords.")
55
    parser.add_argument(
56
        "--passwordout", default="",
57
        help="Output path for CSV file. " +
58
59
60
        "Default='' will be changed to [outfolder]/passwords.csv.")

    return parser
Christian Rohlfing's avatar
Christian Rohlfing committed
61

62
63

# Create argument parser with default values
Christian Rohlfing's avatar
Christian Rohlfing committed
64
_parser = _make_parser()
65
66
67
68
69
70
71
72
73
74
75


def main(args):
    """Main function

    1) Lists all PDFs to be encrypted from input folder
    2) Encrypt pdf with randomly generated 8 character long password
    3) Prepare a csv file that contains matriculation number and password
    """

    # Argument handling
Christian Rohlfing's avatar
Christian Rohlfing committed
76
    args = _parser.parse_args(args)
77
78
    infolder = args.infolder
    outfolder = args.outfolder
79

Deb's avatar
Deb committed
80
81
    if not os.path.exists(outfolder):
        os.makedirs(outfolder)
82

Christian Rohlfing's avatar
Christian Rohlfing committed
83
84
85
86
87
88
    if args.passwordout == "":
        password_file = os.path.join(outfolder, 'passwords.csv')
    else:
        password_file = args.passwordout

    # List all PDFs
89
    starttime = time.time()
Christian Rohlfing's avatar
Christian Rohlfing committed
90
91
    pdf_folder = os.listdir(infolder)
    pdf_files = [_ for _ in pdf_folder
92
                 if _.lower().endswith(".pdf")]
93
    if len(pdf_files) > 0:
Deb's avatar
Deb committed
94
        print("""
Christian Rohlfing's avatar
Christian Rohlfing committed
95
96
97
98
99
Available PDFs to be encrypted:
- {}

Files in output folder {} will be overwritten during this process.
    """.format("\n- ".join(pdf_files), outfolder))
Deb's avatar
Deb committed
100
    else:
101
        print("""
Deb's avatar
Deb committed
102
103
104
There are no PDFs in the given directory.
Exiting now.""")
        return
Christian Rohlfing's avatar
Christian Rohlfing committed
105
106
107
108
    # Encrypt all PDFs in input folder
    csv_lines = []
    enc_files = []
    for pdf_file in pdf_files:
109
        # File ID for password storage
110
111
        if matnum_utils.starts_with_matnum(pdf_file):  # PDF linked to matnum
            pdf_id = matnum_utils.get_matnum(pdf_file)
112
113
        else:  # PDF file does not start with matnum
            pdf_id = pdf_file
Christian Rohlfing's avatar
Christian Rohlfing committed
114
115
116

        # Generate random password if common password not given
        if args.password == "":
117
118
119
            password = pwgen.pwgen(8)
        else:
            password = args.password
120

121
        # Encrypt
Christian Rohlfing's avatar
Christian Rohlfing committed
122
123
124
125
126
127
        in_file = os.path.join(infolder, pdf_file)
        enc_file = os.path.splitext(pdf_file)[0] + '_aes.pdf'
        enc_file = os.path.join(outfolder, enc_file)
        encrypt(in_file, enc_file, password)

        # Save matnum password mapping to be stored in CSV later
128
        csv_lines.append([pdf_id, password])
Christian Rohlfing's avatar
Christian Rohlfing committed
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
        enc_files.append(enc_file)

    # Store matnum password mappings in CSV file
    print('Saving passwords')
    with open(password_file, mode='w') as password_handle:
        writer = csv.writer(password_handle, delimiter=',', quotechar='"',
                            quoting=csv.QUOTE_MINIMAL)

        for csv_line in csv_lines:
            writer.writerow(csv_line)

    # Print status
    endtime = time.time()
    print("""All PDFs are encrypted and can be found in {} folder.
CSV file is stored in {}.

Time taken: {:.2f}s
    """.format(outfolder, password_file, endtime-starttime))

    return enc_files, csv_lines


if __name__ == '__main__':
    main(sys.argv[1:])