renamescans.py 5.43 KB
Newer Older
Christian Rohlfing's avatar
Christian Rohlfing committed
1
2
#!/usr/bin/env python

3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
"""Rename scanned PDFs assuming scan order equal to alphabetical order of
students in Moodle grading sheet.

Renames scans accordingly to info in Moodle grading sheet, such that the
file name starts with the matriculation number. This only works if exams were
scanned in alphabetical order.
Optionally, each scanned PDF is searched for barcodes/QRs containing the
matriculation number to double check.

Attention: Contents in output folder will be overwritten in the following!
"""

__author__ = "Amrita Deb (deb@itc.rwth-aachen.de), " +\
    "Christian Rohlfing (rohlfing@ient.rwth-aachen.de)"


Christian Rohlfing's avatar
Christian Rohlfing committed
19
20
21
22
23
24
25
26
27
28
29
import os
import time
import shutil  # copyfile, make_archive
import argparse
import sys

import utils.moodle as moodle
import utils.matnum as matnum_utils
import utils.qr as qr_utils


30
31
32
33
def _make_parser():
    csv_parser = moodle.get_moodle_csv_parser()
    parser = argparse.ArgumentParser(
        parents=[csv_parser],
Christian Rohlfing's avatar
bugfix    
Christian Rohlfing committed
34
        description=__doc__, prog='renamescans.py',
35
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
Christian Rohlfing's avatar
Christian Rohlfing committed
36
37

    parser.add_argument(
38
        "infolder", help="Input folder with PDFs.")
Christian Rohlfing's avatar
Christian Rohlfing committed
39
    parser.add_argument(
40
41
42
        "csv", help="Moodle grading sheet.")
    parser.add_argument(
        "outfolder", help="Output folder with renamed scans.")
43

Christian Rohlfing's avatar
Christian Rohlfing committed
44
45
46
47
48
49
50
51
52
53
54
    parser.add_argument(
        "--filenameformat", default="{matnum}_{fullname[0]}",
        help="File name format. Available keywords: " +
        "{matnum}, {fullname}, {lastname}, {firstname}. " +
        "Default: '{matnum}_{fullname[0]}'")
    parser.add_argument(
        "-q", "--checkqr", action='store_true',
        help="Flag for additional QR code match.")
    parser.add_argument(
        "-d", "--dry", action='store_true', help="Flag for dry run.")

55
56
57
58
    return parser


# Create argument parser with default values
Christian Rohlfing's avatar
Christian Rohlfing committed
59
_parser = _make_parser()
60
61
62
63
64
65
66


def main(args):
    """Main routine
    """

    # Parse input arguments
Christian Rohlfing's avatar
Christian Rohlfing committed
67
    args = _parser.parse_args(args)
Christian Rohlfing's avatar
Christian Rohlfing committed
68
69
70
71
72
73
74
75
76
77
    infolder = args.infolder
    sheet_csv = args.csv
    outfolder = args.outfolder
    file_format = args.filenameformat
    dry = args.dry
    csv_delim = args.csvdelim
    csv_quote = args.csvquote
    csv_enc = args.csvenc
    check_qr = args.checkqr

Deb's avatar
Deb committed
78
79
80
81
    # Check folders
    if not os.path.exists(outfolder):
        os.makedirs(outfolder)

Christian Rohlfing's avatar
Christian Rohlfing committed
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
    # Print status with total number of lines
    starttime = time.time()
    dryout = ""
    if dry:
        print("Dry run")
    print("Preparing renaming of scans")

    # Only PDF files are considered
    pdf_folder = os.listdir(infolder)
    pdf_files = [_ for _ in pdf_folder
                 if _.lower().endswith(".pdf")]
    # Sort list alphabetically
    # Most scanners are putting timestamps in the file names
    # This information is more important than the OS time stamp
    pdf_files.sort()

    # Get number of CSV entries
    num_students = moodle.get_student_number(sheet_csv=sheet_csv,
                                             csv_enc=csv_enc)
    if len(pdf_files) != num_students:
        raise Exception("Error: Not as many CSV lines as scans!")

    # Parse grading infos from CSV file
    infos = moodle.extract_info(sheet_csv=sheet_csv, csv_delim=csv_delim,
                                csv_quote=csv_quote, csv_enc=csv_enc)

    # Loop over grading infos
    pdfs_no_qrs = []
    print("Renaming", sep=' ', end='', flush=True)
    for cnt, pdf_file in enumerate(pdf_files):
        # Extract matriculation number and lastname from grading info
        info = infos[cnt]
        matnum_csv = info['matnum']

        # Destination PDF file name
        dest_pdf = file_format.format(
            matnum=matnum_csv, fullname=info['fullname'],
            lastname=info['lastname'], firstname=info['firstname'])
        # Add extension
        _, ext = os.path.splitext(pdf_file)
        dest_pdf = dest_pdf + ext
        in_pdf_full = os.path.join(infolder, pdf_file)

        # Sanity check
        if check_qr:
            # Search for first QR code in PDF
            qr = qr_utils.first_qr_from_first_pdf_page(pdf_file=in_pdf_full)

            # Extract matnum from QR code
            if qr:
                # Assumed QR format:
                # "something-before-the-matnum-{matnum}-{pagenum}"
                matnum_qr = qr.split('-')[-2]
                if not matnum_utils.check_matnum(matnum_qr):
                    raise Exception("{} no valid matnum!".format(matnum_qr))

                # Halt if matnum of QR and CSV differ
                if matnum_qr != info['matnum']:
                    raise Exception("{}: QR with {} but CSV with matnum {}"
                                    .format(pdf_file, matnum_csv, matnum_qr))
            else:
                pdfs_no_qrs.append(pdf_file)

        # Copy
        if not dry:
            dest_pdf_full = os.path.join(outfolder, dest_pdf)
            shutil.copyfile(in_pdf_full, dest_pdf_full)
        else:
            dryout += "\n{} -> {}".format(pdf_file, dest_pdf)

        # Print for-loop progress
        if not (cnt % max(1, round(num_students/10))):
            print(".", sep=' ', end='', flush=True)

    # Print results
    print("done.")

    # Dry run
    if dry:
        print("\nDry run results:{}".format(dryout))

    if check_qr and pdfs_no_qrs:
        print("\nCouldn't read QRs in the following PDFs\n- {}"
              .format("\n- ".join(pdfs_no_qrs)))

    # Print time
    endtime = time.time()
169
    print("Time taken: {:.2f}".format(endtime-starttime))
Christian Rohlfing's avatar
Christian Rohlfing committed
170
171
172
173
174


# Main routine
if __name__ == '__main__':
    main(sys.argv[1:])