renamescans.py 5.42 KB
Newer Older
Christian Rohlfing's avatar
Christian Rohlfing committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env python

import os
import time
import shutil  # copyfile, make_archive
import argparse
import sys

import utils.moodle as moodle
import utils.matnum as matnum_utils
import utils.qr as qr_utils


def main(args):
    """Main routine
    """

    # Parse input arguments
    parser = argparse.ArgumentParser(description='''
    renames scans accordingly to info in Moodle grading sheet, such that
    the file name starts with the matriculation number.
    This only works if exams were scanned in alphabetical order.
    Optionally, each scanned PDF is searched for barcodes/QRs containing
    the matriculation number to double check.
    Attention: Contents in output folder are overwritten!

    ''')
    parser.add_argument(
        "-i", "--infolder", default="./pdfs_scan",
        help="Input folder with PDFs. Default: ./pdfs_scan")
    parser.add_argument(
        "-o", "--outfolder", default="./pdfs",
        help="Output folder with renamed scans. Default: ./pdfs")
    parser.add_argument(
        "--filenameformat", default="{matnum}_{fullname[0]}",
        help="File name format. Available keywords: " +
        "{matnum}, {fullname}, {lastname}, {firstname}. " +
        "Default: '{matnum}_{fullname[0]}'")
    parser.add_argument(
        "-c", "--csv", default="./Bewertungen.csv",
        help="Moodle grading sheet file. Default: ./Bewertungen.csv")
    parser.add_argument(
        "--csvdelim", default=",", help="CSV delimiter. Default: ','")
    parser.add_argument(
        "--csvquote", default='"', help="CSV quote char." + """Default: '"'""")
    parser.add_argument(
        "--csvenc", default="utf-8", help="CSV encoding scheme. " +
        "Typical encodings:'utf-8', 'utf-8-sig', or 'cp1252' (Windows). " +
        "Default: 'utf-8'")
    parser.add_argument(
        "-q", "--checkqr", action='store_true',
        help="Flag for additional QR code match.")
    parser.add_argument(
        "-d", "--dry", action='store_true', help="Flag for dry run.")

    args = parser.parse_args(args)
    infolder = args.infolder
    sheet_csv = args.csv
    outfolder = args.outfolder
    file_format = args.filenameformat
    dry = args.dry
    csv_delim = args.csvdelim
    csv_quote = args.csvquote
    csv_enc = args.csvenc
    check_qr = args.checkqr

    # Print status with total number of lines
    starttime = time.time()
    dryout = ""
    if dry:
        print("Dry run")
    print("Preparing renaming of scans")

    # Only PDF files are considered
    pdf_folder = os.listdir(infolder)
    pdf_files = [_ for _ in pdf_folder
                 if _.lower().endswith(".pdf")]
    # Sort list alphabetically
    # Most scanners are putting timestamps in the file names
    # This information is more important than the OS time stamp
    pdf_files.sort()

    # Get number of CSV entries
    num_students = moodle.get_student_number(sheet_csv=sheet_csv,
                                             csv_enc=csv_enc)
    if len(pdf_files) != num_students:
        raise Exception("Error: Not as many CSV lines as scans!")

    # Parse grading infos from CSV file
    infos = moodle.extract_info(sheet_csv=sheet_csv, csv_delim=csv_delim,
                                csv_quote=csv_quote, csv_enc=csv_enc)

    # Loop over grading infos
    pdfs_no_qrs = []
    print("Renaming", sep=' ', end='', flush=True)
    for cnt, pdf_file in enumerate(pdf_files):
        # Extract matriculation number and lastname from grading info
        info = infos[cnt]
        matnum_csv = info['matnum']

        # Destination PDF file name
        dest_pdf = file_format.format(
            matnum=matnum_csv, fullname=info['fullname'],
            lastname=info['lastname'], firstname=info['firstname'])
        # Add extension
        _, ext = os.path.splitext(pdf_file)
        dest_pdf = dest_pdf + ext
        in_pdf_full = os.path.join(infolder, pdf_file)

        # Sanity check
        if check_qr:
            # Search for first QR code in PDF
            qr = qr_utils.first_qr_from_first_pdf_page(pdf_file=in_pdf_full)

            # Extract matnum from QR code
            if qr:
                # Assumed QR format:
                # "something-before-the-matnum-{matnum}-{pagenum}"
                matnum_qr = qr.split('-')[-2]
                if not matnum_utils.check_matnum(matnum_qr):
                    raise Exception("{} no valid matnum!".format(matnum_qr))

                # Halt if matnum of QR and CSV differ
                if matnum_qr != info['matnum']:
                    raise Exception("{}: QR with {} but CSV with matnum {}"
                                    .format(pdf_file, matnum_csv, matnum_qr))
            else:
                pdfs_no_qrs.append(pdf_file)

        # Copy
        if not dry:
            dest_pdf_full = os.path.join(outfolder, dest_pdf)
            shutil.copyfile(in_pdf_full, dest_pdf_full)
        else:
            dryout += "\n{} -> {}".format(pdf_file, dest_pdf)

        # Print for-loop progress
        if not (cnt % max(1, round(num_students/10))):
            print(".", sep=' ', end='', flush=True)

    # Print results
    print("done.")

    # Dry run
    if dry:
        print("\nDry run results:{}".format(dryout))

    if check_qr and pdfs_no_qrs:
        print("\nCouldn't read QRs in the following PDFs\n- {}"
              .format("\n- ".join(pdfs_no_qrs)))

    # Print time
    endtime = time.time()
    print("""Done.
Time taken: {:.2f}""".format(endtime-starttime))


# Main routine
if __name__ == '__main__':
    main(sys.argv[1:])