preparemoodle.py 9.69 KB
Newer Older
1
2
#!/usr/bin/env python

3
4
import os
import time
5
import shutil  # copyfile, make_archive
6
import argparse  # argument parsing
7
import sys
Deb's avatar
Deb committed
8
import zipfile
9
10

import utils.moodle as moodle
11
import utils.matnum as matnum_utils
12

Deb's avatar
Deb committed
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

def zip_folders(base_folder, zip_file, size_limit):
    """Zip folders in base folder. If size limit is exceeded, create next zip
    file until all folders are zipped.

    Args:
        base_folder (str): path of base folder
        zip_file (str): path of zip file
        size_limit (int): size limit

    Returns:
        int: number of zip files
    """

    # Initialize
    zip_file_base = os.path.splitext(zip_file)[0]
    total_compress_size = 0
    zip_cnt = 0
    zf = None

    # Iterate over folders
    all_folders = os.listdir(base_folder)
    num_folders = len(all_folders)
    for cnt, folder in enumerate(all_folders):
        # Measure uncompressed folder path
        folder_path = os.path.join(base_folder, folder)
        folder_size = get_folder_size(folder_path)
        folder_size /= 1024**2  # conversion from bytes to MiB

        # If size_limit reached, create new zip file
        if total_compress_size + folder_size > size_limit or zf is None:
            # File name
            zip_cnt += 1
            if zf is not None:
                zf.close()  # Close previous zip file
                zip_file = "{zip}_{cnt}.zip".format(
                    zip=zip_file_base, cnt=zip_cnt)

            # Reset counters
            total_compress_size = 0
            file_cnt = 0

            # Open (new) zip file
            zf = zipfile.ZipFile(
                zip_file, mode='w', compression=zipfile.ZIP_DEFLATED)

        # Loop over files in current folder
        last_file_cnt = file_cnt
        for f in os.listdir(folder_path):
            # Add file to zip file
            zf.write(
                os.path.join(folder_path, f), arcname=os.path.join(folder, f))
            file_cnt += 1

        # Get compressed size of folder
        folder_compress_size = sum(
            [_.compress_size for _ in zf.filelist[last_file_cnt:]])
        folder_compress_size /= 1024**2  # conversion from bytes to MiB
        total_compress_size += folder_compress_size

        # Print for-loop progress
        if not (cnt % max(1, round(num_folders/10))):
            print(".", sep=' ', end='', flush=True)

    # Clean up
    zf.close()
    print("done.")

    return zip_cnt
Deb's avatar
Deb committed
82
83


Deb's avatar
Deb committed
84
85
86
87
88
89
90
91
def get_folder_size(path):
    """Get size in bytes of folder

    Args:
        path (str): path of folder

    Returns:
        int: number of bytes
Deb's avatar
Deb committed
92
    """
Deb's avatar
Deb committed
93
94
95
96
97
98
99
100
    total = 0
    for entry in os.scandir(path):
        if entry.is_file():
            size = entry.stat().st_size
        elif entry.is_dir():
            size = get_folder_size(entry.path)

        total += size
Deb's avatar
Deb committed
101

Deb's avatar
Deb committed
102
    return total
Deb's avatar
Deb committed
103

104

105
106
107
108
def sanity_check(matnums_csv, matnums_folder):
    """Check two cases for sanity:
    - Are there PDF files with no corresponding CSV entries?
    - Are there CSV entries with no provided PDF file?
109
110

    Args:
111
112
        matnums_csv (list): Matnums of all CSV entries
        matnums_folder (list): Matnums of all provided PDF files
113
114
    """

115
116
    # PDF files with no entry in CSV:
    notfoundcsv = list(set(matnums_folder).difference(matnums_csv))
117

118
119
    # Entries in CSV without PDF file
    notfoundpdf = list(set(matnums_csv).difference(matnums_folder))
120
121

    # Report back
122
123
124
125
126
127
128
    if len(notfoundcsv) > 0:
        print('''Warning: Following {} matnums have PDFs but no entry in CSV:
            {}'''.format(len(notfoundcsv), ", ".join(notfoundcsv)))

    if len(notfoundpdf) > 0:
        print('''Warning: Following {} matnums have CSV entries but no PDF:
            {}'''.format(len(notfoundpdf), ", ".join(notfoundpdf)))
129
130
131

    print("Done.\n")

132
133
    return notfoundcsv, notfoundpdf

134
135
136
137
138
139
140
141

def main(args):
    """Main routine
    """

    # Parse input arguments
    parser = argparse.ArgumentParser(description='''
    prepares batch upload to Moodle via assignment module.
142
143
    PDFs in folder 'in' are moved to folder 'tmp' with a certain folder
    structure and finally zipped to 'out'.
144
145
146
    Attention: zip-archive 'out' will be overwritten in the following!

    ''')
Deb's avatar
Deb committed
147
148
149
    parser.add_argument(
        "infolder", help="Input folder with PDFs.")
    parser.add_argument(
Christian Rohlfing's avatar
Christian Rohlfing committed
150
        "csv",
Deb's avatar
Deb committed
151
        help="Moodle grading sheet. Default: ./Bewertungen.csv")
152
153
154
155
156
157
158
159
    parser.add_argument(
        "--csvdelim", default=",", help="CSV delimiter. Default: ','")
    parser.add_argument(
        "--csvquote", default='"', help="CSV quote char." + """Default: '"'""")
    parser.add_argument(
        "--csvenc", default="utf-8", help="CSV encoding scheme. " +
        "Typical encodings:'utf-8', 'utf-8-sig', or 'cp1252' (Windows). " +
        "Default: 'utf-8'")
Deb's avatar
Deb committed
160
161
    parser.add_argument(
        "outzip", help="Zip archive.")
162
163
164
165
    parser.add_argument(
        "-d", "--dry", action='store_true', help="Flag for dry run.")
    parser.add_argument(
        "-t", "--tmp", default="./tmp", help="Temporary folder. Default:./tmp")
Deb's avatar
Deb committed
166
167
    parser.add_argument(
        "--nowarn", action='store_true', help="Disables warnings")
Deb's avatar
Deb committed
168
169
170
    parser.add_argument(
        "--moodlefilesize", default="250",
        help="Moodle upload file size in MiB. Default: 250")
171
172
173

    args = parser.parse_args(args)
    infolder = args.infolder
174
    sheet_csv = args.csv
175
    outzip = args.outzip
176
    tmp_folder = os.path.join(args.tmp, "to_be_zipped_for_moodle")
177
    dry = args.dry
178
179
180
181
    no_warn = args.nowarn
    csv_delim = args.csvdelim
    csv_quote = args.csvquote
    csv_enc = args.csvenc
Deb's avatar
Deb committed
182
    size_limit = int(args.moodlefilesize)  # Moodle upload size limit in MiB
183

184
    # Print status
185
    starttime = time.time()
186
187
    num_students = moodle.get_student_number(sheet_csv=sheet_csv,
                                             csv_enc=csv_enc)
188
189

    print('''Preparing for moodle upload
Christian Rohlfing's avatar
Christian Rohlfing committed
190
Processing {} students'''.format(num_students))
191

192
    # Clean up and create temporary folder
Christian Rohlfing's avatar
Christian Rohlfing committed
193
    dryout = []
194
    if dry:
Christian Rohlfing's avatar
Christian Rohlfing committed
195
        print("Dry run")
196
197
198
199
200
201
    else:
        # Remove zip file
        if os.path.exists(outzip):
            os.remove(outzip)

        # Create temporary folder within given temporary directory
Deb's avatar
Deb committed
202
203
204
        if os.path.isdir(tmp_folder):
            shutil.rmtree(tmp_folder)
        os.mkdir(tmp_folder)
205

206
207
208
209
210
211
212
213
214
215
216
217
218
    # Parse input folder
    # Only PDF files are considered with first digits
    # containing matriculation number
    matnums_folder = []
    allfiles = os.listdir(infolder)
    allfiles.sort()
    allpdfs = []
    for f in allfiles:
        if f.lower().endswith('.pdf') and matnum_utils.starts_with_matnum(f):
            allpdfs.append(f)
            matnums_folder.append(matnum_utils.get_matnum(f))

    # Parse grading infos from CSV file
219
220
    infos = moodle.extract_info(sheet_csv=sheet_csv, csv_delim=csv_delim,
                                csv_quote=csv_quote, csv_enc=csv_enc)
221
222

    # Loop over grading infos
223
    num_found_pdfs = 0
224
225
    matnums_csv = []
    moodleids = []
Christian Rohlfing's avatar
Christian Rohlfing committed
226
227
228
229
    if no_warn:
        print("Start processing", sep=' ', end='', flush=True)
    else:
        print("Start processing")
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
    for cnt, info in enumerate(infos):
        # Copy PDF files
        # Find all PDFs starting with matriculation number, e.g.
        # '123456_Lastname_sheet.pdf' and '123456_Lastname_exam.pdf'
        # If pdf files for current student exists, create a directory and
        # copy the pdf files to it. The resulting directories can be
        # uploaded to Moodle
        matnum = info['matnum']
        matnums_csv.append(matnum)
        moodleid = info['moodleid']
        moodleids.append(moodleid)

        pdfs_student = [_ for _ in allpdfs
                        if matnum == matnum_utils.get_matnum(_)]
        if len(pdfs_student) > 0:  # Found at least one pdf
245
            num_found_pdfs += len(pdfs_student)
246
247
248

            # Prepare submission folder
            folder = moodle.submission_folder_name(info)
249
            longfolder = os.path.join(tmp_folder, folder)
Deb's avatar
Deb committed
250

251
252
253
254
255
256
257
258
            # Create folder
            if not dry:
                os.mkdir(longfolder)

            # Copy all files to folder
            for pdffile in pdfs_student:
                longpdffile = os.path.join(infolder, pdffile)
                longpdffiledest = os.path.join(longfolder, pdffile)
259
                if not dry:
260
261
                    shutil.copyfile(longpdffile, longpdffiledest)
                else:
Christian Rohlfing's avatar
Christian Rohlfing committed
262
263
                    dryout.append(
                        "- {old} -> {new}"
Deb's avatar
Deb committed
264
265
                        .format(
                            old=pdffile, new=os.path.join(folder, pdffile)))
266

267
        elif not no_warn:  # No PDF found
268
269
270
271
            print("Warning: PDF for {matnum} (id={id}, name={name}) not found."
                  .format(matnum=matnum, id=moodleid, name=info['fullname']))

        # Print for-loop progress
Christian Rohlfing's avatar
Christian Rohlfing committed
272
        if no_warn and not (cnt % max(1, round(num_students/10))):
273
            print(".", sep=' ', end='', flush=True)
274
275

    # Print results
Christian Rohlfing's avatar
Christian Rohlfing committed
276
    print("done.")
277
278
    print("Found {num_pdf} PDFs (CSV had {num_csv} entries)"
          .format(num_pdf=num_found_pdfs, num_csv=num_students))
279
280
281

    # Sanity check:
    # Check for PDFs not reflected in CSV (student not registered in Moodle)
282
    sanity_check(matnums_csv, matnums_folder)
283

284
    # Zip
285
    if not dry:
Deb's avatar
Deb committed
286
287
288
289
290
        print("Zipping")
        zip_cnt = zip_folders(
            base_folder=tmp_folder, zip_file=outzip, size_limit=size_limit)

        # Remove temporary folder
291
        shutil.rmtree(tmp_folder)
292

Deb's avatar
Deb committed
293
294
295
296
        # Print status
        print("{cnt} zip archives are stored ({zip}*)"
              .format(cnt=zip_cnt, zip=os.path.splitext(outzip)[0]))

297
    # Print dry run results
298
    else:
Christian Rohlfing's avatar
Christian Rohlfing committed
299
300
        dryout.sort()
        print("\nDry run results:\n{}".format("\n".join(dryout)))
301

302
    # Print status
303
304
305
    endtime = time.time()
    print("""Done.
Time taken: {:.2f}""".format(endtime-starttime))
306
307


308
# Main routine
309
if __name__ == '__main__':
Deb's avatar
Deb committed
310
    main(sys.argv[1:])