Merge branch 'rohlfing-student-grading-info' into 'master'

Encapsulate parsing student grading info from CSV See merge request !34

Merge branch 'rohlfing-student-grading-info' into 'master'
1834a797 · Amrita Deb · 003659f5 · b50f1b7a · 1834a797 · 1834a797
Commit 1834a797 authored Apr 27, 2021 by Amrita Deb
--- a/Grades.csv
+++ b/Grades.csv
+Identifier,"Full name","Registration number",Status,Grade,"Grade can be changed","Last modified (grade)","Feedback comments"
+"Participant 1519324","Lastname, First",321121,,,Yes,"Monday, 15 March 2021, 5:42 PM",
+"Participant 1519323","Yet another last name, First Name",321122,,,Yes,"Monday, 15 March 2021, 5:42 PM",
+"Participant 1519322","Last, First",321123,,,Yes,"Monday, 15 March 2021, 5:42 PM",
+"Participant 1519321","d'Lastname, Firstname",321124,,,Yes,"Monday, 15 March 2021, 5:42 PM",
--- a/preparemoodle.py
+++ b/preparemoodle.py
 #!/usr/bin/env python

-import csv
 import os
 import time
 import shutil  # copyfile, make_archive
-import argparse
+import argparse  # argument parsing
 import sys
+
+import utils.moodle as moodle
 import utils.matnum as matnum_utils


-def find_unmatched_pdfs(infolder, matnums, nowarn):
-    """Finds matnumbers not present in CSV but in PDF folder
+def sanity_check(matnums_csv, matnums_folder):
+    """Check two cases for sanity:
+    - Are there PDF files with no corresponding CSV entries?
+    - Are there CSV entries with no provided PDF file?

    Args:
-        infolder (str): path to input folder
-        matnums (list): list of matriculation numbers
-        nowarn (int): flag
+        matnums_csv (list): Matnums of all CSV entries
+        matnums_folder (list): Matnums of all provided PDF files
    """

-    print("\nSearching for matnumbers not present in CSV but in PDF folder:")
-
-    # Loop over all PDFs:
-    notfoundmatnums = []
-    for root, dirs, files in os.walk(infolder):
-        for pdffile in files:
-            if pdffile.endswith(".pdf"):
-                # Get matriculation number from file
-                matnum = matnum_utils.get_matnum(pdffile)
+    # PDF files with no entry in CSV:
+    notfoundcsv = list(set(matnums_folder).difference(matnums_csv))

-                # Search matriculation number in CSV
-                if matnum not in matnums:
-                    notfoundmatnums.append(matnum)
-                    if not nowarn:
-                        print("Warning: {} not in CSV".format(matnum))
+    # Entries in CSV without PDF file
+    notfoundpdf = list(set(matnums_csv).difference(matnums_folder))

    # Report back
-    if len(notfoundmatnums) > 0:
-        print('''Could not find following {} matnumbers in CSV:
-            {}'''.format(len(notfoundmatnums), ", ".join(notfoundmatnums)))
+    if len(notfoundcsv) > 0:
+        print('''Warning: Following {} matnums have PDFs but no entry in CSV:
+            {}'''.format(len(notfoundcsv), ", ".join(notfoundcsv)))
+
+    if len(notfoundpdf) > 0:
+        print('''Warning: Following {} matnums have CSV entries but no PDF:
+            {}'''.format(len(notfoundpdf), ", ".join(notfoundpdf)))

    print("Done.\n")

+    return notfoundcsv, notfoundpdf
+

 def main(args):
    """Main routine
@@ -49,141 +47,166 @@ def main(args):
    # Parse input arguments
    parser = argparse.ArgumentParser(description='''
    prepares batch upload to Moodle via assignment module.
-    PDFs in folder 'in' are moved to folder 'tmp' with a certain folder structure and finally zipped to 'out'.
+    PDFs in folder 'in' are moved to folder 'tmp' with a certain folder
+    structure and finally zipped to 'out'.
    Attention: zip-archive 'out' will be overwritten in the following!

    ''')
-    parser.add_argument("-i", "--infolder", default="./pdfs_encrypted",
+    parser.add_argument(
+        "-i", "--infolder", default="./pdfs_encrypted",
        help="Input folder with PDFs. Default: ./pdfs_encrypted")
-    parser.add_argument("-c", "--csv", default="./Bewertungen.csv",
-                        help="Moodle grading CSV file, needed to construct the folder names. Default: ./Bewertungen.csv")
-    parser.add_argument("-o", "--outzip", default="./moodle_feedbacks.zip",
-                        help="Output zip archive. Default: ./moodle_feedbacks.zip")
-    parser.add_argument("-d", "--dry", action='store_true',
-                        help="Flag for dry run, displays only the folder structure inside the archive moodle_feedbacks.zip")
-    parser.add_argument("-t", "--tmp", default="./tmp",
-                        help="tmp folder. Default: ./tmp")
-    parser.add_argument("--nowarn", action='store_true',
-                        help="Disables warnings")
+    parser.add_argument(
+        "-c", "--csv", default="./Bewertungen.csv",
+        help="Moodle grading sheet. Default: ./Bewertungen.csv")
+    parser.add_argument(
+        "--csvdelim", default=",", help="CSV delimiter. Default: ','")
+    parser.add_argument(
+        "--csvquote", default='"', help="CSV quote char." + """Default: '"'""")
+    parser.add_argument(
+        "--csvenc", default="utf-8", help="CSV encoding scheme. " +
+        "Typical encodings:'utf-8', 'utf-8-sig', or 'cp1252' (Windows). " +
+        "Default: 'utf-8'")
+    parser.add_argument(
+        "-o", "--outzip", default="./moodle_feedbacks.zip",
+        help="Zip archive. Default: ./moodle_feedbacks.zip")
+    parser.add_argument(
+        "-d", "--dry", action='store_true', help="Flag for dry run.")
+    parser.add_argument(
+        "-t", "--tmp", default="./tmp", help="Temporary folder. Default:./tmp")
+    parser.add_argument(
+        "--nowarn", action='store_true', help="Disables warnings")

    args = parser.parse_args(args)
    infolder = args.infolder
-    csvfilename = args.csv
+    sheet_csv = args.csv
    outzip = args.outzip
-    tmpfolder = os.path.join(args.tmp, "to_be_zipped_for_moodle")
+    tmp_folder = os.path.join(args.tmp, "to_be_zipped_for_moodle")
    dry = args.dry
-    nowarn = args.nowarn
+    no_warn = args.nowarn
+    csv_delim = args.csvdelim
+    csv_quote = args.csvquote
+    csv_enc = args.csvenc

+    # Print status
    starttime = time.time()
-
-    # Print status with total number of lines
-    numlines = 0
-    with open(csvfilename, newline='') as csvfile:
-        numlines = sum(1 for line in csvfile)
+    num_students = moodle.get_student_number(sheet_csv=sheet_csv,
+                                             csv_enc=csv_enc)

    print('''Preparing for moodle upload
-Processing {} lines
-  '''.format(numlines))
+Processing {} students'''.format(num_students))

-    dryout = ""
+    # Clean up and create temporary folder
+    dryout = []
    if dry:
-        print("Dry run\n")
+        print("Dry run")
    else:
        # Remove zip file
        if os.path.exists(outzip):
            os.remove(outzip)

        # Create temporary folder within given temporary directory
-        if not os.path.isdir(tmpfolder):
-            os.mkdir(tmpfolder)
-
-    # Open CSV file
-    with open(csvfilename, newline='') as csvfile:
-
-        numfoundpdfs = 0
-        matnums = []
-        line_cnt = 0
-        print("Start iterating...", sep='', end='', flush=True)
-
-        # Loop over all lines in CSV file
-        reader = csv.reader(csvfile, delimiter=',', quotechar='"')
-        next(reader)  # skip header CSV line
-        for row in reader:
-            # Parse required fields from CSV line
-            # Moodle has its own internal ID per participant alongside
-            # matriculation number
-            moodleid = row[0]
-            moodleid = moodleid.replace("Teilnehmer/in", "")  # German
-            moodleid = moodleid.replace("Participant ", "")  # English
-            name = row[1]  # Lastname, Firstname
-            matnum = row[2]  # matriculation number (6-digit)
-            matnums.append(matnum)  # save matriculation number for later
-
+        if not os.path.isdir(tmp_folder):
+            os.mkdir(tmp_folder)
+
+    # Parse input folder
+    # Only PDF files are considered with first digits
+    # containing matriculation number
+    matnums_folder = []
+    allfiles = os.listdir(infolder)
+    allfiles.sort()
+    allpdfs = []
+    for f in allfiles:
+        if f.lower().endswith('.pdf') and matnum_utils.starts_with_matnum(f):
+            allpdfs.append(f)
+            matnums_folder.append(matnum_utils.get_matnum(f))
+
+    # Parse grading infos from CSV file
+    infos = moodle.extract_info(sheet_csv=sheet_csv, csv_delim=csv_delim,
+                                csv_quote=csv_quote, csv_enc=csv_enc)
+
+    # Loop over grading infos
+    num_found_pdfs = 0
+    matnums_csv = []
+    moodleids = []
+    if no_warn:
+        print("Start processing", sep=' ', end='', flush=True)
+    else:
+        print("Start processing")
+    for cnt, info in enumerate(infos):
        # Copy PDF files
        # Find all PDFs starting with matriculation number, e.g.
        # '123456_Lastname_sheet.pdf' and '123456_Lastname_exam.pdf'
        # If pdf files for current student exists, create a directory and
        # copy the pdf files to it. The resulting directories can be
        # uploaded to Moodle
-            longpdffiles = matnum_utils.find_file(matnum + "*.pdf", infolder)
-            if len(longpdffiles) > 0:  # Found some file(s)
-                numfoundpdfs += 1
+        matnum = info['matnum']
+        matnums_csv.append(matnum)
+        moodleid = info['moodleid']
+        moodleids.append(moodleid)

-                # Prepare folder
-                # For upload, Moodle accepts submission files per participant
-                folder = "{}_{}_assignsubmission_file_".format(name, moodleid)
-                longfolder = os.path.join(tmpfolder, folder)
+        pdfs_student = [_ for _ in allpdfs
+                        if matnum == matnum_utils.get_matnum(_)]
+        if len(pdfs_student) > 0:  # Found at least one pdf
+            num_found_pdfs += len(pdfs_student)
+
+            # Prepare submission folder
+            folder = moodle.submission_folder_name(info)
+            longfolder = os.path.join(tmp_folder, folder)

            # Create folder
            if not dry:
                os.mkdir(longfolder)

            # Copy all files to folder
-                for longpdffile in longpdffiles:
-                    pdffile = os.path.basename(longpdffile)
-
+            for pdffile in pdfs_student:
+                longpdffile = os.path.join(infolder, pdffile)
+                longpdffiledest = os.path.join(longfolder, pdffile)
                if not dry:
-                        shutil.copyfile(longpdffile,
-                                        os.path.join(longfolder, pdffile))
+                    shutil.copyfile(longpdffile, longpdffiledest)
                else:
-                        dryout += "\n{}".format(os.path.join(folder, pdffile))
-            else:
-                if not nowarn:
-                    print("Warning: PDF corresponding to matnumber {} (moodleid={}, name={}) not available.".format(
-                        matnum, moodleid, name
-                    ))
+                    dryout.append(
+                        "- {old} -> {new}"
+                        .format(old=pdffile, new=os.path.join(folder, pdffile)))
+
+        elif not no_warn:  # No PDF found
+            print("Warning: PDF for {matnum} (id={id}, name={name}) not found."
+                  .format(matnum=matnum, id=moodleid, name=info['fullname']))

-            # Print progress
-            if not (line_cnt % max(1, round(numlines/10))):
+        # Print for-loop progress
+        if no_warn and not (cnt % max(1, round(num_students/10))):
            print(".", sep=' ', end='', flush=True)
-            line_cnt += 1

    # Print results
-    print("Found {} PDFs (CSV had {} entries)".format(numfoundpdfs, numlines))
    print("done.")
+    print("Found {num_pdf} PDFs (CSV had {num_csv} entries)"
+          .format(num_pdf=num_found_pdfs, num_csv=num_students))
+    

    # Sanity check:
    # Check for PDFs not reflected in CSV (student not registered in Moodle)
-    find_unmatched_pdfs(infolder, matnums, nowarn)
+    sanity_check(matnums_csv, matnums_folder)

-    # Zipping
+    # Zip
    if not dry:
        # Zip
        print("Zipping")
-        shutil.make_archive(os.path.splitext(outzip)[0], 'zip', tmpfolder)
-        print('The Zip archive is available at: '+outzip)
+        shutil.make_archive(os.path.splitext(outzip)[0], 'zip', tmp_folder)
+        print('Zip archive is stored at {}'.format(outzip))

        # Delete temporary folder
-        shutil.rmtree(tmpfolder)
+        shutil.rmtree(tmp_folder)

+    # Print dry run results
    else:
-        print("\nDry run results:\n{}".format(dryout))
+        dryout.sort()
+        print("\nDry run results:\n{}".format("\n".join(dryout)))

+    # Print status
    endtime = time.time()
-
    print("""Done.
 Time taken: {:.2f}""".format(endtime-starttime))


+# Main routine
 if __name__ == '__main__':
    main(sys.argv[1:])
--- a/preparepdf.py
+++ b/preparepdf.py
@@ -3,92 +3,225 @@ import os  # path listing/manipulation/...
 import time  # keep track of time
 import argparse  # handle command line arguments
 import shutil  # unzipping and copying files
-import re # pattern matching
-import csv # opening grading worksheet csv
+
+from utils import moodle as moodle
+

 def main(args):
-    """Transfer PDF files from zip file containing all submissions into user provided folder following exam scan naming convention
+    """Transfer PDF files from submisions zip file (or already extracted folder)
+    containing all submissions into user provided folder following exam scan
+    naming convention

-    1) files are extracted from user-provided zip file location eg: ./all_submissions.zip
-    2) Scan through extracted folder for PDF files. Only 1 PDF file/student is accepted.
-    3) Matriculation number and last name of student is fetched from grading worksheet
-    4) PDFs from extracted folder are renamed according to convention and placed in user provided outfolder
+    1) Files are extracted from zip file location eg: ./all_submissions.zip
+        In case folder is given, extraction is skipped.
+    2) Scan through extracted folder for PDF files.
+        Only 1 PDF file/student is accepted.
+    3) Matriculation number and last name are fetched from grading worksheet
+    4) PDFs from extracted folder are renamed according to convention and
+        placed in user provided outfolder
    """

    # Argument handling
    parser = argparse.ArgumentParser(description='''
-      Zip file, provided with parameter inzip, containg all submissions of an assignment is extracted, 
-      renamed according to convention and placed in folder provided with prameter --outfolder
+      Zip file 'inzip', containing all submissions of an assignment,
+      is extracted, renamed according to convention 'filenameformat'
+      and placed in folder 'outfolder'.
      ''')
-    parser.add_argument("-o", "--outfolder", default="./pdfs",
-        help="Output folder with PDFs followingname schema. Default: ./pdfs")
-    parser.add_argument("-i", "--inzip", default="0",
-        help="Input zip file. Default: 0")
-    parser.add_argument("-c", "--csv", default="./Bewertungen.csv",
-        help="Moodle grading CSV file, needed to construct the folder names. Default: ./Bewertungen.csv")
-    
+    parser.add_argument(
+        "-i", "--inzip", default="submissions.zip",
+        help="Input zip file or already extracted folder. " +
+        "Default: ./submissions.zip")
+    parser.add_argument(
+        "-o", "--outfolder", default="./pdfs",
+        help="Output folder with PDFs. Default: ./pdfs")
+    parser.add_argument(
+        "--filenameformat", default="{matnum}_{fullname[0]}",
+        help="File name format. Available keywords: " +
+        "{{matnum}}, {{fullname}}, {{lastname}}, {{firstname}}. " +
+        "Default: '{{matnum}}_{{fullname[0]}}'")
+    parser.add_argument(
+        "--copyall", action='store_true',
+        help="If set, copies all files (including multiple and non-PDF files)")
+    parser.add_argument(
+        "--appendoriginal", action='store_true',
+        help="If set, appends original file name to new location's file name")
+    parser.add_argument(
+        "-c", "--csv", default="./Bewertungen.csv",
+        help="Moodle grading sheet. Default: ./Bewertungen.csv")
+    parser.add_argument(
+        "--csvdelim", default=",", help="CSV delimiter. Default: ','")
+    parser.add_argument(
+        "--csvquote", default='"', help="CSV quote char." + """Default: '"'""")
+    parser.add_argument(
+        "--csvenc", default="utf-8", help="CSV encoding scheme. " +
+        "Typical encodings:'utf-8', 'utf-8-sig', or 'cp1252' (Windows). " +
+        "Default: 'utf-8'")
+    parser.add_argument(
+        "-d", "--dry", action='store_true', help="Flag for dry run.")
+    parser.add_argument(
+        "-t", "--tmp", default="./tmp", help="Temporary folder. Default:./tmp")

    args = parser.parse_args(args)
    inzip = args.inzip
    outfolder = args.outfolder
-    csvfilename = args.csv
+    sheet_csv = args.csv
+    dry = args.dry
+    csv_enc = args.csvenc
+    csv_delim = args.csvdelim
+    csv_quote = args.csvquote
+    copy_all = args.copyall
+    append_original_name = args.appendoriginal
+    filenameformat = args.filenameformat
+    tmp_folder = args.tmp
+    extracted_folder = os.path.join(tmp_folder, "extracted_from_moodle")
+
+    # Print status
+    starttime = time.time()
+    num_students = moodle.get_student_number(sheet_csv=sheet_csv,
+                                             csv_enc=csv_enc)
+
+    print('''Preparing for renaming of submission files.
+Processing {} students
+  '''.format(num_students))
+
+    # Clean up and create temporary folder
+    dryout = []
+    if dry:
+        print("Dry run\n")
+
+    # Check whether zip or folder is given
+    folder_instead_of_zip = False
+    if not(inzip.lower().endswith(('.zip'))):
+        if not(os.path.isdir(inzip)):
+            raise Exception(
+                "{zip} neither Zip file nor folder. Exiting."
+                .format(zip=inzip))
+        # Folder was given instead of Zip file
+        extracted_folder = inzip
+        folder_instead_of_zip = True

-    if inzip == "0" or not(inzip.lower().endswith(('.zip'))):
-        print ('\n***ERROR*** Not a suitable zip file. The script cannot proceed')
-        return
    else:
-        print('\nExtracting files from '+inzip+' ...')
-        try:
-            extracted_folder = os.path.splitext(inzip)[0]
+        # Extract
+        print("Extracting files from {zip} ...".format(zip=inzip))
+        if not dry:
            shutil.unpack_archive(inzip, extracted_folder)  # unzip file
-        except:
-            print('\n***ERROR*** Something went wrong. Check if you have given the correct name and path for the zip file')
-            return
-
-    with open(csvfilename, newline='') as csvfile:
-        matnums = []
-        moodleids={}
-        reader = csv.reader(csvfile, delimiter=',', quotechar='"')
-        next(reader)  # skip header CSV line
-        for row in reader:
-                # Parse required fields from CSV line
-                # Moodle has its own internal ID per participant alongside
-                # matriculation number
-            moodleid = row[0]
-            moodleid = moodleid.replace("Teilnehmer/in", "")  # German
-            moodleid = moodleid.replace("Participant ", "")  # English
-            
-            name = row[1]  # Lastname, Firstname
-            matnum = row[2]  # matriculation number
-            matnums.append(matnum)  # save matriculation number for later
-            moodleids[moodleid] = matnum
-            pattern = '.*_'+moodleid+'_.*'
-            folder_lists = [folder for folder in os.listdir(extracted_folder) if re.compile(pattern).match(folder)]
-            for folder in folder_lists:
-                print('\n************** For Matriculation number '+matnum+' ****************\n')
-                unsupported_files = []
-                for dirpath, dirnames, filenames in os.walk(os.path.join(extracted_folder,folder)):
-                    if(len([f for f in filenames if f.endswith(".pdf")])>1): # only 1 file per student is allowed
-                        print('Multiple PDF files found in submission. Each student should submit only 1 PDF file')
-                        break
-                    elif (len([f for f in filenames if f.endswith(".pdf")])==0): 
-                        if not dirnames:
-                            print('No PDFs were submitted')# No PDF found in a student's submission
-                        else: #deals if students uploaded a folder instead of a file
-                            continue
        else:
-                        for filename in [f for f in filenames if f.endswith(".pdf")]: # renames and copies PDF to outfolder
-                            shutil.copy(os.path.join(dirpath,filename),os.path.join(outfolder,matnum+'_'+name.split(',')[0]+'_'+os.path.splitext(filename)[0]+'.pdf'))
-                            print(filename+' is renamed to '+matnum+'_'+name.split(',')[0]+'.pdf and placed in '+outfolder )
-                for dirpath, dirnames, filenames in os.walk(os.path.join(extracted_folder,folder)):
-                    for filename in [f for f in filenames if not f.endswith(".pdf")]:
-                        unsupported_files.append(filename)
-
-                if unsupported_files: #Lists all non-PDF files found in a student's submission
-                    print('This script only supports PDF files. Hence the below files cannot be handled:')
-                    for filename in unsupported_files:
-                        print('- '+filename)
+            raise Exception("Dry run prevents me from unpacking the Zip file.")
+
+    # List all extracted folders
+    folders = os.listdir(extracted_folder)
+    folders.sort()
+
+    # There should never be more folders than entries in CSV file
+    if len(folders) > num_students:
+        raise Exception(
+            ("More folders ({num_folders}) than "
+             "students in CSV file ({num_students})")
+            .format(num_folders=len(folders), num_students=num_students))
+
+    # Parse grading infos from CSV file
+    infos = moodle.extract_info(sheet_csv=sheet_csv, csv_delim=csv_delim,
+                                csv_quote=csv_quote, csv_enc=csv_enc)
+
+    # Collect non-default cases:
+    # Student did not submit anything
+    infos_no_submission = []
+    # Student submitted more than one file
+    infos_multi_files = []
+    # Student submitted a non-PDF file
+    infos_unsupported_files = []
+
+    # Loop over grading info
+    print("Copying submissions", sep=' ', end='', flush=True)
+    for cnt, info in enumerate(infos):
+        folder = moodle.submission_folder_name(info)
+
+        if folder in folders:
+            # Folder was found
+            folderfull = os.path.join(extracted_folder, folder)
+            files = os.listdir(folderfull)
+
+            # Notify if folder empty
+            if len(files) == 0:
+                infos_no_submission.append(info)
+
+            # Notify if more than one submission
+            if len(files) > 1:
+                infos_multi_files.append(info)
+
+            # Iterate over all files within folder
+            for file_cnt, file in enumerate(files):
+                file_full = os.path.join(folderfull, file)
+
+                # Create destination file name
+                dest = filenameformat.format(
+                    matnum=info['matnum'], fullname=info['fullname'],
+                    lastname=info['lastname'], firstname=info['firstname'])
+
+                # Add unique file ID (only for copy all)
+                if copy_all > 0:
+                    dest = dest + "_{:03d}".format(file_cnt)
+
+                base, ext = os.path.splitext(file)
+                # Add original file name
+                if append_original_name:
+                    dest = dest + "_" + base
+
+                # Add extension
+                dest = dest + ext
+                dest_full = os.path.join(outfolder, dest)
+
+                # Notify if non-PDF file
+                is_pdf = file_full.lower().endswith('.pdf')
+                if not is_pdf and \
+                        info not in infos_unsupported_files:
+                    infos_unsupported_files.append(info)
+
+                # Copy either first PDF file or all files if copyall is active
+                if (file_cnt == 0 and is_pdf) or copy_all:
+                    if not dry:
+                        shutil.copyfile(file_full, dest_full)
+                    else:
+                        dryout.append(
+                            "- {old} -> {new}"
+                            .format(old=os.path.join(folder, file), new=dest))
+        else:
+            # Notify if folder was not found
+            infos_no_submission.append(info)
+
+        # Print for-loop progress
+        if not (cnt % max(1, round(num_students/10))):
+            print(".", sep=' ', end='', flush=True)
+
+    print("done.")
+
+    # Report back special cases
+    for report in [(infos_no_submission, "no files"),
+                   (infos_multi_files, "multiple files"),
+                   (infos_unsupported_files, "unsupported files")]:
+        infos, reason = report
+        if len(infos) > 0:
+            lines = ["- {folder} ({matnum})"
+                     .format(folder=moodle.submission_folder_name(_),
+                             matnum=_['matnum'])
+                     for _ in infos]
+            lines.sort()
+            print(
+                "\nSubmissions of {reason}:\n{lines}"
+                .format(reason=reason, lines="\n".join(lines)))
+
+    # Dry run output
+    if not dry:
+        # Delete temporary folder
+        if not folder_instead_of_zip:
+            shutil.rmtree(extracted_folder)
+    else:
+        dryout.sort()
+        print("\nDry run results:\n{}".format("\n".join(dryout)))

+    # Print status
+    endtime = time.time()
+    print("Time taken: {:.2f}".format(endtime-starttime))


 if __name__ == '__main__':

--- a/supplements.py
+++ b/supplements.py
@@ -13,9 +13,9 @@ import os  # path listing/manipulation/...
 import time  # keep track of time
 import argparse  # handle command line arguments
 import shutil  # copy
-import csv  # handle CSV files

 import utils.matnum as matnum_utils
+import utils.moodle as moodle


 def copy_supplements(supp_dir, supp_files, prefixes, output_dir, dry=False):
@@ -51,7 +51,9 @@ def copy_supplements(supp_dir, supp_files, prefixes, output_dir, dry=False):
            if not dry:
                shutil.copyfile(supp_filefull, new_filefull)
            else:
-                dryout.append(new_file)
+                dryout.append(
+                    "- {old} -> {new}"
+                    .format(old=supp_file, new=new_file))
            copied_files.append(new_file)

            # Print progress
@@ -80,21 +82,40 @@ def main(args):
      This information is either taken from the filenames of exam scan PDFs or
      from the Moodle grading CSV file.
      ''')
-    parser.add_argument("-i", "--infolder", default="./supplements",
+    parser.add_argument(
+        "-i", "--infolder", default="./supplements",
        help="Folder with supplements. Default: ./supplements")
-    parser.add_argument("-p", "--prefix", default="./pdfs",
-                        help="Provides information to construct prefixes. " +
-                              "Either PDF folder with scanned PDFs or " +
-                             "Moodle grading CSV file. Default: ./pdfs")
-    parser.add_argument("-o", "--outfolder", default="./supplements_out",
+    parser.add_argument(
+        "-p", "--prefix", default="./pdfs",
+        help="Provides information to construct prefixes. Either PDF folder " +
+        "with scanned PDFs or Moodle grading CSV file. Default: ./pdfs")
+    parser.add_argument(
+        "-o", "--outfolder", default="./supplements_out",
        help="Output folder. Default: ./supplements_out")
-    parser.add_argument("-d", "--dry", action='store_true',
-                        help="Flag for dry run")
+    parser.add_argument(
+        "--filenameformat", default="{matnum}_{fullname[0]}",
+        help="File name format. Available keywords: " +
+        "{{matnum}}, {{fullname}}, {{lastname}}, {{firstname}}. " +
+        "Default: '{{matnum}}_{{fullname[0]}}'")
+    parser.add_argument(
+        "--csvdelim", default=",", help="CSV delimiter. Default: ','")
+    parser.add_argument(
+        "--csvquote", default='"', help="CSV quote char." + """Default: '"'""")
+    parser.add_argument(
+        "--csvenc", default="utf-8", help="CSV encoding scheme. " +
+        "Typical encodings:'utf-8', 'utf-8-sig', or 'cp1252' (Windows). " +
+        "Default: 'utf-8'")
+    parser.add_argument(
+        "-d", "--dry", action='store_true', help="Flag for dry run")

    args = parser.parse_args(args)
    supp_dir = args.infolder
    prefixinfo = args.prefix
+    prefixformat = args.filenameformat
    output_dir = args.outfolder
+    csv_enc = args.csvenc
+    csv_delim = args.csvdelim
+    csv_quote = args.csvquote
    dry = args.dry

    # Decide whether PDF folder or CSV file was given
@@ -114,7 +135,7 @@ def main(args):
    # Print status
    starttime = time.time()
    supp_folder = os.listdir(supp_dir)
-    supp_files = [_ for _ in supp_folder if _.endswith(".pdf")]
+    supp_files = [_ for _ in supp_folder if _.lower().endswith(".pdf")]
    print("""
 Available supplement PDFs to be copied:
 - {}
@@ -127,7 +148,7 @@ Files in output folder {} will be overwritten during this process.
    if pdf_dir != "":  # Take prefixes from pdf directory
        pdf_folder = os.listdir(pdf_dir)
        pdf_files = [_ for _ in pdf_folder
-                     if _.endswith(".pdf") and
+                     if _.lower().endswith(".pdf") and
                     matnum_utils.starts_with_matnum(_)]
        prefixes = []
        for pdf_file in pdf_files:
@@ -135,20 +156,12 @@ Files in output folder {} will be overwritten during this process.
            prefixes.append(prefix)
    else:  # Take prefixes from CSV file
        prefixes = []
-        # Open CSV file
-        with open(csvfilename, newline='') as csvfile:
-            # Loop over all lines in CSV file
-            reader = csv.reader(csvfile, delimiter=',', quotechar='"')
-            next(reader)  # skip header CSV line
-            for row in reader:
-                # Parse required fields from CSV line
-                name = row[1]  # [Lastname], [Firstname]
-                name = name[0]  # Take only first letter of lastname
-                matnum = row[2]  # matriculation number (6-digit)
-                if not matnum_utils.check_matnum(matnum):
-                    raise Exception("Invalid matriculation number found")
-
-                prefix = matnum + "_" + name
+        infos = moodle.extract_info(sheet_csv=csvfilename, csv_delim=csv_delim,
+                                    csv_quote=csv_quote, csv_enc=csv_enc)
+        for info in infos:
+            prefix = prefixformat.format(
+                matnum=info['matnum'], fullname=info['fullname'],
+                lastname=info['lastname'], firstname=info['firstname'])
            prefixes.append(prefix)  # save prefix

    # Copy supplements to output dir and prepend prefixes

--- a/tests/test_moodle.py
+++ b/tests/test_moodle.py
+import unittest
+import time
+
+
+class MainTest(unittest.TestCase):
+    def setUp(self):
+        self.tic = time.time()  # todo this is sooo ugly
+
+    def tearDown(self):
+        self.toc = time.time()
+        t = self.toc - self.tic
+        print('Time: %.3f' % (t))
+
+    def test_extract_grading_info(self):
+        from utils import moodle as moodle_utils
+
+        csv_file = "./Bewertungen.csv"
+        gis = moodle_utils.extract_info(sheet_csv=csv_file)
+        self.assertEqual(gis[2]['lastname'], "Three")
+
+        num_students = moodle_utils.get_student_number(sheet_csv=csv_file)
+        self.assertEqual(num_students, 3)
+
+        csv_file = "./Grades.csv"
+        gis = moodle_utils.extract_info(sheet_csv=csv_file)
+
+        # Check that "d'Lastname" gets whitened to "dLastname"
+        self.assertEqual(gis[3]['lastname'], "dLastname")
--- a/tests/test_prepare_moodle.py
+++ b/tests/test_prepare_moodle.py
+import unittest
+import time
+import os
+import tempfile
+import shutil
+
+
+class MainTest(unittest.TestCase):
+    def setUp(self):
+        self.tic = time.time()  # todo this is sooo ugly
+        self.test_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        self.toc = time.time()
+        t = self.toc - self.tic
+        print('Time: %.3f' % (t))
+
+        # Clean up
+        shutil.rmtree(self.test_dir)
+
+    def test_prepare_moodle(self):
+        import preparemoodle
+
+        expected_feedback_folder = 'Four, Student_107_assignsubmission_file_'
+        expected_feedback_file = '123456_Nachname.pdf'
+
+        # Prepare parameter
+        in_dir = './pdfs'
+        sheet_csv = "./Bewertungen.csv"
+        feedback_zip = 'feedbacks.zip'
+
+        tmp_dir = os.path.join(self.test_dir, 'tmp')
+        os.mkdir(tmp_dir)
+
+        out_zip = os.path.join(self.test_dir, feedback_zip)
+
+        # Call function
+        preparemoodle.main(["-i", in_dir, "--csv", sheet_csv,
+                            "-t", tmp_dir, "-o", out_zip])
+
+        # Unpack feedbacks
+        shutil.unpack_archive(out_zip, tmp_dir)
+        feedback_folders = os.listdir(tmp_dir)
+        self.assertTrue(feedback_folders[0], expected_feedback_folder)
+
+        feedbacks = os.listdir(os.path.join(tmp_dir, feedback_folders[0]))
+        self.assertTrue(feedbacks[0], expected_feedback_file)
--- a/tests/test_preparepdf.py
+++ b/tests/test_preparepdf.py
+import unittest
+import time
+import os
+import tempfile
+
+
+class MainTest(unittest.TestCase):
+    def setUp(self):
+        self.tic = time.time()  # todo this is sooo ugly
+
+        self.test_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        self.toc = time.time()
+        t = self.toc - self.tic
+        print('Time: %.3f' % (t))
+
+    def test_copy_from_zip(self):
+        import preparepdf
+
+        expected_files = [
+            '123456_F.pdf',
+            '123457_O.pdf',
+            '125412_T.pdf']
+
+        # Prepare parameter
+        in_zip = './submissions.zip'
+        sheet_csv = "./Bewertungen.csv"
+
+        out_dir = os.path.join(self.test_dir, 'out')
+        os.mkdir(out_dir)
+
+        tmp_dir = os.path.join(self.test_dir, 'tmp')
+        os.mkdir(tmp_dir)
+
+        # Call function
+        preparepdf.main(["-i", in_zip, "-o", out_dir, "-c", sheet_csv])
+
+        # Assert output
+        created_files = os.listdir(out_dir)
+        created_files.sort()
+        self.assertEqual(expected_files, created_files)
--- a/tests/test_supplements.py
+++ b/tests/test_supplements.py
@@ -85,7 +85,7 @@ class MainTest(unittest.TestCase):
        # Prepare parameter
        supp_dir = './supplements'
        pdf_dir = './pdfs'
-        dpi = 150
+        dpi = 100

        supp_out_dir = os.path.join(self.test_dir, 'supplements_out')
        os.mkdir(supp_out_dir)

--- a/utils/moodle.py
+++ b/utils/moodle.py
+import csv  # handles csv
+
+
+def get_student_number(sheet_csv, csv_enc='utf-8'):
+    """Count number of student entries in grading sheet
+
+    Args:
+        sheet_csv (str): filename of grading sheet CSV
+        csv_enc (str): CSV encoding
+
+    Returns:
+        int: number of entries
+    """
+
+    # Open CSV file and count lines
+    numlines = 0
+    with open(sheet_csv, newline='', encoding=csv_enc) as csvfile:
+        numlines = sum(1 for _ in csvfile)
+
+    numlines -= 1  # do not count header line
+
+    return numlines
+
+
+def submission_folder_name(grading_info):
+    """Create submission folder name
+
+    Args:
+        grading_info ([dict]): student's grading info from CSV
+
+    Returns:
+        str: submission folder name
+    """
+    template = "{fullname}_{moodleid}_assignsubmission_file_"
+    foldername = template.format(fullname=grading_info['fullname'],
+                                 moodleid=grading_info['moodleid'])
+
+    return foldername
+
+
+def extract_info(sheet_csv, csv_delim=',', csv_quote='"', csv_enc='utf-8'):
+    """Extract grading information from grading sheet
+
+    Args:
+        sheet_csv (str): filename of grading sheet CSV
+        csv_delim (str, optional): CSV delimiter. Defaults to ','.
+        csv_quote (str, optional): CSV quote char. Defaults to '"'.
+        csv_enc (str, optional): CSV encoding. Defaults to 'utf-8'.
+                                 Typical values: "'utf-8', 'utf-8-sig',
+                                 or 'cp1252' (Windows). "
+
+    Returns:
+        list of dicts: grading information with following info per student:
+        moodleid, fullname, matnum, lastname, firstname
+    """
+
+    # CSV header
+    # We only parse these fieldnames, all other columns are listed under [None]
+    fieldnames = ['longid', 'fullname', 'matnum', 'status', 'grade']
+
+    # Check delimiter
+    with open(sheet_csv, newline='', encoding=csv_enc) as csvfile:
+        try:
+            csv.Sniffer().sniff(csvfile.read(1024),
+                                delimiters=csv_delim)
+        except csv.Error as error:
+            print("csv_delim='{csvdelim}' not correct for file {csv}"
+                  .format(csvdelim=csv_delim, csv=sheet_csv))
+            raise ValueError(error)
+
+    # Open CSV file
+    with open(sheet_csv, newline='', encoding=csv_enc) as csvfile:
+        # Convert CSV to list of dicts
+        reader = csv.DictReader(csvfile, fieldnames=fieldnames,
+                                delimiter=csv_delim, quotechar=csv_quote)
+        grading_infos = list(reader)  # convert to list
+        grading_infos.pop(0)  # do not save header
+
+        # Clean up Moodle ID and split fullname
+        for gi in grading_infos:
+            # Clean up Moodle ID
+            # "Participant xxx" -> "xxx" or
+            # "Teilnehmer/inxxx" -> "xxx"
+            moodleid = gi['longid']
+            moodleid = moodleid.replace("Teilnehmer/in", "")  # German
+            moodleid = moodleid.replace("Participant ", "")  # English
+            gi['moodleid'] = moodleid
+
+            # Split up fullname into first and last name
+            fullname = gi['fullname'].replace("'", "")  # remove "'"
+            try:
+                gi['lastname'], gi['firstname'] = fullname.split(", ")
+            except ValueError:
+                raise Exception(
+                    "csv_delim='{csvdelim}' not correct for file {csv}"
+                    .format(csvdelim=csv_delim, csv=sheet_csv))
+
+            gi['fullname'] = fullname
+
+    return grading_infos