Commit 1834a797 authored by Amrita Deb's avatar Amrita Deb
Browse files

Merge branch 'rohlfing-student-grading-info' into 'master'

Encapsulate parsing student grading info from CSV

See merge request !34
parents 003659f5 b50f1b7a
Identifier,"Full name","Registration number",Status,Grade,"Grade can be changed","Last modified (grade)","Feedback comments"
"Participant 1519324","Lastname, First",321121,,,Yes,"Monday, 15 March 2021, 5:42 PM",
"Participant 1519323","Yet another last name, First Name",321122,,,Yes,"Monday, 15 March 2021, 5:42 PM",
"Participant 1519322","Last, First",321123,,,Yes,"Monday, 15 March 2021, 5:42 PM",
"Participant 1519321","d'Lastname, Firstname",321124,,,Yes,"Monday, 15 March 2021, 5:42 PM",
#!/usr/bin/env python
import csv
import os
import time
import shutil # copyfile, make_archive
import argparse
import argparse # argument parsing
import sys
import utils.moodle as moodle
import utils.matnum as matnum_utils
def find_unmatched_pdfs(infolder, matnums, nowarn):
"""Finds matnumbers not present in CSV but in PDF folder
def sanity_check(matnums_csv, matnums_folder):
"""Check two cases for sanity:
- Are there PDF files with no corresponding CSV entries?
- Are there CSV entries with no provided PDF file?
Args:
infolder (str): path to input folder
matnums (list): list of matriculation numbers
nowarn (int): flag
matnums_csv (list): Matnums of all CSV entries
matnums_folder (list): Matnums of all provided PDF files
"""
print("\nSearching for matnumbers not present in CSV but in PDF folder:")
# Loop over all PDFs:
notfoundmatnums = []
for root, dirs, files in os.walk(infolder):
for pdffile in files:
if pdffile.endswith(".pdf"):
# Get matriculation number from file
matnum = matnum_utils.get_matnum(pdffile)
# PDF files with no entry in CSV:
notfoundcsv = list(set(matnums_folder).difference(matnums_csv))
# Search matriculation number in CSV
if matnum not in matnums:
notfoundmatnums.append(matnum)
if not nowarn:
print("Warning: {} not in CSV".format(matnum))
# Entries in CSV without PDF file
notfoundpdf = list(set(matnums_csv).difference(matnums_folder))
# Report back
if len(notfoundmatnums) > 0:
print('''Could not find following {} matnumbers in CSV:
{}'''.format(len(notfoundmatnums), ", ".join(notfoundmatnums)))
if len(notfoundcsv) > 0:
print('''Warning: Following {} matnums have PDFs but no entry in CSV:
{}'''.format(len(notfoundcsv), ", ".join(notfoundcsv)))
if len(notfoundpdf) > 0:
print('''Warning: Following {} matnums have CSV entries but no PDF:
{}'''.format(len(notfoundpdf), ", ".join(notfoundpdf)))
print("Done.\n")
return notfoundcsv, notfoundpdf
def main(args):
"""Main routine
......@@ -49,141 +47,166 @@ def main(args):
# Parse input arguments
parser = argparse.ArgumentParser(description='''
prepares batch upload to Moodle via assignment module.
PDFs in folder 'in' are moved to folder 'tmp' with a certain folder structure and finally zipped to 'out'.
PDFs in folder 'in' are moved to folder 'tmp' with a certain folder
structure and finally zipped to 'out'.
Attention: zip-archive 'out' will be overwritten in the following!
''')
parser.add_argument("-i", "--infolder", default="./pdfs_encrypted",
help="Input folder with PDFs. Default: ./pdfs_encrypted")
parser.add_argument("-c", "--csv", default="./Bewertungen.csv",
help="Moodle grading CSV file, needed to construct the folder names. Default: ./Bewertungen.csv")
parser.add_argument("-o", "--outzip", default="./moodle_feedbacks.zip",
help="Output zip archive. Default: ./moodle_feedbacks.zip")
parser.add_argument("-d", "--dry", action='store_true',
help="Flag for dry run, displays only the folder structure inside the archive moodle_feedbacks.zip")
parser.add_argument("-t", "--tmp", default="./tmp",
help="tmp folder. Default: ./tmp")
parser.add_argument("--nowarn", action='store_true',
help="Disables warnings")
parser.add_argument(
"-i", "--infolder", default="./pdfs_encrypted",
help="Input folder with PDFs. Default: ./pdfs_encrypted")
parser.add_argument(
"-c", "--csv", default="./Bewertungen.csv",
help="Moodle grading sheet. Default: ./Bewertungen.csv")
parser.add_argument(
"--csvdelim", default=",", help="CSV delimiter. Default: ','")
parser.add_argument(
"--csvquote", default='"', help="CSV quote char." + """Default: '"'""")
parser.add_argument(
"--csvenc", default="utf-8", help="CSV encoding scheme. " +
"Typical encodings:'utf-8', 'utf-8-sig', or 'cp1252' (Windows). " +
"Default: 'utf-8'")
parser.add_argument(
"-o", "--outzip", default="./moodle_feedbacks.zip",
help="Zip archive. Default: ./moodle_feedbacks.zip")
parser.add_argument(
"-d", "--dry", action='store_true', help="Flag for dry run.")
parser.add_argument(
"-t", "--tmp", default="./tmp", help="Temporary folder. Default:./tmp")
parser.add_argument(
"--nowarn", action='store_true', help="Disables warnings")
args = parser.parse_args(args)
infolder = args.infolder
csvfilename = args.csv
sheet_csv = args.csv
outzip = args.outzip
tmpfolder = os.path.join(args.tmp, "to_be_zipped_for_moodle")
tmp_folder = os.path.join(args.tmp, "to_be_zipped_for_moodle")
dry = args.dry
nowarn = args.nowarn
no_warn = args.nowarn
csv_delim = args.csvdelim
csv_quote = args.csvquote
csv_enc = args.csvenc
# Print status
starttime = time.time()
# Print status with total number of lines
numlines = 0
with open(csvfilename, newline='') as csvfile:
numlines = sum(1 for line in csvfile)
num_students = moodle.get_student_number(sheet_csv=sheet_csv,
csv_enc=csv_enc)
print('''Preparing for moodle upload
Processing {} lines
'''.format(numlines))
Processing {} students'''.format(num_students))
dryout = ""
# Clean up and create temporary folder
dryout = []
if dry:
print("Dry run\n")
print("Dry run")
else:
# Remove zip file
if os.path.exists(outzip):
os.remove(outzip)
# Create temporary folder within given temporary directory
if not os.path.isdir(tmpfolder):
os.mkdir(tmpfolder)
# Open CSV file
with open(csvfilename, newline='') as csvfile:
numfoundpdfs = 0
matnums = []
line_cnt = 0
print("Start iterating...", sep='', end='', flush=True)
# Loop over all lines in CSV file
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
next(reader) # skip header CSV line
for row in reader:
# Parse required fields from CSV line
# Moodle has its own internal ID per participant alongside
# matriculation number
moodleid = row[0]
moodleid = moodleid.replace("Teilnehmer/in", "") # German
moodleid = moodleid.replace("Participant ", "") # English
name = row[1] # Lastname, Firstname
matnum = row[2] # matriculation number (6-digit)
matnums.append(matnum) # save matriculation number for later
# Copy PDF files
# Find all PDFs starting with matriculation number, e.g.
# '123456_Lastname_sheet.pdf' and '123456_Lastname_exam.pdf'
# If pdf files for current student exists, create a directory and
# copy the pdf files to it. The resulting directories can be
# uploaded to Moodle
longpdffiles = matnum_utils.find_file(matnum + "*.pdf", infolder)
if len(longpdffiles) > 0: # Found some file(s)
numfoundpdfs += 1
# Prepare folder
# For upload, Moodle accepts submission files per participant
folder = "{}_{}_assignsubmission_file_".format(name, moodleid)
longfolder = os.path.join(tmpfolder, folder)
# Create folder
if not os.path.isdir(tmp_folder):
os.mkdir(tmp_folder)
# Parse input folder
# Only PDF files are considered with first digits
# containing matriculation number
matnums_folder = []
allfiles = os.listdir(infolder)
allfiles.sort()
allpdfs = []
for f in allfiles:
if f.lower().endswith('.pdf') and matnum_utils.starts_with_matnum(f):
allpdfs.append(f)
matnums_folder.append(matnum_utils.get_matnum(f))
# Parse grading infos from CSV file
infos = moodle.extract_info(sheet_csv=sheet_csv, csv_delim=csv_delim,
csv_quote=csv_quote, csv_enc=csv_enc)
# Loop over grading infos
num_found_pdfs = 0
matnums_csv = []
moodleids = []
if no_warn:
print("Start processing", sep=' ', end='', flush=True)
else:
print("Start processing")
for cnt, info in enumerate(infos):
# Copy PDF files
# Find all PDFs starting with matriculation number, e.g.
# '123456_Lastname_sheet.pdf' and '123456_Lastname_exam.pdf'
# If pdf files for current student exists, create a directory and
# copy the pdf files to it. The resulting directories can be
# uploaded to Moodle
matnum = info['matnum']
matnums_csv.append(matnum)
moodleid = info['moodleid']
moodleids.append(moodleid)
pdfs_student = [_ for _ in allpdfs
if matnum == matnum_utils.get_matnum(_)]
if len(pdfs_student) > 0: # Found at least one pdf
num_found_pdfs += len(pdfs_student)
# Prepare submission folder
folder = moodle.submission_folder_name(info)
longfolder = os.path.join(tmp_folder, folder)
# Create folder
if not dry:
os.mkdir(longfolder)
# Copy all files to folder
for pdffile in pdfs_student:
longpdffile = os.path.join(infolder, pdffile)
longpdffiledest = os.path.join(longfolder, pdffile)
if not dry:
os.mkdir(longfolder)
# Copy all files to folder
for longpdffile in longpdffiles:
pdffile = os.path.basename(longpdffile)
if not dry:
shutil.copyfile(longpdffile,
os.path.join(longfolder, pdffile))
else:
dryout += "\n{}".format(os.path.join(folder, pdffile))
else:
if not nowarn:
print("Warning: PDF corresponding to matnumber {} (moodleid={}, name={}) not available.".format(
matnum, moodleid, name
))
# Print progress
if not (line_cnt % max(1, round(numlines/10))):
print(".", sep=' ', end='', flush=True)
line_cnt += 1
shutil.copyfile(longpdffile, longpdffiledest)
else:
dryout.append(
"- {old} -> {new}"
.format(old=pdffile, new=os.path.join(folder, pdffile)))
elif not no_warn: # No PDF found
print("Warning: PDF for {matnum} (id={id}, name={name}) not found."
.format(matnum=matnum, id=moodleid, name=info['fullname']))
# Print for-loop progress
if no_warn and not (cnt % max(1, round(num_students/10))):
print(".", sep=' ', end='', flush=True)
# Print results
print("Found {} PDFs (CSV had {} entries)".format(numfoundpdfs, numlines))
print("done.")
print("Found {num_pdf} PDFs (CSV had {num_csv} entries)"
.format(num_pdf=num_found_pdfs, num_csv=num_students))
# Sanity check:
# Check for PDFs not reflected in CSV (student not registered in Moodle)
find_unmatched_pdfs(infolder, matnums, nowarn)
sanity_check(matnums_csv, matnums_folder)
# Zipping
# Zip
if not dry:
# Zip
print("Zipping")
shutil.make_archive(os.path.splitext(outzip)[0], 'zip', tmpfolder)
print('The Zip archive is available at: '+outzip)
shutil.make_archive(os.path.splitext(outzip)[0], 'zip', tmp_folder)
print('Zip archive is stored at {}'.format(outzip))
# Delete temporary folder
shutil.rmtree(tmpfolder)
shutil.rmtree(tmp_folder)
# Print dry run results
else:
print("\nDry run results:\n{}".format(dryout))
dryout.sort()
print("\nDry run results:\n{}".format("\n".join(dryout)))
# Print status
endtime = time.time()
print("""Done.
Time taken: {:.2f}""".format(endtime-starttime))
# Main routine
if __name__ == '__main__':
main(sys.argv[1:])
......@@ -2,93 +2,226 @@ import sys # get arguments from command line
import os # path listing/manipulation/...
import time # keep track of time
import argparse # handle command line arguments
import shutil #unzipping and copying files
import re # pattern matching
import csv # opening grading worksheet csv
import shutil # unzipping and copying files
from utils import moodle as moodle
def main(args):
"""Transfer PDF files from zip file containing all submissions into user provided folder following exam scan naming convention
"""Transfer PDF files from submisions zip file (or already extracted folder)
containing all submissions into user provided folder following exam scan
naming convention
1) files are extracted from user-provided zip file location eg: ./all_submissions.zip
2) Scan through extracted folder for PDF files. Only 1 PDF file/student is accepted.
3) Matriculation number and last name of student is fetched from grading worksheet
4) PDFs from extracted folder are renamed according to convention and placed in user provided outfolder
1) Files are extracted from zip file location eg: ./all_submissions.zip
In case folder is given, extraction is skipped.
2) Scan through extracted folder for PDF files.
Only 1 PDF file/student is accepted.
3) Matriculation number and last name are fetched from grading worksheet
4) PDFs from extracted folder are renamed according to convention and
placed in user provided outfolder
"""
# Argument handling
parser = argparse.ArgumentParser(description='''
Zip file, provided with parameter inzip, containg all submissions of an assignment is extracted,
renamed according to convention and placed in folder provided with prameter --outfolder
Zip file 'inzip', containing all submissions of an assignment,
is extracted, renamed according to convention 'filenameformat'
and placed in folder 'outfolder'.
''')
parser.add_argument("-o", "--outfolder", default="./pdfs",
help="Output folder with PDFs followingname schema. Default: ./pdfs")
parser.add_argument("-i", "--inzip", default="0",
help="Input zip file. Default: 0")
parser.add_argument("-c", "--csv", default="./Bewertungen.csv",
help="Moodle grading CSV file, needed to construct the folder names. Default: ./Bewertungen.csv")
parser.add_argument(
"-i", "--inzip", default="submissions.zip",
help="Input zip file or already extracted folder. " +
"Default: ./submissions.zip")
parser.add_argument(
"-o", "--outfolder", default="./pdfs",
help="Output folder with PDFs. Default: ./pdfs")
parser.add_argument(
"--filenameformat", default="{matnum}_{fullname[0]}",
help="File name format. Available keywords: " +
"{{matnum}}, {{fullname}}, {{lastname}}, {{firstname}}. " +
"Default: '{{matnum}}_{{fullname[0]}}'")
parser.add_argument(
"--copyall", action='store_true',
help="If set, copies all files (including multiple and non-PDF files)")
parser.add_argument(
"--appendoriginal", action='store_true',
help="If set, appends original file name to new location's file name")
parser.add_argument(
"-c", "--csv", default="./Bewertungen.csv",
help="Moodle grading sheet. Default: ./Bewertungen.csv")
parser.add_argument(
"--csvdelim", default=",", help="CSV delimiter. Default: ','")
parser.add_argument(
"--csvquote", default='"', help="CSV quote char." + """Default: '"'""")
parser.add_argument(
"--csvenc", default="utf-8", help="CSV encoding scheme. " +
"Typical encodings:'utf-8', 'utf-8-sig', or 'cp1252' (Windows). " +
"Default: 'utf-8'")
parser.add_argument(
"-d", "--dry", action='store_true', help="Flag for dry run.")
parser.add_argument(
"-t", "--tmp", default="./tmp", help="Temporary folder. Default:./tmp")
args = parser.parse_args(args)
inzip = args.inzip
outfolder = args.outfolder
csvfilename = args.csv
sheet_csv = args.csv
dry = args.dry
csv_enc = args.csvenc
csv_delim = args.csvdelim
csv_quote = args.csvquote
copy_all = args.copyall
append_original_name = args.appendoriginal
filenameformat = args.filenameformat
tmp_folder = args.tmp
extracted_folder = os.path.join(tmp_folder, "extracted_from_moodle")
# Print status
starttime = time.time()
num_students = moodle.get_student_number(sheet_csv=sheet_csv,
csv_enc=csv_enc)
print('''Preparing for renaming of submission files.
Processing {} students
'''.format(num_students))
# Clean up and create temporary folder
dryout = []
if dry:
print("Dry run\n")
# Check whether zip or folder is given
folder_instead_of_zip = False
if not(inzip.lower().endswith(('.zip'))):
if not(os.path.isdir(inzip)):
raise Exception(
"{zip} neither Zip file nor folder. Exiting."
.format(zip=inzip))
# Folder was given instead of Zip file
extracted_folder = inzip
folder_instead_of_zip = True
if inzip == "0" or not(inzip.lower().endswith(('.zip'))):
print ('\n***ERROR*** Not a suitable zip file. The script cannot proceed')
return
else:
print('\nExtracting files from '+inzip+' ...')
try:
extracted_folder = os.path.splitext(inzip)[0]
shutil.unpack_archive(inzip, extracted_folder) #unzip file
except:
print('\n***ERROR*** Something went wrong. Check if you have given the correct name and path for the zip file')
return
with open(csvfilename, newline='') as csvfile:
matnums = []
moodleids={}
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
next(reader) # skip header CSV line
for row in reader:
# Parse required fields from CSV line
# Moodle has its own internal ID per participant alongside
# matriculation number
moodleid = row[0]
moodleid = moodleid.replace("Teilnehmer/in", "") # German
moodleid = moodleid.replace("Participant ", "") # English
name = row[1] # Lastname, Firstname
matnum = row[2] # matriculation number
matnums.append(matnum) # save matriculation number for later
moodleids[moodleid] = matnum
pattern = '.*_'+moodleid+'_.*'
folder_lists = [folder for folder in os.listdir(extracted_folder) if re.compile(pattern).match(folder)]
for folder in folder_lists:
print('\n************** For Matriculation number '+matnum+' ****************\n')
unsupported_files = []
for dirpath, dirnames, filenames in os.walk(os.path.join(extracted_folder,folder)):
if(len([f for f in filenames if f.endswith(".pdf")])>1): # only 1 file per student is allowed
print('Multiple PDF files found in submission. Each student should submit only 1 PDF file')
break
elif (len([f for f in filenames if f.endswith(".pdf")])==0):
if not dirnames:
print('No PDFs were submitted')# No PDF found in a student's submission
else: #deals if students uploaded a folder instead of a file
continue
# Extract
print("Extracting files from {zip} ...".format(zip=inzip))
if not dry:
shutil.unpack_archive(inzip, extracted_folder) # unzip file
else:
raise Exception("Dry run prevents me from unpacking the Zip file.")
# List all extracted folders
folders = os.listdir(extracted_folder)
folders.sort()
# There should never be more folders than entries in CSV file
if len(folders) > num_students:
raise Exception(
("More folders ({num_folders}) than "
"students in CSV file ({num_students})")
.format(num_folders=len(folders), num_students=num_students))
# Parse grading infos from CSV file
infos = moodle.extract_info(sheet_csv=sheet_csv, csv_delim=csv_delim,
csv_quote=csv_quote, csv_enc=csv_enc)
# Collect non-default cases:
# Student did not submit anything
infos_no_submission = []
# Student submitted more than one file
infos_multi_files = []
# Student submitted a non-PDF file
infos_unsupported_files = []
# Loop over grading info
print("Copying submissions", sep=' ', end='', flush=True)
for cnt, info in enumerate(infos):
folder = moodle.submission_folder_name(info)
if folder in folders:
# Folder was found
folderfull = os.path.join(extracted_folder, folder)
files = os.listdir(folderfull)
# Notify if folder empty
if len(files) == 0:
infos_no_submission.append(info)
# Notify if more than one submission
if len(files) > 1:
infos_multi_files.append(info)
# Iterate over all files within folder
for file_cnt, file in enumerate(files):
file_full = os.path.join(folderfull, file)
# Create destination file name
dest = filenameformat.format(
matnum=info['matnum'], fullname=info['fullname'],
lastname=info['lastname'], firstname=info['firstname'])
# Add unique file ID (only for copy all)
if copy_all > 0:
dest = dest + "_{:03d}".format(file_cnt)
base, ext = os.path.splitext(file)
# Add original file name
if append_original_name:
dest = dest + "_" + base
# Add extension
dest = dest + ext
dest_full = os.path.join(outfolder, dest)
# Notify if non-PDF file
is_pdf = file_full.lower().endswith('.pdf')
if not is_pdf and \
info not in infos_unsupported_files:
infos_unsupported_files.append(info)
# Copy either first PDF file or all files if copyall is active
if (file_cnt == 0 and is_pdf) or copy_all:
if not dry:
shutil.copyfile(file_full, dest_full)
else:
for filename in [f for f in filenames if f.endswith(".pdf")]: # renames and copies PDF to outfolder
shutil.copy(os.path.join(dirpath,filename),os.path.join(outfolder,matnum+'_'+name.split(',')[0]+'_'+os.path.splitext(filename)[0]+'.pdf'))
print(filename+' is renamed to '+matnum+'_'+name.split(',')[0]+'.pdf and placed in '+outfolder )
for dirpath, dirnames, filenames in os.walk(os.path.join(extracted_folder,folder)):
for filename in [f for f in filenames if not f.endswith(".pdf")]:
unsupported_files.append(filename)
if unsupported_files: #Lists all non-PDF files found in a student's submission
print('This script only supports PDF files. Hence the below files cannot be handled:')
for filename in unsupported_files:
print('- '+filename)
dryout.append(
"- {old} -> {new}"
.format(old=os.path.join(folder, file), new=dest))
else:
# Notify if folder was not found
infos_no_submission.append(info)
# Print for-loop progress
if not (cnt % max(1, round(num_students/10))):
print(".", sep=' ', end='', flush=True)
print("done.")
# Report back special cases
for report in [(infos_no_submission, "no files"),
(infos_multi_files, "multiple files"),
(infos_unsupported_files, "unsupported files")]:
infos, reason = report
if len(infos) > 0:
lines = ["- {folder} ({matnum})"