Commit 2bad1059 authored by Christian Rohlfing's avatar Christian Rohlfing
Browse files

preparepdf uses grading info

parent 6670d2df
......@@ -52,49 +52,49 @@ def main(args):
Attention: zip-archive 'out' will be overwritten in the following!
''')
parser.add_argument("-i", "--infolder", default="./pdfs_encrypted",
help="Input folder with PDFs." +
"Default: ./pdfs_encrypted")
parser.add_argument("-c", "--csv", default="./Bewertungen.csv",
help="Moodle grading sheet, needed to construct " +
"submission folder names. " +
"Default: ./Bewertungen.csv")
parser.add_argument("--csvdelim", default=",",
help="CSV delimiter. Default: ','")
parser.add_argument("--csvquote", default='"', help="Quote character." +
"""Default: '"'""")
parser.add_argument("--csvencoding", default="utf-8",
help="CSV encoding scheme. Typical encodings:" +
"'utf-8', 'utf-8-sig', or 'cp1252' (Windows). " +
"Default: 'utf-8'")
parser.add_argument("-o", "--outzip", default="./moodle_feedbacks.zip",
help="Zip archive. Default: ./moodle_feedbacks.zip")
parser.add_argument("-d", "--dry", action='store_true',
help="Flag for dry run, displays folder structure")
parser.add_argument("-t", "--tmp", default="./tmp",
help="tmp folder. Default: ./tmp")
parser.add_argument("--nowarn", action='store_true',
help="Disables warnings")
parser.add_argument(
"-i", "--infolder", default="./pdfs_encrypted",
help="Input folder with PDFs. Default: ./pdfs_encrypted")
parser.add_argument(
"-c", "--csv", default="./Bewertungen.csv",
help="Moodle grading sheet. Default: ./Bewertungen.csv")
parser.add_argument(
"--csvdelim", default=",", help="CSV delimiter. Default: ','")
parser.add_argument(
"--csvquote", default='"', help="CSV quote char." + """Default: '"'""")
parser.add_argument(
"--csvenc", default="utf-8", help="CSV encoding scheme. " +
"Typical encodings:'utf-8', 'utf-8-sig', or 'cp1252' (Windows). " +
"Default: 'utf-8'")
parser.add_argument(
"-o", "--outzip", default="./moodle_feedbacks.zip",
help="Zip archive. Default: ./moodle_feedbacks.zip")
parser.add_argument(
"-d", "--dry", action='store_true', help="Flag for dry run.")
parser.add_argument(
"-t", "--tmp", default="./tmp", help="Temporary folder. Default:./tmp")
parser.add_argument(
"--nowarn", action='store_true', help="Disables warnings")
args = parser.parse_args(args)
infolder = args.infolder
sheet_csv = args.csv
outzip = args.outzip
tmpfolder = os.path.join(args.tmp, "to_be_zipped_for_moodle")
tmp_folder = os.path.join(args.tmp, "to_be_zipped_for_moodle")
dry = args.dry
nowarn = args.nowarn
csvdelim = args.csvdelim
csvquote = args.csvquote
csvenc = args.csvencoding
no_warn = args.nowarn
csv_delim = args.csvdelim
csv_quote = args.csvquote
csv_enc = args.csvenc
# Print number of lines
# Print status
starttime = time.time()
numstudents = moodle.get_student_number(sheet_csv=sheet_csv,
csv_enc=csvenc)
num_students = moodle.get_student_number(sheet_csv=sheet_csv,
csv_enc=csv_enc)
print('''Preparing for moodle upload
Processing {} lines
'''.format(numstudents))
Processing {} students
'''.format(num_students))
# Clean up and create temporary folder
dryout = ""
......@@ -106,8 +106,8 @@ Processing {} lines
os.remove(outzip)
# Create temporary folder within given temporary directory
if not os.path.isdir(tmpfolder):
os.mkdir(tmpfolder)
if not os.path.isdir(tmp_folder):
os.mkdir(tmp_folder)
# Parse input folder
# Only PDF files are considered with first digits
......@@ -122,11 +122,11 @@ Processing {} lines
matnums_folder.append(matnum_utils.get_matnum(f))
# Parse grading infos from CSV file
infos = moodle.extract_info(sheet_csv=sheet_csv, csv_delim=csvdelim,
csv_quote=csvquote, csv_enc=csvenc)
infos = moodle.extract_info(sheet_csv=sheet_csv, csv_delim=csv_delim,
csv_quote=csv_quote, csv_enc=csv_enc)
# Loop over grading infos
numfoundpdfs = 0
num_found_pdfs = 0
matnums_csv = []
moodleids = []
for cnt, info in enumerate(infos):
......@@ -144,11 +144,11 @@ Processing {} lines
pdfs_student = [_ for _ in allpdfs
if matnum == matnum_utils.get_matnum(_)]
if len(pdfs_student) > 0: # Found at least one pdf
numfoundpdfs += len(pdfs_student)
num_found_pdfs += len(pdfs_student)
# Prepare submission folder
folder = moodle.submission_folder_name(info)
longfolder = os.path.join(tmpfolder, folder)
longfolder = os.path.join(tmp_folder, folder)
# Create folder
if not dry:
......@@ -163,17 +163,17 @@ Processing {} lines
else:
dryout += "\n{}".format(os.path.join(folder, pdffile))
elif not nowarn: # No PDF found
elif not no_warn: # No PDF found
print("Warning: PDF for {matnum} (id={id}, name={name}) not found."
.format(matnum=matnum, id=moodleid, name=info['fullname']))
# Print for-loop progress
if not (cnt % max(1, round(numstudents/10))):
if not (cnt % max(1, round(num_students/10))):
print(".", sep=' ', end='', flush=True)
# Print results
print("Found {numpdf} PDFs (CSV had {numcsv} entries)"
.format(numpdf=numfoundpdfs, numcsv=numstudents))
print("Found {num_pdf} PDFs (CSV had {num_csv} entries)"
.format(num_pdf=num_found_pdfs, num_csv=num_students))
print("done.")
# Sanity check:
......@@ -184,11 +184,11 @@ Processing {} lines
if not dry:
# Zip
print("Zipping")
shutil.make_archive(os.path.splitext(outzip)[0], 'zip', tmpfolder)
shutil.make_archive(os.path.splitext(outzip)[0], 'zip', tmp_folder)
print('Zip archive is stored at {}'.format(outzip))
# Delete temporary folder
shutil.rmtree(tmpfolder)
shutil.rmtree(tmp_folder)
# Print dry run results
else:
......
......@@ -2,93 +2,226 @@ import sys # get arguments from command line
import os # path listing/manipulation/...
import time # keep track of time
import argparse # handle command line arguments
import shutil #unzipping and copying files
import re # pattern matching
import csv # opening grading worksheet csv
import shutil # unzipping and copying files
from utils import moodle as moodle
def main(args):
"""Transfer PDF files from zip file containing all submissions into user provided folder following exam scan naming convention
"""Transfer PDF files from submisions zip file (or already extracted folder)
containing all submissions into user provided folder following exam scan
naming convention
1) files are extracted from user-provided zip file location eg: ./all_submissions.zip
2) Scan through extracted folder for PDF files. Only 1 PDF file/student is accepted.
3) Matriculation number and last name of student is fetched from grading worksheet
4) PDFs from extracted folder are renamed according to convention and placed in user provided outfolder
1) Files are extracted from zip file location eg: ./all_submissions.zip
In case folder is given, extraction is skipped.
2) Scan through extracted folder for PDF files.
Only 1 PDF file/student is accepted.
3) Matriculation number and last name are fetched from grading worksheet
4) PDFs from extracted folder are renamed according to convention and
placed in user provided outfolder
"""
# Argument handling
parser = argparse.ArgumentParser(description='''
Zip file, provided with parameter inzip, containg all submissions of an assignment is extracted,
renamed according to convention and placed in folder provided with prameter --outfolder
Zip file 'inzip', containing all submissions of an assignment,
is extracted, renamed according to convention 'filenameformat'
and placed in folder 'outfolder'.
''')
parser.add_argument("-o", "--outfolder", default="./pdfs",
help="Output folder with PDFs followingname schema. Default: ./pdfs")
parser.add_argument("-i", "--inzip", default="0",
help="Input zip file. Default: 0")
parser.add_argument("-c", "--csv", default="./Bewertungen.csv",
help="Moodle grading CSV file, needed to construct the folder names. Default: ./Bewertungen.csv")
parser.add_argument(
"-i", "--inzip", default="submissions.zip",
help="Input zip file or already extracted folder. " +
"Default: ./submissions.zip")
parser.add_argument(
"-o", "--outfolder", default="./pdfs",
help="Output folder with PDFs. Default: ./pdfs")
parser.add_argument(
"--filenameformat", default="{matnum}_{fullname[0]}",
help="File name format. Available keywords: " +
"{{matnum}}, {{fullname}}, {{lastname}}, {{firstname}}. " +
"Default: '{{matnum}}_{{fullname[0]}}'")
parser.add_argument(
"--copyall", action='store_true',
help="If set, copies all files (including multiple and non-PDF files)")
parser.add_argument(
"--appendoriginal", action='store_true',
help="If set, appends original file name to new location's file name")
parser.add_argument(
"-c", "--csv", default="./Bewertungen.csv",
help="Moodle grading sheet. Default: ./Bewertungen.csv")
parser.add_argument(
"--csvdelim", default=",", help="CSV delimiter. Default: ','")
parser.add_argument(
"--csvquote", default='"', help="CSV quote char." + """Default: '"'""")
parser.add_argument(
"--csvenc", default="utf-8", help="CSV encoding scheme. " +
"Typical encodings:'utf-8', 'utf-8-sig', or 'cp1252' (Windows). " +
"Default: 'utf-8'")
parser.add_argument(
"-d", "--dry", action='store_true', help="Flag for dry run.")
parser.add_argument(
"-t", "--tmp", default="./tmp", help="Temporary folder. Default:./tmp")
args = parser.parse_args(args)
inzip = args.inzip
outfolder = args.outfolder
csvfilename = args.csv
sheet_csv = args.csv
dry = args.dry
csv_enc = args.csvenc
csv_delim = args.csvdelim
csv_quote = args.csvquote
copy_all = args.copyall
append_original_name = args.appendoriginal
filenameformat = args.filenameformat
tmp_folder = args.tmp
extracted_folder = os.path.join(tmp_folder, "extracted_from_moodle")
# Print status
starttime = time.time()
num_students = moodle.get_student_number(sheet_csv=sheet_csv,
csv_enc=csv_enc)
print('''Preparing for renaming of submission files.
Processing {} students
'''.format(num_students))
# Clean up and create temporary folder
dryout = []
if dry:
print("Dry run\n")
# Check whether zip or folder is given
folder_instead_of_zip = False
if not(inzip.lower().endswith(('.zip'))):
if not(os.path.isdir(inzip)):
raise Exception(
"{zip} neither Zip file nor folder. Exiting."
.format(zip=inzip))
# Folder was given instead of Zip file
extracted_folder = inzip
folder_instead_of_zip = True
if inzip == "0" or not(inzip.lower().endswith(('.zip'))):
print ('\n***ERROR*** Not a suitable zip file. The script cannot proceed')
return
else:
print('\nExtracting files from '+inzip+' ...')
try:
extracted_folder = os.path.splitext(inzip)[0]
shutil.unpack_archive(inzip, extracted_folder) #unzip file
except:
print('\n***ERROR*** Something went wrong. Check if you have given the correct name and path for the zip file')
return
with open(csvfilename, newline='') as csvfile:
matnums = []
moodleids={}
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
next(reader) # skip header CSV line
for row in reader:
# Parse required fields from CSV line
# Moodle has its own internal ID per participant alongside
# matriculation number
moodleid = row[0]
moodleid = moodleid.replace("Teilnehmer/in", "") # German
moodleid = moodleid.replace("Participant ", "") # English
name = row[1] # Lastname, Firstname
matnum = row[2] # matriculation number
matnums.append(matnum) # save matriculation number for later
moodleids[moodleid] = matnum
pattern = '.*_'+moodleid+'_.*'
folder_lists = [folder for folder in os.listdir(extracted_folder) if re.compile(pattern).match(folder)]
for folder in folder_lists:
print('\n************** For Matriculation number '+matnum+' ****************\n')
unsupported_files = []
for dirpath, dirnames, filenames in os.walk(os.path.join(extracted_folder,folder)):
if(len([f for f in filenames if f.endswith(".pdf")])>1): # only 1 file per student is allowed
print('Multiple PDF files found in submission. Each student should submit only 1 PDF file')
break
elif (len([f for f in filenames if f.endswith(".pdf")])==0):
if not dirnames:
print('No PDFs were submitted')# No PDF found in a student's submission
else: #deals if students uploaded a folder instead of a file
continue
# Extract
print("Extracting files from {zip} ...".format(zip=inzip))
if not dry:
shutil.unpack_archive(inzip, extracted_folder) # unzip file
else:
raise Exception("Dry run prevents me from unpacking the Zip file.")
# List all extracted folders
folders = os.listdir(extracted_folder)
folders.sort()
# There should never be more folders than entries in CSV file
if len(folders) > num_students:
raise Exception(
("More folders ({num_folders}) than "
"students in CSV file ({num_students})")
.format(num_folders=len(folders), num_students=num_students))
# Parse grading infos from CSV file
infos = moodle.extract_info(sheet_csv=sheet_csv, csv_delim=csv_delim,
csv_quote=csv_quote, csv_enc=csv_enc)
# Collect non-default cases:
# Student did not submit anything
infos_no_submission = []
# Student submitted more than one file
infos_multi_files = []
# Student submitted a non-PDF file
infos_unsupported_files = []
# Loop over grading info
print("Copying submissions", sep=' ', end='', flush=True)
for cnt, info in enumerate(infos):
folder = moodle.submission_folder_name(info)
if folder in folders:
# Folder was found
folderfull = os.path.join(extracted_folder, folder)
files = os.listdir(folderfull)
# Notify if folder empty
if len(files) == 0:
infos_no_submission.append(info)
# Notify if more than one submission
if len(files) > 1:
infos_multi_files.append(info)
# Iterate over all files within folder
for file_cnt, file in enumerate(files):
file_full = os.path.join(folderfull, file)
# Create destination file name
dest = filenameformat.format(
matnum=info['matnum'], fullname=info['fullname'],
lastname=info['lastname'], firstname=info['firstname'])
# Add unique file ID (only for copy all)
if copy_all > 0:
dest = dest + "_{:03d}".format(file_cnt)
base, ext = os.path.splitext(file)
# Add original file name
if append_original_name:
dest = dest + "_" + base
# Add extension
dest = dest + ext
dest_full = os.path.join(outfolder, dest)
# Notify if non-PDF file
is_pdf = file_full.lower().endswith('.pdf')
if not is_pdf and \
info not in infos_unsupported_files:
infos_unsupported_files.append(info)
# Copy either first PDF file or all files if copyall is active
if (file_cnt == 0 and is_pdf) or copy_all:
if not dry:
shutil.copyfile(file_full, dest_full)
else:
for filename in [f for f in filenames if f.endswith(".pdf")]: # renames and copies PDF to outfolder
shutil.copy(os.path.join(dirpath,filename),os.path.join(outfolder,matnum+'_'+name.split(',')[0]+'_'+os.path.splitext(filename)[0]+'.pdf'))
print(filename+' is renamed to '+matnum+'_'+name.split(',')[0]+'.pdf and placed in '+outfolder )
for dirpath, dirnames, filenames in os.walk(os.path.join(extracted_folder,folder)):
for filename in [f for f in filenames if not f.endswith(".pdf")]:
unsupported_files.append(filename)
if unsupported_files: #Lists all non-PDF files found in a student's submission
print('This script only supports PDF files. Hence the below files cannot be handled:')
for filename in unsupported_files:
print('- '+filename)
dryout.append(
"- {} -> {}"
.format(os.path.join(folder, file), dest))
else:
# Notify if folder was not found
infos_no_submission.append(info)
# Print for-loop progress
if not (cnt % max(1, round(num_students/10))):
print(".", sep=' ', end='', flush=True)
print("done.")
# Report back special cases
for report in [(infos_no_submission, "no files"),
(infos_multi_files, "multiple files"),
(infos_unsupported_files, "unsupported files")]:
infos, reason = report
if len(infos) > 0:
lines = ["- {folder} ({matnum})"
.format(folder=moodle.submission_folder_name(_),
matnum=_['matnum'])
for _ in infos]
lines.sort()
print(
"\nSubmissions of {reason}:\n{lines}"
.format(reason=reason, lines="\n".join(lines)))
# Dry run output
if not dry:
# Delete temporary folder
if not folder_instead_of_zip:
shutil.rmtree(extracted_folder)
else:
dryout.sort()
print("\nDry run results:\n{}".format("\n".join(dryout)))
# Print status
endtime = time.time()
print("Time taken: {:.2f}".format(endtime-starttime))
if __name__ == '__main__':
......
......@@ -80,25 +80,28 @@ def main(args):
This information is either taken from the filenames of exam scan PDFs or
from the Moodle grading CSV file.
''')
parser.add_argument("-i", "--infolder", default="./supplements",
help="Folder with supplements. Default: ./supplements")
parser.add_argument("-p", "--prefix", default="./pdfs",
help="Provides information to construct prefixes. " +
"Either PDF folder with scanned PDFs or " +
"Moodle grading CSV file. Default: ./pdfs")
parser.add_argument("-o", "--outfolder", default="./supplements_out",
help="Output folder. Default: ./supplements_out")
parser.add_argument("--csvprefixformat", default="{matnum}_{fullname[0]}",
help="Format of CSV prefix. Available keywords: " +
"{{matnum}}, {{fullname}}, {{lastname}}, " +
"{{firstname}}. Default: '{{matnum}}_{{fullname[0]}}'")
parser.add_argument("-d", "--dry", action='store_true',
help="Flag for dry run")
parser.add_argument(
"-i", "--infolder", default="./supplements",
help="Folder with supplements. Default: ./supplements")
parser.add_argument(
"-p", "--prefix", default="./pdfs",
help="Provides information to construct prefixes. Either PDF folder " +
"with scanned PDFs or Moodle grading CSV file. Default: ./pdfs")
parser.add_argument(
"-o", "--outfolder", default="./supplements_out",
help="Output folder. Default: ./supplements_out")
parser.add_argument(
"--filenameformat", default="{matnum}_{fullname[0]}",
help="File name format. Available keywords: " +
"{{matnum}}, {{fullname}}, {{lastname}}, {{firstname}}. " +
"Default: '{{matnum}}_{{fullname[0]}}'")
parser.add_argument(
"-d", "--dry", action='store_true', help="Flag for dry run")
args = parser.parse_args(args)
supp_dir = args.infolder
prefixinfo = args.prefix
prefixformat = args.csvprefixformat
prefixformat = args.filenameformat
output_dir = args.outfolder
dry = args.dry
......@@ -142,10 +145,9 @@ Files in output folder {} will be overwritten during this process.
prefixes = []
infos = moodle.extract_info(csvfilename)
for info in infos:
prefix = prefixformat.format(matnum=info['matnum'],
fullname=info['fullname'],
lastname=info['lastname'],
firstname=info['firstname'])
prefix = prefixformat.format(
matnum=info['matnum'], fullname=info['fullname'],
lastname=info['lastname'], firstname=info['firstname'])
prefixes.append(prefix) # save prefix
# Copy supplements to output dir and prepend prefixes
......
import unittest
import time
import os
import tempfile
class MainTest(unittest.TestCase):
def setUp(self):
self.tic = time.time() # todo this is sooo ugly
self.test_dir = tempfile.mkdtemp()
def tearDown(self):
self.toc = time.time()
t = self.toc - self.tic
print('Time: %.3f' % (t))
def test_copy_from_zip(self):
import preparepdf
expected_files = [
'123456_F.pdf',
'123457_O.pdf',
'125412_T.pdf']
# Prepare parameter
in_zip = './submissions.zip'
sheet_csv = "./Bewertungen.csv"
out_dir = os.path.join(self.test_dir, 'out')
os.mkdir(out_dir)
tmp_dir = os.path.join(self.test_dir, 'tmp')
os.mkdir(tmp_dir)
# Call function
preparepdf.main(["-i", in_zip, "-o", out_dir, "-c", sheet_csv])
# Assert output
created_files = os.listdir(out_dir)
created_files.sort()
self.assertEqual(expected_files, created_files)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment