#!/usr/bin/env python """Rename scanned PDFs assuming scan order equal to alphabetical order of students in Moodle grading sheet. Renames scans accordingly to info in Moodle grading sheet, such that the file name starts with the matriculation number. This only works if exams were scanned in alphabetical order. Optionally, each scanned PDF is searched for barcodes/QRs containing the matriculation number to double check. Attention: Contents in output folder will be overwritten in the following! """ __author__ = "Amrita Deb (deb@itc.rwth-aachen.de), " +\ "Christian Rohlfing (rohlfing@ient.rwth-aachen.de)" import os import time import shutil # copyfile, make_archive import argparse import sys import utils.moodle as moodle import utils.matnum as matnum_utils import utils.qr as qr_utils def _make_parser(): csv_parser = moodle.get_moodle_csv_parser() parser = argparse.ArgumentParser( parents=[csv_parser], description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "infolder", help="Input folder with PDFs.") parser.add_argument( "csv", help="Moodle grading sheet.") parser.add_argument( "outfolder", help="Output folder with renamed scans.") parser.add_argument( "--filenameformat", default="{matnum}_{fullname[0]}", help="File name format. Available keywords: " + "{matnum}, {fullname}, {lastname}, {firstname}. " + "Default: '{matnum}_{fullname[0]}'") parser.add_argument( "-q", "--checkqr", action='store_true', help="Flag for additional QR code match.") parser.add_argument( "-d", "--dry", action='store_true', help="Flag for dry run.") return parser # Create argument parser with default values _parser = _make_parser() __doc__ += _parser.format_help() def main(args): """Main routine """ # Parse input arguments args = _parser.parse_args(args) infolder = args.infolder sheet_csv = args.csv outfolder = args.outfolder file_format = args.filenameformat dry = args.dry csv_delim = args.csvdelim csv_quote = args.csvquote csv_enc = args.csvenc check_qr = args.checkqr # Print status with total number of lines starttime = time.time() dryout = "" if dry: print("Dry run") print("Preparing renaming of scans") # Only PDF files are considered pdf_folder = os.listdir(infolder) pdf_files = [_ for _ in pdf_folder if _.lower().endswith(".pdf")] # Sort list alphabetically # Most scanners are putting timestamps in the file names # This information is more important than the OS time stamp pdf_files.sort() # Get number of CSV entries num_students = moodle.get_student_number(sheet_csv=sheet_csv, csv_enc=csv_enc) if len(pdf_files) != num_students: raise Exception("Error: Not as many CSV lines as scans!") # Parse grading infos from CSV file infos = moodle.extract_info(sheet_csv=sheet_csv, csv_delim=csv_delim, csv_quote=csv_quote, csv_enc=csv_enc) # Loop over grading infos pdfs_no_qrs = [] print("Renaming", sep=' ', end='', flush=True) for cnt, pdf_file in enumerate(pdf_files): # Extract matriculation number and lastname from grading info info = infos[cnt] matnum_csv = info['matnum'] # Destination PDF file name dest_pdf = file_format.format( matnum=matnum_csv, fullname=info['fullname'], lastname=info['lastname'], firstname=info['firstname']) # Add extension _, ext = os.path.splitext(pdf_file) dest_pdf = dest_pdf + ext in_pdf_full = os.path.join(infolder, pdf_file) # Sanity check if check_qr: # Search for first QR code in PDF qr = qr_utils.first_qr_from_first_pdf_page(pdf_file=in_pdf_full) # Extract matnum from QR code if qr: # Assumed QR format: # "something-before-the-matnum-{matnum}-{pagenum}" matnum_qr = qr.split('-')[-2] if not matnum_utils.check_matnum(matnum_qr): raise Exception("{} no valid matnum!".format(matnum_qr)) # Halt if matnum of QR and CSV differ if matnum_qr != info['matnum']: raise Exception("{}: QR with {} but CSV with matnum {}" .format(pdf_file, matnum_csv, matnum_qr)) else: pdfs_no_qrs.append(pdf_file) # Copy if not dry: dest_pdf_full = os.path.join(outfolder, dest_pdf) shutil.copyfile(in_pdf_full, dest_pdf_full) else: dryout += "\n{} -> {}".format(pdf_file, dest_pdf) # Print for-loop progress if not (cnt % max(1, round(num_students/10))): print(".", sep=' ', end='', flush=True) # Print results print("done.") # Dry run if dry: print("\nDry run results:{}".format(dryout)) if check_qr and pdfs_no_qrs: print("\nCouldn't read QRs in the following PDFs\n- {}" .format("\n- ".join(pdfs_no_qrs))) # Print time endtime = time.time() print("Time taken: {:.2f}".format(endtime-starttime)) # Main routine if __name__ == '__main__': main(sys.argv[1:])