Christian Rohlfing · ad5d1dbc · ad5d1dbc · ad5d1dbc
--- a/preparemoodle.py

+ 181

− 177
+++ b/preparemoodle.py

+ 181

− 177
 #!/usr/bin/env python

 import csv
-import os,time
+import os
+import time
 import shutil  # copyfile, make_archive
-import argparse, sys
-
-
-def find_file(pattern, path):
-  if os.name == "posix":
-    import subprocess
-
-    result = [line[2:] for line in subprocess.check_output(
-        "find " + path + " -type f -name " + pattern, 
-        shell=True).splitlines()]
-    result = [tmp.decode("utf-8") for tmp in result]
-
-  else: 
-    import fnmatch
+import argparse
+import sys
+import utils.matnum as utils
+
+
+def find_unmatched_pdfs(infolder, matnums, nowarn):
+    """Finds matnumbers not present in CSV but in PDF folder
+
+    Args:
+        infolder (str): path to input folder
+        matnums (list): list of matriculation numbers
+        nowarn (int): flag
+    """
+
+    print("\nSearching for matnumbers not present in CSV but in PDF folder:")
+
+    # Loop over all PDFs:
+    notfoundmatnums = []
+    for root, dirs, files in os.walk(infolder):
+        for pdffile in files:
+            if pdffile.endswith(".pdf"):
+                # Get matriculation number from file
+                matnum = utils.get_matnum(pdffile)
+
+                # Search matriculation number in CSV
+                if matnum not in matnums:
+                    notfoundmatnums.append(matnum)
+                    if not nowarn:
+                        print("Warning: {} not in CSV".format(matnum))
+
+    # Report back
+    if len(notfoundmatnums) > 0:
+        print('''Could not find following {} matnumbers in CSV:
+            {}'''.format(len(notfoundmatnums), ", ".join(notfoundmatnums)))
+
+    print("Done.\n")
+
+
+def main(args):
+    """Main routine
+    """
+
+    # Parse input arguments
+    parser = argparse.ArgumentParser(description='''
+    prepares batch upload to Moodle via assignment module.
+    PDFs in folder 'in' are moved to folder 'tmp' with a certain folder structure and finally zipped to 'out'.
+    Attention: zip-archive 'out' will be overwritten in the following!
+
+    ''')
+    parser.add_argument("-i", "--infolder", default="./pdfs_encrypted",
+                        help="Input folder with PDFs. Default: ./pdfs_encrypted")
+    parser.add_argument("-c", "--csv", default="./Bewertungen.csv",
+                        help="Moodle grading CSV file, needed to construct the folder names. Default: ./Bewertungen.csv")
+    parser.add_argument("-o", "--outzip", default="./moodle_feedbacks.zip",
+                        help="Output zip archive. Default: ./moodle_feedbacks.zip")
+    parser.add_argument("-d", "--dry", action='store_true',
+                        help="Flag for dry run, displays only the folder structure inside the archive moodle_feedbacks.zip")
+    parser.add_argument("-t", "--tmp", default="./tmp",
+                        help="tmp folder. Default: ./tmp")
+    parser.add_argument("--nowarn", action='store_true',
+                        help="Disables warnings")
+
+    args = parser.parse_args(args)
+    infolder = args.infolder
+    csvfilename = args.csv
+    outzip = args.outzip
+    tmpfolder = os.path.join(args.tmp, "to_be_zipped_for_moodle")
+    dry = args.dry
+    nowarn = args.nowarn
+
+    starttime = time.time()
+
+    # Print status with total number of lines
+    numlines = 0
+    with open(csvfilename, newline='') as csvfile:
+        numlines = sum(1 for line in csvfile)
+
+    print('''Preparing for moodle upload
+Processing {} lines
+  '''.format(numlines))

-    result = []
-    for root, _, files in os.walk(path):
-      for name in files:
-        if fnmatch.fnmatch(name, pattern):
-          result.append(os.path.join(root, name))
+    dryout = ""
+    if dry:
+        print("Dry run\n")
+    else:
+        # Remove zip file
+        if os.path.exists(outzip):
+            os.remove(outzip)
+
+        # Create temporary folder within given temporary directory
+        if not os.path.isdir(tmpfolder):
+            os.mkdir(tmpfolder)
+
+    # Open CSV file
+    with open(csvfilename, newline='') as csvfile:
+
+        numfoundpdfs = 0
+        matnums = []
+        line_cnt = 0
+        print("Start iterating...", sep='', end='', flush=True)
+
+        # Loop over all lines in CSV file
+        reader = csv.reader(csvfile, delimiter=',', quotechar='"')
+        next(reader)  # skip header CSV line
+        for row in reader:
+            # Parse required fields from CSV line
+            # Moodle has its own internal ID per participant alongside
+            # matriculation number
+            moodleid = row[0]
+            moodleid = moodleid.replace("Teilnehmer/in", "")  # German
+            moodleid = moodleid.replace("Participant ", "")  # English
+            name = row[1]  # Lastname, Firstname
+            matnum = row[2]  # matriculation number (6-digit)
+            matnums.append(matnum)  # save matriculation number for later
+
+            # Copy PDF files
+            # Find all PDFs starting with matriculation number, e.g.
+            # '123456_Lastname_sheet.pdf' and '123456_Lastname_exam.pdf'
+            # If pdf files for current student exists, create a directory and
+            # copy the pdf files to it. The resulting directories can be
+            # uploaded to Moodle
+            longpdffiles = utils.find_file(matnum + "*.pdf", infolder)
+            if len(longpdffiles) > 0:  # Found some file(s)
+                numfoundpdfs += 1
+
+                # Prepare folder
+                # For upload, Moodle accepts submission files per participant
+                folder = "{}_{}_assignsubmission_file_".format(name, moodleid)
+                longfolder = os.path.join(tmpfolder, folder)
+
+                # Create folder
+                if not dry:
+                    os.mkdir(longfolder)
+
+                # Copy all files to folder
+                for longpdffile in longpdffiles:
+                    pdffile = os.path.basename(longpdffile)
+
+                    if not dry:
+                        shutil.copyfile(longpdffile,
+                                        os.path.join(longfolder, pdffile))
+                    else:
+                        dryout += "\n{}".format(os.path.join(folder, pdffile))
+            else:
+                if not nowarn:
+                    print("Warning: PDF corresponding to matnumber {} (moodleid={}, name={}) not available.".format(
+                        matnum, moodleid, name
+                    ))
+
+            # Print progress
+            if not (line_cnt % max(1, round(numlines/10))):
+                print(".", sep=' ', end='', flush=True)
+            line_cnt += 1
+
+    # Print results
+    print("Found {} PDFs (CSV had {} entries)".format(numfoundpdfs, numlines))
+    print("done.")
+
+    # Sanity check:
+    # Check for PDFs not reflected in CSV (student not registered in Moodle)
+    find_unmatched_pdfs(infolder, matnums, nowarn)
+
+    # Zipping
+    if not dry:
+        # Zip
+        print("Zipping")
+        shutil.make_archive(os.path.splitext(outzip)[0], 'zip', tmpfolder)
+        print('The Zip archive is available at: '+outzip)
+
+        # Delete temporary folder
+        shutil.rmtree(tmpfolder)
+
+    else:
+        print("\nDry run results:\n{}".format(dryout))
+
+    endtime = time.time()
+
+    print("""Done.
+Time taken: {:.2f}""".format(endtime-starttime))

-  return result

 if __name__ == '__main__':
-
-  parser = argparse.ArgumentParser(description='''
-  prepares batch upload to Moodle via assignment module. 
-  PDFs in folder 'in' are moved to folder 'tmp' with a certain folder structure and finally zipped to 'out'. 
-  Attention: zip-archive 'out' will be overwritten in the following!
-
-  ''')
-  parser.add_argument("-i", "--infolder", default="./pdfs_encrypted", 
-    help="Input folder with PDFs. Default: ./pdfs_encrypted")
-  parser.add_argument("-c", "--csv", default="./Bewertungen.csv",
-    help="Moodle grading CSV file, needed to construct the folder names. Default: ./Bewertungen.csv")
-  parser.add_argument("-o", "--outzip", default="./moodle_feedbacks.zip",
-    help="Output zip archive. Default: ./moodle_feedbacks.zip")
-  parser.add_argument("-d", "--dry", action='store_true', 
-    help="Flag for dry run, displays only the folder structure inside the archive moodle_feedbacks.zip")
-  parser.add_argument("-t", "--tmp", default="./tmp",
-    help="tmp folder. Default: ./tmp")
-  parser.add_argument("--nowarn", action='store_true', 
-    help="Disables warnings")
-  parser.add_argument("-b","--batch", default="0", 
-    help="Check whether it runs through batch script or not. Default: 0")
-
-  args = parser.parse_args()
-  infolder = args.infolder
-  csvfilename = args.csv
-  outzip = args.outzip
-  tmpfolder = args.tmp
-  dry = args.dry
-  nowarn = args.nowarn
-  batch_process = int(args.batch)
-
-  numlines = 0
-  starttime = time.time()
-  with open(csvfilename, newline='') as csvfile:
-    numlines = sum(1 for line in csvfile)
-
-  print('''Preparing for moodle upload
-  Processing {} lines
-  '''.format(numlines))
-
-  if dry:
-    print("Dry run\n")
-    dryoutput=""
-  else:
-    if batch_process == 0:
-        for root, dirs, files in os.walk(tmpfolder):
-          for f in files:
-              os.unlink(os.path.join(root, f))
-          for d in dirs:
-              shutil.rmtree(os.path.join(root, d))
-        
-
-    if os.path.exists(outzip): os.remove(outzip)
-
-
-  with open(csvfilename, newline='') as csvfile:
-    
-    # Loop over all lines in CSV file
-    numfoundpdfs = 0
-    cnt = 0
-    print("Start iterating...", sep='', end='', flush=True)
-
-    reader = csv.reader(csvfile, delimiter=',', quotechar='"')
-    next(reader)  # skip first row in CSV file since this should be the header
-    for row in reader:
-      # parse the required fields from the csv file
-      id = row[0]
-      id = id.replace("Teilnehmer/in", "")
-      id = id.replace("Participant ", "")
-      name = row[1]
-      matnum = row[2]
-
-      # if a pdf file for current student exists, create a directory and copy
-      # the pdf file to it. The resulting directories can be uploaded to moodle
-      longpdffile = ''
-      paths = find_file(matnum + "*.pdf", infolder)
-
-      if len(paths) > 0:
-        longpdffile = paths[0]
-      if len(paths) > 1:  # TODO: implement second loop for enabling distribution of multiple files
-        raise Exception("More than one PDFs starting with matnum {} found!".format(matnum)) 
-      
-      
-      if os.path.isfile(longpdffile):
-        numfoundpdfs += 1
-        pdffile = os.path.basename(longpdffile)
-        folder = "{}_{}_assignsubmission_file_".format(name, id)
-        longfolder = os.path.join(tmpfolder, folder)
-
-        if not dry:
-          os.mkdir(longfolder)
-          shutil.copyfile(longpdffile, os.path.join(longfolder, pdffile))
-        else:
-          dryoutput += "\n{}".format(os.path.join(folder, pdffile))
-      else:
-          if not nowarn:
-            print("Warning: PDF corresponding to matriculation number {} (id={}, name={}) not available.".format(
-              matnum, id, name
-            ))
-      
-      # Progress
-      if not (cnt % max(1,round(numlines/10))): 
-        print(".", sep=' ', end='', flush=True)
-      cnt += 1
-      
-  print("done.\n")
-
-  print("Found {} PDFs (CSV had {} entries)\n".format(numfoundpdfs, numlines))
-
-  print("Searching for matriculation numbers not present in CSV but in PDF input folder:")
-
-  # Check for PDFs which are not reflected in CSV (student not registered in Moodle)
-  numnotfoundmatnums = 0
-  notfoundmatnums = ""
-
-  for root, dirs, files in os.walk(infolder):
-      for pdffile in files:
-          if pdffile.endswith(".pdf"):
-            # Get matriculation number from file
-            matnum = pdffile[0:6]
-            
-            # Search in CSV
-            with open(csvfilename, 'r') as csvfile:
-              notfound = True
-              for line in csvfile:
-                  if matnum in line:
-                    notfound = False
-
-            if notfound:
-              numnotfoundmatnums += 1
-              notfoundmatnums += matnum + ", "
-              if not nowarn:
-                print("Warning: Could not find {} in CSV".format(matnum))
-
-  if numnotfoundmatnums > 0:
-    print('''I could not find the following {} matriculation numbers in CSV:
-      {}'''.format(numnotfoundmatnums, notfoundmatnums))
-
-  print("Done.")
-  
-
-  # Zipping
-  if not dry:
-    print("Zipping")
-
-    shutil.make_archive(os.path.splitext(outzip)[0], 'zip', tmpfolder)
-
-  else:
-    print("\nResults from dry ryn:\n{}".format(dryoutput))
-
-
-  print("\nDone.\n")
-
-  endtime = time.time()
-  print('\n The Zip archive is available at: '+outzip)
-  print(f'\nTime taken: {endtime-starttime:.2f}s\n')
 \ No newline at end of file
+    main(sys.argv[1:])