Commit fc93772e authored by Christian Rohlfing's avatar Christian Rohlfing
Browse files

Merge branch 'ad_iss8' of git.rwth-aachen.de:rwthmoodle/exam-scan into ad_iss8

parents 41c22a29 3fff639c
......@@ -5,27 +5,102 @@ import time
import shutil # copyfile, make_archive
import argparse # argument parsing
import sys
import zipfile
import utils.moodle as moodle
import utils.matnum as matnum_utils
import zipfile
def zip_folders(base_folder, zip_file, size_limit):
"""Zip folders in base folder. If size limit is exceeded, create next zip
file until all folders are zipped.
Args:
base_folder (str): path of base folder
zip_file (str): path of zip file
size_limit (int): size limit
Returns:
int: number of zip files
"""
# Initialize
zip_file_base = os.path.splitext(zip_file)[0]
total_compress_size = 0
zip_cnt = 0
zf = None
# Iterate over folders
all_folders = os.listdir(base_folder)
num_folders = len(all_folders)
for cnt, folder in enumerate(all_folders):
# Measure uncompressed folder path
folder_path = os.path.join(base_folder, folder)
folder_size = get_folder_size(folder_path)
folder_size /= 1024**2 # conversion from bytes to MiB
# If size_limit reached, create new zip file
if total_compress_size + folder_size > size_limit or zf is None:
# File name
zip_cnt += 1
if zf is not None:
zf.close() # Close previous zip file
zip_file = "{zip}_{cnt}.zip".format(
zip=zip_file_base, cnt=zip_cnt)
# Reset counters
total_compress_size = 0
file_cnt = 0
# Open (new) zip file
zf = zipfile.ZipFile(
zip_file, mode='w', compression=zipfile.ZIP_DEFLATED)
# Loop over files in current folder
last_file_cnt = file_cnt
for f in os.listdir(folder_path):
# Add file to zip file
zf.write(
os.path.join(folder_path, f), arcname=os.path.join(folder, f))
file_cnt += 1
# Get compressed size of folder
folder_compress_size = sum(
[_.compress_size for _ in zf.filelist[last_file_cnt:]])
folder_compress_size /= 1024**2 # conversion from bytes to MiB
total_compress_size += folder_compress_size
# Print for-loop progress
if not (cnt % max(1, round(num_folders/10))):
print(".", sep=' ', end='', flush=True)
# Clean up
zf.close()
print("done.")
return zip_cnt
def bytesto(bytes, to, bsize=1024):
"""convert bytes to megabytes, etc.
sample code:
print('mb= ' + str(bytesto(314575262000000, 'm')))
sample output:
mb= 300002347.946
def get_folder_size(path):
"""Get size in bytes of folder
Args:
path (str): path of folder
Returns:
int: number of bytes
"""
total = 0
for entry in os.scandir(path):
if entry.is_file():
size = entry.stat().st_size
elif entry.is_dir():
size = get_folder_size(entry.path)
total += size
a = {'k' : 1, 'm': 2, 'g' : 3, 't' : 4, 'p' : 5, 'e' : 6 }
r = float(bytes)
for i in range(a[to]):
r = r / bsize
return total
return(round(r,2))
def sanity_check(matnums_csv, matnums_folder):
"""Check two cases for sanity:
......@@ -69,8 +144,11 @@ def main(args):
Attention: zip-archive 'out' will be overwritten in the following!
''')
parser.add_argument("infolder", help="Input folder with PDFs.")
parser.add_argument("csv", help="Moodle grading sheet.")
parser.add_argument(
"infolder", help="Input folder with PDFs.")
parser.add_argument(
"-c", "--csv", default="./Bewertungen.csv",
help="Moodle grading sheet. Default: ./Bewertungen.csv")
parser.add_argument(
"--csvdelim", default=",", help="CSV delimiter. Default: ','")
parser.add_argument(
......@@ -79,16 +157,17 @@ def main(args):
"--csvenc", default="utf-8", help="CSV encoding scheme. " +
"Typical encodings:'utf-8', 'utf-8-sig', or 'cp1252' (Windows). " +
"Default: 'utf-8'")
parser.add_argument("outzip", help="Zip archive.")
parser.add_argument(
"outzip", help="Zip archive.")
parser.add_argument(
"-d", "--dry", action='store_true', help="Flag for dry run.")
parser.add_argument(
"-t", "--tmp", default="./tmp", help="Temporary folder. Default:./tmp")
parser.add_argument(
"--nowarn", action='store_true', help="Disables warnings")
parser.add_argument(
"--moodlefilesize", default="250",
help="Moodle upload file size in MiB. Default: 250")
parser.add_argument(
"--nowarn", action='store_true', help="Disables warnings")
args = parser.parse_args(args)
infolder = args.infolder
......@@ -120,8 +199,9 @@ Processing {} students'''.format(num_students))
os.remove(outzip)
# Create temporary folder within given temporary directory
if not os.path.isdir(tmp_folder):
os.mkdir(tmp_folder)
if os.path.isdir(tmp_folder):
shutil.rmtree(tmp_folder)
os.mkdir(tmp_folder)
# Parse input folder
# Only PDF files are considered with first digits
......@@ -130,9 +210,6 @@ Processing {} students'''.format(num_students))
allfiles = os.listdir(infolder)
allfiles.sort()
allpdfs = []
if (len(allfiles) == 0):
print(""" There are no PDFs in the given directory. Exiting now.""")
return
for f in allfiles:
if f.lower().endswith('.pdf') and matnum_utils.starts_with_matnum(f):
allpdfs.append(f)
......@@ -170,7 +247,7 @@ Processing {} students'''.format(num_students))
# Prepare submission folder
folder = moodle.submission_folder_name(info)
longfolder = os.path.join(tmp_folder, folder)
# Create folder
if not dry:
os.mkdir(longfolder)
......@@ -184,7 +261,8 @@ Processing {} students'''.format(num_students))
else:
dryout.append(
"- {old} -> {new}"
.format(old=pdffile, new=os.path.join(folder, pdffile)))
.format(
old=pdffile, new=os.path.join(folder, pdffile)))
elif not no_warn: # No PDF found
print("Warning: PDF for {matnum} (id={id}, name={name}) not found."
......@@ -198,7 +276,6 @@ Processing {} students'''.format(num_students))
print("done.")
print("Found {num_pdf} PDFs (CSV had {num_csv} entries)"
.format(num_pdf=num_found_pdfs, num_csv=num_students))
# Sanity check:
# Check for PDFs not reflected in CSV (student not registered in Moodle)
......@@ -206,31 +283,17 @@ Processing {} students'''.format(num_students))
# Zip
if not dry:
foldersize = 0
count = 1
z = zipfile.ZipFile(os.path.splitext(outzip)[0]+str(count)+".zip", "w")
for dirpath, dirnames, filenames in os.walk(tmp_folder):
for file in filenames:
file_path = os.path.join(dirpath, file)
if not os.path.islink(file_path):
foldersize += os.path.getsize(file_path)
if bytesto(foldersize,'m') < size_limit:
z.write(file_path,os.path.join(os.path.relpath(file_path, tmp_folder),file))
os.remove(file_path)
else:
print("Preparing zip file "+str(count))
z.close()
print('Zip archive is stored at {}'.format(os.path.splitext(outzip)[0]+str(count)+".zip"))
count+=1
foldersize = 0
z = zipfile.ZipFile(os.path.splitext(outzip)[0]+str(count)+".zip", "w")
z.write(file_path,os.path.join(os.path.relpath(file_path, tmp_folder),file))
os.remove(file_path)
# Delete temporary folder
print("Zipping")
zip_cnt = zip_folders(
base_folder=tmp_folder, zip_file=outzip, size_limit=size_limit)
# Remove temporary folder
shutil.rmtree(tmp_folder)
# Print status
print("{cnt} zip archives are stored ({zip}*)"
.format(cnt=zip_cnt, zip=os.path.splitext(outzip)[0]))
# Print dry run results
else:
dryout.sort()
......@@ -244,4 +307,4 @@ Time taken: {:.2f}""".format(endtime-starttime))
# Main routine
if __name__ == '__main__':
main(sys.argv[1:])
main(sys.argv[1:])
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment