Commit 0ad778ef authored by Arndt Heilmann's avatar Arndt Heilmann
Browse files

Fix script unix

parent 5688d9cb
Manual Re-Alignment of Keystrokes to Tokens. In some cases, keystrokes are not mapped to the correct token. The manual re-alignment procedure below helps rectify this issue:
NOTE: You will have to install the Python library "lxml" for the scripts to work.
1) Download the following repository from GitLab: https://git.rwth-aachen.de/arndt.heilmann/kdfixer
2) Download the Event.xml-files from your study form the Management Tool: https://critt.as.kent.edu/cgi-bin/yawat/yawat.cgi
3) Move the Event.xml-files to the folder "EventFiles"
......
......@@ -7,36 +7,37 @@ Created on Wed Nov 8 17:15:55 2017
import os
import numpy as np
from lxml import etree
tokenidentifierdict={}
for dirs,subdirs,files in os.walk('..\\EventFiles\\'):
for dirs,subdirs,files in os.walk('../EventFiles/'):
for file in files:
if file.endswith("Event.xml") :
print(file)
with open(dirs+"//"+file,"r",encoding="utf-8") as myfile:
mytext=myfile.read()
with open(dirs+"/"+file,"r",encoding="utf-8") as myfile:
mytext=myfile.read().replace("\"\"\"","\""\"")
mytext=mytext.replace("LogFile","logfile")
root = etree.fromstring(bytes(mytext,"utf-8"))
file=file.replace(".Event.xml","")
for Token in root.iter("Token"):
# print(file,Token.attrib)
tokenidentifierdict.setdefault(file,{}).setdefault("FinalText",{}).update({int(Token.attrib["id"]):Token.attrib["tok"]})
#%%
print("Collecting Keystrokes")
KDdict={}
TokKDdict={}
for dirs,subdirs,files in os.walk('..\\EventFiles\\'):
for dirs,subdirs,files in os.walk('../EventFiles/'):
for file in files:
if file.endswith("Event.xml") :
# print(file)
filename=file.replace(".Event.xml","")
with open(dirs+"//"+file,"r",encoding="utf-8") as infile:
mytext=infile.read()
with open(dirs+"//"+file,"r",encoding="utf-8") as myfile:
mytext=myfile.read().replace("\"\"\"","\""\"")
mytext=mytext.replace("LogFile","logfile")
root = etree.fromstring(bytes(mytext,"utf-8"))
file=file.replace(".Event.xml","")
# file=file.replace(".Event.xml","")
for i,Mod in enumerate(root.iter("Mod")):
Mod.attrib["chr"]=Mod.attrib["chr"].replace("\n","_")
Mod.attrib["chr"]=Mod.attrib["chr"].replace(" ","_")
......@@ -49,11 +50,11 @@ for dirs,subdirs,files in os.walk('..\\EventFiles\\'):
TokKDdict.setdefault(filename,{}).setdefault(int(Mod.attrib["tid"]),{}).setdefault(key,[]).append(value)
#%%
print("Writing .pzl-files")
for translation in TokKDdict:
writefile=False
try:
open("..\\ManualRealignment\\"+translation+".pzl")
open("../ManualRealignment/"+translation+".pzl")
answer=input("Do you really want to overwrite "+translation+".pzl (y/n)")
if answer=="y":
writefile=True
......@@ -61,10 +62,10 @@ for translation in TokKDdict:
except FileNotFoundError:
writefile=True
if writefile==True:
with open("..\\ManualRealignment\\"+translation+".pzl","w",encoding="utf-8") as outfile:
with open("../ManualRealignment/"+translation+".pzl","w",encoding="utf-8") as outfile:
for i,token in enumerate(sorted(list(TokKDdict[translation].keys()))):
# token=str(token)
if token in tokenidentifierdict:
if token in tokenidentifierdict[translation]["FinalText"]:
line=[]
header=[]
if i==0:
......@@ -78,13 +79,13 @@ for translation in TokKDdict:
line.append("TT")
line.append("Chars")
line.append(token)
line.append(tokenidentifierdict[translation]["FinalText"][token].replace("\"","\\\""))
line.append(tokenidentifierdict[translation]["FinalText"][token].replace("\"","/\""))
for ids in TokKDdict[translation][token]["Id"]:
# if KDdict[translation][ids].setdefault("Value","")=="[Return]":
# char="/"
if KDdict[translation][ids].setdefault("Value","")=="\"":
char="\\\""
if KDdict[translation][ids].setdefault("chr","")=="\"":
char="/\""
# print("HEHAHDH")
else:
char=KDdict[translation][ids]["chr"]
......@@ -96,7 +97,7 @@ for translation in TokKDdict:
line.append("TT")
line.append("Ids")
line.append(token)
line.append(tokenidentifierdict[translation]["FinalText"][token].replace("\"","\\\""))
line.append(tokenidentifierdict[translation]["FinalText"][token].replace("\"","/\""))
for ids in TokKDdict[translation][token]["Id"]:
line.append(ids)
outfile.write("\t".join([str(x) for x in line])+"\n")
......
......@@ -10,7 +10,7 @@ Created on Wed Apr 28 11:42:17 2021
import os
from lxml import etree
for dirs,subdirs,files in os.walk('..\\EventFiles\\'):
for dirs,subdirs,files in os.walk('../EventFiles/'):
for file in files:
if file.endswith("Event.xml") :
KDdict={}
......@@ -41,7 +41,7 @@ for dirs,subdirs,files in os.walk('..\\EventFiles\\'):
#%%
FixedKDdict=KDdict.copy()
with open("..\ManualRealignment\\"+filename+".pzl","r",encoding="utf-8") as infile:
with open("..\ManualRealignment/"+filename+".pzl","r",encoding="utf-8") as infile:
for i,line in enumerate(infile):
line=line.replace("\n","").split("\t")
if line[1]=="TT" and line[2]=="Ids":
......@@ -74,5 +74,5 @@ for dirs,subdirs,files in os.walk('..\\EventFiles\\'):
et = etree.ElementTree(root)
out=etree.tostring(et,encoding="utf-8", pretty_print=True).decode('utf-8')
out=out.replace("
","
")
with open("..\\FixedEventFiles\\"+file,"w",encoding="utf-8") as outfile:
with open("../FixedEventFiles/"+file,"w",encoding="utf-8") as outfile:
outfile.write(out)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment