2_CreateFixedEventFile.py 3.32 KB
Newer Older
Arndt Heilmann's avatar
Final    
Arndt Heilmann committed
1
2
3
4
5
6
7
8
9
10
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 28 11:42:17 2021

@author: Arndt
"""


     
import os
Arndt Heilmann's avatar
Arndt Heilmann committed
11
12
from lxml import etree

Arndt Heilmann's avatar
Arndt Heilmann committed
13
for dirs,subdirs,files in os.walk('../EventFiles/'):
Arndt Heilmann's avatar
Arndt Heilmann committed
14
15
    for file in files:
        if file.endswith("Event.xml") :
Arndt Heilmann's avatar
Ready    
Arndt Heilmann committed
16
17
18
            KDdict={}
            Alignmentdict={}
            FixedKDdict={}
Arndt Heilmann's avatar
Arndt Heilmann committed
19
            filename=file.replace(".Event.xml","")
Arndt Heilmann's avatar
Ready    
Arndt Heilmann committed
20
            print(file)
Arndt Heilmann's avatar
Arndt Heilmann committed
21
22
            with open(dirs+"//"+file,"r",encoding="utf-8") as infile:
                mytext=infile.read()
Arndt Heilmann's avatar
Ready    
Arndt Heilmann committed
23
24
25
26
27
28
29
                mytext=mytext.replace("LogFile","logfile")

                mytext=mytext.replace("chr=\"\"\"","char=\""\"")
                
              
                p = etree.XMLParser(remove_blank_text=True)
                root = etree.fromstring(mytext,parser=p)
Arndt Heilmann's avatar
Arndt Heilmann committed
30
31
32
33
34
35
36
37
                for i,Mod in enumerate(root.iter("Mod")):
                    Mod.attrib["chr"]=Mod.attrib["chr"].replace("\n","_")
                    Mod.attrib["chr"]=Mod.attrib["chr"].replace(" ","_")
                    KDdict.setdefault(filename,{}).setdefault(i,{}).setdefault("Id",i)
                    for key,value in Mod.attrib.items():
                        if key=="chr" and Mod.attrib["type"]=="Mdel":
                             Mod.attrib["chr"]=f'[{Mod.attrib["chr"]}]'
                        KDdict.setdefault(filename,{}).setdefault(i,{}).setdefault(key,value)
Arndt Heilmann's avatar
Ready    
Arndt Heilmann committed
38
39
40
41
42
43
                                
                for i,Align in enumerate(root.iter("Align")):
                    Alignmentdict.setdefault(filename,{}).setdefault(Align.attrib["tid"],[]).append(Align.attrib["sid"])
                            
#%%
            FixedKDdict=KDdict.copy()
Arndt Heilmann's avatar
fix    
Arndt Heilmann committed
44
            with open("../ManualRealignment/"+filename+".pzl","r",encoding="utf-8") as infile:
Arndt Heilmann's avatar
Ready    
Arndt Heilmann committed
45
46
47
                for i,line in enumerate(infile):
                    line=line.replace("\n","").split("\t")
                    if line[1]=="TT" and line[2]=="Ids":
Arndt Heilmann's avatar
Final    
Arndt Heilmann committed
48
                        
Arndt Heilmann's avatar
Ready    
Arndt Heilmann committed
49
50
51
52
53
54
                        for item in line[5:]:
                            if item!="":
                                item=int(item)
                                
                                if FixedKDdict[filename][item]["tid"]!=(line[3]):
                                    FixedKDdict[filename][item]["tid"]=(line[3])
Arndt Heilmann's avatar
Final    
Arndt Heilmann committed
55
#%%
Arndt Heilmann's avatar
Arndt Heilmann committed
56
            with open(dirs+"//"+file,"r",encoding="utf-8") as infile:
Arndt Heilmann's avatar
Ready    
Arndt Heilmann committed
57
        
Arndt Heilmann's avatar
Arndt Heilmann committed
58
                mytext=infile.read()
Arndt Heilmann's avatar
Ready    
Arndt Heilmann committed
59
60
61
62
63
64
65
                mytext=mytext.replace("LogFile","logfile")
        
                mytext=mytext.replace("chr=\"\"\"","char=\""\"")
                
              
                p = etree.XMLParser(remove_blank_text=True)
                root = etree.fromstring(mytext,parser=p)
Arndt Heilmann's avatar
Arndt Heilmann committed
66
67
68
69
                filename=file.replace(".Event.xml","")
                for i,Mod in enumerate(root.iter("Mod")):
                    for key,value in Mod.attrib.items():
                        Mod.attrib[key]=  FixedKDdict[filename][i][key]  
Arndt Heilmann's avatar
Ready    
Arndt Heilmann committed
70
71
72
73
                    try:
                        Mod.attrib["sid"]="+".join(Alignmentdict[filename][FixedKDdict[filename][i]["tid"]])
                    except:
                        pass
Arndt Heilmann's avatar
Arndt Heilmann committed
74
            et = etree.ElementTree(root)
Arndt Heilmann's avatar
Arndt Heilmann committed
75
76
            out=etree.tostring(et,encoding="utf-8", pretty_print=True).decode('utf-8')
            out=out.replace("
","
")
Arndt Heilmann's avatar
Arndt Heilmann committed
77
            with open("../FixedEventFiles/"+file,"w",encoding="utf-8") as outfile:
Arndt Heilmann's avatar
Arndt Heilmann committed
78
                outfile.write(out)