cimimport.py 20.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
from lxml import etree
from time import time
import importlib
import logging
import os
import cimpy

logger = logging.getLogger(__name__)

10
11

def cim_import(xml_files, cgmes_version, start_dict=None):
12
13
14
15
    """Function to read cimgen files and instantiate the classes

    This function parses xml files containing a cgmes topology and instantiates these classes with their attributes.
    The instantiation is done in two steps. In the first step all classes are instantiated with default values and
16
    in a second step the attributes contained in the xml files are set. The origin of all classes and attributes are
17
    stored in the class attribute serializationProfile.
18
19

    :param xml_files: CIM RDF/XML file.
20
    :param cgmes_version: cgmes version, e.g. "cgmes_v2_4_15"
21
22
23
    :param start_dict: a list of classes which indicates which classes will be read
        e.g. elements=["BaseVoltage", "ACLineSegment"]
        * If start_dict=None the complete file will be read
24
25
26
27
28
29
30
31
    :return: import_result: a dictionary containing the topology and meta information. The topology can be extracted via
    import_result['topology']. The topology dictionary contains all objects accessible via their mRID. The meta
    information can be extracted via import_result['meta_info']. The meta_info dictionary contains a new dictionary with
    the keys: 'author', 'namespaces' and 'urls'. The last two are also dictionaries. 'urls' contains a mapping
    between references to URLs and the extracted value of the URL, e.g. 'absoluteValue':
    'http://iec.ch/TC57/2012/CIM-schema-cim16#OperationalLimitDirectionKind.absoluteValue' These mappings are accessible
    via the name of the attribute, e.g. import_result['meta_info']['urls'}[attr_name] = {mapping like example above}.
    'namespaces' is a dictionary containing all RDF namespaces used in the imported xml files.
32
33
34
    """

    # Import cim version class
35
36
    cgmes_version_path = "cimpy." + cgmes_version

37
38
39
    # Start the clock.
    t0 = time()

40
41
    # map used to group errors and infos
    logger_grouped = dict(errors={}, info={})
42

43
44
    # create a dict which will contain meta information and the topology
    import_result = start_dict if start_dict is not None else dict(meta_info={}, topology={})
45

46
47
48
    # create sub-dictionaries
    import_result['meta_info'] = dict(namespaces=_get_namespaces(xml_files[0]), urls={})
    namespace_rdf = _get_rdf_namespace(import_result['meta_info']['namespaces'])
49
50

    # CIM element tag base (e.g. {http://iec.ch/TC57/2012/CIM-schema-cim16#} )
51
52
53
54
    base = "{" + import_result['meta_info']['namespaces']["cim"] + "}"

    import_result, logger_grouped, = _instantiate_classes(import_result, xml_files, cgmes_version_path, namespace_rdf,
                                                          base, logger_grouped)
55

56
    import_result, logger_grouped = _set_attributes(import_result, xml_files, namespace_rdf, base, logger_grouped)
57

58
59
    if logger_grouped['errors']:
        for error, count in logger_grouped['errors'].items():
60
61
62
            logging_message = '{} : {} times'.format(error, count)
            logger.warning(logging_message)

63
64
    if logger_grouped['info']:
        for info, count in logger_grouped['info'].items():
65
66
67
68
69
70
71
            logging_message = '{} : {} times'.format(info, count)
            logger.info(logging_message)

            # print info which classes and how many were instantiated
            print(logging_message)

    elapsed_time = time() - t0
72
    logger.info('Created totally {} CIM objects in {}s\n\n'.format(len(import_result['topology']), elapsed_time))
73
    # print info of how many classes in total were instantiated to terminal
74
    print('Created totally {} CIM objects in {}s'.format(len(import_result['topology']), elapsed_time))
75

76
    return import_result
77
78


79
80
81
82
# This function instantiates the classes defined in all RDF files. All attributes are set to default values.
# The only exception is the mRID which is set for all classes that have this attribute. The attributes of a class
# are set in the _set_attributes function because some attributes might be stored in one package and the class in
# another. Since after this function all classes are instantiated, there should be no problem in setting the attributes.
83
# Also the information from which package file a class was read is stored in the serializationProfile dictionary.
84
85
86
87
88
89
def _instantiate_classes(import_result, xml_files, cgmes_version_path, namespace_rdf, base,
                         logger_grouped):

    # extract topology from import_result
    topology = import_result['topology']

90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
    # length of element tag base
    m = len(base)
    # first step: create the dict res{uuid}=instance_of_the_cim_class
    for xml_file in xml_files:

        logger.info('START of parsing file \"%s\"', xml_file)

        # get an iterable
        context = etree.iterparse(xml_file, ("start", "end"))

        # Turn it into an iterator (required for cElementTree).
        context = iter(context)

        # Get the root element ({http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF).
        _, root = next(context)

106
        package = ''
107

108
        for event, elem in context:
109

110
            # Process 'end' elements in the CGMES namespace.
111
112
            if event == "end" and elem.tag[:m] == base:

113
                # check if the element has the attribute "rdf:ID" --> CGMES class located
114
115
116
117
118
                uuid = elem.get("{%s}ID" % namespace_rdf)
                if uuid is not None:  # cim class
                    # Element tag without namespace (e.g. VoltageLevel).
                    tag = elem.tag[m:]
                    try:
119
120
121
122
123
                        # module_name = package_map[package][tag]
                        # Import the module for the CGMES object.
                        module_name = cgmes_version_path + '.' + tag
                        module = importlib.import_module(module_name)
                    except ModuleNotFoundError:
124
125
                        error_msg = 'Module {} not implemented'.format(tag)
                        try:
126
                            logger_grouped['errors'][error_msg] += 1
127
                        except KeyError:
128
                            logger_grouped['errors'][error_msg] = 1
129
130
131

                        root.clear()
                        continue
132
133

                    # Get the CGMES class from the module.
134
135
136
                    klass = getattr(module, tag)
                    # Instantiate the class and map it to the uuid.
                    # res[uuid] = klass(UUID=uuid)
137
                    topology[uuid] = klass()
138
139
                    info_msg = 'CIM object {} created'.format(module_name.split('.')[-1])
                    try:
140
                        logger_grouped['info'][info_msg] += 1
141
                    except KeyError:
142
                        logger_grouped['info'][info_msg] = 1
143

144
145
                    # check if the class has the attribute mRID and set the mRID to the read in UUID. If the class
                    # does not has this attribute, the UUID is only stored in the res dictionary.
146
147
                    if hasattr(topology[uuid], 'mRID'):
                        topology[uuid].mRID = uuid
148

149
                    if package is not '':
150
                        topology[uuid].serializationProfile['class'] = short_package_name[package]
151
152
153
154
155
                    else:
                        error_msg = 'Package information not found for class {}'.format(
                            klass.__class__.__name__
                        )
                        try:
156
                            logger_grouped['errors'][error_msg] += 1
157
                        except KeyError:
158
                            logger_grouped['errors'][error_msg] = 1
159
160

            # Check which package is read
161
162
163
164
165
166
167
168
169
170
171
172
173
174
            elif event == "end":
                if 'Model.profile' in elem.tag:
                    for package_key in short_package_name.keys():
                        if package_key in elem.text:
                            package = package_key
                            break
                # the author of all imported files should be the same, avoid multiple entries
                elif 'author' in import_result['meta_info'].keys():
                    pass
                # extract author
                elif 'Model.createdBy' in elem.tag:
                    import_result['meta_info']['author'] = elem.text
                elif 'Model.modelingAuthoritySet' in elem.tag:
                    import_result['meta_info']['author'] = elem.text
175
176
177
178

            # Clear children of the root element to minimise memory usage.
            root.clear()

179
    return import_result, logger_grouped
180
181


182
183
# This function sets all attributes after the classes are instantiated by _instanciate_classes. Cyclic attributes like
# PowerTransformerEnd <-> PowerTransformer are set. This function also stores the information from which package file
184
# the attributes are read in the serializationProfile dictionary.
185
def _set_attributes(import_result, xml_files, namespace_rdf, base, logger_grouped):
186

187
188
189
190
    topology = import_result['topology']
    urls = import_result['meta_info']['urls']

    m = len(base)
191

192
193
194
195
196
197
198
199
    # Second step pass sets attributes and references.
    for xml_file in xml_files:

        # get an iterable and turn it into an iterator (required for cElementTree).
        context = iter(etree.iterparse(xml_file, ("start", "end")))

        # Get the root element ({http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF).
        _, root = next(context)
200
201
        
        package = ''
202
203

        for event, elem in context:
204

205
            # Process 'start' elements in the CGMES namespace.
206
207
208
209
210
211
212
            if event == "start" and elem.tag[:m] == base:
                uuid = elem.get("{%s}ID" % namespace_rdf)
                if uuid is None:
                    uuid = elem.get("{%s}about" % namespace_rdf)
                    if uuid is not None:
                        uuid = uuid[1:]
                if uuid is not None:
213
                    # Locate the CGMES object using the uuid.
214
                    try:
215
                        obj = topology[uuid]
216
217
218
                    except KeyError:
                        error_msg = 'Missing {} object with uuid: {}'.format(elem.tag[m:], uuid)
                        try:
219
                            logger_grouped['errors'][error_msg] += 1
220
                        except KeyError:
221
                            logger_grouped['errors'][error_msg] = 1
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
                        root.clear()
                        continue

                    # Iterate over attributes/references.
                    for event, elem in context:
                        # Process end events with elements in the CIM namespace.
                        if event == "end" and elem.tag[:m] == base:
                            # Break if class closing element (e.g. </cim:Terminal>).
                            if elem.get("{%s}ID" % namespace_rdf) is None \
                                    and elem.get("{%s}about" % namespace_rdf) is None:
                                # Get the attribute/reference name.
                                attr = elem.tag[m:].rsplit(".")[-1]

                                if not hasattr(obj, attr):
                                    error_msg = "'%s' has not attribute '%s'" % (obj.__class__.__name__, attr)
                                    try:
238
                                        logger_grouped['errors'][error_msg] += 1
239
                                    except KeyError:
240
                                        logger_grouped['errors'][error_msg] = 1
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
                                    continue

                                # Use the rdf:resource attribute to distinguish between attributes and references/enums.
                                uuid2 = elem.get("{%s}resource" % namespace_rdf)

                                if uuid2 is None:  # attribute
                                    # Convert value type using the default value.
                                    try:
                                        typ = type(getattr(obj, attr))
                                        if isinstance(getattr(obj, attr), bool):  # if typ==<class 'bool'>
                                            # The function bool("false") returns True,
                                            # because it is called upon non-empty string!
                                            # This means that it wrongly reads "false" value as boolean True.
                                            # This is why this special case testing is necessary.
                                            if str.title(elem.text) == 'True':
                                                setattr(obj, attr, True)
                                            else:
                                                setattr(obj, attr, False)
                                        else:
                                            setattr(obj, attr, typ(elem.text))
                                    except TypeError:
Philipp Reuber's avatar
Philipp Reuber committed
262
263
264
265
                                        try:
                                            setattr(obj, attr, elem.text)
                                        except TypeError:
                                            pass
266
267
268
269
270

                                else:  # reference or enum (uuid2 is not None)
                                    # Use the '#' prefix to distinguish between references and enumerations.
                                    if uuid2[0] == "#":  # reference
                                        try:
271
                                            val = topology[uuid2[1:]]  # remove '#' prefix
272
273
274
275
                                        except KeyError:
                                            error_msg = 'Referenced {} [{}] object missing.'.format(
                                                obj.__class__.__name__, uuid2[1:])
                                            try:
276
                                                logger_grouped['errors'][error_msg] += 1
277
                                            except KeyError:
278
                                                logger_grouped['errors'][error_msg] = 1
279
280
281
282
283
284

                                            continue

                                        default = getattr(obj, attr)
                                        if default is None:  # 1..1 or 0..1
                                            # Rely on properties to set any bi-directional references.
285
                                            setattr(obj, attr, val)
286
                                        elif default == 'list':  # many
287
288
289
                                            setattr(obj, attr, [val])
                                        elif isinstance(default, list):  # many
                                            attribute = getattr(obj, attr)
290
291
292
                                            if val not in attribute:
                                                attribute.append(val)
                                                setattr(obj, attr, attribute)
Philipp Reuber's avatar
Philipp Reuber committed
293
294
295
                                        elif default == val:
                                            # attribute reference already resolved
                                            pass
296
                                        else:
Philipp Reuber's avatar
Philipp Reuber committed
297
298
                                            # note here
                                            error_msg = 'Multiplicity Error for class {} [{}], attribute {}. Multiplicity should be 1..1 or 0..1'.format(
299
300
                                                obj.__class__.__name__, uuid, attr)
                                            try:
301
                                                logger_grouped['errors'][error_msg] += 1
302
                                            except KeyError:
303
                                                logger_grouped['errors'][error_msg] = 1
304
305
306
307

                                        if hasattr(val, obj.__class__.__name__):
                                            default1 = getattr(val, obj.__class__.__name__)
                                            if default1 is None:
308
                                                setattr(val, obj.__class__.__name__, obj)
309
                                            elif default1 == 'list':  # many
310
                                                setattr(val, obj.__class__.__name__, [obj])
311
                                            elif isinstance(default1, list):  # many
312
313
314
315
                                                attribute2 = getattr(val, obj.__class__.__name__)
                                                if obj not in attribute2:
                                                    attribute2.append(obj)
                                                    setattr(val, obj.__class__.__name__, attribute2)
Philipp Reuber's avatar
Philipp Reuber committed
316
317
                                            elif default1 == obj:
                                                pass
318
                                            else:
Philipp Reuber's avatar
Philipp Reuber committed
319
                                                error_msg = 'Multiplicity Error for class {} [{}], attribute {}. Multiplicity should be 1..1 or 0..1'.format(
320
321
                                                    val.__class__.__name__, uuid2[1:], obj.__class__.__name__)
                                                try:
322
                                                    logger_grouped['errors'][error_msg] += 1
323
                                                except KeyError:
324
                                                    logger_grouped['errors'][error_msg] = 1
325
326

                                    else:  # enum
327
328
                                        # if http in uuid2 reference to URL, create mapping
                                        if 'http' in uuid2:
329
330
331
332
333
334
                                            if attr in urls.keys():
                                                if uuid2.rsplit(".", 1)[1] not in urls[attr].keys():
                                                    urls[attr][uuid2.rsplit(".", 1)[1]] = uuid2
                                            else:
                                                urls[attr] = {uuid2.rsplit(".", 1)[1]: uuid2}

335
                                            # url_reference_dict[uuid2.rsplit(".", 1)[1]] = uuid2
336
337
338
                                        val = uuid2.rsplit(".", 1)[1]
                                        setattr(obj, attr, val)

339
                                if package is not '':
340
                                    obj.serializationProfile[attr] = short_package_name[package]
341
342
343
344
345
                                else:
                                    error_msg = 'Package information not found for class {}, attribute {}'.format(
                                        obj.__class__.__name__, attr
                                    )
                                    try:
346
                                        logger_grouped['errors'][error_msg] += 1
347
                                    except KeyError:
348
                                        logger_grouped['errors'][error_msg] = 1
349
350
351
352
                            else:  # if elem.get("{%s}ID" % nd_rdf is not None:
                                # Finished setting object attributes.
                                break

353
354
            # Check which package is read
            elif event == "end" and 'Model.profile' in elem.tag:
355
                for package_key in short_package_name.keys():
356
357
358
359
                    if package_key in elem.text:
                        package = package_key
                        break

360
361
362
            # Clear children of the root element to minimise memory usage.
            root.clear()

363
        logger.info('END of parsing file "{}"'.format(xml_file))
364
    return import_result, logger_grouped
365
366


367
# Returns a map of prefix to namespace for the given XML file.
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
def _get_namespaces(source):
    namespaces = {}
    events = ("end", "start-ns", "end-ns")
    for (event, elem) in etree.iterparse(source, events):
        if event == "start-ns":
            prefix, ns = elem
            namespaces[prefix] = ns
        elif event == "end":
            break

    # Reset stream
    if hasattr(source, "seek"):
        source.seek(0)

    return namespaces


385
# Returns the RDF Namespace from the namespaces dictionary
386
387
388
389
390
391
392
393
def _get_rdf_namespace(namespaces):
    try:
        namespace = namespaces['rdf']
    except KeyError:
        ns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
        logger.warning('No rdf namespace found. Using %s' % ns)

    return namespace
394
395
396
397
398
399
400
401
402
403
404
405


# used to map the profile name to their abbreviations according to the CGMES standard
short_package_name = {
    "DiagramLayout": 'DI',
    "Dynamics": "DY",
    "Equipment": "EQ",
    "GeographicalLocation": "GL",
    "StateVariables": "SV",
    "SteadyStateHypothesis": "SSH",
    "Topology": "TP"
}