blob: dc8cc4fda2e07fb75bb2c85d11970e9faa26a47f [file] [log] [blame]
Harald Weltebb3b5df2021-05-24 23:15:54 +02001"""object-oriented TLV parser/encoder library."""
2
3# (C) 2021 by Harald Welte <laforge@osmocom.org>
4# All Rights Reserved
5#
6# This program is free software: you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation, either version 2 of the License, or
9# (at your option) any later version.
10#
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program. If not, see <http://www.gnu.org/licenses/>.
18
19
20from typing import Optional, List, Dict, Any, Tuple
21from bidict import bidict
22from construct import *
23
24from pySim.utils import bertlv_encode_len, bertlv_parse_len, bertlv_encode_tag, bertlv_parse_tag
25from pySim.utils import comprehensiontlv_encode_tag, comprehensiontlv_parse_tag
26from pySim.utils import bertlv_parse_one, comprehensiontlv_parse_one
27from pySim.utils import bertlv_parse_tag_raw, comprehensiontlv_parse_tag_raw
28
29from pySim.construct import parse_construct, LV, HexAdapter, BcdAdapter, BitsRFU, GsmStringAdapter
30from pySim.exceptions import *
31
32import inspect
33import abc
Harald Weltee8d177d2022-02-11 17:08:45 +010034import re
Harald Weltebb3b5df2021-05-24 23:15:54 +020035
Harald Weltee8d177d2022-02-11 17:08:45 +010036def camel_to_snake(name):
37 name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
38 return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower()
Harald Weltec91085e2022-02-10 18:05:45 +010039
Harald Weltebb3b5df2021-05-24 23:15:54 +020040class TlvMeta(abc.ABCMeta):
41 """Metaclass which we use to set some class variables at the time of defining a subclass.
42 This allows us to create subclasses for each TLV/IE type, where the class represents fixed
43 parameters like the tag/type and instances of it represent the actual TLV data."""
44 def __new__(metacls, name, bases, namespace, **kwargs):
45 #print("TlvMeta_new_(metacls=%s, name=%s, bases=%s, namespace=%s, kwargs=%s)" % (metacls, name, bases, namespace, kwargs))
46 x = super().__new__(metacls, name, bases, namespace)
47 # this becomes a _class_ variable, not an instance variable
48 x.tag = namespace.get('tag', kwargs.get('tag', None))
49 x.desc = namespace.get('desc', kwargs.get('desc', None))
50 nested = namespace.get('nested', kwargs.get('nested', None))
51 if nested is None or inspect.isclass(nested) and issubclass(nested, TLV_IE_Collection):
52 # caller has specified TLV_IE_Collection sub-class, we can directly reference it
53 x.nested_collection_cls = nested
54 else:
55 # caller passed list of other TLV classes that might possibly appear within us,
56 # build a dynamically-created TLV_IE_Collection sub-class and reference it
57 name = 'auto_collection_%s' % (name)
58 cls = type(name, (TLV_IE_Collection,), {'nested': nested})
59 x.nested_collection_cls = cls
60 return x
61
Harald Weltec91085e2022-02-10 18:05:45 +010062
Harald Weltebb3b5df2021-05-24 23:15:54 +020063class TlvCollectionMeta(abc.ABCMeta):
64 """Metaclass which we use to set some class variables at the time of defining a subclass.
65 This allows us to create subclasses for each Collection type, where the class represents fixed
66 parameters like the nested IE classes and instances of it represent the actual TLV data."""
67 def __new__(metacls, name, bases, namespace, **kwargs):
68 #print("TlvCollectionMeta_new_(metacls=%s, name=%s, bases=%s, namespace=%s, kwargs=%s)" % (metacls, name, bases, namespace, kwargs))
69 x = super().__new__(metacls, name, bases, namespace)
70 # this becomes a _class_ variable, not an instance variable
71 x.possible_nested = namespace.get('nested', kwargs.get('nested', None))
72 return x
73
74
75class Transcodable(abc.ABC):
76 _construct = None
77 """Base class for something that can be encoded + encoded. Decoding and Encoding happens either
78 * via a 'construct' object stored in a derived class' _construct variable, or
79 * via a 'construct' object stored in an instance _construct variable, or
80 * via a derived class' _{to,from}_bytes() methods."""
Harald Weltec91085e2022-02-10 18:05:45 +010081
Harald Weltebb3b5df2021-05-24 23:15:54 +020082 def __init__(self):
83 self.encoded = None
84 self.decoded = None
85 self._construct = None
86
87 def to_bytes(self) -> bytes:
88 """Convert from internal representation to binary bytes. Store the binary result
89 in the internal state and return it."""
Harald Welte04c13022021-10-21 10:02:10 +020090 if not self.decoded:
91 do = b''
92 elif self._construct:
Harald Weltebb3b5df2021-05-24 23:15:54 +020093 do = self._construct.build(self.decoded, total_len=None)
94 elif self.__class__._construct:
95 do = self.__class__._construct.build(self.decoded, total_len=None)
96 else:
97 do = self._to_bytes()
98 self.encoded = do
99 return do
100
101 # not an abstractmethod, as it is only required if no _construct exists
102 def _to_bytes(self):
103 raise NotImplementedError
104
Harald Weltec91085e2022-02-10 18:05:45 +0100105 def from_bytes(self, do: bytes):
Harald Weltebb3b5df2021-05-24 23:15:54 +0200106 """Convert from binary bytes to internal representation. Store the decoded result
107 in the internal state and return it."""
108 self.encoded = do
Harald Welte04c13022021-10-21 10:02:10 +0200109 if self.encoded == b'':
110 self.decoded = None
111 elif self._construct:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200112 self.decoded = parse_construct(self._construct, do)
113 elif self.__class__._construct:
114 self.decoded = parse_construct(self.__class__._construct, do)
115 else:
116 self.decoded = self._from_bytes(do)
117 return self.decoded
118
119 # not an abstractmethod, as it is only required if no _construct exists
Harald Weltec91085e2022-02-10 18:05:45 +0100120 def _from_bytes(self, do: bytes):
Harald Weltebb3b5df2021-05-24 23:15:54 +0200121 raise NotImplementedError
122
Harald Weltec91085e2022-02-10 18:05:45 +0100123
Harald Weltebb3b5df2021-05-24 23:15:54 +0200124class IE(Transcodable, metaclass=TlvMeta):
125 # we specify the metaclass so any downstream subclasses will automatically use it
126 """Base class for various Information Elements. We understand the notion of a hierarchy
127 of IEs on top of the Transcodable class."""
128 # this is overridden by the TlvMeta metaclass, if it is used to create subclasses
129 nested_collection_cls = None
130 tag = None
131
132 def __init__(self, **kwargs):
133 super().__init__()
134 self.nested_collection = None
135 if self.nested_collection_cls:
136 self.nested_collection = self.nested_collection_cls()
137 # if we are a constructed IE, [ordered] list of actual child-IE instances
138 self.children = kwargs.get('children', [])
139 self.decoded = kwargs.get('decoded', None)
140
141 def __repr__(self):
142 """Return a string representing the [nested] IE data (for print)."""
143 if len(self.children):
144 member_strs = [repr(x) for x in self.children]
145 return '%s(%s)' % (type(self).__name__, ','.join(member_strs))
146 else:
147 return '%s(%s)' % (type(self).__name__, self.decoded)
148
149 def to_dict(self):
150 """Return a JSON-serializable dict representing the [nested] IE data."""
151 if len(self.children):
152 v = [x.to_dict() for x in self.children]
153 else:
154 v = self.decoded
Harald Weltee8d177d2022-02-11 17:08:45 +0100155 return {camel_to_snake(type(self).__name__): v}
Harald Weltebb3b5df2021-05-24 23:15:54 +0200156
Harald Weltec91085e2022-02-10 18:05:45 +0100157 def from_dict(self, decoded: dict):
Harald Weltebb3b5df2021-05-24 23:15:54 +0200158 """Set the IE internal decoded representation to data from the argument.
159 If this is a nested IE, the child IE instance list is re-created."""
160 if self.nested_collection:
161 self.children = self.nested_collection.from_dict(decoded)
162 else:
163 self.children = []
164 self.decoded = decoded
165
166 def is_constructed(self):
167 """Is this IE constructed by further nested IEs?"""
168 if len(self.children):
169 return True
170 else:
171 return False
172
173 @abc.abstractmethod
174 def to_ie(self) -> bytes:
175 """Convert the internal representation to entire IE including IE header."""
176
177 def to_bytes(self) -> bytes:
178 """Convert the internal representation _of the value part_ to binary bytes."""
179 if self.is_constructed():
180 # concatenate the encoded IE of all children to form the value part
181 out = b''
182 for c in self.children:
183 out += c.to_ie()
184 return out
185 else:
186 return super().to_bytes()
187
Harald Weltec91085e2022-02-10 18:05:45 +0100188 def from_bytes(self, do: bytes):
Harald Weltebb3b5df2021-05-24 23:15:54 +0200189 """Parse _the value part_ from binary bytes to internal representation."""
190 if self.nested_collection:
191 self.children = self.nested_collection.from_bytes(do)
192 else:
193 self.children = []
194 return super().from_bytes(do)
195
196
197class TLV_IE(IE):
198 """Abstract base class for various TLV type Information Elements."""
Harald Weltec91085e2022-02-10 18:05:45 +0100199
Harald Weltebb3b5df2021-05-24 23:15:54 +0200200 def __init__(self, **kwargs):
201 super().__init__(**kwargs)
202
203 def _compute_tag(self) -> int:
204 """Compute the tag (sometimes the tag encodes part of the value)."""
205 return self.tag
206
207 @classmethod
208 @abc.abstractmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100209 def _parse_tag_raw(cls, do: bytes) -> Tuple[int, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200210 """Obtain the raw TAG at the start of the bytes provided by the user."""
211
212 @classmethod
213 @abc.abstractmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100214 def _parse_len(cls, do: bytes) -> Tuple[int, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200215 """Obtain the length encoded at the start of the bytes provided by the user."""
216
217 @abc.abstractmethod
218 def _encode_tag(self) -> bytes:
219 """Encode the tag part. Must be provided by derived (TLV format specific) class."""
220
221 @abc.abstractmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100222 def _encode_len(self, val: bytes) -> bytes:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200223 """Encode the length part assuming a certain binary value. Must be provided by
224 derived (TLV format specific) class."""
225
226 def to_ie(self):
227 return self.to_tlv()
228
229 def to_tlv(self):
230 """Convert the internal representation to binary TLV bytes."""
231 val = self.to_bytes()
232 return self._encode_tag() + self._encode_len(val) + val
233
Harald Weltec91085e2022-02-10 18:05:45 +0100234 def from_tlv(self, do: bytes):
Harald Weltebb3b5df2021-05-24 23:15:54 +0200235 (rawtag, remainder) = self.__class__._parse_tag_raw(do)
236 if rawtag:
237 if rawtag != self.tag:
238 raise ValueError("%s: Encountered tag %s doesn't match our supported tag %s" %
239 (self, rawtag, self.tag))
240 (length, remainder) = self.__class__._parse_len(remainder)
241 value = remainder[:length]
242 remainder = remainder[length:]
243 else:
244 value = do
245 remainder = b''
246 dec = self.from_bytes(value)
247 return dec, remainder
248
249
250class BER_TLV_IE(TLV_IE):
251 """TLV_IE formatted as ASN.1 BER described in ITU-T X.690 8.1.2."""
Harald Weltec91085e2022-02-10 18:05:45 +0100252
Harald Weltebb3b5df2021-05-24 23:15:54 +0200253 def __init__(self, **kwargs):
254 super().__init__(**kwargs)
255
256 @classmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100257 def _decode_tag(cls, do: bytes) -> Tuple[dict, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200258 return bertlv_parse_tag(do)
259
260 @classmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100261 def _parse_tag_raw(cls, do: bytes) -> Tuple[int, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200262 return bertlv_parse_tag_raw(do)
263
264 @classmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100265 def _parse_len(cls, do: bytes) -> Tuple[int, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200266 return bertlv_parse_len(do)
267
268 def _encode_tag(self) -> bytes:
269 return bertlv_encode_tag(self._compute_tag())
270
Harald Weltec91085e2022-02-10 18:05:45 +0100271 def _encode_len(self, val: bytes) -> bytes:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200272 return bertlv_encode_len(len(val))
273
274
275class COMPR_TLV_IE(TLV_IE):
276 """TLV_IE formated as COMPREHENSION-TLV as described in ETSI TS 101 220."""
Harald Weltec91085e2022-02-10 18:05:45 +0100277
Harald Weltebb3b5df2021-05-24 23:15:54 +0200278 def __init__(self, **kwargs):
279 super().__init__(**kwargs)
280 self.comprehension = False
281
282 @classmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100283 def _decode_tag(cls, do: bytes) -> Tuple[dict, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200284 return comprehensiontlv_parse_tag(do)
285
286 @classmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100287 def _parse_tag_raw(cls, do: bytes) -> Tuple[int, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200288 return comprehensiontlv_parse_tag_raw(do)
289
290 @classmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100291 def _parse_len(cls, do: bytes) -> Tuple[int, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200292 return bertlv_parse_len(do)
293
294 def _encode_tag(self) -> bytes:
295 return comprehensiontlv_encode_tag(self._compute_tag())
296
Harald Weltec91085e2022-02-10 18:05:45 +0100297 def _encode_len(self, val: bytes) -> bytes:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200298 return bertlv_encode_len(len(val))
299
300
301class TLV_IE_Collection(metaclass=TlvCollectionMeta):
302 # we specify the metaclass so any downstream subclasses will automatically use it
303 """A TLV_IE_Collection consists of multiple TLV_IE classes identified by their tags.
304 A given encoded DO may contain any of them in any order, and may contain multiple instances
305 of each DO."""
306 # this is overridden by the TlvCollectionMeta metaclass, if it is used to create subclasses
307 possible_nested = []
Harald Weltec91085e2022-02-10 18:05:45 +0100308
Harald Weltebb3b5df2021-05-24 23:15:54 +0200309 def __init__(self, desc=None, **kwargs):
310 self.desc = desc
311 #print("possible_nested: ", self.possible_nested)
312 self.members = kwargs.get('nested', self.possible_nested)
313 self.members_by_tag = {}
314 self.members_by_name = {}
Harald Weltec91085e2022-02-10 18:05:45 +0100315 self.members_by_tag = {m.tag: m for m in self.members}
316 self.members_by_name = {m.__name__: m for m in self.members}
Harald Weltebb3b5df2021-05-24 23:15:54 +0200317 # if we are a constructed IE, [ordered] list of actual child-IE instances
318 self.children = kwargs.get('children', [])
319 self.encoded = None
320
321 def __str__(self):
322 member_strs = [str(x) for x in self.members]
323 return '%s(%s)' % (type(self).__name__, ','.join(member_strs))
324
325 def __repr__(self):
326 member_strs = [repr(x) for x in self.members]
327 return '%s(%s)' % (self.__class__, ','.join(member_strs))
328
329 def __add__(self, other):
330 """Extending TLV_IE_Collections with other TLV_IE_Collections or TLV_IEs."""
331 if isinstance(other, TLV_IE_Collection):
332 # adding one collection to another
333 members = self.members + other.members
334 return TLV_IE_Collection(self.desc, nested=members)
335 elif inspect.isclass(other) and issubclass(other, TLV_IE):
336 # adding a member to a collection
Harald Weltec91085e2022-02-10 18:05:45 +0100337 return TLV_IE_Collection(self.desc, nested=self.members + [other])
Harald Weltebb3b5df2021-05-24 23:15:54 +0200338 else:
339 raise TypeError
340
Harald Weltec91085e2022-02-10 18:05:45 +0100341 def from_bytes(self, binary: bytes) -> List[TLV_IE]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200342 """Create a list of TLV_IEs from the collection based on binary input data.
343 Args:
344 binary : binary bytes of encoded data
345 Returns:
346 list of instances of TLV_IE sub-classes containing parsed data
347 """
348 self.encoded = binary
349 # list of instances of TLV_IE collection member classes appearing in the data
350 res = []
351 remainder = binary
352 first = next(iter(self.members_by_tag.values()))
353 # iterate until no binary trailer is left
354 while len(remainder):
355 # obtain the tag at the start of the remainder
356 tag, r = first._parse_tag_raw(remainder)
Harald Weltefb506212021-05-29 21:28:24 +0200357 if tag == None:
358 return res
Harald Weltebb3b5df2021-05-24 23:15:54 +0200359 if tag in self.members_by_tag:
360 cls = self.members_by_tag[tag]
361 # create an instance and parse accordingly
362 inst = cls()
363 dec, remainder = inst.from_tlv(remainder)
364 res.append(inst)
365 else:
366 # unknown tag; create the related class on-the-fly using the same base class
367 name = 'unknown_%s_%X' % (first.__base__.__name__, tag)
Harald Weltec91085e2022-02-10 18:05:45 +0100368 cls = type(name, (first.__base__,), {'tag': tag, 'possible_nested': [],
369 'nested_collection_cls': None})
370 cls._from_bytes = lambda s, a: {'raw': a.hex()}
Harald Weltebb3b5df2021-05-24 23:15:54 +0200371 cls._to_bytes = lambda s: bytes.fromhex(s.decoded['raw'])
372 # create an instance and parse accordingly
373 inst = cls()
374 dec, remainder = inst.from_tlv(remainder)
375 res.append(inst)
376 self.children = res
377 return res
378
Harald Weltec91085e2022-02-10 18:05:45 +0100379 def from_dict(self, decoded: List[dict]) -> List[TLV_IE]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200380 """Create a list of TLV_IE instances from the collection based on an array
381 of dicts, where they key indicates the name of the TLV_IE subclass to use."""
382 # list of instances of TLV_IE collection member classes appearing in the data
383 res = []
384 for i in decoded:
385 for k in i.keys():
386 if k in self.members_by_name:
387 cls = self.members_by_name[k]
Harald Welte58953802021-10-21 11:33:44 +0200388 inst = cls()
389 inst.from_dict(i[k])
Harald Weltebb3b5df2021-05-24 23:15:54 +0200390 res.append(inst)
391 else:
392 raise ValueError('%s: Unknown TLV Class %s in %s; expected %s' %
393 (self, i[0], decoded, self.members_by_name.keys()))
394 self.children = res
395 return res
396
397 def to_dict(self):
398 return [x.to_dict() for x in self.children]
399
400 def to_bytes(self):
401 out = b''
402 for c in self.children:
403 out += c.to_tlv()
404 return out
405
406 def from_tlv(self, do):
407 return self.from_bytes(do)
408
409 def to_tlv(self):
410 return self.to_bytes()
Harald Welte9a2a6692022-02-11 15:44:28 +0100411
412
413def flatten_dict_lists(inp):
414 """hierarchically flatten each list-of-dicts into a single dict. This is useful to
415 make the output of hierarchical TLV decoder structures flatter and more easy to read."""
416 def are_all_elements_dict(l):
417 for e in l:
418 if not isinstance(e, dict):
419 return False
420 return True
421
422 if isinstance(inp, list):
423 if are_all_elements_dict(inp):
424 # flatten into one shared dict
425 newdict = {}
426 for e in inp:
427 key = list(e.keys())[0]
428 newdict[key] = e[key]
429 inp = newdict
430 # process result as any native dict
431 return {k:flatten_dict_lists(inp[k]) for k in inp.keys()}
432 else:
433 return [flatten_dict_lists(x) for x in inp]
434 elif isinstance(inp, dict):
435 return {k:flatten_dict_lists(inp[k]) for k in inp.keys()}
436 else:
437 return inp