blob: 827972f65aa61d1d36035c7af4fe41300fcd158d [file] [log] [blame]
Harald Weltebb3b5df2021-05-24 23:15:54 +02001"""object-oriented TLV parser/encoder library."""
2
3# (C) 2021 by Harald Welte <laforge@osmocom.org>
4# All Rights Reserved
5#
6# This program is free software: you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation, either version 2 of the License, or
9# (at your option) any later version.
10#
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program. If not, see <http://www.gnu.org/licenses/>.
18
19
20from typing import Optional, List, Dict, Any, Tuple
21from bidict import bidict
22from construct import *
23
24from pySim.utils import bertlv_encode_len, bertlv_parse_len, bertlv_encode_tag, bertlv_parse_tag
25from pySim.utils import comprehensiontlv_encode_tag, comprehensiontlv_parse_tag
26from pySim.utils import bertlv_parse_one, comprehensiontlv_parse_one
27from pySim.utils import bertlv_parse_tag_raw, comprehensiontlv_parse_tag_raw
28
29from pySim.construct import parse_construct, LV, HexAdapter, BcdAdapter, BitsRFU, GsmStringAdapter
30from pySim.exceptions import *
31
32import inspect
33import abc
Harald Weltee8d177d2022-02-11 17:08:45 +010034import re
Harald Weltebb3b5df2021-05-24 23:15:54 +020035
Harald Weltee8d177d2022-02-11 17:08:45 +010036def camel_to_snake(name):
37 name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
38 return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower()
Harald Weltec91085e2022-02-10 18:05:45 +010039
Harald Weltebb3b5df2021-05-24 23:15:54 +020040class TlvMeta(abc.ABCMeta):
41 """Metaclass which we use to set some class variables at the time of defining a subclass.
42 This allows us to create subclasses for each TLV/IE type, where the class represents fixed
43 parameters like the tag/type and instances of it represent the actual TLV data."""
44 def __new__(metacls, name, bases, namespace, **kwargs):
45 #print("TlvMeta_new_(metacls=%s, name=%s, bases=%s, namespace=%s, kwargs=%s)" % (metacls, name, bases, namespace, kwargs))
46 x = super().__new__(metacls, name, bases, namespace)
47 # this becomes a _class_ variable, not an instance variable
48 x.tag = namespace.get('tag', kwargs.get('tag', None))
49 x.desc = namespace.get('desc', kwargs.get('desc', None))
50 nested = namespace.get('nested', kwargs.get('nested', None))
51 if nested is None or inspect.isclass(nested) and issubclass(nested, TLV_IE_Collection):
52 # caller has specified TLV_IE_Collection sub-class, we can directly reference it
53 x.nested_collection_cls = nested
54 else:
55 # caller passed list of other TLV classes that might possibly appear within us,
56 # build a dynamically-created TLV_IE_Collection sub-class and reference it
57 name = 'auto_collection_%s' % (name)
58 cls = type(name, (TLV_IE_Collection,), {'nested': nested})
59 x.nested_collection_cls = cls
60 return x
61
Harald Weltec91085e2022-02-10 18:05:45 +010062
Harald Weltebb3b5df2021-05-24 23:15:54 +020063class TlvCollectionMeta(abc.ABCMeta):
64 """Metaclass which we use to set some class variables at the time of defining a subclass.
65 This allows us to create subclasses for each Collection type, where the class represents fixed
66 parameters like the nested IE classes and instances of it represent the actual TLV data."""
67 def __new__(metacls, name, bases, namespace, **kwargs):
68 #print("TlvCollectionMeta_new_(metacls=%s, name=%s, bases=%s, namespace=%s, kwargs=%s)" % (metacls, name, bases, namespace, kwargs))
69 x = super().__new__(metacls, name, bases, namespace)
70 # this becomes a _class_ variable, not an instance variable
71 x.possible_nested = namespace.get('nested', kwargs.get('nested', None))
72 return x
73
74
75class Transcodable(abc.ABC):
76 _construct = None
77 """Base class for something that can be encoded + encoded. Decoding and Encoding happens either
78 * via a 'construct' object stored in a derived class' _construct variable, or
79 * via a 'construct' object stored in an instance _construct variable, or
80 * via a derived class' _{to,from}_bytes() methods."""
Harald Weltec91085e2022-02-10 18:05:45 +010081
Harald Weltebb3b5df2021-05-24 23:15:54 +020082 def __init__(self):
83 self.encoded = None
84 self.decoded = None
85 self._construct = None
86
87 def to_bytes(self) -> bytes:
88 """Convert from internal representation to binary bytes. Store the binary result
89 in the internal state and return it."""
Harald Welte04c13022021-10-21 10:02:10 +020090 if not self.decoded:
91 do = b''
92 elif self._construct:
Harald Weltebb3b5df2021-05-24 23:15:54 +020093 do = self._construct.build(self.decoded, total_len=None)
94 elif self.__class__._construct:
95 do = self.__class__._construct.build(self.decoded, total_len=None)
96 else:
97 do = self._to_bytes()
98 self.encoded = do
99 return do
100
101 # not an abstractmethod, as it is only required if no _construct exists
102 def _to_bytes(self):
Harald Welteea600a82022-07-20 07:51:13 +0200103 raise NotImplementedError('%s._to_bytes' % type(self).__name__)
Harald Weltebb3b5df2021-05-24 23:15:54 +0200104
Harald Weltec91085e2022-02-10 18:05:45 +0100105 def from_bytes(self, do: bytes):
Harald Weltebb3b5df2021-05-24 23:15:54 +0200106 """Convert from binary bytes to internal representation. Store the decoded result
107 in the internal state and return it."""
108 self.encoded = do
Harald Welte04c13022021-10-21 10:02:10 +0200109 if self.encoded == b'':
110 self.decoded = None
111 elif self._construct:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200112 self.decoded = parse_construct(self._construct, do)
113 elif self.__class__._construct:
114 self.decoded = parse_construct(self.__class__._construct, do)
115 else:
116 self.decoded = self._from_bytes(do)
117 return self.decoded
118
119 # not an abstractmethod, as it is only required if no _construct exists
Harald Weltec91085e2022-02-10 18:05:45 +0100120 def _from_bytes(self, do: bytes):
Harald Welteea600a82022-07-20 07:51:13 +0200121 raise NotImplementedError('%s._from_bytes' % type(self).__name__)
Harald Weltebb3b5df2021-05-24 23:15:54 +0200122
Harald Weltec91085e2022-02-10 18:05:45 +0100123
Harald Weltebb3b5df2021-05-24 23:15:54 +0200124class IE(Transcodable, metaclass=TlvMeta):
125 # we specify the metaclass so any downstream subclasses will automatically use it
126 """Base class for various Information Elements. We understand the notion of a hierarchy
127 of IEs on top of the Transcodable class."""
128 # this is overridden by the TlvMeta metaclass, if it is used to create subclasses
129 nested_collection_cls = None
130 tag = None
131
132 def __init__(self, **kwargs):
133 super().__init__()
134 self.nested_collection = None
135 if self.nested_collection_cls:
136 self.nested_collection = self.nested_collection_cls()
137 # if we are a constructed IE, [ordered] list of actual child-IE instances
138 self.children = kwargs.get('children', [])
139 self.decoded = kwargs.get('decoded', None)
140
141 def __repr__(self):
142 """Return a string representing the [nested] IE data (for print)."""
143 if len(self.children):
144 member_strs = [repr(x) for x in self.children]
145 return '%s(%s)' % (type(self).__name__, ','.join(member_strs))
146 else:
147 return '%s(%s)' % (type(self).__name__, self.decoded)
148
149 def to_dict(self):
150 """Return a JSON-serializable dict representing the [nested] IE data."""
151 if len(self.children):
152 v = [x.to_dict() for x in self.children]
153 else:
154 v = self.decoded
Harald Weltee8d177d2022-02-11 17:08:45 +0100155 return {camel_to_snake(type(self).__name__): v}
Harald Weltebb3b5df2021-05-24 23:15:54 +0200156
Harald Weltec91085e2022-02-10 18:05:45 +0100157 def from_dict(self, decoded: dict):
Harald Weltebb3b5df2021-05-24 23:15:54 +0200158 """Set the IE internal decoded representation to data from the argument.
159 If this is a nested IE, the child IE instance list is re-created."""
160 if self.nested_collection:
161 self.children = self.nested_collection.from_dict(decoded)
162 else:
163 self.children = []
Harald Welte579ac3e2023-06-26 10:52:19 +0200164 expected_key_name = camel_to_snake(type(self).__name__)
165 if not expected_key_name in decoded:
166 raise ValueError("Dict %s doesn't contain expected key %s" % (decoded, expected_key_name))
167 self.decoded = decoded[expected_key_name]
Harald Weltebb3b5df2021-05-24 23:15:54 +0200168
169 def is_constructed(self):
170 """Is this IE constructed by further nested IEs?"""
171 if len(self.children):
172 return True
173 else:
174 return False
175
176 @abc.abstractmethod
177 def to_ie(self) -> bytes:
178 """Convert the internal representation to entire IE including IE header."""
179
180 def to_bytes(self) -> bytes:
181 """Convert the internal representation _of the value part_ to binary bytes."""
182 if self.is_constructed():
183 # concatenate the encoded IE of all children to form the value part
184 out = b''
185 for c in self.children:
186 out += c.to_ie()
187 return out
188 else:
189 return super().to_bytes()
190
Harald Weltec91085e2022-02-10 18:05:45 +0100191 def from_bytes(self, do: bytes):
Harald Weltebb3b5df2021-05-24 23:15:54 +0200192 """Parse _the value part_ from binary bytes to internal representation."""
193 if self.nested_collection:
194 self.children = self.nested_collection.from_bytes(do)
195 else:
196 self.children = []
197 return super().from_bytes(do)
198
199
200class TLV_IE(IE):
201 """Abstract base class for various TLV type Information Elements."""
Harald Weltec91085e2022-02-10 18:05:45 +0100202
Harald Weltebb3b5df2021-05-24 23:15:54 +0200203 def __init__(self, **kwargs):
204 super().__init__(**kwargs)
205
206 def _compute_tag(self) -> int:
207 """Compute the tag (sometimes the tag encodes part of the value)."""
208 return self.tag
209
210 @classmethod
211 @abc.abstractmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100212 def _parse_tag_raw(cls, do: bytes) -> Tuple[int, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200213 """Obtain the raw TAG at the start of the bytes provided by the user."""
214
215 @classmethod
216 @abc.abstractmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100217 def _parse_len(cls, do: bytes) -> Tuple[int, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200218 """Obtain the length encoded at the start of the bytes provided by the user."""
219
220 @abc.abstractmethod
221 def _encode_tag(self) -> bytes:
222 """Encode the tag part. Must be provided by derived (TLV format specific) class."""
223
224 @abc.abstractmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100225 def _encode_len(self, val: bytes) -> bytes:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200226 """Encode the length part assuming a certain binary value. Must be provided by
227 derived (TLV format specific) class."""
228
229 def to_ie(self):
230 return self.to_tlv()
231
232 def to_tlv(self):
233 """Convert the internal representation to binary TLV bytes."""
234 val = self.to_bytes()
235 return self._encode_tag() + self._encode_len(val) + val
236
Harald Weltec91085e2022-02-10 18:05:45 +0100237 def from_tlv(self, do: bytes):
Harald Weltee4a6eaf2022-02-11 16:29:32 +0100238 if len(do) == 0:
239 return {}, b''
Harald Weltebb3b5df2021-05-24 23:15:54 +0200240 (rawtag, remainder) = self.__class__._parse_tag_raw(do)
241 if rawtag:
Harald Welte7d8029e2022-08-06 13:16:19 +0200242 if rawtag != self._compute_tag():
Harald Weltebb3b5df2021-05-24 23:15:54 +0200243 raise ValueError("%s: Encountered tag %s doesn't match our supported tag %s" %
244 (self, rawtag, self.tag))
245 (length, remainder) = self.__class__._parse_len(remainder)
246 value = remainder[:length]
247 remainder = remainder[length:]
248 else:
249 value = do
250 remainder = b''
251 dec = self.from_bytes(value)
252 return dec, remainder
253
254
255class BER_TLV_IE(TLV_IE):
256 """TLV_IE formatted as ASN.1 BER described in ITU-T X.690 8.1.2."""
Harald Weltec91085e2022-02-10 18:05:45 +0100257
Harald Weltebb3b5df2021-05-24 23:15:54 +0200258 def __init__(self, **kwargs):
259 super().__init__(**kwargs)
260
261 @classmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100262 def _decode_tag(cls, do: bytes) -> Tuple[dict, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200263 return bertlv_parse_tag(do)
264
265 @classmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100266 def _parse_tag_raw(cls, do: bytes) -> Tuple[int, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200267 return bertlv_parse_tag_raw(do)
268
269 @classmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100270 def _parse_len(cls, do: bytes) -> Tuple[int, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200271 return bertlv_parse_len(do)
272
273 def _encode_tag(self) -> bytes:
274 return bertlv_encode_tag(self._compute_tag())
275
Harald Weltec91085e2022-02-10 18:05:45 +0100276 def _encode_len(self, val: bytes) -> bytes:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200277 return bertlv_encode_len(len(val))
278
279
280class COMPR_TLV_IE(TLV_IE):
281 """TLV_IE formated as COMPREHENSION-TLV as described in ETSI TS 101 220."""
Harald Weltec91085e2022-02-10 18:05:45 +0100282
Harald Weltebb3b5df2021-05-24 23:15:54 +0200283 def __init__(self, **kwargs):
284 super().__init__(**kwargs)
285 self.comprehension = False
286
287 @classmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100288 def _decode_tag(cls, do: bytes) -> Tuple[dict, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200289 return comprehensiontlv_parse_tag(do)
290
291 @classmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100292 def _parse_tag_raw(cls, do: bytes) -> Tuple[int, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200293 return comprehensiontlv_parse_tag_raw(do)
294
295 @classmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100296 def _parse_len(cls, do: bytes) -> Tuple[int, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200297 return bertlv_parse_len(do)
298
299 def _encode_tag(self) -> bytes:
300 return comprehensiontlv_encode_tag(self._compute_tag())
301
Harald Weltec91085e2022-02-10 18:05:45 +0100302 def _encode_len(self, val: bytes) -> bytes:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200303 return bertlv_encode_len(len(val))
304
305
306class TLV_IE_Collection(metaclass=TlvCollectionMeta):
307 # we specify the metaclass so any downstream subclasses will automatically use it
308 """A TLV_IE_Collection consists of multiple TLV_IE classes identified by their tags.
309 A given encoded DO may contain any of them in any order, and may contain multiple instances
310 of each DO."""
311 # this is overridden by the TlvCollectionMeta metaclass, if it is used to create subclasses
312 possible_nested = []
Harald Weltec91085e2022-02-10 18:05:45 +0100313
Harald Weltebb3b5df2021-05-24 23:15:54 +0200314 def __init__(self, desc=None, **kwargs):
315 self.desc = desc
316 #print("possible_nested: ", self.possible_nested)
317 self.members = kwargs.get('nested', self.possible_nested)
318 self.members_by_tag = {}
319 self.members_by_name = {}
Harald Weltec91085e2022-02-10 18:05:45 +0100320 self.members_by_tag = {m.tag: m for m in self.members}
Harald Welte30de9fd2023-07-09 21:25:14 +0200321 self.members_by_name = {camel_to_snake(m.__name__): m for m in self.members}
Harald Weltebb3b5df2021-05-24 23:15:54 +0200322 # if we are a constructed IE, [ordered] list of actual child-IE instances
323 self.children = kwargs.get('children', [])
324 self.encoded = None
325
326 def __str__(self):
327 member_strs = [str(x) for x in self.members]
328 return '%s(%s)' % (type(self).__name__, ','.join(member_strs))
329
330 def __repr__(self):
331 member_strs = [repr(x) for x in self.members]
332 return '%s(%s)' % (self.__class__, ','.join(member_strs))
333
334 def __add__(self, other):
335 """Extending TLV_IE_Collections with other TLV_IE_Collections or TLV_IEs."""
336 if isinstance(other, TLV_IE_Collection):
337 # adding one collection to another
338 members = self.members + other.members
339 return TLV_IE_Collection(self.desc, nested=members)
340 elif inspect.isclass(other) and issubclass(other, TLV_IE):
341 # adding a member to a collection
Harald Weltec91085e2022-02-10 18:05:45 +0100342 return TLV_IE_Collection(self.desc, nested=self.members + [other])
Harald Weltebb3b5df2021-05-24 23:15:54 +0200343 else:
344 raise TypeError
345
Harald Weltec91085e2022-02-10 18:05:45 +0100346 def from_bytes(self, binary: bytes) -> List[TLV_IE]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200347 """Create a list of TLV_IEs from the collection based on binary input data.
348 Args:
349 binary : binary bytes of encoded data
350 Returns:
351 list of instances of TLV_IE sub-classes containing parsed data
352 """
353 self.encoded = binary
354 # list of instances of TLV_IE collection member classes appearing in the data
355 res = []
356 remainder = binary
357 first = next(iter(self.members_by_tag.values()))
358 # iterate until no binary trailer is left
359 while len(remainder):
360 # obtain the tag at the start of the remainder
361 tag, r = first._parse_tag_raw(remainder)
Harald Weltefb506212021-05-29 21:28:24 +0200362 if tag == None:
363 return res
Harald Weltebb3b5df2021-05-24 23:15:54 +0200364 if tag in self.members_by_tag:
365 cls = self.members_by_tag[tag]
366 # create an instance and parse accordingly
367 inst = cls()
368 dec, remainder = inst.from_tlv(remainder)
369 res.append(inst)
370 else:
371 # unknown tag; create the related class on-the-fly using the same base class
372 name = 'unknown_%s_%X' % (first.__base__.__name__, tag)
Harald Weltec91085e2022-02-10 18:05:45 +0100373 cls = type(name, (first.__base__,), {'tag': tag, 'possible_nested': [],
374 'nested_collection_cls': None})
375 cls._from_bytes = lambda s, a: {'raw': a.hex()}
Harald Weltebb3b5df2021-05-24 23:15:54 +0200376 cls._to_bytes = lambda s: bytes.fromhex(s.decoded['raw'])
377 # create an instance and parse accordingly
378 inst = cls()
379 dec, remainder = inst.from_tlv(remainder)
380 res.append(inst)
381 self.children = res
382 return res
383
Harald Weltec91085e2022-02-10 18:05:45 +0100384 def from_dict(self, decoded: List[dict]) -> List[TLV_IE]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200385 """Create a list of TLV_IE instances from the collection based on an array
386 of dicts, where they key indicates the name of the TLV_IE subclass to use."""
387 # list of instances of TLV_IE collection member classes appearing in the data
388 res = []
Harald Welte2352f2d2023-07-09 21:28:13 +0200389 # iterate over members of the list passed into "decoded"
Harald Weltebb3b5df2021-05-24 23:15:54 +0200390 for i in decoded:
Harald Welte2352f2d2023-07-09 21:28:13 +0200391 # iterate over all the keys (typically one!) within the current list item dict
Harald Weltebb3b5df2021-05-24 23:15:54 +0200392 for k in i.keys():
Harald Welte2352f2d2023-07-09 21:28:13 +0200393 # check if we have a member identified by the dict key
Harald Weltebb3b5df2021-05-24 23:15:54 +0200394 if k in self.members_by_name:
Harald Welte2352f2d2023-07-09 21:28:13 +0200395 # resolve the class for that name; create an instance of it
Harald Weltebb3b5df2021-05-24 23:15:54 +0200396 cls = self.members_by_name[k]
Harald Welte58953802021-10-21 11:33:44 +0200397 inst = cls()
Harald Welte2352f2d2023-07-09 21:28:13 +0200398 if cls.nested_collection_cls:
399 # in case of collections, we want to pass the raw "value" portion to from_dict,
400 # as to_dict() below intentionally omits the collection-class-name as key
401 inst.from_dict(i[k])
402 else:
403 inst.from_dict({k: i[k]})
Harald Weltebb3b5df2021-05-24 23:15:54 +0200404 res.append(inst)
405 else:
406 raise ValueError('%s: Unknown TLV Class %s in %s; expected %s' %
Harald Welteba955b62023-07-09 21:27:07 +0200407 (self, k, decoded, self.members_by_name.keys()))
Harald Weltebb3b5df2021-05-24 23:15:54 +0200408 self.children = res
409 return res
410
411 def to_dict(self):
Harald Welte2352f2d2023-07-09 21:28:13 +0200412 # we intentionally return not a dict, but a list of dicts. We could prefix by
413 # self.__class__.__name__, but that is usually some meaningless auto-generated collection name.
Harald Weltebb3b5df2021-05-24 23:15:54 +0200414 return [x.to_dict() for x in self.children]
415
416 def to_bytes(self):
417 out = b''
418 for c in self.children:
419 out += c.to_tlv()
420 return out
421
422 def from_tlv(self, do):
423 return self.from_bytes(do)
424
425 def to_tlv(self):
426 return self.to_bytes()
Harald Welte9a2a6692022-02-11 15:44:28 +0100427
428
429def flatten_dict_lists(inp):
430 """hierarchically flatten each list-of-dicts into a single dict. This is useful to
431 make the output of hierarchical TLV decoder structures flatter and more easy to read."""
432 def are_all_elements_dict(l):
433 for e in l:
434 if not isinstance(e, dict):
435 return False
436 return True
437
438 if isinstance(inp, list):
439 if are_all_elements_dict(inp):
440 # flatten into one shared dict
441 newdict = {}
442 for e in inp:
443 key = list(e.keys())[0]
444 newdict[key] = e[key]
445 inp = newdict
446 # process result as any native dict
447 return {k:flatten_dict_lists(inp[k]) for k in inp.keys()}
448 else:
449 return [flatten_dict_lists(x) for x in inp]
450 elif isinstance(inp, dict):
451 return {k:flatten_dict_lists(inp[k]) for k in inp.keys()}
452 else:
453 return inp