blob: a5baa234ab34a88e9cf16d5bc5eaf6f4ebe24f16 [file] [log] [blame]
Harald Weltebb3b5df2021-05-24 23:15:54 +02001"""object-oriented TLV parser/encoder library."""
2
3# (C) 2021 by Harald Welte <laforge@osmocom.org>
4# All Rights Reserved
5#
6# This program is free software: you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation, either version 2 of the License, or
9# (at your option) any later version.
10#
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program. If not, see <http://www.gnu.org/licenses/>.
18
19
20from typing import Optional, List, Dict, Any, Tuple
21from bidict import bidict
22from construct import *
23
24from pySim.utils import bertlv_encode_len, bertlv_parse_len, bertlv_encode_tag, bertlv_parse_tag
25from pySim.utils import comprehensiontlv_encode_tag, comprehensiontlv_parse_tag
26from pySim.utils import bertlv_parse_one, comprehensiontlv_parse_one
27from pySim.utils import bertlv_parse_tag_raw, comprehensiontlv_parse_tag_raw
28
29from pySim.construct import parse_construct, LV, HexAdapter, BcdAdapter, BitsRFU, GsmStringAdapter
30from pySim.exceptions import *
31
32import inspect
33import abc
Harald Weltee8d177d2022-02-11 17:08:45 +010034import re
Harald Weltebb3b5df2021-05-24 23:15:54 +020035
Harald Weltee8d177d2022-02-11 17:08:45 +010036def camel_to_snake(name):
37 name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
38 return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower()
Harald Weltec91085e2022-02-10 18:05:45 +010039
Harald Weltebb3b5df2021-05-24 23:15:54 +020040class TlvMeta(abc.ABCMeta):
41 """Metaclass which we use to set some class variables at the time of defining a subclass.
42 This allows us to create subclasses for each TLV/IE type, where the class represents fixed
43 parameters like the tag/type and instances of it represent the actual TLV data."""
44 def __new__(metacls, name, bases, namespace, **kwargs):
45 #print("TlvMeta_new_(metacls=%s, name=%s, bases=%s, namespace=%s, kwargs=%s)" % (metacls, name, bases, namespace, kwargs))
46 x = super().__new__(metacls, name, bases, namespace)
47 # this becomes a _class_ variable, not an instance variable
48 x.tag = namespace.get('tag', kwargs.get('tag', None))
49 x.desc = namespace.get('desc', kwargs.get('desc', None))
50 nested = namespace.get('nested', kwargs.get('nested', None))
51 if nested is None or inspect.isclass(nested) and issubclass(nested, TLV_IE_Collection):
52 # caller has specified TLV_IE_Collection sub-class, we can directly reference it
53 x.nested_collection_cls = nested
54 else:
55 # caller passed list of other TLV classes that might possibly appear within us,
56 # build a dynamically-created TLV_IE_Collection sub-class and reference it
57 name = 'auto_collection_%s' % (name)
58 cls = type(name, (TLV_IE_Collection,), {'nested': nested})
59 x.nested_collection_cls = cls
60 return x
61
Harald Weltec91085e2022-02-10 18:05:45 +010062
Harald Weltebb3b5df2021-05-24 23:15:54 +020063class TlvCollectionMeta(abc.ABCMeta):
64 """Metaclass which we use to set some class variables at the time of defining a subclass.
65 This allows us to create subclasses for each Collection type, where the class represents fixed
66 parameters like the nested IE classes and instances of it represent the actual TLV data."""
67 def __new__(metacls, name, bases, namespace, **kwargs):
68 #print("TlvCollectionMeta_new_(metacls=%s, name=%s, bases=%s, namespace=%s, kwargs=%s)" % (metacls, name, bases, namespace, kwargs))
69 x = super().__new__(metacls, name, bases, namespace)
70 # this becomes a _class_ variable, not an instance variable
71 x.possible_nested = namespace.get('nested', kwargs.get('nested', None))
72 return x
73
74
75class Transcodable(abc.ABC):
76 _construct = None
77 """Base class for something that can be encoded + encoded. Decoding and Encoding happens either
78 * via a 'construct' object stored in a derived class' _construct variable, or
79 * via a 'construct' object stored in an instance _construct variable, or
80 * via a derived class' _{to,from}_bytes() methods."""
Harald Weltec91085e2022-02-10 18:05:45 +010081
Harald Weltebb3b5df2021-05-24 23:15:54 +020082 def __init__(self):
83 self.encoded = None
84 self.decoded = None
85 self._construct = None
86
87 def to_bytes(self) -> bytes:
88 """Convert from internal representation to binary bytes. Store the binary result
89 in the internal state and return it."""
Harald Welte04c13022021-10-21 10:02:10 +020090 if not self.decoded:
91 do = b''
92 elif self._construct:
Harald Weltebb3b5df2021-05-24 23:15:54 +020093 do = self._construct.build(self.decoded, total_len=None)
94 elif self.__class__._construct:
95 do = self.__class__._construct.build(self.decoded, total_len=None)
96 else:
97 do = self._to_bytes()
98 self.encoded = do
99 return do
100
101 # not an abstractmethod, as it is only required if no _construct exists
102 def _to_bytes(self):
Harald Welteea600a82022-07-20 07:51:13 +0200103 raise NotImplementedError('%s._to_bytes' % type(self).__name__)
Harald Weltebb3b5df2021-05-24 23:15:54 +0200104
Harald Weltec91085e2022-02-10 18:05:45 +0100105 def from_bytes(self, do: bytes):
Harald Weltebb3b5df2021-05-24 23:15:54 +0200106 """Convert from binary bytes to internal representation. Store the decoded result
107 in the internal state and return it."""
108 self.encoded = do
Harald Welte04c13022021-10-21 10:02:10 +0200109 if self.encoded == b'':
110 self.decoded = None
111 elif self._construct:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200112 self.decoded = parse_construct(self._construct, do)
113 elif self.__class__._construct:
114 self.decoded = parse_construct(self.__class__._construct, do)
115 else:
116 self.decoded = self._from_bytes(do)
117 return self.decoded
118
119 # not an abstractmethod, as it is only required if no _construct exists
Harald Weltec91085e2022-02-10 18:05:45 +0100120 def _from_bytes(self, do: bytes):
Harald Welteea600a82022-07-20 07:51:13 +0200121 raise NotImplementedError('%s._from_bytes' % type(self).__name__)
Harald Weltebb3b5df2021-05-24 23:15:54 +0200122
Harald Weltec91085e2022-02-10 18:05:45 +0100123
Harald Weltebb3b5df2021-05-24 23:15:54 +0200124class IE(Transcodable, metaclass=TlvMeta):
125 # we specify the metaclass so any downstream subclasses will automatically use it
126 """Base class for various Information Elements. We understand the notion of a hierarchy
127 of IEs on top of the Transcodable class."""
128 # this is overridden by the TlvMeta metaclass, if it is used to create subclasses
129 nested_collection_cls = None
130 tag = None
131
132 def __init__(self, **kwargs):
133 super().__init__()
134 self.nested_collection = None
135 if self.nested_collection_cls:
136 self.nested_collection = self.nested_collection_cls()
137 # if we are a constructed IE, [ordered] list of actual child-IE instances
138 self.children = kwargs.get('children', [])
139 self.decoded = kwargs.get('decoded', None)
140
141 def __repr__(self):
142 """Return a string representing the [nested] IE data (for print)."""
143 if len(self.children):
144 member_strs = [repr(x) for x in self.children]
145 return '%s(%s)' % (type(self).__name__, ','.join(member_strs))
146 else:
147 return '%s(%s)' % (type(self).__name__, self.decoded)
148
149 def to_dict(self):
150 """Return a JSON-serializable dict representing the [nested] IE data."""
151 if len(self.children):
152 v = [x.to_dict() for x in self.children]
153 else:
154 v = self.decoded
Harald Weltee8d177d2022-02-11 17:08:45 +0100155 return {camel_to_snake(type(self).__name__): v}
Harald Weltebb3b5df2021-05-24 23:15:54 +0200156
Harald Weltec91085e2022-02-10 18:05:45 +0100157 def from_dict(self, decoded: dict):
Harald Weltebb3b5df2021-05-24 23:15:54 +0200158 """Set the IE internal decoded representation to data from the argument.
159 If this is a nested IE, the child IE instance list is re-created."""
160 if self.nested_collection:
161 self.children = self.nested_collection.from_dict(decoded)
162 else:
163 self.children = []
164 self.decoded = decoded
165
166 def is_constructed(self):
167 """Is this IE constructed by further nested IEs?"""
168 if len(self.children):
169 return True
170 else:
171 return False
172
173 @abc.abstractmethod
174 def to_ie(self) -> bytes:
175 """Convert the internal representation to entire IE including IE header."""
176
177 def to_bytes(self) -> bytes:
178 """Convert the internal representation _of the value part_ to binary bytes."""
179 if self.is_constructed():
180 # concatenate the encoded IE of all children to form the value part
181 out = b''
182 for c in self.children:
183 out += c.to_ie()
184 return out
185 else:
186 return super().to_bytes()
187
Harald Weltec91085e2022-02-10 18:05:45 +0100188 def from_bytes(self, do: bytes):
Harald Weltebb3b5df2021-05-24 23:15:54 +0200189 """Parse _the value part_ from binary bytes to internal representation."""
190 if self.nested_collection:
191 self.children = self.nested_collection.from_bytes(do)
192 else:
193 self.children = []
194 return super().from_bytes(do)
195
196
197class TLV_IE(IE):
198 """Abstract base class for various TLV type Information Elements."""
Harald Weltec91085e2022-02-10 18:05:45 +0100199
Harald Weltebb3b5df2021-05-24 23:15:54 +0200200 def __init__(self, **kwargs):
201 super().__init__(**kwargs)
202
203 def _compute_tag(self) -> int:
204 """Compute the tag (sometimes the tag encodes part of the value)."""
205 return self.tag
206
207 @classmethod
208 @abc.abstractmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100209 def _parse_tag_raw(cls, do: bytes) -> Tuple[int, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200210 """Obtain the raw TAG at the start of the bytes provided by the user."""
211
212 @classmethod
213 @abc.abstractmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100214 def _parse_len(cls, do: bytes) -> Tuple[int, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200215 """Obtain the length encoded at the start of the bytes provided by the user."""
216
217 @abc.abstractmethod
218 def _encode_tag(self) -> bytes:
219 """Encode the tag part. Must be provided by derived (TLV format specific) class."""
220
221 @abc.abstractmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100222 def _encode_len(self, val: bytes) -> bytes:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200223 """Encode the length part assuming a certain binary value. Must be provided by
224 derived (TLV format specific) class."""
225
226 def to_ie(self):
227 return self.to_tlv()
228
229 def to_tlv(self):
230 """Convert the internal representation to binary TLV bytes."""
231 val = self.to_bytes()
232 return self._encode_tag() + self._encode_len(val) + val
233
Harald Weltec91085e2022-02-10 18:05:45 +0100234 def from_tlv(self, do: bytes):
Harald Weltee4a6eaf2022-02-11 16:29:32 +0100235 if len(do) == 0:
236 return {}, b''
Harald Weltebb3b5df2021-05-24 23:15:54 +0200237 (rawtag, remainder) = self.__class__._parse_tag_raw(do)
238 if rawtag:
Harald Welte7d8029e2022-08-06 13:16:19 +0200239 if rawtag != self._compute_tag():
Harald Weltebb3b5df2021-05-24 23:15:54 +0200240 raise ValueError("%s: Encountered tag %s doesn't match our supported tag %s" %
241 (self, rawtag, self.tag))
242 (length, remainder) = self.__class__._parse_len(remainder)
243 value = remainder[:length]
244 remainder = remainder[length:]
245 else:
246 value = do
247 remainder = b''
248 dec = self.from_bytes(value)
249 return dec, remainder
250
251
252class BER_TLV_IE(TLV_IE):
253 """TLV_IE formatted as ASN.1 BER described in ITU-T X.690 8.1.2."""
Harald Weltec91085e2022-02-10 18:05:45 +0100254
Harald Weltebb3b5df2021-05-24 23:15:54 +0200255 def __init__(self, **kwargs):
256 super().__init__(**kwargs)
257
258 @classmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100259 def _decode_tag(cls, do: bytes) -> Tuple[dict, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200260 return bertlv_parse_tag(do)
261
262 @classmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100263 def _parse_tag_raw(cls, do: bytes) -> Tuple[int, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200264 return bertlv_parse_tag_raw(do)
265
266 @classmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100267 def _parse_len(cls, do: bytes) -> Tuple[int, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200268 return bertlv_parse_len(do)
269
270 def _encode_tag(self) -> bytes:
271 return bertlv_encode_tag(self._compute_tag())
272
Harald Weltec91085e2022-02-10 18:05:45 +0100273 def _encode_len(self, val: bytes) -> bytes:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200274 return bertlv_encode_len(len(val))
275
276
277class COMPR_TLV_IE(TLV_IE):
278 """TLV_IE formated as COMPREHENSION-TLV as described in ETSI TS 101 220."""
Harald Weltec91085e2022-02-10 18:05:45 +0100279
Harald Weltebb3b5df2021-05-24 23:15:54 +0200280 def __init__(self, **kwargs):
281 super().__init__(**kwargs)
282 self.comprehension = False
283
284 @classmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100285 def _decode_tag(cls, do: bytes) -> Tuple[dict, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200286 return comprehensiontlv_parse_tag(do)
287
288 @classmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100289 def _parse_tag_raw(cls, do: bytes) -> Tuple[int, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200290 return comprehensiontlv_parse_tag_raw(do)
291
292 @classmethod
Harald Weltec91085e2022-02-10 18:05:45 +0100293 def _parse_len(cls, do: bytes) -> Tuple[int, bytes]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200294 return bertlv_parse_len(do)
295
296 def _encode_tag(self) -> bytes:
297 return comprehensiontlv_encode_tag(self._compute_tag())
298
Harald Weltec91085e2022-02-10 18:05:45 +0100299 def _encode_len(self, val: bytes) -> bytes:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200300 return bertlv_encode_len(len(val))
301
302
303class TLV_IE_Collection(metaclass=TlvCollectionMeta):
304 # we specify the metaclass so any downstream subclasses will automatically use it
305 """A TLV_IE_Collection consists of multiple TLV_IE classes identified by their tags.
306 A given encoded DO may contain any of them in any order, and may contain multiple instances
307 of each DO."""
308 # this is overridden by the TlvCollectionMeta metaclass, if it is used to create subclasses
309 possible_nested = []
Harald Weltec91085e2022-02-10 18:05:45 +0100310
Harald Weltebb3b5df2021-05-24 23:15:54 +0200311 def __init__(self, desc=None, **kwargs):
312 self.desc = desc
313 #print("possible_nested: ", self.possible_nested)
314 self.members = kwargs.get('nested', self.possible_nested)
315 self.members_by_tag = {}
316 self.members_by_name = {}
Harald Weltec91085e2022-02-10 18:05:45 +0100317 self.members_by_tag = {m.tag: m for m in self.members}
318 self.members_by_name = {m.__name__: m for m in self.members}
Harald Weltebb3b5df2021-05-24 23:15:54 +0200319 # if we are a constructed IE, [ordered] list of actual child-IE instances
320 self.children = kwargs.get('children', [])
321 self.encoded = None
322
323 def __str__(self):
324 member_strs = [str(x) for x in self.members]
325 return '%s(%s)' % (type(self).__name__, ','.join(member_strs))
326
327 def __repr__(self):
328 member_strs = [repr(x) for x in self.members]
329 return '%s(%s)' % (self.__class__, ','.join(member_strs))
330
331 def __add__(self, other):
332 """Extending TLV_IE_Collections with other TLV_IE_Collections or TLV_IEs."""
333 if isinstance(other, TLV_IE_Collection):
334 # adding one collection to another
335 members = self.members + other.members
336 return TLV_IE_Collection(self.desc, nested=members)
337 elif inspect.isclass(other) and issubclass(other, TLV_IE):
338 # adding a member to a collection
Harald Weltec91085e2022-02-10 18:05:45 +0100339 return TLV_IE_Collection(self.desc, nested=self.members + [other])
Harald Weltebb3b5df2021-05-24 23:15:54 +0200340 else:
341 raise TypeError
342
Harald Weltec91085e2022-02-10 18:05:45 +0100343 def from_bytes(self, binary: bytes) -> List[TLV_IE]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200344 """Create a list of TLV_IEs from the collection based on binary input data.
345 Args:
346 binary : binary bytes of encoded data
347 Returns:
348 list of instances of TLV_IE sub-classes containing parsed data
349 """
350 self.encoded = binary
351 # list of instances of TLV_IE collection member classes appearing in the data
352 res = []
353 remainder = binary
354 first = next(iter(self.members_by_tag.values()))
355 # iterate until no binary trailer is left
356 while len(remainder):
357 # obtain the tag at the start of the remainder
358 tag, r = first._parse_tag_raw(remainder)
Harald Weltefb506212021-05-29 21:28:24 +0200359 if tag == None:
360 return res
Harald Weltebb3b5df2021-05-24 23:15:54 +0200361 if tag in self.members_by_tag:
362 cls = self.members_by_tag[tag]
363 # create an instance and parse accordingly
364 inst = cls()
365 dec, remainder = inst.from_tlv(remainder)
366 res.append(inst)
367 else:
368 # unknown tag; create the related class on-the-fly using the same base class
369 name = 'unknown_%s_%X' % (first.__base__.__name__, tag)
Harald Weltec91085e2022-02-10 18:05:45 +0100370 cls = type(name, (first.__base__,), {'tag': tag, 'possible_nested': [],
371 'nested_collection_cls': None})
372 cls._from_bytes = lambda s, a: {'raw': a.hex()}
Harald Weltebb3b5df2021-05-24 23:15:54 +0200373 cls._to_bytes = lambda s: bytes.fromhex(s.decoded['raw'])
374 # create an instance and parse accordingly
375 inst = cls()
376 dec, remainder = inst.from_tlv(remainder)
377 res.append(inst)
378 self.children = res
379 return res
380
Harald Weltec91085e2022-02-10 18:05:45 +0100381 def from_dict(self, decoded: List[dict]) -> List[TLV_IE]:
Harald Weltebb3b5df2021-05-24 23:15:54 +0200382 """Create a list of TLV_IE instances from the collection based on an array
383 of dicts, where they key indicates the name of the TLV_IE subclass to use."""
384 # list of instances of TLV_IE collection member classes appearing in the data
385 res = []
386 for i in decoded:
387 for k in i.keys():
388 if k in self.members_by_name:
389 cls = self.members_by_name[k]
Harald Welte58953802021-10-21 11:33:44 +0200390 inst = cls()
391 inst.from_dict(i[k])
Harald Weltebb3b5df2021-05-24 23:15:54 +0200392 res.append(inst)
393 else:
394 raise ValueError('%s: Unknown TLV Class %s in %s; expected %s' %
395 (self, i[0], decoded, self.members_by_name.keys()))
396 self.children = res
397 return res
398
399 def to_dict(self):
400 return [x.to_dict() for x in self.children]
401
402 def to_bytes(self):
403 out = b''
404 for c in self.children:
405 out += c.to_tlv()
406 return out
407
408 def from_tlv(self, do):
409 return self.from_bytes(do)
410
411 def to_tlv(self):
412 return self.to_bytes()
Harald Welte9a2a6692022-02-11 15:44:28 +0100413
414
415def flatten_dict_lists(inp):
416 """hierarchically flatten each list-of-dicts into a single dict. This is useful to
417 make the output of hierarchical TLV decoder structures flatter and more easy to read."""
418 def are_all_elements_dict(l):
419 for e in l:
420 if not isinstance(e, dict):
421 return False
422 return True
423
424 if isinstance(inp, list):
425 if are_all_elements_dict(inp):
426 # flatten into one shared dict
427 newdict = {}
428 for e in inp:
429 key = list(e.keys())[0]
430 newdict[key] = e[key]
431 inp = newdict
432 # process result as any native dict
433 return {k:flatten_dict_lists(inp[k]) for k in inp.keys()}
434 else:
435 return [flatten_dict_lists(x) for x in inp]
436 elif isinstance(inp, dict):
437 return {k:flatten_dict_lists(inp[k]) for k in inp.keys()}
438 else:
439 return inp