Harald Welte | bb3b5df | 2021-05-24 23:15:54 +0200 | [diff] [blame] | 1 | """object-oriented TLV parser/encoder library.""" |
| 2 | |
| 3 | # (C) 2021 by Harald Welte <laforge@osmocom.org> |
| 4 | # All Rights Reserved |
| 5 | # |
| 6 | # This program is free software: you can redistribute it and/or modify |
| 7 | # it under the terms of the GNU General Public License as published by |
| 8 | # the Free Software Foundation, either version 2 of the License, or |
| 9 | # (at your option) any later version. |
| 10 | # |
| 11 | # This program is distributed in the hope that it will be useful, |
| 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | # GNU General Public License for more details. |
| 15 | # |
| 16 | # You should have received a copy of the GNU General Public License |
| 17 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 18 | |
| 19 | |
| 20 | from typing import Optional, List, Dict, Any, Tuple |
| 21 | from bidict import bidict |
| 22 | from construct import * |
| 23 | |
| 24 | from pySim.utils import bertlv_encode_len, bertlv_parse_len, bertlv_encode_tag, bertlv_parse_tag |
| 25 | from pySim.utils import comprehensiontlv_encode_tag, comprehensiontlv_parse_tag |
| 26 | from pySim.utils import bertlv_parse_one, comprehensiontlv_parse_one |
| 27 | from pySim.utils import bertlv_parse_tag_raw, comprehensiontlv_parse_tag_raw |
| 28 | |
| 29 | from pySim.construct import parse_construct, LV, HexAdapter, BcdAdapter, BitsRFU, GsmStringAdapter |
| 30 | from pySim.exceptions import * |
| 31 | |
| 32 | import inspect |
| 33 | import abc |
| 34 | |
| 35 | class TlvMeta(abc.ABCMeta): |
| 36 | """Metaclass which we use to set some class variables at the time of defining a subclass. |
| 37 | This allows us to create subclasses for each TLV/IE type, where the class represents fixed |
| 38 | parameters like the tag/type and instances of it represent the actual TLV data.""" |
| 39 | def __new__(metacls, name, bases, namespace, **kwargs): |
| 40 | #print("TlvMeta_new_(metacls=%s, name=%s, bases=%s, namespace=%s, kwargs=%s)" % (metacls, name, bases, namespace, kwargs)) |
| 41 | x = super().__new__(metacls, name, bases, namespace) |
| 42 | # this becomes a _class_ variable, not an instance variable |
| 43 | x.tag = namespace.get('tag', kwargs.get('tag', None)) |
| 44 | x.desc = namespace.get('desc', kwargs.get('desc', None)) |
| 45 | nested = namespace.get('nested', kwargs.get('nested', None)) |
| 46 | if nested is None or inspect.isclass(nested) and issubclass(nested, TLV_IE_Collection): |
| 47 | # caller has specified TLV_IE_Collection sub-class, we can directly reference it |
| 48 | x.nested_collection_cls = nested |
| 49 | else: |
| 50 | # caller passed list of other TLV classes that might possibly appear within us, |
| 51 | # build a dynamically-created TLV_IE_Collection sub-class and reference it |
| 52 | name = 'auto_collection_%s' % (name) |
| 53 | cls = type(name, (TLV_IE_Collection,), {'nested': nested}) |
| 54 | x.nested_collection_cls = cls |
| 55 | return x |
| 56 | |
| 57 | class TlvCollectionMeta(abc.ABCMeta): |
| 58 | """Metaclass which we use to set some class variables at the time of defining a subclass. |
| 59 | This allows us to create subclasses for each Collection type, where the class represents fixed |
| 60 | parameters like the nested IE classes and instances of it represent the actual TLV data.""" |
| 61 | def __new__(metacls, name, bases, namespace, **kwargs): |
| 62 | #print("TlvCollectionMeta_new_(metacls=%s, name=%s, bases=%s, namespace=%s, kwargs=%s)" % (metacls, name, bases, namespace, kwargs)) |
| 63 | x = super().__new__(metacls, name, bases, namespace) |
| 64 | # this becomes a _class_ variable, not an instance variable |
| 65 | x.possible_nested = namespace.get('nested', kwargs.get('nested', None)) |
| 66 | return x |
| 67 | |
| 68 | |
| 69 | class Transcodable(abc.ABC): |
| 70 | _construct = None |
| 71 | """Base class for something that can be encoded + encoded. Decoding and Encoding happens either |
| 72 | * via a 'construct' object stored in a derived class' _construct variable, or |
| 73 | * via a 'construct' object stored in an instance _construct variable, or |
| 74 | * via a derived class' _{to,from}_bytes() methods.""" |
| 75 | def __init__(self): |
| 76 | self.encoded = None |
| 77 | self.decoded = None |
| 78 | self._construct = None |
| 79 | |
| 80 | def to_bytes(self) -> bytes: |
| 81 | """Convert from internal representation to binary bytes. Store the binary result |
| 82 | in the internal state and return it.""" |
| 83 | if self._construct: |
| 84 | do = self._construct.build(self.decoded, total_len=None) |
| 85 | elif self.__class__._construct: |
| 86 | do = self.__class__._construct.build(self.decoded, total_len=None) |
| 87 | else: |
| 88 | do = self._to_bytes() |
| 89 | self.encoded = do |
| 90 | return do |
| 91 | |
| 92 | # not an abstractmethod, as it is only required if no _construct exists |
| 93 | def _to_bytes(self): |
| 94 | raise NotImplementedError |
| 95 | |
| 96 | def from_bytes(self, do:bytes): |
| 97 | """Convert from binary bytes to internal representation. Store the decoded result |
| 98 | in the internal state and return it.""" |
| 99 | self.encoded = do |
| 100 | if self._construct: |
| 101 | self.decoded = parse_construct(self._construct, do) |
| 102 | elif self.__class__._construct: |
| 103 | self.decoded = parse_construct(self.__class__._construct, do) |
| 104 | else: |
| 105 | self.decoded = self._from_bytes(do) |
| 106 | return self.decoded |
| 107 | |
| 108 | # not an abstractmethod, as it is only required if no _construct exists |
| 109 | def _from_bytes(self, do:bytes): |
| 110 | raise NotImplementedError |
| 111 | |
| 112 | class IE(Transcodable, metaclass=TlvMeta): |
| 113 | # we specify the metaclass so any downstream subclasses will automatically use it |
| 114 | """Base class for various Information Elements. We understand the notion of a hierarchy |
| 115 | of IEs on top of the Transcodable class.""" |
| 116 | # this is overridden by the TlvMeta metaclass, if it is used to create subclasses |
| 117 | nested_collection_cls = None |
| 118 | tag = None |
| 119 | |
| 120 | def __init__(self, **kwargs): |
| 121 | super().__init__() |
| 122 | self.nested_collection = None |
| 123 | if self.nested_collection_cls: |
| 124 | self.nested_collection = self.nested_collection_cls() |
| 125 | # if we are a constructed IE, [ordered] list of actual child-IE instances |
| 126 | self.children = kwargs.get('children', []) |
| 127 | self.decoded = kwargs.get('decoded', None) |
| 128 | |
| 129 | def __repr__(self): |
| 130 | """Return a string representing the [nested] IE data (for print).""" |
| 131 | if len(self.children): |
| 132 | member_strs = [repr(x) for x in self.children] |
| 133 | return '%s(%s)' % (type(self).__name__, ','.join(member_strs)) |
| 134 | else: |
| 135 | return '%s(%s)' % (type(self).__name__, self.decoded) |
| 136 | |
| 137 | def to_dict(self): |
| 138 | """Return a JSON-serializable dict representing the [nested] IE data.""" |
| 139 | if len(self.children): |
| 140 | v = [x.to_dict() for x in self.children] |
| 141 | else: |
| 142 | v = self.decoded |
| 143 | return {type(self).__name__: v} |
| 144 | |
| 145 | def from_dict(self, decoded:dict): |
| 146 | """Set the IE internal decoded representation to data from the argument. |
| 147 | If this is a nested IE, the child IE instance list is re-created.""" |
| 148 | if self.nested_collection: |
| 149 | self.children = self.nested_collection.from_dict(decoded) |
| 150 | else: |
| 151 | self.children = [] |
| 152 | self.decoded = decoded |
| 153 | |
| 154 | def is_constructed(self): |
| 155 | """Is this IE constructed by further nested IEs?""" |
| 156 | if len(self.children): |
| 157 | return True |
| 158 | else: |
| 159 | return False |
| 160 | |
| 161 | @abc.abstractmethod |
| 162 | def to_ie(self) -> bytes: |
| 163 | """Convert the internal representation to entire IE including IE header.""" |
| 164 | |
| 165 | def to_bytes(self) -> bytes: |
| 166 | """Convert the internal representation _of the value part_ to binary bytes.""" |
| 167 | if self.is_constructed(): |
| 168 | # concatenate the encoded IE of all children to form the value part |
| 169 | out = b'' |
| 170 | for c in self.children: |
| 171 | out += c.to_ie() |
| 172 | return out |
| 173 | else: |
| 174 | return super().to_bytes() |
| 175 | |
| 176 | def from_bytes(self, do:bytes): |
| 177 | """Parse _the value part_ from binary bytes to internal representation.""" |
| 178 | if self.nested_collection: |
| 179 | self.children = self.nested_collection.from_bytes(do) |
| 180 | else: |
| 181 | self.children = [] |
| 182 | return super().from_bytes(do) |
| 183 | |
| 184 | |
| 185 | class TLV_IE(IE): |
| 186 | """Abstract base class for various TLV type Information Elements.""" |
| 187 | def __init__(self, **kwargs): |
| 188 | super().__init__(**kwargs) |
| 189 | |
| 190 | def _compute_tag(self) -> int: |
| 191 | """Compute the tag (sometimes the tag encodes part of the value).""" |
| 192 | return self.tag |
| 193 | |
| 194 | @classmethod |
| 195 | @abc.abstractmethod |
| 196 | def _parse_tag_raw(cls, do:bytes) -> Tuple[int, bytes]: |
| 197 | """Obtain the raw TAG at the start of the bytes provided by the user.""" |
| 198 | |
| 199 | @classmethod |
| 200 | @abc.abstractmethod |
| 201 | def _parse_len(cls, do:bytes) -> Tuple[int, bytes]: |
| 202 | """Obtain the length encoded at the start of the bytes provided by the user.""" |
| 203 | |
| 204 | @abc.abstractmethod |
| 205 | def _encode_tag(self) -> bytes: |
| 206 | """Encode the tag part. Must be provided by derived (TLV format specific) class.""" |
| 207 | |
| 208 | @abc.abstractmethod |
| 209 | def _encode_len(self, val:bytes) -> bytes: |
| 210 | """Encode the length part assuming a certain binary value. Must be provided by |
| 211 | derived (TLV format specific) class.""" |
| 212 | |
| 213 | def to_ie(self): |
| 214 | return self.to_tlv() |
| 215 | |
| 216 | def to_tlv(self): |
| 217 | """Convert the internal representation to binary TLV bytes.""" |
| 218 | val = self.to_bytes() |
| 219 | return self._encode_tag() + self._encode_len(val) + val |
| 220 | |
| 221 | def from_tlv(self, do:bytes): |
| 222 | (rawtag, remainder) = self.__class__._parse_tag_raw(do) |
| 223 | if rawtag: |
| 224 | if rawtag != self.tag: |
| 225 | raise ValueError("%s: Encountered tag %s doesn't match our supported tag %s" % |
| 226 | (self, rawtag, self.tag)) |
| 227 | (length, remainder) = self.__class__._parse_len(remainder) |
| 228 | value = remainder[:length] |
| 229 | remainder = remainder[length:] |
| 230 | else: |
| 231 | value = do |
| 232 | remainder = b'' |
| 233 | dec = self.from_bytes(value) |
| 234 | return dec, remainder |
| 235 | |
| 236 | |
| 237 | class BER_TLV_IE(TLV_IE): |
| 238 | """TLV_IE formatted as ASN.1 BER described in ITU-T X.690 8.1.2.""" |
| 239 | def __init__(self, **kwargs): |
| 240 | super().__init__(**kwargs) |
| 241 | |
| 242 | @classmethod |
| 243 | def _decode_tag(cls, do:bytes) -> Tuple[dict, bytes]: |
| 244 | return bertlv_parse_tag(do) |
| 245 | |
| 246 | @classmethod |
| 247 | def _parse_tag_raw(cls, do:bytes) -> Tuple[int, bytes]: |
| 248 | return bertlv_parse_tag_raw(do) |
| 249 | |
| 250 | @classmethod |
| 251 | def _parse_len(cls, do:bytes) -> Tuple[int, bytes]: |
| 252 | return bertlv_parse_len(do) |
| 253 | |
| 254 | def _encode_tag(self) -> bytes: |
| 255 | return bertlv_encode_tag(self._compute_tag()) |
| 256 | |
| 257 | def _encode_len(self, val:bytes) -> bytes: |
| 258 | return bertlv_encode_len(len(val)) |
| 259 | |
| 260 | |
| 261 | class COMPR_TLV_IE(TLV_IE): |
| 262 | """TLV_IE formated as COMPREHENSION-TLV as described in ETSI TS 101 220.""" |
| 263 | def __init__(self, **kwargs): |
| 264 | super().__init__(**kwargs) |
| 265 | self.comprehension = False |
| 266 | |
| 267 | @classmethod |
| 268 | def _decode_tag(cls, do:bytes) -> Tuple[dict, bytes]: |
| 269 | return comprehensiontlv_parse_tag(do) |
| 270 | |
| 271 | @classmethod |
| 272 | def _parse_tag_raw(cls, do:bytes) -> Tuple[int, bytes]: |
| 273 | return comprehensiontlv_parse_tag_raw(do) |
| 274 | |
| 275 | @classmethod |
| 276 | def _parse_len(cls, do:bytes) -> Tuple[int, bytes]: |
| 277 | return bertlv_parse_len(do) |
| 278 | |
| 279 | def _encode_tag(self) -> bytes: |
| 280 | return comprehensiontlv_encode_tag(self._compute_tag()) |
| 281 | |
| 282 | def _encode_len(self, val:bytes) -> bytes: |
| 283 | return bertlv_encode_len(len(val)) |
| 284 | |
| 285 | |
| 286 | class TLV_IE_Collection(metaclass=TlvCollectionMeta): |
| 287 | # we specify the metaclass so any downstream subclasses will automatically use it |
| 288 | """A TLV_IE_Collection consists of multiple TLV_IE classes identified by their tags. |
| 289 | A given encoded DO may contain any of them in any order, and may contain multiple instances |
| 290 | of each DO.""" |
| 291 | # this is overridden by the TlvCollectionMeta metaclass, if it is used to create subclasses |
| 292 | possible_nested = [] |
| 293 | def __init__(self, desc=None, **kwargs): |
| 294 | self.desc = desc |
| 295 | #print("possible_nested: ", self.possible_nested) |
| 296 | self.members = kwargs.get('nested', self.possible_nested) |
| 297 | self.members_by_tag = {} |
| 298 | self.members_by_name = {} |
| 299 | self.members_by_tag = { m.tag:m for m in self.members } |
| 300 | self.members_by_name = { m.__name__:m for m in self.members } |
| 301 | # if we are a constructed IE, [ordered] list of actual child-IE instances |
| 302 | self.children = kwargs.get('children', []) |
| 303 | self.encoded = None |
| 304 | |
| 305 | def __str__(self): |
| 306 | member_strs = [str(x) for x in self.members] |
| 307 | return '%s(%s)' % (type(self).__name__, ','.join(member_strs)) |
| 308 | |
| 309 | def __repr__(self): |
| 310 | member_strs = [repr(x) for x in self.members] |
| 311 | return '%s(%s)' % (self.__class__, ','.join(member_strs)) |
| 312 | |
| 313 | def __add__(self, other): |
| 314 | """Extending TLV_IE_Collections with other TLV_IE_Collections or TLV_IEs.""" |
| 315 | if isinstance(other, TLV_IE_Collection): |
| 316 | # adding one collection to another |
| 317 | members = self.members + other.members |
| 318 | return TLV_IE_Collection(self.desc, nested=members) |
| 319 | elif inspect.isclass(other) and issubclass(other, TLV_IE): |
| 320 | # adding a member to a collection |
| 321 | return TLV_IE_Collection(self.desc, nested = self.members + [other]) |
| 322 | else: |
| 323 | raise TypeError |
| 324 | |
| 325 | def from_bytes(self, binary:bytes) -> List[TLV_IE]: |
| 326 | """Create a list of TLV_IEs from the collection based on binary input data. |
| 327 | Args: |
| 328 | binary : binary bytes of encoded data |
| 329 | Returns: |
| 330 | list of instances of TLV_IE sub-classes containing parsed data |
| 331 | """ |
| 332 | self.encoded = binary |
| 333 | # list of instances of TLV_IE collection member classes appearing in the data |
| 334 | res = [] |
| 335 | remainder = binary |
| 336 | first = next(iter(self.members_by_tag.values())) |
| 337 | # iterate until no binary trailer is left |
| 338 | while len(remainder): |
| 339 | # obtain the tag at the start of the remainder |
| 340 | tag, r = first._parse_tag_raw(remainder) |
Harald Welte | fb50621 | 2021-05-29 21:28:24 +0200 | [diff] [blame] | 341 | if tag == None: |
| 342 | return res |
Harald Welte | bb3b5df | 2021-05-24 23:15:54 +0200 | [diff] [blame] | 343 | if tag in self.members_by_tag: |
| 344 | cls = self.members_by_tag[tag] |
| 345 | # create an instance and parse accordingly |
| 346 | inst = cls() |
| 347 | dec, remainder = inst.from_tlv(remainder) |
| 348 | res.append(inst) |
| 349 | else: |
| 350 | # unknown tag; create the related class on-the-fly using the same base class |
| 351 | name = 'unknown_%s_%X' % (first.__base__.__name__, tag) |
| 352 | cls = type(name, (first.__base__,), {'tag':tag, 'possible_nested':[], |
| 353 | 'nested_collection_cls':None}) |
| 354 | cls._from_bytes = lambda s, a : {'raw': a.hex()} |
| 355 | cls._to_bytes = lambda s: bytes.fromhex(s.decoded['raw']) |
| 356 | # create an instance and parse accordingly |
| 357 | inst = cls() |
| 358 | dec, remainder = inst.from_tlv(remainder) |
| 359 | res.append(inst) |
| 360 | self.children = res |
| 361 | return res |
| 362 | |
| 363 | def from_dict(self, decoded:List[dict]) -> List[TLV_IE]: |
| 364 | """Create a list of TLV_IE instances from the collection based on an array |
| 365 | of dicts, where they key indicates the name of the TLV_IE subclass to use.""" |
| 366 | # list of instances of TLV_IE collection member classes appearing in the data |
| 367 | res = [] |
| 368 | for i in decoded: |
| 369 | for k in i.keys(): |
| 370 | if k in self.members_by_name: |
| 371 | cls = self.members_by_name[k] |
| 372 | inst = cls(decoded=i[k]) |
| 373 | res.append(inst) |
| 374 | else: |
| 375 | raise ValueError('%s: Unknown TLV Class %s in %s; expected %s' % |
| 376 | (self, i[0], decoded, self.members_by_name.keys())) |
| 377 | self.children = res |
| 378 | return res |
| 379 | |
| 380 | def to_dict(self): |
| 381 | return [x.to_dict() for x in self.children] |
| 382 | |
| 383 | def to_bytes(self): |
| 384 | out = b'' |
| 385 | for c in self.children: |
| 386 | out += c.to_tlv() |
| 387 | return out |
| 388 | |
| 389 | def from_tlv(self, do): |
| 390 | return self.from_bytes(do) |
| 391 | |
| 392 | def to_tlv(self): |
| 393 | return self.to_bytes() |