Blame - pySim/construct.py - pysim

2022-02-10 18:05:45 +0100

[diff] [blame]

1

from construct.lib.containers import Container, ListContainer

2

from construct.core import EnumIntegerString

Harald Welte

2021-05-28 22:01:29 +0200

[diff] [blame]

3

import typing

Harald Welte

2021-04-10 17:22:35 +0200

[diff] [blame]

4

from construct import *

Vadim Yanitskiy

05d30eb

2022-08-29 20:24:44 +0700

[diff] [blame]

5

from construct.core import evaluate, BitwisableString

Harald Welte

2022-02-11 18:05:48 +0100

[diff] [blame]

6

from construct.lib import integertypes

Harald Welte

2021-04-10 19:05:37 +0200

[diff] [blame]

7

from pySim.utils import b2h, h2b, swap_nibbles

Robert Falkenberg

2021-05-07 15:23:20 +0200

[diff] [blame]

8

import gsm0338

Philipp Maier

791f80a

2023-07-26 17:01:37 +0200

[diff] [blame]

9

import codecs

Harald Welte

6e9ae8a

2023-12-08 14:57:19 +0100

[diff] [blame]

10

import ipaddress

Harald Welte

2021-04-10 17:22:35 +0200

[diff] [blame]

11

12

"""Utility code related to the integration of the 'construct' declarative parser."""

13

Harald Welte

2022-02-11 18:05:48 +0100

[diff] [blame]

14

Harald Welte

2021-04-10 17:22:35 +0200

[diff] [blame]

15

#

16

# This program is free software: you can redistribute it and/or modify

17

# it under the terms of the GNU General Public License as published by

18

# the Free Software Foundation, either version 2 of the License, or

19

# (at your option) any later version.

20

#

21

# This program is distributed in the hope that it will be useful,

22

# but WITHOUT ANY WARRANTY; without even the implied warranty of

23

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

24

# GNU General Public License for more details.

25

#

26

# You should have received a copy of the GNU General Public License

27

# along with this program. If not, see <http://www.gnu.org/licenses/>.

28

29

30

class HexAdapter(Adapter):

31

"""convert a bytes() type to a string of hex nibbles."""

Harald Welte

2022-02-10 18:05:45 +0100

[diff] [blame]

32

Harald Welte

2021-04-10 17:22:35 +0200

[diff] [blame]

33

def _decode(self, obj, context, path):

34

return b2h(obj)

Harald Welte

2022-02-10 18:05:45 +0100

[diff] [blame]

35

Harald Welte

2021-04-10 17:22:35 +0200

[diff] [blame]

36

def _encode(self, obj, context, path):

37

return h2b(obj)

38

Philipp Maier

791f80a

2023-07-26 17:01:37 +0200

[diff] [blame]

39

class Utf8Adapter(Adapter):

40

"""convert a bytes() type that contains utf8 encoded text to human readable text."""

41

42

def _decode(self, obj, context, path):

43

# In case the string contains only 0xff bytes we interpret it as an empty string

44

if obj == b'\xff' * len(obj):

45

return ""

46

return codecs.decode(obj, "utf-8")

47

48

def _encode(self, obj, context, path):

49

return codecs.encode(obj, "utf-8")

50

Harald Welte

6e6caa8

2023-12-27 22:04:50 +0100

[diff] [blame]

51

class GsmOrUcs2Adapter(Adapter):

52

"""Try to encode into a GSM 03.38 string; if that fails, fall back to UCS-2 as described

53

in TS 102 221 Annex A."""

54

def _decode(self, obj, context, path):

55

# In case the string contains only 0xff bytes we interpret it as an empty string

56

if obj == b'\xff' * len(obj):

57

return ""

58

# one of the magic bytes of TS 102 221 Annex A

59

if obj[0] in [0x80, 0x81, 0x82]:

60

ad = Ucs2Adapter(GreedyBytes)

61

else:

62

ad = GsmString(GreedyBytes)

63

return ad._decode(obj, context, path)

64

65

def _encode(self, obj, context, path):

66

# first try GSM 03.38; then fall back to TS 102 221 Annex A UCS-2

67

try:

68

ad = GsmString(GreedyBytes)

69

return ad._encode(obj, context, path)

70

except:

71

ad = Ucs2Adapter(GreedyBytes)

72

return ad._encode(obj, context, path)

73

Harald Welte

f6fceb8

2023-12-27 21:44:56 +0100

[diff] [blame]

74

class Ucs2Adapter(Adapter):

75

"""convert a bytes() type that contains UCS2 encoded characters encoded as defined in TS 102 221

76

Annex A to normal python string representation (and back)."""

77

def _decode(self, obj, context, path):

78

# In case the string contains only 0xff bytes we interpret it as an empty string

79

if obj == b'\xff' * len(obj):

80

return ""

81

if obj[0] == 0x80:

82

# TS 102 221 Annex A Variant 1

83

return codecs.decode(obj[1:], 'utf_16_be')

84

elif obj[0] == 0x81:

85

# TS 102 221 Annex A Variant 2

86

out = ""

87

# second byte contains a value indicating the number of characters

88

num_of_chars = obj[1]

89

# the third byte contains an 8 bit number which defines bits 15 to 8 of a 16 bit base

90

# pointer, where bit 16 is set to zero, and bits 7 to 1 are also set to zero. These

91

# sixteen bits constitute a base pointer to a "half-page" in the UCS2 code space

92

base_ptr = obj[2] << 7

93

for ch in obj[3:3+num_of_chars]:

94

# if bit 8 of the byte is set to zero, the remaining 7 bits of the byte contain a

95

# GSM Default Alphabet character, whereas if bit 8 of the byte is set to one, then

96

# the remaining seven bits are an offset value added to the 16 bit base pointer

97

# defined earlier, and the resultant 16 bit value is a UCS2 code point

98

if ch & 0x80:

99

codepoint = (ch & 0x7f) + base_ptr

100

out += codecs.decode(codepoint.to_bytes(2, byteorder='big'), 'utf_16_be')

101

else:

102

out += codecs.decode(bytes([ch]), 'gsm03.38')

103

return out

104

elif obj[0] == 0x82:

105

# TS 102 221 Annex A Variant 3

106

out = ""

107

# second byte contains a value indicating the number of characters

108

num_of_chars = obj[1]

109

# third and fourth bytes contain a 16 bit number which defines the complete 16 bit base

110

# pointer to a half-page in the UCS2 code space, for use with some or all of the

111

# remaining bytes in the string

112

base_ptr = obj[2] << 8 | obj[3]

113

for ch in obj[4:4+num_of_chars]:

114

# if bit 8 of the byte is set to zero, the remaining 7 bits of the byte contain a

115

# GSM Default Alphabet character, whereas if bit 8 of the byte is set to one, the

116

# remaining seven bits are an offset value added to the base pointer defined in

117

# bytes three and four, and the resultant 16 bit value is a UCS2 code point, else: #

118

# GSM default alphabet

119

if ch & 0x80:

120

codepoint = (ch & 0x7f) + base_ptr

121

out += codecs.decode(codepoint.to_bytes(2, byteorder='big'), 'utf_16_be')

122

else:

123

out += codecs.decode(bytes([ch]), 'gsm03.38')

124

return out

125

else:

126

raise ValueError('First byte of TS 102 221 UCS-2 must be 0x80, 0x81 or 0x82')

127

128

def _encode(self, obj, context, path):

129

def encodable_in_gsm338(instr: str) -> bool:

130

"""Determine if given input string is encode-ale in gsm03.38."""

131

try:

132

# TODO: figure out if/how we can constrain to default alphabet. The gsm0338

133

# library seems to include the spanish lock/shift table

134

codecs.encode(instr, 'gsm03.38')

except ValueError:

return False

return True

def codepoints_not_in_gsm338(instr: str) -> typing.List[int]:

140

"""Return an integer list of UCS2 codepoints for all characters of 'inster'

141

which are not representable in the GSM 03.38 default alphabet."""

142

codepoint_list = []

143

for c in instr:

144

if encodable_in_gsm338(c):

145

continue

146

c_codepoint = int.from_bytes(codecs.encode(c, 'utf_16_be'), byteorder='big')

147

codepoint_list.append(c_codepoint)

148

return codepoint_list

149

150

def diff_between_min_and_max_of_list(inlst: typing.List) -> int:

151

return max(inlst) - min(inlst)

152

153

def encodable_in_variant2(instr: str) -> bool:

154

codepoint_prefix = None

155

for c in instr:

156

if encodable_in_gsm338(c):

157

continue

158

c_codepoint = int.from_bytes(codecs.encode(c, 'utf_16_be'), byteorder='big')

159

if c_codepoint >= 0x8000:

160

return False

161

c_prefix = c_codepoint >> 7

162

if codepoint_prefix is None:

163

codepoint_prefix = c_prefix

164

else:

165

if c_prefix != codepoint_prefix:

return False

return True

def encodable_in_variant3(instr: str) -> bool:

170

codepoint_list = codepoints_not_in_gsm338(instr)

171

# compute delta between max and min; check if it's encodable in 7 bits

172

if diff_between_min_and_max_of_list(codepoint_list) >= 0x80:

return False

return True

def _encode_variant1(instr: str) -> bytes:

177

"""Encode according to TS 102 221 Annex A Variant 1"""

178

return b'\x80' + codecs.encode(obj, 'utf_16_be')

179

180

def _encode_variant2(instr: str) -> bytes:

181

"""Encode according to TS 102 221 Annex A Variant 2"""

182

codepoint_prefix = None

183

# second byte contains a value indicating the number of characters

184

hdr = b'\x81' + len(instr).to_bytes(1, byteorder='big')

chars = b''

for c in instr:

try:

enc = codecs.encode(c, 'gsm03.38')

189

except ValueError:

190

c_codepoint = int.from_bytes(codecs.encode(c, 'utf_16_be'), byteorder='big')

191

c_prefix = c_codepoint >> 7

192

if codepoint_prefix is None:

193

codepoint_prefix = c_prefix

194

assert codepoint_prefix == c_prefix

195

enc = (0x80 + (c_codepoint & 0x7f)).to_bytes(1, byteorder='big')

196

chars += enc

197

if codepoint_prefix == None:

198

codepoint_prefix = 0

199

return hdr + codepoint_prefix.to_bytes(1, byteorder='big') + chars

200

201

def _encode_variant3(instr: str) -> bytes:

202

"""Encode according to TS 102 221 Annex A Variant 3"""

203

# second byte contains a value indicating the number of characters

204

hdr = b'\x82' + len(instr).to_bytes(1, byteorder='big')

205

chars = b''

206

codepoint_list = codepoints_not_in_gsm338(instr)

207

codepoint_base = min(codepoint_list)

208

for c in instr:

209

try:

210

# if bit 8 of the byte is set to zero, the remaining 7 bits of the byte contain a GSM

211

# Default # Alphabet character

212

enc = codecs.encode(c, 'gsm03.38')

213

except ValueError:

214

# if bit 8 of the byte is set to one, the remaining seven bits are an offset

215

# value added to the base pointer defined in bytes three and four, and the

216

# resultant 16 bit value is a UCS2 code point

217

c_codepoint = int.from_bytes(codecs.encode(c, 'utf_16_be'), byteorder='big')

218

c_codepoint_delta = c_codepoint - codepoint_base

219

assert c_codepoint_delta < 0x80

220

enc = (0x80 + c_codepoint_delta).to_bytes(1, byteorder='big')

221

chars += enc

222

# third and fourth bytes contain a 16 bit number which defines the complete 16 bit base

223

# pointer to a half-page in the UCS2 code space

224

return hdr + codepoint_base.to_bytes(2, byteorder='big') + chars

225

226

if encodable_in_variant2(obj):

227

return _encode_variant2(obj)

228

elif encodable_in_variant3(obj):

229

return _encode_variant3(obj)

230

else:

231

return _encode_variant1(obj)

Harald Welte

2022-02-10 18:05:45 +0100

[diff] [blame]

232

Harald Welte

2021-04-10 19:05:37 +0200

[diff] [blame]

233

class BcdAdapter(Adapter):

234

"""convert a bytes() type to a string of BCD nibbles."""

Harald Welte

2022-02-10 18:05:45 +0100

[diff] [blame]

235

Harald Welte

2021-04-10 19:05:37 +0200

[diff] [blame]

236

def _decode(self, obj, context, path):

237

return swap_nibbles(b2h(obj))

Harald Welte

2022-02-10 18:05:45 +0100

[diff] [blame]

238

Harald Welte

2021-04-10 19:05:37 +0200

[diff] [blame]

239

def _encode(self, obj, context, path):

240

return h2b(swap_nibbles(obj))

241

Harald Welte

842fbdb

2023-12-27 17:06:58 +0100

[diff] [blame]

242

class PlmnAdapter(BcdAdapter):

243

"""convert a bytes(3) type to BCD string like 262-02 or 262-002."""

244

def _decode(self, obj, context, path):

245

bcd = super()._decode(obj, context, path)

246

if bcd[3] == 'f':

247

return '-'.join([bcd[:3], bcd[4:]])

248

else:

249

return '-'.join([bcd[:3], bcd[3:]])

250

251

def _encode(self, obj, context, path):

252

l = obj.split('-')

253

if len(l[1]) == 2:

254

bcd = l[0] + 'f' + l[1]

255

else:

256

bcd = l[0] + l[1]

257

return super()._encode(bcd, context, path)

258

Harald Welte

bc0e209

2022-02-13 10:54:58 +0100

[diff] [blame]

259

class InvertAdapter(Adapter):

260

"""inverse logic (false->true, true->false)."""

261

@staticmethod

262

def _invert_bool_in_obj(obj):

263

for k,v in obj.items():

264

# skip all private entries

265

if k.startswith('_'):

continue

if v == False:

obj[k] = True

elif v == True:

obj[k] = False

return obj

def _decode(self, obj, context, path):

274

return self._invert_bool_in_obj(obj)

275

276

def _encode(self, obj, context, path):

277

return self._invert_bool_in_obj(obj)

Harald Welte

2022-02-10 18:05:45 +0100

[diff] [blame]

278

Robert Falkenberg

2021-05-07 15:23:20 +0200

[diff] [blame]

279

class Rpad(Adapter):

280

"""

Harald Welte

2023-06-09 09:17:05 +0200

[diff] [blame]

281

Encoder appends padding bytes (b'\\xff') or characters up to target size.

282

Decoder removes trailing padding bytes/characters.

Robert Falkenberg

2021-05-07 15:23:20 +0200

[diff] [blame]

283

284

Parameters:

285

subcon: Subconstruct as defined by construct library

286

pattern: set padding pattern (default: b'\\xff')

Harald Welte

2023-06-09 09:17:05 +0200

[diff] [blame]

287

num_per_byte: number of 'elements' per byte. E.g. for hex nibbles: 2

Robert Falkenberg

2021-05-07 15:23:20 +0200

[diff] [blame]

288

"""

289

Harald Welte

2023-06-09 09:17:05 +0200

[diff] [blame]

290

def __init__(self, subcon, pattern=b'\xff', num_per_byte=1):

Robert Falkenberg

2021-05-07 15:23:20 +0200

[diff] [blame]

291

super().__init__(subcon)

292

self.pattern = pattern

Harald Welte

2023-06-09 09:17:05 +0200

[diff] [blame]

293

self.num_per_byte = num_per_byte

Robert Falkenberg

2021-05-07 15:23:20 +0200

[diff] [blame]

294

295

def _decode(self, obj, context, path):

296

return obj.rstrip(self.pattern)

297

298

def _encode(self, obj, context, path):

Harald Welte

2023-06-09 09:17:05 +0200

[diff] [blame]

299

target_size = self.sizeof() * self.num_per_byte

300

if len(obj) > target_size:

Harald Welte

2022-02-10 18:05:45 +0100

[diff] [blame]

301

raise SizeofError("Input ({}) exceeds target size ({})".format(

Harald Welte

2023-06-09 09:17:05 +0200

[diff] [blame]

302

len(obj), target_size))

303

return obj + self.pattern * (target_size - len(obj))

Robert Falkenberg

2021-05-07 15:23:20 +0200

[diff] [blame]

304

Harald Welte

954ce95

2023-05-27 20:08:09 +0200

[diff] [blame]

305

class MultiplyAdapter(Adapter):

306

"""

307

Decoder multiplies by multiplicator

308

Encoder divides by multiplicator

309

310

Parameters:

311

subcon: Subconstruct as defined by construct library

312

multiplier: Multiplier to apply to raw encoded value

313

"""

314

315

def __init__(self, subcon, multiplicator):

316

super().__init__(subcon)

317

self.multiplicator = multiplicator

318

319

def _decode(self, obj, context, path):

320

return obj * 8

321

322

def _encode(self, obj, context, path):

323

return obj // 8

324

Harald Welte

2022-02-10 18:05:45 +0100

[diff] [blame]

325

Robert Falkenberg

2021-05-07 15:23:20 +0200

[diff] [blame]

326

class GsmStringAdapter(Adapter):

327

"""Convert GSM 03.38 encoded bytes to a string."""

328

329

def __init__(self, subcon, codec='gsm03.38', err='strict'):

330

super().__init__(subcon)

self.codec = codec

self.err = err

def _decode(self, obj, context, path):

335

return obj.decode(self.codec)

336

337

def _encode(self, obj, context, path):

338

return obj.encode(self.codec, self.err)

339

Harald Welte

6e9ae8a

2023-12-08 14:57:19 +0100

[diff] [blame]

340

class Ipv4Adapter(Adapter):

341

"""

342

Encoder converts from 4 bytes to string representation (A.B.C.D).

343

Decoder converts from string representation (A.B.C.D) to four bytes.

344

"""

345

def _decode(self, obj, context, path):

346

ia = ipaddress.IPv4Address(obj)

347

return ia.compressed

348

349

def _encode(self, obj, context, path):

350

ia = ipaddress.IPv4Address(obj)

351

return ia.packed

352

353

class Ipv6Adapter(Adapter):

354

"""

355

Encoder converts from 16 bytes to string representation.

356

Decoder converts from string representation to 16 bytes.

357

"""

358

def _decode(self, obj, context, path):

359

ia = ipaddress.IPv6Address(obj)

360

return ia.compressed

361

362

def _encode(self, obj, context, path):

363

ia = ipaddress.IPv6Address(obj)

364

return ia.packed

365

Harald Welte

2022-02-10 18:05:45 +0100

[diff] [blame]

366

Harald Welte

2021-04-10 17:22:35 +0200

[diff] [blame]

367

def filter_dict(d, exclude_prefix='_'):

368

"""filter the input dict to ensure no keys starting with 'exclude_prefix' remain."""

Harald Welte

7fca85b

2021-05-29 21:27:46 +0200

[diff] [blame]

369

if not isinstance(d, dict):

370

return d

Harald Welte

2021-04-10 17:22:35 +0200

[diff] [blame]

371

res = {}

372

for (key, value) in d.items():

373

if key.startswith(exclude_prefix):

374

continue

375

if type(value) is dict:

376

res[key] = filter_dict(value)

else:

res[key] = value

return res

Harald Welte

2021-05-28 22:01:29 +0200

[diff] [blame]

381

382

def normalize_construct(c):

383

"""Convert a construct specific type to a related base type, mostly useful

384

so we can serialize it."""

385

# we need to include the filter_dict as we otherwise get elements like this

386

# in the dict: '_io': <_io.BytesIO object at 0x7fdb64e05860> which we cannot json-serialize

387

c = filter_dict(c)

388

if isinstance(c, Container) or isinstance(c, dict):

Harald Welte

2022-02-10 18:05:45 +0100

[diff] [blame]

389

r = {k: normalize_construct(v) for (k, v) in c.items()}

Harald Welte

2021-05-28 22:01:29 +0200

[diff] [blame]

390

elif isinstance(c, ListContainer):

391

r = [normalize_construct(x) for x in c]

392

elif isinstance(c, list):

393

r = [normalize_construct(x) for x in c]

394

elif isinstance(c, EnumIntegerString):

r = str(c)

else:

r = c

return r

Harald Welte

2022-02-10 18:05:45 +0100

[diff] [blame]

400

Harald Welte

caef0df

2023-12-17 10:07:01 +0100

[diff] [blame]

401

def parse_construct(c, raw_bin_data: bytes, length: typing.Optional[int] = None, exclude_prefix: str = '_', context: dict = {}):

Harald Welte

2021-05-28 22:01:29 +0200

[diff] [blame]

402

"""Helper function to wrap around normalize_construct() and filter_dict()."""

403

if not length:

404

length = len(raw_bin_data)

Harald Welte

b0c9ccb

2024-01-04 21:18:31 +0100

[diff] [blame]

405

try:

406

parsed = c.parse(raw_bin_data, total_len=length, **context)

407

except StreamError as e:

408

# if the input is all-ff, this means the content is undefined. Let's avoid passing StreamError

409

# exceptions in those situations (which might occur if a length field 0xff is 255 but then there's

410

# actually less bytes in the remainder of the file.

411

if all([v == 0xff for v in raw_bin_data]):

412

return None

413

else:

414

raise e

Harald Welte

2021-05-28 22:01:29 +0200

[diff] [blame]

415

return normalize_construct(parsed)

416

Harald Welte

caef0df

2023-12-17 10:07:01 +0100

[diff] [blame]

417

def build_construct(c, decoded_data, context: dict = {}):

418

"""Helper function to handle total_len."""

419

return c.build(decoded_data, total_len=None, **context)

Harald Welte

2022-02-10 18:05:45 +0100

[diff] [blame]

420

Harald Welte

2021-04-10 17:22:35 +0200

[diff] [blame]

421

# here we collect some shared / common definitions of data types

422

LV = Prefixed(Int8ub, HexAdapter(GreedyBytes))

Robert Falkenberg

9d16fbc

2021-04-12 11:43:22 +0200

[diff] [blame]

423

424

# Default value for Reserved for Future Use (RFU) bits/bytes

425

# See TS 31.101 Sec. "3.4 Coding Conventions"

426

__RFU_VALUE = 0

427

428

# Field that packs Reserved for Future Use (RFU) bit

429

FlagRFU = Default(Flag, __RFU_VALUE)

430

431

# Field that packs Reserved for Future Use (RFU) byte

432

ByteRFU = Default(Byte, __RFU_VALUE)

433

434

# Field that packs all remaining Reserved for Future Use (RFU) bytes

435

GreedyBytesRFU = Default(GreedyBytes, b'')

436

Harald Welte

2022-02-10 18:05:45 +0100

[diff] [blame]

437

Robert Falkenberg

9d16fbc

2021-04-12 11:43:22 +0200

[diff] [blame]

438

def BitsRFU(n=1):

439

'''

440

Field that packs Reserved for Future Use (RFU) bit(s)

441

as defined in TS 31.101 Sec. "3.4 Coding Conventions"

442

443

Use this for (currently) unused/reserved bits whose contents

444

should be initialized automatically but should not be cleared

445

in the future or when restoring read data (unlike padding).

446

447

Parameters:

448

n (Integer): Number of bits (default: 1)

449

'''

450

return Default(BitsInteger(n), __RFU_VALUE)

451

Harald Welte

2022-02-10 18:05:45 +0100

[diff] [blame]

452

Robert Falkenberg

9d16fbc

2021-04-12 11:43:22 +0200

[diff] [blame]

453

def BytesRFU(n=1):

454

'''

455

Field that packs Reserved for Future Use (RFU) byte(s)

456

as defined in TS 31.101 Sec. "3.4 Coding Conventions"

457

458

Use this for (currently) unused/reserved bytes whose contents

459

should be initialized automatically but should not be cleared

460

in the future or when restoring read data (unlike padding).

461

462

Parameters:

463

n (Integer): Number of bytes (default: 1)

464

'''

465

return Default(Bytes(n), __RFU_VALUE)

Robert Falkenberg

2021-05-07 15:23:20 +0200

[diff] [blame]

466

Harald Welte

2022-02-10 18:05:45 +0100

[diff] [blame]

467

Robert Falkenberg

2021-05-07 15:23:20 +0200

[diff] [blame]

468

def GsmString(n):

469

'''

470

GSM 03.38 encoded byte string of fixed length n.

471

Encoder appends padding bytes (b'\\xff') to maintain

472

length. Decoder removes those trailing bytes.

473

474

Exceptions are raised for invalid characters

and length excess.

Parameters:

n (Integer): Fixed length of the encoded byte string

479

'''

480

return GsmStringAdapter(Rpad(Bytes(n), pattern=b'\xff'), codec='gsm03.38')

Harald Welte

2022-02-11 18:05:48 +0100

[diff] [blame]

481

Harald Welte

6e6caa8

2023-12-27 22:04:50 +0100

[diff] [blame]

482

def GsmOrUcs2String(n):

483

'''

484

GSM 03.38 or UCS-2 (TS 102 221 Annex A) encoded byte string of fixed length n.

485

Encoder appends padding bytes (b'\\xff') to maintain

486

length. Decoder removes those trailing bytes.

487

488

Exceptions are raised for invalid characters

and length excess.

Parameters:

n (Integer): Fixed length of the encoded byte string

493

'''

494

return GsmOrUcs2Adapter(Rpad(Bytes(n), pattern=b'\xff'))

495

Harald Welte

2022-02-11 18:05:48 +0100

[diff] [blame]

496

class GreedyInteger(Construct):

497

"""A variable-length integer implementation, think of combining GrredyBytes with BytesInteger."""

Philipp Maier

2022-06-01 18:21:17 +0200

[diff] [blame]

498

def __init__(self, signed=False, swapped=False, minlen=0):

Harald Welte

2022-02-11 18:05:48 +0100

[diff] [blame]

499

super().__init__()

500

self.signed = signed

501

self.swapped = swapped

Philipp Maier

2022-06-01 18:21:17 +0200

[diff] [blame]

502

self.minlen = minlen

Harald Welte

2022-02-11 18:05:48 +0100

[diff] [blame]

503

504

def _parse(self, stream, context, path):

505

data = stream_read_entire(stream, path)

506

if evaluate(self.swapped, context):

507

data = swapbytes(data)

508

try:

Vadim Yanitskiy

05d30eb

2022-08-29 20:24:44 +0700

[diff] [blame]

509

return int.from_bytes(data, byteorder='big', signed=self.signed)

Harald Welte

2022-02-11 18:05:48 +0100

[diff] [blame]

510

except ValueError as e:

511

raise IntegerError(str(e), path=path)

512

Philipp Maier

2022-06-01 18:21:17 +0200

[diff] [blame]

513

def __bytes_required(self, i, minlen=0):

Harald Welte

2022-02-11 18:05:48 +0100

[diff] [blame]

514

if self.signed:

515

raise NotImplementedError("FIXME: Implement support for encoding signed integer")

Philipp Maier

2022-06-01 18:21:17 +0200

[diff] [blame]

516

517

# compute how many bytes we need

Harald Welte

2022-02-11 18:05:48 +0100

[diff] [blame]

nbytes = 1

while True:

i = i >> 8

if i == 0:

Philipp Maier

2022-06-01 18:21:17 +0200

[diff] [blame]

522

break

Harald Welte

2022-02-11 18:05:48 +0100

[diff] [blame]

523

else:

524

nbytes = nbytes + 1

Philipp Maier

2022-06-01 18:21:17 +0200

[diff] [blame]

525

526

# round up to the minimum number

527

# of bytes we anticipate

if nbytes < minlen:

nbytes = minlen

return nbytes

Harald Welte

2022-02-11 18:05:48 +0100

[diff] [blame]

532

533

def _build(self, obj, stream, context, path):

534

if not isinstance(obj, integertypes):

535

raise IntegerError(f"value {obj} is not an integer", path=path)

Philipp Maier

2022-06-01 18:21:17 +0200

[diff] [blame]

536

length = self.__bytes_required(obj, self.minlen)

Harald Welte

2022-02-11 18:05:48 +0100

[diff] [blame]

537

try:

Vadim Yanitskiy

05d30eb

2022-08-29 20:24:44 +0700

[diff] [blame]

538

data = obj.to_bytes(length, byteorder='big', signed=self.signed)

Harald Welte