Blame - contrib/struct_endianness.py - libosmocore

blob: e6cbe00b66cafd50d7b34b9da6d4b83ff30e6727 [file] [log] [blame]

Neels Hofmeyr	7ab5fc1	2018-11-15 23:29:56 +0100	[diff] [blame]	1	#!/usr/bin/env python3
				2
				3	'''Using mad regexes, automatically make sure that all structs with sub-byte
				4	integers have matching big-endian definitions. The idea is to save a lot of
				5	manual effort, and to automatically verify that there are no errors.
				6	This script most certainly has numerous holes and shortcomings, but actually,
				7	if you hit problems with it, rather adjust your coding style so that this
				8	script can deal with it...'''
				9
				10	import re
				11	import sys
				12	import codecs
				13	import os.path
				14
				15	re_struct_start = re.compile(r'^struct\s[a-zA-Z_][a-zA-Z_0-9]\s{\s$')
				16	re_struct_end = re.compile(r'^}[^;];\s$')
				17
				18	re_substruct_start = re.compile(r'^\s+struct\s{\s$')
				19	re_substruct_end = re.compile(r'^\s+}\s([^;]\s)[a-zA-Z_][a-zA-Z_0-9]\s;\s*$')
Neels Hofmeyr	bd58697	2020-05-14 17:27:59 +0200	[diff] [blame]	20	re_unnamed_substruct_end = re.compile(r'^\s+}\s;\s$')
Neels Hofmeyr	7ab5fc1	2018-11-15 23:29:56 +0100	[diff] [blame]	21
				22	re_int_def = re.compile(r'(^\s((const\|unsigned\|signed\|char\|int\|long\|int[0-9]+_t\|uint[0-9]_t)\s+)+\s)([^;]*;)',
				23	re.DOTALL \| re.MULTILINE)
				24	re_int_members = re.compile(r'([a-zA-Z_][a-zA-Z_0-9]\|[a-zA-Z_][a-zA-Z_0-9]\s:\s[0-9]+)\s[,;]\s', re.DOTALL \| re.MULTILINE)
				25
				26	re_little_endian_ifdef = re.compile(r'#\s(if\|elif)\s+OSMO_IS_LITTLE_ENDIAN\s(==\s1\s\|)');
				27	re_big_endian_ifdef = re.compile(r'#\s(if\|elif)\s+OSMO_IS_BIG_ENDIAN\s');
				28	re_else = re.compile(r'#\selse\s');
				29	re_endif = re.compile(r'#\sendif\s');
				30
				31	re_c_comment = re.compile(r'(/\[^]+\*/\|//.?$)')
				32
				33	def remove_c_comments(code_str):
				34	return ''.join(re_c_comment.split(code_str)[::2])
				35
				36	def section_struct_body(struct_body_lines):
				37	'''divide a top-level-struct body into sections of
				38	['arbitrary string', ['body;\n', 'lines;\n'], 'arbitrary string', ...]
				39	Aim: handle each sub-struct on its own, and if there already are ifdefs for
				40	little and big endian, keep just the little endian bit and derive big
				41	endian from it.
				42	An arbitrary string is anything other than struct member definitions, like
				43	a 'struct {', '} sub_name;', ...
				44	"body lines" are lines that define struct members (possibly with comments).
				45	Return: list of alternate arbitrary strings and variable definitions.
				46	'''
				47
				48	# these globals are needed so that end_def() can change them from inside
				49	# the function. Not very nice style, but easiest implementation.
				50	global struct_body_parts
				51	global arbitrary_part
				52	global def_part
				53
				54	struct_body_parts = []
				55	arbitrary_part = []
				56	def_part = []
				57
				58	def end_def():
				59	'''if there is any content, flush out recorded parts (def_part,
				60	arbitrary_part) and start a new part. In short, cut a section
				61	boundary.'''
				62	global struct_body_parts
				63	global arbitrary_part
				64	global def_part
				65
				66	if def_part:
				67	struct_body_parts.append(arbitrary_part)
				68	arbitrary_part = []
				69	struct_body_parts.append(def_part)
				70	def_part = []
				71
				72	j = 0
				73	while j < len(struct_body_lines):
				74	line = struct_body_lines[j]
				75
				76	if (re_substruct_start.fullmatch(line)
Neels Hofmeyr	bd58697	2020-05-14 17:27:59 +0200	[diff] [blame]	77	or re_substruct_end.fullmatch(line)
				78	or re_unnamed_substruct_end.fullmatch(line)):
Neels Hofmeyr	7ab5fc1	2018-11-15 23:29:56 +0100	[diff] [blame]	79	end_def()
				80	arbitrary_part.append(line)
				81	j += 1
				82	continue
				83
				84	if re_big_endian_ifdef.fullmatch(line):
				85	end_def()
				86	# discard big endian section
				87	j += 1
				88	while j < len(struct_body_lines):
				89	line = struct_body_lines[j]
				90	if re_endif.fullmatch(line):
				91	end_def()
				92	j += 1
				93	break;
				94	if re_little_endian_ifdef.fullmatch(line):
				95	end_def()
				96	# keep that start of little endian section, not j++
				97	break;
				98	if re_else.fullmatch(line):
				99	# there's an '#else' after big-endian. Shim a little-endian header in just for the loop.
				100	struct_body_lines[j] = '#if OSMO_IS_LITTLE_ENDIAN\n'
				101	break;
				102	j += 1
				103	continue
				104
				105	if re_little_endian_ifdef.fullmatch(line):
				106	end_def()
				107	j += 1
				108	while j < len(struct_body_lines):
				109	line = struct_body_lines[j]
				110	if re_endif.fullmatch(line):
				111	end_def()
				112	j += 1
				113	break;
				114	if re_big_endian_ifdef.fullmatch(line):
				115	end_def()
				116	# keep that start of big endian section, not j++
				117	break;
				118	if re_else.fullmatch(line):
				119	# there's an '#else' after little-endian. Shim a big-endian header in just for the loop.
				120	struct_body_lines[j] = '#if OSMO_IS_BIG_ENDIAN\n'
				121	break;
				122	def_part.append(line)
				123	j += 1
				124
				125	continue
				126
				127	def_part.append(line)
				128	j += 1
				129
				130	# flush the last section remaining that didn't see an explicit end
				131	end_def()
				132	# end_def() only flushes arbitrary_part if there was a def_part, so:
				133	if arbitrary_part:
				134	struct_body_parts.append(arbitrary_part)
				135
				136	return struct_body_parts
				137
				138	def struct_body_to_big_endian(body_str):
				139	'''Input: a multi-line string containing the body of a struct, i.e. without
				140	sub-structs and without #if OSMO_IS_BIG_ENDIAN. like
				141
				142	'\tconst char foo;\n\tuint8_t moo:3, goo:2;\n\tuint8_t loo:3;\n\tvoid baz;\n'
				143
				144	Return None to indicate that there is no little/big endian split
				145	required, or return a multi-line string of the big-endian version of this
				146	same struct body, where sub-byte ints are reversed at byte boundaries, and
				147	all others are copied 1:1. If there are no sub-byte integers, return None,
				148	to indicate that there is no little/big endian split required.'''
				149
				150	# kick comments out of the code analysis. They will end up being stripped
				151	# from big-endian only.
				152	body_str = remove_c_comments(body_str)
				153
				154	def_strs = body_str.split(';')
				155	def_strs = ('%s;' % def_str for def_str in def_strs if def_str.strip())
				156
				157	# classify defs as containing sub-byte members or not
				158	# defs = [ (true, 'uint8_t ', ('foo:3', 'bar:5')),
				159	# (false, 'int baz;'),...]
				160	defs = []
				161	any_sub_byte_ints = False
				162	for one_def in def_strs:
				163
				164	# does it have sub-string integers?
				165	int_def = re_int_def.fullmatch(one_def)
				166	if not int_def:
				167	# not even a number, same for big and little endian
				168	defs.append((False, one_def))
				169	continue
				170
				171	int_type = int_def.group(1)
				172	members_str = int_def.groups()[-1]
				173	has_sub_byte_ints = False
				174
				175	members = []
				176	for int_member in re_int_members.finditer(members_str):
				177	member = int_member.group(1)
				178	members.append(member)
				179	if ':' in member:
				180	has_sub_byte_ints = True
				181
				182	if not has_sub_byte_ints:
				183	defs.append((False, one_def))
				184	else:
				185	defs.append((True, one_def, int_type, members))
				186	any_sub_byte_ints = True
				187
				188	if not any_sub_byte_ints:
				189	return None
				190
				191	# now the interesting part, go over the defs, and reverse the sub-byte ints
				192	# at byte boundaries.
				193
				194	i = 0
				195	got_bits = 0
				196	byte_type = None
				197	members_within_a_byte = []
				198	big_endian_defs = []
				199
				200	big_defs = []
				201	for classified_def in defs:
				202	has_sub_byte_ints = classified_def[0]
				203
				204	# now the big endian part
				205	if has_sub_byte_ints:
				206	_, one_def, int_type, members = classified_def
				207
				208	if byte_type and byte_type.strip() != int_type.strip():
				209	raise Exception('mismatching type continuation after incomplete byte: %r %r to %r'
				210	% (byte_type, members_within_a_byte, int_type))
				211	byte_type = int_type
				212
				213	for member in members:
				214	member_name, bits_str = member.split(':')
				215	member_name = member_name.strip()
				216	bits = int(bits_str)
				217	member = '%s:%d' % (member_name, bits)
				218	members_within_a_byte.append(member)
				219	got_bits += bits
				220
				221	if got_bits == 8:
				222	# reverse these.
				223	big_endian_defs.append('%s%s;' % (byte_type, ', '.join(reversed(members_within_a_byte))))
				224	members_within_a_byte = []
				225	byte_type = None
				226	got_bits = 0
				227
				228	elif got_bits > 8:
				229	raise Exception('sub-byte int breaks clean byte bounds: %s -- %d + %d = %d bits'
				230	% (member, got_bits - bits, bits, got_bits))
				231
				232	elif not has_sub_byte_ints:
				233	if got_bits:
				234	raise Exception('sub-byte members do not add up to clean byte bounds: %r' % members_within_a_byte)
				235
				236	big_endian_defs.append(classified_def[1])
				237
				238	# strip empty lines
				239	lines = [l for l in (''.join(big_endian_defs).split('\n')) if l.strip()]
				240	# clean lines' whitespace errors we might have taken in with the type names
				241	for i in range(len(lines)):
				242	line = lines[i]
				243	while len(line) and line[-1] in ' \t':
				244	line = line[:-1]
				245	lines[i] = line
				246	return '\n'.join(lines)
				247
				248	def handle_struct_body(body_str):
				249
				250	big_endian_body_str = struct_body_to_big_endian(body_str)
				251
				252	if big_endian_body_str:
				253	new_lines = ['#if OSMO_IS_LITTLE_ENDIAN\n']
				254	new_lines.append(body_str)
				255	new_lines.append('#elif OSMO_IS_BIG_ENDIAN\n'
Oliver Smith	0b5c09b	2023-02-17 10:35:38 +0100	[diff] [blame]	256	'/* auto-generated from the little endian part above (libosmocore/contrib/struct_endianness.py) */\n')
Neels Hofmeyr	7ab5fc1	2018-11-15 23:29:56 +0100	[diff] [blame]	257	new_lines.append(big_endian_body_str)
				258	new_lines.append('\n#endif\n')
				259	return ''.join(new_lines)
				260	else:
				261	return body_str
				262
				263	def _check_file(f):
Vadim Yanitskiy	ed501d3	2023-02-26 16:29:31 +0700	[diff] [blame]	264	if not f.endswith(('.h', '.c', '.cpp')):
Neels Hofmeyr	7ab5fc1	2018-11-15 23:29:56 +0100	[diff] [blame]	265	return
				266
				267	# section the file into
				268	# [ ["no struct def"], ["struct {...};"], ["no struct def"], ... ]
				269	sections = []
				270	in_struct = False
				271	buf = []
				272	for line in codecs.open(f, "r", "utf-8").readlines():
				273
				274	if not in_struct and re_struct_start.fullmatch(line):
				275	# flush whatever might still be in buf from before
				276	sections.append(buf)
				277	# start an in_struct section
				278	buf = [line]
				279	in_struct = True
				280	elif in_struct and re_struct_end.fullmatch(line):
				281	# add this end to the in_struct section and then start a non-struct section
				282	buf.append(line)
				283	sections.append(buf)
				284	in_struct = False
				285	buf = []
				286	else:
				287	buf.append(line)
				288	# flush any leftovers in buf
				289	if buf:
				290	sections.append(buf)
				291
				292	# examine each struct, i.e. every second item in 'sections'
				293	for i in range(len(sections)):
				294	if not (i & 1):
				295	continue
				296
				297	struct = sections[i]
				298
				299	# If the struct isn't packed, we need not bother.
				300	# The practical use of this: in some structs we have booleans in the
				301	# form of
				302	# integer flag:1;
				303	# and these don't add up to bytes, and cause errors. So let's skip all
				304	# non-packed structs, then all of those are out of the picture.
				305	if not 'packed' in struct[-1]:
				306	continue
				307
				308	try:
				309
				310	# assume the 'struct foo {' is on the first line, the closing brace
				311	# '} __attribute...;' on the last, and the rest are individual
				312	# definitions split by ';'.
				313	struct_body_lines = struct[1:-1]
				314	struct_body_parts = section_struct_body(struct_body_lines)
				315
				316	new_struct_body_parts = []
				317	for j in range(len(struct_body_parts)):
				318	part = ''.join(struct_body_parts[j])
				319	if not (j & 1):
				320	new_struct_body_parts.append(part)
				321	else:
				322	new_struct_body_parts.append(handle_struct_body(part))
				323
				324	new_struct = [struct[0], ''.join(new_struct_body_parts), struct[-1]]
				325	sections[i] = new_struct
				326	except Exception as e:
				327	raise Exception('ERROR in struct %r' % struct[0])
				328
				329	# phew. result.
				330	result = ''.join((''.join(s) for s in sections))
				331
				332	# see if osmocom/core/endian.h is needed and included.
				333	if (not f.endswith('endian.h')
				334	and 'OSMO_IS_LITTLE_ENDIAN' in result
				335	and '#include <osmocom/core/endian.h>' not in result):
				336	# add the include after the last 'osmocom/core' include
				337	last_include_start = result.rfind('#include <osmocom/core/')
				338	if last_include_start < 0:
				339	last_include_start = result.rfind('#include <osmocom/')
				340	if last_include_start < 0:
				341	last_include_start = result.rfind('#include')
				342
				343	if last_include_start < 0:
				344	raise Exception('do not know where to include osmocom/core/endian.h in %r' % f)
				345
				346	insert_at = result.find('\n', last_include_start)
				347
				348	result = result[:insert_at] + '\n#include <osmocom/core/endian.h>' + result[insert_at:]
				349
				350	with codecs.open(f, "w", "utf-8") as fd:
				351	fd.write(result)
				352
				353	def check_file(f):
				354	try:
				355	_check_file(f)
				356	except Exception as e:
				357	raise Exception('ERROR IN FILE %r' % f)
				358
				359	args = sys.argv[1:]
				360	if not args:
				361	args = ['.']
				362
				363	for f in args:
				364	if os.path.isdir(f):
				365	for parent_path, subdirs, files in os.walk(f, None, None):
				366	for ff in files:
				367	check_file(os.path.join(parent_path, ff))
				368	else:
				369	check_file(f)
				370
				371	# vim: tabstop=4 shiftwidth=4 expandtab