Blame - contrib/struct_endianess.py - libosmocore

blob: be73fbe22839a4939951b58187a2c152a6f825bc [file] [log] [blame]

Neels Hofmeyr	7ab5fc1	2018-11-15 23:29:56 +0100	[diff] [blame]	1	#!/usr/bin/env python3
				2
				3	'''Using mad regexes, automatically make sure that all structs with sub-byte
				4	integers have matching big-endian definitions. The idea is to save a lot of
				5	manual effort, and to automatically verify that there are no errors.
				6	This script most certainly has numerous holes and shortcomings, but actually,
				7	if you hit problems with it, rather adjust your coding style so that this
				8	script can deal with it...'''
				9
				10	import re
				11	import sys
				12	import codecs
				13	import os.path
				14
				15	re_struct_start = re.compile(r'^struct\s[a-zA-Z_][a-zA-Z_0-9]\s{\s$')
				16	re_struct_end = re.compile(r'^}[^;];\s$')
				17
				18	re_substruct_start = re.compile(r'^\s+struct\s{\s$')
				19	re_substruct_end = re.compile(r'^\s+}\s([^;]\s)[a-zA-Z_][a-zA-Z_0-9]\s;\s*$')
				20
				21	re_int_def = re.compile(r'(^\s((const\|unsigned\|signed\|char\|int\|long\|int[0-9]+_t\|uint[0-9]_t)\s+)+\s)([^;]*;)',
				22	re.DOTALL \| re.MULTILINE)
				23	re_int_members = re.compile(r'([a-zA-Z_][a-zA-Z_0-9]\|[a-zA-Z_][a-zA-Z_0-9]\s:\s[0-9]+)\s[,;]\s', re.DOTALL \| re.MULTILINE)
				24
				25	re_little_endian_ifdef = re.compile(r'#\s(if\|elif)\s+OSMO_IS_LITTLE_ENDIAN\s(==\s1\s\|)');
				26	re_big_endian_ifdef = re.compile(r'#\s(if\|elif)\s+OSMO_IS_BIG_ENDIAN\s');
				27	re_else = re.compile(r'#\selse\s');
				28	re_endif = re.compile(r'#\sendif\s');
				29
				30	re_c_comment = re.compile(r'(/\[^]+\*/\|//.?$)')
				31
				32	def remove_c_comments(code_str):
				33	return ''.join(re_c_comment.split(code_str)[::2])
				34
				35	def section_struct_body(struct_body_lines):
				36	'''divide a top-level-struct body into sections of
				37	['arbitrary string', ['body;\n', 'lines;\n'], 'arbitrary string', ...]
				38	Aim: handle each sub-struct on its own, and if there already are ifdefs for
				39	little and big endian, keep just the little endian bit and derive big
				40	endian from it.
				41	An arbitrary string is anything other than struct member definitions, like
				42	a 'struct {', '} sub_name;', ...
				43	"body lines" are lines that define struct members (possibly with comments).
				44	Return: list of alternate arbitrary strings and variable definitions.
				45	'''
				46
				47	# these globals are needed so that end_def() can change them from inside
				48	# the function. Not very nice style, but easiest implementation.
				49	global struct_body_parts
				50	global arbitrary_part
				51	global def_part
				52
				53	struct_body_parts = []
				54	arbitrary_part = []
				55	def_part = []
				56
				57	def end_def():
				58	'''if there is any content, flush out recorded parts (def_part,
				59	arbitrary_part) and start a new part. In short, cut a section
				60	boundary.'''
				61	global struct_body_parts
				62	global arbitrary_part
				63	global def_part
				64
				65	if def_part:
				66	struct_body_parts.append(arbitrary_part)
				67	arbitrary_part = []
				68	struct_body_parts.append(def_part)
				69	def_part = []
				70
				71	j = 0
				72	while j < len(struct_body_lines):
				73	line = struct_body_lines[j]
				74
				75	if (re_substruct_start.fullmatch(line)
				76	or re_substruct_end.fullmatch(line)):
				77	end_def()
				78	arbitrary_part.append(line)
				79	j += 1
				80	continue
				81
				82	if re_big_endian_ifdef.fullmatch(line):
				83	end_def()
				84	# discard big endian section
				85	j += 1
				86	while j < len(struct_body_lines):
				87	line = struct_body_lines[j]
				88	if re_endif.fullmatch(line):
				89	end_def()
				90	j += 1
				91	break;
				92	if re_little_endian_ifdef.fullmatch(line):
				93	end_def()
				94	# keep that start of little endian section, not j++
				95	break;
				96	if re_else.fullmatch(line):
				97	# there's an '#else' after big-endian. Shim a little-endian header in just for the loop.
				98	struct_body_lines[j] = '#if OSMO_IS_LITTLE_ENDIAN\n'
				99	break;
				100	j += 1
				101	continue
				102
				103	if re_little_endian_ifdef.fullmatch(line):
				104	end_def()
				105	j += 1
				106	while j < len(struct_body_lines):
				107	line = struct_body_lines[j]
				108	if re_endif.fullmatch(line):
				109	end_def()
				110	j += 1
				111	break;
				112	if re_big_endian_ifdef.fullmatch(line):
				113	end_def()
				114	# keep that start of big endian section, not j++
				115	break;
				116	if re_else.fullmatch(line):
				117	# there's an '#else' after little-endian. Shim a big-endian header in just for the loop.
				118	struct_body_lines[j] = '#if OSMO_IS_BIG_ENDIAN\n'
				119	break;
				120	def_part.append(line)
				121	j += 1
				122
				123	continue
				124
				125	def_part.append(line)
				126	j += 1
				127
				128	# flush the last section remaining that didn't see an explicit end
				129	end_def()
				130	# end_def() only flushes arbitrary_part if there was a def_part, so:
				131	if arbitrary_part:
				132	struct_body_parts.append(arbitrary_part)
				133
				134	return struct_body_parts
				135
				136	def struct_body_to_big_endian(body_str):
				137	'''Input: a multi-line string containing the body of a struct, i.e. without
				138	sub-structs and without #if OSMO_IS_BIG_ENDIAN. like
				139
				140	'\tconst char foo;\n\tuint8_t moo:3, goo:2;\n\tuint8_t loo:3;\n\tvoid baz;\n'
				141
				142	Return None to indicate that there is no little/big endian split
				143	required, or return a multi-line string of the big-endian version of this
				144	same struct body, where sub-byte ints are reversed at byte boundaries, and
				145	all others are copied 1:1. If there are no sub-byte integers, return None,
				146	to indicate that there is no little/big endian split required.'''
				147
				148	# kick comments out of the code analysis. They will end up being stripped
				149	# from big-endian only.
				150	body_str = remove_c_comments(body_str)
				151
				152	def_strs = body_str.split(';')
				153	def_strs = ('%s;' % def_str for def_str in def_strs if def_str.strip())
				154
				155	# classify defs as containing sub-byte members or not
				156	# defs = [ (true, 'uint8_t ', ('foo:3', 'bar:5')),
				157	# (false, 'int baz;'),...]
				158	defs = []
				159	any_sub_byte_ints = False
				160	for one_def in def_strs:
				161
				162	# does it have sub-string integers?
				163	int_def = re_int_def.fullmatch(one_def)
				164	if not int_def:
				165	# not even a number, same for big and little endian
				166	defs.append((False, one_def))
				167	continue
				168
				169	int_type = int_def.group(1)
				170	members_str = int_def.groups()[-1]
				171	has_sub_byte_ints = False
				172
				173	members = []
				174	for int_member in re_int_members.finditer(members_str):
				175	member = int_member.group(1)
				176	members.append(member)
				177	if ':' in member:
				178	has_sub_byte_ints = True
				179
				180	if not has_sub_byte_ints:
				181	defs.append((False, one_def))
				182	else:
				183	defs.append((True, one_def, int_type, members))
				184	any_sub_byte_ints = True
				185
				186	if not any_sub_byte_ints:
				187	return None
				188
				189	# now the interesting part, go over the defs, and reverse the sub-byte ints
				190	# at byte boundaries.
				191
				192	i = 0
				193	got_bits = 0
				194	byte_type = None
				195	members_within_a_byte = []
				196	big_endian_defs = []
				197
				198	big_defs = []
				199	for classified_def in defs:
				200	has_sub_byte_ints = classified_def[0]
				201
				202	# now the big endian part
				203	if has_sub_byte_ints:
				204	_, one_def, int_type, members = classified_def
				205
				206	if byte_type and byte_type.strip() != int_type.strip():
				207	raise Exception('mismatching type continuation after incomplete byte: %r %r to %r'
				208	% (byte_type, members_within_a_byte, int_type))
				209	byte_type = int_type
				210
				211	for member in members:
				212	member_name, bits_str = member.split(':')
				213	member_name = member_name.strip()
				214	bits = int(bits_str)
				215	member = '%s:%d' % (member_name, bits)
				216	members_within_a_byte.append(member)
				217	got_bits += bits
				218
				219	if got_bits == 8:
				220	# reverse these.
				221	big_endian_defs.append('%s%s;' % (byte_type, ', '.join(reversed(members_within_a_byte))))
				222	members_within_a_byte = []
				223	byte_type = None
				224	got_bits = 0
				225
				226	elif got_bits > 8:
				227	raise Exception('sub-byte int breaks clean byte bounds: %s -- %d + %d = %d bits'
				228	% (member, got_bits - bits, bits, got_bits))
				229
				230	elif not has_sub_byte_ints:
				231	if got_bits:
				232	raise Exception('sub-byte members do not add up to clean byte bounds: %r' % members_within_a_byte)
				233
				234	big_endian_defs.append(classified_def[1])
				235
				236	# strip empty lines
				237	lines = [l for l in (''.join(big_endian_defs).split('\n')) if l.strip()]
				238	# clean lines' whitespace errors we might have taken in with the type names
				239	for i in range(len(lines)):
				240	line = lines[i]
				241	while len(line) and line[-1] in ' \t':
				242	line = line[:-1]
				243	lines[i] = line
				244	return '\n'.join(lines)
				245
				246	def handle_struct_body(body_str):
				247
				248	big_endian_body_str = struct_body_to_big_endian(body_str)
				249
				250	if big_endian_body_str:
				251	new_lines = ['#if OSMO_IS_LITTLE_ENDIAN\n']
				252	new_lines.append(body_str)
				253	new_lines.append('#elif OSMO_IS_BIG_ENDIAN\n'
				254	'/* auto-generated from the little endian part above (libosmocore/contrib/struct_endianess.py) */\n')
				255	new_lines.append(big_endian_body_str)
				256	new_lines.append('\n#endif\n')
				257	return ''.join(new_lines)
				258	else:
				259	return body_str
				260
				261	def _check_file(f):
				262	if not (f.endswith('.h') or f.endswith('.c') or f.endswith('.cpp')):
				263	return
				264
				265	# section the file into
				266	# [ ["no struct def"], ["struct {...};"], ["no struct def"], ... ]
				267	sections = []
				268	in_struct = False
				269	buf = []
				270	for line in codecs.open(f, "r", "utf-8").readlines():
				271
				272	if not in_struct and re_struct_start.fullmatch(line):
				273	# flush whatever might still be in buf from before
				274	sections.append(buf)
				275	# start an in_struct section
				276	buf = [line]
				277	in_struct = True
				278	elif in_struct and re_struct_end.fullmatch(line):
				279	# add this end to the in_struct section and then start a non-struct section
				280	buf.append(line)
				281	sections.append(buf)
				282	in_struct = False
				283	buf = []
				284	else:
				285	buf.append(line)
				286	# flush any leftovers in buf
				287	if buf:
				288	sections.append(buf)
				289
				290	# examine each struct, i.e. every second item in 'sections'
				291	for i in range(len(sections)):
				292	if not (i & 1):
				293	continue
				294
				295	struct = sections[i]
				296
				297	# If the struct isn't packed, we need not bother.
				298	# The practical use of this: in some structs we have booleans in the
				299	# form of
				300	# integer flag:1;
				301	# and these don't add up to bytes, and cause errors. So let's skip all
				302	# non-packed structs, then all of those are out of the picture.
				303	if not 'packed' in struct[-1]:
				304	continue
				305
				306	try:
				307
				308	# assume the 'struct foo {' is on the first line, the closing brace
				309	# '} __attribute...;' on the last, and the rest are individual
				310	# definitions split by ';'.
				311	struct_body_lines = struct[1:-1]
				312	struct_body_parts = section_struct_body(struct_body_lines)
				313
				314	new_struct_body_parts = []
				315	for j in range(len(struct_body_parts)):
				316	part = ''.join(struct_body_parts[j])
				317	if not (j & 1):
				318	new_struct_body_parts.append(part)
				319	else:
				320	new_struct_body_parts.append(handle_struct_body(part))
				321
				322	new_struct = [struct[0], ''.join(new_struct_body_parts), struct[-1]]
				323	sections[i] = new_struct
				324	except Exception as e:
				325	raise Exception('ERROR in struct %r' % struct[0])
				326
				327	# phew. result.
				328	result = ''.join((''.join(s) for s in sections))
				329
				330	# see if osmocom/core/endian.h is needed and included.
				331	if (not f.endswith('endian.h')
				332	and 'OSMO_IS_LITTLE_ENDIAN' in result
				333	and '#include <osmocom/core/endian.h>' not in result):
				334	# add the include after the last 'osmocom/core' include
				335	last_include_start = result.rfind('#include <osmocom/core/')
				336	if last_include_start < 0:
				337	last_include_start = result.rfind('#include <osmocom/')
				338	if last_include_start < 0:
				339	last_include_start = result.rfind('#include')
				340
				341	if last_include_start < 0:
				342	raise Exception('do not know where to include osmocom/core/endian.h in %r' % f)
				343
				344	insert_at = result.find('\n', last_include_start)
				345
				346	result = result[:insert_at] + '\n#include <osmocom/core/endian.h>' + result[insert_at:]
				347
				348	with codecs.open(f, "w", "utf-8") as fd:
				349	fd.write(result)
				350
				351	def check_file(f):
				352	try:
				353	_check_file(f)
				354	except Exception as e:
				355	raise Exception('ERROR IN FILE %r' % f)
				356
				357	args = sys.argv[1:]
				358	if not args:
				359	args = ['.']
				360
				361	for f in args:
				362	if os.path.isdir(f):
				363	for parent_path, subdirs, files in os.walk(f, None, None):
				364	for ff in files:
				365	check_file(os.path.join(parent_path, ff))
				366	else:
				367	check_file(f)
				368
				369	# vim: tabstop=4 shiftwidth=4 expandtab