blob: e6cbe00b66cafd50d7b34b9da6d4b83ff30e6727 [file] [log] [blame]
Neels Hofmeyr7ab5fc12018-11-15 23:29:56 +01001#!/usr/bin/env python3
2
3'''Using mad regexes, automatically make sure that all structs with sub-byte
4integers have matching big-endian definitions. The idea is to save a lot of
5manual effort, and to automatically verify that there are no errors.
6This script most certainly has numerous holes and shortcomings, but actually,
7if you hit problems with it, rather adjust your coding style so that this
8script can deal with it...'''
9
10import re
11import sys
12import codecs
13import os.path
14
15re_struct_start = re.compile(r'^struct\s*[a-zA-Z_][a-zA-Z_0-9]*\s*{\s*$')
16re_struct_end = re.compile(r'^}[^;]*;\s*$')
17
18re_substruct_start = re.compile(r'^\s+struct\s*{\s*$')
19re_substruct_end = re.compile(r'^\s+}\s*([^;]*\s)[a-zA-Z_][a-zA-Z_0-9]*\s*;\s*$')
Neels Hofmeyrbd586972020-05-14 17:27:59 +020020re_unnamed_substruct_end = re.compile(r'^\s+}\s*;\s*$')
Neels Hofmeyr7ab5fc12018-11-15 23:29:56 +010021
22re_int_def = re.compile(r'(^\s*((const|unsigned|signed|char|int|long|int[0-9]+_t|uint[0-9]_t)\s+)+\s*)([^;]*;)',
23 re.DOTALL | re.MULTILINE)
24re_int_members = re.compile(r'([a-zA-Z_][a-zA-Z_0-9]*|[a-zA-Z_][a-zA-Z_0-9]*\s*:\s*[0-9]+)\s*[,;]\s*', re.DOTALL | re.MULTILINE)
25
26re_little_endian_ifdef = re.compile(r'#\s*(if|elif)\s+OSMO_IS_LITTLE_ENDIAN\s*(==\s*1\s*|)');
27re_big_endian_ifdef = re.compile(r'#\s*(if|elif)\s+OSMO_IS_BIG_ENDIAN\s*');
28re_else = re.compile(r'#\s*else\s*');
29re_endif = re.compile(r'#\s*endif\s*');
30
31re_c_comment = re.compile(r'(/\*[^*]+\*/|//.?$)')
32
33def remove_c_comments(code_str):
34 return ''.join(re_c_comment.split(code_str)[::2])
35
36def section_struct_body(struct_body_lines):
37 '''divide a top-level-struct body into sections of
38 ['arbitrary string', ['body;\n', 'lines;\n'], 'arbitrary string', ...]
39 Aim: handle each sub-struct on its own, and if there already are ifdefs for
40 little and big endian, keep just the little endian bit and derive big
41 endian from it.
42 An arbitrary string is anything other than struct member definitions, like
43 a 'struct {', '} sub_name;', ...
44 "body lines" are lines that define struct members (possibly with comments).
45 Return: list of alternate arbitrary strings and variable definitions.
46 '''
47
48 # these globals are needed so that end_def() can change them from inside
49 # the function. Not very nice style, but easiest implementation.
50 global struct_body_parts
51 global arbitrary_part
52 global def_part
53
54 struct_body_parts = []
55 arbitrary_part = []
56 def_part = []
57
58 def end_def():
59 '''if there is any content, flush out recorded parts (def_part,
60 arbitrary_part) and start a new part. In short, cut a section
61 boundary.'''
62 global struct_body_parts
63 global arbitrary_part
64 global def_part
65
66 if def_part:
67 struct_body_parts.append(arbitrary_part)
68 arbitrary_part = []
69 struct_body_parts.append(def_part)
70 def_part = []
71
72 j = 0
73 while j < len(struct_body_lines):
74 line = struct_body_lines[j]
75
76 if (re_substruct_start.fullmatch(line)
Neels Hofmeyrbd586972020-05-14 17:27:59 +020077 or re_substruct_end.fullmatch(line)
78 or re_unnamed_substruct_end.fullmatch(line)):
Neels Hofmeyr7ab5fc12018-11-15 23:29:56 +010079 end_def()
80 arbitrary_part.append(line)
81 j += 1
82 continue
83
84 if re_big_endian_ifdef.fullmatch(line):
85 end_def()
86 # discard big endian section
87 j += 1
88 while j < len(struct_body_lines):
89 line = struct_body_lines[j]
90 if re_endif.fullmatch(line):
91 end_def()
92 j += 1
93 break;
94 if re_little_endian_ifdef.fullmatch(line):
95 end_def()
96 # keep that start of little endian section, not j++
97 break;
98 if re_else.fullmatch(line):
99 # there's an '#else' after big-endian. Shim a little-endian header in just for the loop.
100 struct_body_lines[j] = '#if OSMO_IS_LITTLE_ENDIAN\n'
101 break;
102 j += 1
103 continue
104
105 if re_little_endian_ifdef.fullmatch(line):
106 end_def()
107 j += 1
108 while j < len(struct_body_lines):
109 line = struct_body_lines[j]
110 if re_endif.fullmatch(line):
111 end_def()
112 j += 1
113 break;
114 if re_big_endian_ifdef.fullmatch(line):
115 end_def()
116 # keep that start of big endian section, not j++
117 break;
118 if re_else.fullmatch(line):
119 # there's an '#else' after little-endian. Shim a big-endian header in just for the loop.
120 struct_body_lines[j] = '#if OSMO_IS_BIG_ENDIAN\n'
121 break;
122 def_part.append(line)
123 j += 1
124
125 continue
126
127 def_part.append(line)
128 j += 1
129
130 # flush the last section remaining that didn't see an explicit end
131 end_def()
132 # end_def() only flushes arbitrary_part if there was a def_part, so:
133 if arbitrary_part:
134 struct_body_parts.append(arbitrary_part)
135
136 return struct_body_parts
137
138def struct_body_to_big_endian(body_str):
139 '''Input: a multi-line string containing the body of a struct, i.e. without
140 sub-structs and without #if OSMO_IS_BIG_ENDIAN. like
141
142 '\tconst char *foo;\n\tuint8_t moo:3, goo:2;\n\tuint8_t loo:3;\n\tvoid *baz;\n'
143
144 Return None to indicate that there is no little/big endian split
145 required, or return a multi-line string of the big-endian version of this
146 same struct body, where sub-byte ints are reversed at byte boundaries, and
147 all others are copied 1:1. If there are no sub-byte integers, return None,
148 to indicate that there is no little/big endian split required.'''
149
150 # kick comments out of the code analysis. They will end up being stripped
151 # from big-endian only.
152 body_str = remove_c_comments(body_str)
153
154 def_strs = body_str.split(';')
155 def_strs = ('%s;' % def_str for def_str in def_strs if def_str.strip())
156
157 # classify defs as containing sub-byte members or not
158 # defs = [ (true, 'uint8_t ', ('foo:3', 'bar:5')),
159 # (false, 'int baz;'),...]
160 defs = []
161 any_sub_byte_ints = False
162 for one_def in def_strs:
163
164 # does it have sub-string integers?
165 int_def = re_int_def.fullmatch(one_def)
166 if not int_def:
167 # not even a number, same for big and little endian
168 defs.append((False, one_def))
169 continue
170
171 int_type = int_def.group(1)
172 members_str = int_def.groups()[-1]
173 has_sub_byte_ints = False
174
175 members = []
176 for int_member in re_int_members.finditer(members_str):
177 member = int_member.group(1)
178 members.append(member)
179 if ':' in member:
180 has_sub_byte_ints = True
181
182 if not has_sub_byte_ints:
183 defs.append((False, one_def))
184 else:
185 defs.append((True, one_def, int_type, members))
186 any_sub_byte_ints = True
187
188 if not any_sub_byte_ints:
189 return None
190
191 # now the interesting part, go over the defs, and reverse the sub-byte ints
192 # at byte boundaries.
193
194 i = 0
195 got_bits = 0
196 byte_type = None
197 members_within_a_byte = []
198 big_endian_defs = []
199
200 big_defs = []
201 for classified_def in defs:
202 has_sub_byte_ints = classified_def[0]
203
204 # now the big endian part
205 if has_sub_byte_ints:
206 _, one_def, int_type, members = classified_def
207
208 if byte_type and byte_type.strip() != int_type.strip():
209 raise Exception('mismatching type continuation after incomplete byte: %r %r to %r'
210 % (byte_type, members_within_a_byte, int_type))
211 byte_type = int_type
212
213 for member in members:
214 member_name, bits_str = member.split(':')
215 member_name = member_name.strip()
216 bits = int(bits_str)
217 member = '%s:%d' % (member_name, bits)
218 members_within_a_byte.append(member)
219 got_bits += bits
220
221 if got_bits == 8:
222 # reverse these.
223 big_endian_defs.append('%s%s;' % (byte_type, ', '.join(reversed(members_within_a_byte))))
224 members_within_a_byte = []
225 byte_type = None
226 got_bits = 0
227
228 elif got_bits > 8:
229 raise Exception('sub-byte int breaks clean byte bounds: %s -- %d + %d = %d bits'
230 % (member, got_bits - bits, bits, got_bits))
231
232 elif not has_sub_byte_ints:
233 if got_bits:
234 raise Exception('sub-byte members do not add up to clean byte bounds: %r' % members_within_a_byte)
235
236 big_endian_defs.append(classified_def[1])
237
238 # strip empty lines
239 lines = [l for l in (''.join(big_endian_defs).split('\n')) if l.strip()]
240 # clean lines' whitespace errors we might have taken in with the type names
241 for i in range(len(lines)):
242 line = lines[i]
243 while len(line) and line[-1] in ' \t':
244 line = line[:-1]
245 lines[i] = line
246 return '\n'.join(lines)
247
248def handle_struct_body(body_str):
249
250 big_endian_body_str = struct_body_to_big_endian(body_str)
251
252 if big_endian_body_str:
253 new_lines = ['#if OSMO_IS_LITTLE_ENDIAN\n']
254 new_lines.append(body_str)
255 new_lines.append('#elif OSMO_IS_BIG_ENDIAN\n'
Oliver Smith0b5c09b2023-02-17 10:35:38 +0100256 '/* auto-generated from the little endian part above (libosmocore/contrib/struct_endianness.py) */\n')
Neels Hofmeyr7ab5fc12018-11-15 23:29:56 +0100257 new_lines.append(big_endian_body_str)
258 new_lines.append('\n#endif\n')
259 return ''.join(new_lines)
260 else:
261 return body_str
262
263def _check_file(f):
Vadim Yanitskiyed501d32023-02-26 16:29:31 +0700264 if not f.endswith(('.h', '.c', '.cpp')):
Neels Hofmeyr7ab5fc12018-11-15 23:29:56 +0100265 return
266
267 # section the file into
268 # [ ["no struct def"], ["struct {...};"], ["no struct def"], ... ]
269 sections = []
270 in_struct = False
271 buf = []
272 for line in codecs.open(f, "r", "utf-8").readlines():
273
274 if not in_struct and re_struct_start.fullmatch(line):
275 # flush whatever might still be in buf from before
276 sections.append(buf)
277 # start an in_struct section
278 buf = [line]
279 in_struct = True
280 elif in_struct and re_struct_end.fullmatch(line):
281 # add this end to the in_struct section and then start a non-struct section
282 buf.append(line)
283 sections.append(buf)
284 in_struct = False
285 buf = []
286 else:
287 buf.append(line)
288 # flush any leftovers in buf
289 if buf:
290 sections.append(buf)
291
292 # examine each struct, i.e. every second item in 'sections'
293 for i in range(len(sections)):
294 if not (i & 1):
295 continue
296
297 struct = sections[i]
298
299 # If the struct isn't packed, we need not bother.
300 # The practical use of this: in some structs we have booleans in the
301 # form of
302 # integer flag:1;
303 # and these don't add up to bytes, and cause errors. So let's skip all
304 # non-packed structs, then all of those are out of the picture.
305 if not 'packed' in struct[-1]:
306 continue
307
308 try:
309
310 # assume the 'struct foo {' is on the first line, the closing brace
311 # '} __attribute...;' on the last, and the rest are individual
312 # definitions split by ';'.
313 struct_body_lines = struct[1:-1]
314 struct_body_parts = section_struct_body(struct_body_lines)
315
316 new_struct_body_parts = []
317 for j in range(len(struct_body_parts)):
318 part = ''.join(struct_body_parts[j])
319 if not (j & 1):
320 new_struct_body_parts.append(part)
321 else:
322 new_struct_body_parts.append(handle_struct_body(part))
323
324 new_struct = [struct[0], ''.join(new_struct_body_parts), struct[-1]]
325 sections[i] = new_struct
326 except Exception as e:
327 raise Exception('ERROR in struct %r' % struct[0])
328
329 # phew. result.
330 result = ''.join((''.join(s) for s in sections))
331
332 # see if osmocom/core/endian.h is needed and included.
333 if (not f.endswith('endian.h')
334 and 'OSMO_IS_LITTLE_ENDIAN' in result
335 and '#include <osmocom/core/endian.h>' not in result):
336 # add the include after the last 'osmocom/core' include
337 last_include_start = result.rfind('#include <osmocom/core/')
338 if last_include_start < 0:
339 last_include_start = result.rfind('#include <osmocom/')
340 if last_include_start < 0:
341 last_include_start = result.rfind('#include')
342
343 if last_include_start < 0:
344 raise Exception('do not know where to include osmocom/core/endian.h in %r' % f)
345
346 insert_at = result.find('\n', last_include_start)
347
348 result = result[:insert_at] + '\n#include <osmocom/core/endian.h>' + result[insert_at:]
349
350 with codecs.open(f, "w", "utf-8") as fd:
351 fd.write(result)
352
353def check_file(f):
354 try:
355 _check_file(f)
356 except Exception as e:
357 raise Exception('ERROR IN FILE %r' % f)
358
359args = sys.argv[1:]
360if not args:
361 args = ['.']
362
363for f in args:
364 if os.path.isdir(f):
365 for parent_path, subdirs, files in os.walk(f, None, None):
366 for ff in files:
367 check_file(os.path.join(parent_path, ff))
368 else:
369 check_file(f)
370
371# vim: tabstop=4 shiftwidth=4 expandtab