blob: be73fbe22839a4939951b58187a2c152a6f825bc [file] [log] [blame]
Neels Hofmeyr7ab5fc12018-11-15 23:29:56 +01001#!/usr/bin/env python3
2
3'''Using mad regexes, automatically make sure that all structs with sub-byte
4integers have matching big-endian definitions. The idea is to save a lot of
5manual effort, and to automatically verify that there are no errors.
6This script most certainly has numerous holes and shortcomings, but actually,
7if you hit problems with it, rather adjust your coding style so that this
8script can deal with it...'''
9
10import re
11import sys
12import codecs
13import os.path
14
15re_struct_start = re.compile(r'^struct\s*[a-zA-Z_][a-zA-Z_0-9]*\s*{\s*$')
16re_struct_end = re.compile(r'^}[^;]*;\s*$')
17
18re_substruct_start = re.compile(r'^\s+struct\s*{\s*$')
19re_substruct_end = re.compile(r'^\s+}\s*([^;]*\s)[a-zA-Z_][a-zA-Z_0-9]*\s*;\s*$')
20
21re_int_def = re.compile(r'(^\s*((const|unsigned|signed|char|int|long|int[0-9]+_t|uint[0-9]_t)\s+)+\s*)([^;]*;)',
22 re.DOTALL | re.MULTILINE)
23re_int_members = re.compile(r'([a-zA-Z_][a-zA-Z_0-9]*|[a-zA-Z_][a-zA-Z_0-9]*\s*:\s*[0-9]+)\s*[,;]\s*', re.DOTALL | re.MULTILINE)
24
25re_little_endian_ifdef = re.compile(r'#\s*(if|elif)\s+OSMO_IS_LITTLE_ENDIAN\s*(==\s*1\s*|)');
26re_big_endian_ifdef = re.compile(r'#\s*(if|elif)\s+OSMO_IS_BIG_ENDIAN\s*');
27re_else = re.compile(r'#\s*else\s*');
28re_endif = re.compile(r'#\s*endif\s*');
29
30re_c_comment = re.compile(r'(/\*[^*]+\*/|//.?$)')
31
32def remove_c_comments(code_str):
33 return ''.join(re_c_comment.split(code_str)[::2])
34
35def section_struct_body(struct_body_lines):
36 '''divide a top-level-struct body into sections of
37 ['arbitrary string', ['body;\n', 'lines;\n'], 'arbitrary string', ...]
38 Aim: handle each sub-struct on its own, and if there already are ifdefs for
39 little and big endian, keep just the little endian bit and derive big
40 endian from it.
41 An arbitrary string is anything other than struct member definitions, like
42 a 'struct {', '} sub_name;', ...
43 "body lines" are lines that define struct members (possibly with comments).
44 Return: list of alternate arbitrary strings and variable definitions.
45 '''
46
47 # these globals are needed so that end_def() can change them from inside
48 # the function. Not very nice style, but easiest implementation.
49 global struct_body_parts
50 global arbitrary_part
51 global def_part
52
53 struct_body_parts = []
54 arbitrary_part = []
55 def_part = []
56
57 def end_def():
58 '''if there is any content, flush out recorded parts (def_part,
59 arbitrary_part) and start a new part. In short, cut a section
60 boundary.'''
61 global struct_body_parts
62 global arbitrary_part
63 global def_part
64
65 if def_part:
66 struct_body_parts.append(arbitrary_part)
67 arbitrary_part = []
68 struct_body_parts.append(def_part)
69 def_part = []
70
71 j = 0
72 while j < len(struct_body_lines):
73 line = struct_body_lines[j]
74
75 if (re_substruct_start.fullmatch(line)
76 or re_substruct_end.fullmatch(line)):
77 end_def()
78 arbitrary_part.append(line)
79 j += 1
80 continue
81
82 if re_big_endian_ifdef.fullmatch(line):
83 end_def()
84 # discard big endian section
85 j += 1
86 while j < len(struct_body_lines):
87 line = struct_body_lines[j]
88 if re_endif.fullmatch(line):
89 end_def()
90 j += 1
91 break;
92 if re_little_endian_ifdef.fullmatch(line):
93 end_def()
94 # keep that start of little endian section, not j++
95 break;
96 if re_else.fullmatch(line):
97 # there's an '#else' after big-endian. Shim a little-endian header in just for the loop.
98 struct_body_lines[j] = '#if OSMO_IS_LITTLE_ENDIAN\n'
99 break;
100 j += 1
101 continue
102
103 if re_little_endian_ifdef.fullmatch(line):
104 end_def()
105 j += 1
106 while j < len(struct_body_lines):
107 line = struct_body_lines[j]
108 if re_endif.fullmatch(line):
109 end_def()
110 j += 1
111 break;
112 if re_big_endian_ifdef.fullmatch(line):
113 end_def()
114 # keep that start of big endian section, not j++
115 break;
116 if re_else.fullmatch(line):
117 # there's an '#else' after little-endian. Shim a big-endian header in just for the loop.
118 struct_body_lines[j] = '#if OSMO_IS_BIG_ENDIAN\n'
119 break;
120 def_part.append(line)
121 j += 1
122
123 continue
124
125 def_part.append(line)
126 j += 1
127
128 # flush the last section remaining that didn't see an explicit end
129 end_def()
130 # end_def() only flushes arbitrary_part if there was a def_part, so:
131 if arbitrary_part:
132 struct_body_parts.append(arbitrary_part)
133
134 return struct_body_parts
135
136def struct_body_to_big_endian(body_str):
137 '''Input: a multi-line string containing the body of a struct, i.e. without
138 sub-structs and without #if OSMO_IS_BIG_ENDIAN. like
139
140 '\tconst char *foo;\n\tuint8_t moo:3, goo:2;\n\tuint8_t loo:3;\n\tvoid *baz;\n'
141
142 Return None to indicate that there is no little/big endian split
143 required, or return a multi-line string of the big-endian version of this
144 same struct body, where sub-byte ints are reversed at byte boundaries, and
145 all others are copied 1:1. If there are no sub-byte integers, return None,
146 to indicate that there is no little/big endian split required.'''
147
148 # kick comments out of the code analysis. They will end up being stripped
149 # from big-endian only.
150 body_str = remove_c_comments(body_str)
151
152 def_strs = body_str.split(';')
153 def_strs = ('%s;' % def_str for def_str in def_strs if def_str.strip())
154
155 # classify defs as containing sub-byte members or not
156 # defs = [ (true, 'uint8_t ', ('foo:3', 'bar:5')),
157 # (false, 'int baz;'),...]
158 defs = []
159 any_sub_byte_ints = False
160 for one_def in def_strs:
161
162 # does it have sub-string integers?
163 int_def = re_int_def.fullmatch(one_def)
164 if not int_def:
165 # not even a number, same for big and little endian
166 defs.append((False, one_def))
167 continue
168
169 int_type = int_def.group(1)
170 members_str = int_def.groups()[-1]
171 has_sub_byte_ints = False
172
173 members = []
174 for int_member in re_int_members.finditer(members_str):
175 member = int_member.group(1)
176 members.append(member)
177 if ':' in member:
178 has_sub_byte_ints = True
179
180 if not has_sub_byte_ints:
181 defs.append((False, one_def))
182 else:
183 defs.append((True, one_def, int_type, members))
184 any_sub_byte_ints = True
185
186 if not any_sub_byte_ints:
187 return None
188
189 # now the interesting part, go over the defs, and reverse the sub-byte ints
190 # at byte boundaries.
191
192 i = 0
193 got_bits = 0
194 byte_type = None
195 members_within_a_byte = []
196 big_endian_defs = []
197
198 big_defs = []
199 for classified_def in defs:
200 has_sub_byte_ints = classified_def[0]
201
202 # now the big endian part
203 if has_sub_byte_ints:
204 _, one_def, int_type, members = classified_def
205
206 if byte_type and byte_type.strip() != int_type.strip():
207 raise Exception('mismatching type continuation after incomplete byte: %r %r to %r'
208 % (byte_type, members_within_a_byte, int_type))
209 byte_type = int_type
210
211 for member in members:
212 member_name, bits_str = member.split(':')
213 member_name = member_name.strip()
214 bits = int(bits_str)
215 member = '%s:%d' % (member_name, bits)
216 members_within_a_byte.append(member)
217 got_bits += bits
218
219 if got_bits == 8:
220 # reverse these.
221 big_endian_defs.append('%s%s;' % (byte_type, ', '.join(reversed(members_within_a_byte))))
222 members_within_a_byte = []
223 byte_type = None
224 got_bits = 0
225
226 elif got_bits > 8:
227 raise Exception('sub-byte int breaks clean byte bounds: %s -- %d + %d = %d bits'
228 % (member, got_bits - bits, bits, got_bits))
229
230 elif not has_sub_byte_ints:
231 if got_bits:
232 raise Exception('sub-byte members do not add up to clean byte bounds: %r' % members_within_a_byte)
233
234 big_endian_defs.append(classified_def[1])
235
236 # strip empty lines
237 lines = [l for l in (''.join(big_endian_defs).split('\n')) if l.strip()]
238 # clean lines' whitespace errors we might have taken in with the type names
239 for i in range(len(lines)):
240 line = lines[i]
241 while len(line) and line[-1] in ' \t':
242 line = line[:-1]
243 lines[i] = line
244 return '\n'.join(lines)
245
246def handle_struct_body(body_str):
247
248 big_endian_body_str = struct_body_to_big_endian(body_str)
249
250 if big_endian_body_str:
251 new_lines = ['#if OSMO_IS_LITTLE_ENDIAN\n']
252 new_lines.append(body_str)
253 new_lines.append('#elif OSMO_IS_BIG_ENDIAN\n'
254 '/* auto-generated from the little endian part above (libosmocore/contrib/struct_endianess.py) */\n')
255 new_lines.append(big_endian_body_str)
256 new_lines.append('\n#endif\n')
257 return ''.join(new_lines)
258 else:
259 return body_str
260
261def _check_file(f):
262 if not (f.endswith('.h') or f.endswith('.c') or f.endswith('.cpp')):
263 return
264
265 # section the file into
266 # [ ["no struct def"], ["struct {...};"], ["no struct def"], ... ]
267 sections = []
268 in_struct = False
269 buf = []
270 for line in codecs.open(f, "r", "utf-8").readlines():
271
272 if not in_struct and re_struct_start.fullmatch(line):
273 # flush whatever might still be in buf from before
274 sections.append(buf)
275 # start an in_struct section
276 buf = [line]
277 in_struct = True
278 elif in_struct and re_struct_end.fullmatch(line):
279 # add this end to the in_struct section and then start a non-struct section
280 buf.append(line)
281 sections.append(buf)
282 in_struct = False
283 buf = []
284 else:
285 buf.append(line)
286 # flush any leftovers in buf
287 if buf:
288 sections.append(buf)
289
290 # examine each struct, i.e. every second item in 'sections'
291 for i in range(len(sections)):
292 if not (i & 1):
293 continue
294
295 struct = sections[i]
296
297 # If the struct isn't packed, we need not bother.
298 # The practical use of this: in some structs we have booleans in the
299 # form of
300 # integer flag:1;
301 # and these don't add up to bytes, and cause errors. So let's skip all
302 # non-packed structs, then all of those are out of the picture.
303 if not 'packed' in struct[-1]:
304 continue
305
306 try:
307
308 # assume the 'struct foo {' is on the first line, the closing brace
309 # '} __attribute...;' on the last, and the rest are individual
310 # definitions split by ';'.
311 struct_body_lines = struct[1:-1]
312 struct_body_parts = section_struct_body(struct_body_lines)
313
314 new_struct_body_parts = []
315 for j in range(len(struct_body_parts)):
316 part = ''.join(struct_body_parts[j])
317 if not (j & 1):
318 new_struct_body_parts.append(part)
319 else:
320 new_struct_body_parts.append(handle_struct_body(part))
321
322 new_struct = [struct[0], ''.join(new_struct_body_parts), struct[-1]]
323 sections[i] = new_struct
324 except Exception as e:
325 raise Exception('ERROR in struct %r' % struct[0])
326
327 # phew. result.
328 result = ''.join((''.join(s) for s in sections))
329
330 # see if osmocom/core/endian.h is needed and included.
331 if (not f.endswith('endian.h')
332 and 'OSMO_IS_LITTLE_ENDIAN' in result
333 and '#include <osmocom/core/endian.h>' not in result):
334 # add the include after the last 'osmocom/core' include
335 last_include_start = result.rfind('#include <osmocom/core/')
336 if last_include_start < 0:
337 last_include_start = result.rfind('#include <osmocom/')
338 if last_include_start < 0:
339 last_include_start = result.rfind('#include')
340
341 if last_include_start < 0:
342 raise Exception('do not know where to include osmocom/core/endian.h in %r' % f)
343
344 insert_at = result.find('\n', last_include_start)
345
346 result = result[:insert_at] + '\n#include <osmocom/core/endian.h>' + result[insert_at:]
347
348 with codecs.open(f, "w", "utf-8") as fd:
349 fd.write(result)
350
351def check_file(f):
352 try:
353 _check_file(f)
354 except Exception as e:
355 raise Exception('ERROR IN FILE %r' % f)
356
357args = sys.argv[1:]
358if not args:
359 args = ['.']
360
361for f in args:
362 if os.path.isdir(f):
363 for parent_path, subdirs, files in os.walk(f, None, None):
364 for ff in files:
365 check_file(os.path.join(parent_path, ff))
366 else:
367 check_file(f)
368
369# vim: tabstop=4 shiftwidth=4 expandtab