Skip to content

Commit 7755806

Browse files
committed
Rearchitect var args support (making it actually work)
Previously all the logic to access values of variable argument lists was put into our va_arg() implementation. This required guessing what the parameters could have been, and did not take into account the initial value passed to va_start. Now va_start triggers constructing an array of pointers to parameters, the first of which is the original argument to va_start. The va_list is then just a pointer to the first element of the array. va_arg just increments the pointer. goto_program conversion places a side_effect_exprt where va_start was, and goto-symex populates the array with the actual values applicable for a particular invocation of the function.
1 parent 09c3c43 commit 7755806

File tree

12 files changed

+130
-105
lines changed

12 files changed

+130
-105
lines changed

regression/cbmc/Variadic1/test.desc

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FUTURE
1+
CORE
22
main.c
33

44
^EXIT=0$

regression/cbmc/va_list3/test.desc

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
KNOWNBUG
1+
CORE broken-smt-backend
22
main.c
33

44
^EXIT=0$

regression/cbmc/va_list3/windows-preprocessed.desc

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
KNOWNBUG
1+
CORE broken-smt-backend
22
windows-preprocessed.i
33
--winx64
44
^EXIT=0$

src/ansi-c/expr2c.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -3630,8 +3630,8 @@ std::string expr2ct::convert_with_precedence(
36303630
return convert_prob_uniform(src, precedence=16);
36313631
else if(statement==ID_statement_expression)
36323632
return convert_statement_expression(src, precedence=15);
3633-
else if(statement==ID_gcc_builtin_va_arg_next)
3634-
return convert_function(src, "gcc_builtin_va_arg_next", precedence=16);
3633+
else if(statement == ID_va_start)
3634+
return convert_function(src, CPROVER_PREFIX "va_start", precedence = 16);
36353635
else
36363636
return convert_norep(src, precedence);
36373637
}

src/goto-instrument/goto_program2code.cpp

+10-8
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,10 @@ void goto_program2codet::scan_for_varargs()
104104
const exprt &l = target->get_assign().lhs();
105105
const exprt &r = target->get_assign().rhs();
106106

107-
// find va_arg_next
108-
if(r.id()==ID_side_effect &&
109-
to_side_effect_expr(r).get_statement()==ID_gcc_builtin_va_arg_next)
107+
// find va_start
108+
if(
109+
r.id() == ID_side_effect &&
110+
to_side_effect_expr(r).get_statement() == ID_va_start)
110111
{
111112
assert(r.has_operands());
112113
va_list_expr.insert(r.op0());
@@ -308,16 +309,17 @@ goto_programt::const_targett goto_program2codet::convert_assign_varargs(
308309

309310
dest.add(std::move(f));
310311
}
311-
else if(r.id()==ID_address_of)
312+
else if(
313+
r.id() == ID_side_effect &&
314+
to_side_effect_expr(r).get_statement() == ID_va_start)
312315
{
313316
code_function_callt f(
314-
symbol_exprt("va_start", code_typet({}, empty_typet())),
315-
{this_va_list_expr, to_address_of_expr(r).object()});
317+
symbol_exprt(ID_va_start, code_typet({}, empty_typet())),
318+
{this_va_list_expr, to_address_of_expr(skip_typecast(r.op0())).object()});
316319

317320
dest.add(std::move(f));
318321
}
319-
else if(r.id()==ID_side_effect &&
320-
to_side_effect_expr(r).get_statement()==ID_gcc_builtin_va_arg_next)
322+
else if(r.id() == ID_plus)
321323
{
322324
code_function_callt f(
323325
symbol_exprt("va_arg", code_typet({}, empty_typet())),

src/goto-programs/builtin_functions.cpp

+18-19
Original file line numberDiff line numberDiff line change
@@ -1081,10 +1081,10 @@ void goto_convertt::do_function_call_symbol(
10811081
else if(identifier==ID_gcc_builtin_va_arg)
10821082
{
10831083
// This does two things.
1084-
// 1) Move list pointer to next argument.
1085-
// Done by gcc_builtin_va_arg_next.
1086-
// 2) Return value of argument.
1084+
// 1) Return value of argument.
10871085
// This is just dereferencing.
1086+
// 2) Move list pointer to next argument.
1087+
// This is just an increment.
10881088

10891089
if(arguments.size()!=1)
10901090
{
@@ -1096,25 +1096,21 @@ void goto_convertt::do_function_call_symbol(
10961096

10971097
exprt list_arg=make_va_list(arguments[0]);
10981098

1099-
{
1100-
side_effect_exprt rhs(
1101-
ID_gcc_builtin_va_arg_next,
1102-
list_arg.type(),
1103-
function.source_location());
1104-
rhs.copy_to_operands(list_arg);
1105-
rhs.set(ID_C_va_arg_type, to_code_type(function.type()).return_type());
1106-
dest.add(goto_programt::make_assignment(
1107-
list_arg, rhs, function.source_location()));
1108-
}
1109-
11101099
if(lhs.is_not_nil())
11111100
{
11121101
typet t=pointer_type(lhs.type());
1113-
dereference_exprt rhs(typecast_exprt(list_arg, t), lhs.type());
1102+
dereference_exprt rhs{typecast_exprt(dereference_exprt{list_arg}, t)};
11141103
rhs.add_source_location()=function.source_location();
11151104
dest.add(
11161105
goto_programt::make_assignment(lhs, rhs, function.source_location()));
11171106
}
1107+
1108+
code_assignt assign{
1109+
list_arg, plus_exprt{list_arg, from_integer(1, pointer_diff_type())}};
1110+
assign.rhs().set(
1111+
ID_C_va_arg_type, to_code_type(function.type()).return_type());
1112+
dest.add(goto_programt::make_assignment(
1113+
std::move(assign), function.source_location()));
11181114
}
11191115
else if(identifier=="__builtin_va_copy")
11201116
{
@@ -1139,7 +1135,7 @@ void goto_convertt::do_function_call_symbol(
11391135
dest.add(goto_programt::make_assignment(
11401136
dest_expr, src_expr, function.source_location()));
11411137
}
1142-
else if(identifier=="__builtin_va_start")
1138+
else if(identifier == "__builtin_va_start" || identifier == "__va_start")
11431139
{
11441140
// Set the list argument to be the address of the
11451141
// parameter argument.
@@ -1152,8 +1148,6 @@ void goto_convertt::do_function_call_symbol(
11521148
}
11531149

11541150
exprt dest_expr=make_va_list(arguments[0]);
1155-
const typecast_exprt src_expr(
1156-
address_of_exprt(arguments[1]), dest_expr.type());
11571151

11581152
if(!is_lvalue(dest_expr))
11591153
{
@@ -1162,8 +1156,13 @@ void goto_convertt::do_function_call_symbol(
11621156
throw 0;
11631157
}
11641158

1159+
side_effect_exprt rhs{
1160+
ID_va_start, dest_expr.type(), function.source_location()};
1161+
rhs.add_to_operands(
1162+
typecast_exprt{address_of_exprt{arguments[1]}, dest_expr.type()});
1163+
11651164
dest.add(goto_programt::make_assignment(
1166-
dest_expr, src_expr, function.source_location()));
1165+
std::move(dest_expr), std::move(rhs), function.source_location()));
11671166
}
11681167
else if(identifier=="__builtin_va_end")
11691168
{

src/goto-symex/goto_symex.h

+3-4
Original file line numberDiff line numberDiff line change
@@ -498,10 +498,9 @@ class goto_symext
498498
/// \return The resulting expression
499499
static exprt add_to_lhs(const exprt &lhs, const exprt &what);
500500

501-
virtual void symex_gcc_builtin_va_arg_next(
502-
statet &state,
503-
const exprt &lhs,
504-
const side_effect_exprt &code);
501+
virtual void
502+
symex_va_start(statet &, const exprt &lhs, const side_effect_exprt &);
503+
505504
/// Symbolically execute an assignment instruction that has an `allocate` on
506505
/// the right hand side
507506
/// \param state: Symbolic execution state for current instruction

src/goto-symex/goto_symex_state.h

+1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ struct framet
8888
irep_idt function_identifier;
8989
std::map<goto_programt::const_targett, goto_state_listt> goto_state_map;
9090
symex_targett::sourcet calling_location;
91+
std::vector<irep_idt> parameter_names;
9192

9293
goto_programt::const_targett end_of_function;
9394
exprt return_value = nil_exprt();

src/goto-symex/symex_assign.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ void goto_symext::symex_assign(statet &state, const code_assignt &code)
4949
PRECONDITION(lhs.is_nil());
5050
symex_printf(state, side_effect_expr);
5151
}
52-
else if(statement==ID_gcc_builtin_va_arg_next)
53-
symex_gcc_builtin_va_arg_next(state, lhs, side_effect_expr);
52+
else if(statement == ID_va_start)
53+
symex_va_start(state, lhs, side_effect_expr);
5454
else
5555
UNREACHABLE;
5656
}

src/goto-symex/symex_builtin_functions.cpp

+75-43
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ Author: Daniel Kroening, [email protected]
1414
#include <util/arith_tools.h>
1515
#include <util/c_types.h>
1616
#include <util/expr_initializer.h>
17+
#include <util/expr_util.h>
18+
#include <util/fresh_symbol.h>
1719
#include <util/invariant_utils.h>
1820
#include <util/optional.h>
1921
#include <util/pointer_offset_size.h>
@@ -206,24 +208,44 @@ void goto_symext::symex_allocate(
206208
code_assignt(lhs, typecast_exprt::conditional_cast(rhs, lhs.type())));
207209
}
208210

209-
irep_idt get_symbol(const exprt &src)
211+
/// Construct an entry for the var args array. Visual Studio complicates this as
212+
/// we need to put immediate values or pointers in there, depending on the size
213+
/// of the parameter.
214+
static exprt va_list_entry(
215+
const irep_idt &parameter,
216+
const pointer_typet &lhs_type,
217+
const namespacet &ns)
210218
{
211-
if(src.id()==ID_typecast)
212-
return get_symbol(to_typecast_expr(src).op());
213-
else if(src.id()==ID_address_of)
219+
static const pointer_typet void_ptr_type = pointer_type(empty_typet{});
220+
221+
symbol_exprt parameter_expr = ns.lookup(parameter).symbol_expr();
222+
223+
// Visual Studio has va_list == char* and stores parameters of size no
224+
// greater than the size of a pointer as immediate values
225+
if(lhs_type.subtype().id() != ID_pointer)
214226
{
215-
exprt op=to_address_of_expr(src).object();
216-
if(op.id()==ID_symbol &&
217-
op.get_bool(ID_C_SSA_symbol))
218-
return to_ssa_expr(op).get_object_name();
219-
else
220-
return irep_idt();
227+
auto parameter_size = size_of_expr(parameter_expr.type(), ns);
228+
CHECK_RETURN(parameter_size.has_value());
229+
230+
binary_predicate_exprt fits_slot{
231+
*parameter_size,
232+
ID_le,
233+
from_integer(lhs_type.get_width(), parameter_size->type())};
234+
235+
return if_exprt{
236+
fits_slot,
237+
typecast_exprt::conditional_cast(parameter_expr, void_ptr_type),
238+
typecast_exprt::conditional_cast(
239+
address_of_exprt{parameter_expr}, void_ptr_type)};
221240
}
222241
else
223-
return irep_idt();
242+
{
243+
return typecast_exprt::conditional_cast(
244+
address_of_exprt{std::move(parameter_expr)}, void_ptr_type);
245+
}
224246
}
225247

226-
void goto_symext::symex_gcc_builtin_va_arg_next(
248+
void goto_symext::symex_va_start(
227249
statet &state,
228250
const exprt &lhs,
229251
const side_effect_exprt &code)
@@ -233,42 +255,52 @@ void goto_symext::symex_gcc_builtin_va_arg_next(
233255
if(lhs.is_nil())
234256
return; // ignore
235257

236-
// to allow constant propagation
237-
exprt tmp = state.rename(code.op0(), ns);
238-
do_simplify(tmp);
239-
irep_idt id=get_symbol(tmp);
240-
241-
const auto zero = zero_initializer(lhs.type(), code.source_location(), ns);
242-
CHECK_RETURN(zero.has_value());
243-
exprt rhs(*zero);
258+
// create an array holding pointers to the parameters, starting after the
259+
// parameter that the operand points to
260+
const exprt &op = skip_typecast(code.op0());
261+
// this must be the address of a symbol
262+
const irep_idt start_parameter =
263+
to_ssa_expr(to_address_of_expr(op).object()).get_object_name();
244264

245-
if(!id.empty())
265+
exprt::operandst va_arg_operands;
266+
bool parameter_id_found = false;
267+
for(auto &parameter : state.top().parameter_names)
246268
{
247-
// strip last name off id to get function name
248-
std::size_t pos=id2string(id).rfind("::");
249-
if(pos!=std::string::npos)
269+
if(!parameter_id_found)
250270
{
251-
irep_idt function_identifier=std::string(id2string(id), 0, pos);
252-
std::string base=id2string(function_identifier)+"::va_arg";
253-
254-
if(has_prefix(id2string(id), base))
255-
id=base+std::to_string(
256-
safe_string2unsigned(
257-
std::string(id2string(id), base.size(), std::string::npos))+1);
258-
else
259-
id=base+"0";
260-
261-
const symbolt *symbol;
262-
if(!ns.lookup(id, symbol))
263-
{
264-
const symbol_exprt symbol_expr(symbol->name, symbol->type);
265-
rhs = typecast_exprt::conditional_cast(
266-
address_of_exprt(symbol_expr), lhs.type());
267-
}
271+
parameter_id_found = parameter == start_parameter;
272+
continue;
268273
}
269-
}
270274

271-
symex_assign(state, code_assignt(lhs, rhs));
275+
va_arg_operands.push_back(
276+
va_list_entry(parameter, to_pointer_type(lhs.type()), ns));
277+
}
278+
const std::size_t va_arg_size = va_arg_operands.size();
279+
exprt array =
280+
array_exprt{std::move(va_arg_operands),
281+
array_typet{pointer_type(empty_typet{}),
282+
from_integer(va_arg_size, size_type())}};
283+
284+
symbolt &va_array = get_fresh_aux_symbol(
285+
array.type(),
286+
id2string(state.source.function_id),
287+
"va_args",
288+
state.source.pc->source_location,
289+
ns.lookup(state.source.function_id).mode,
290+
state.symbol_table);
291+
va_array.value = array;
292+
293+
clean_expr(array, state, false);
294+
array = state.rename(std::move(array), ns);
295+
do_simplify(array);
296+
symex_assign(state, code_assignt{va_array.symbol_expr(), std::move(array)});
297+
298+
address_of_exprt rhs{
299+
index_exprt{va_array.symbol_expr(), from_integer(0, index_type())}};
300+
state.rename(rhs, ns);
301+
symex_assign(
302+
state,
303+
code_assignt{lhs, typecast_exprt::conditional_cast(rhs, lhs.type())});
272304
}
273305

274306
irep_idt get_string_argument_rec(const exprt &src)

src/goto-symex/symex_function_call.cpp

+15-23
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Author: Daniel Kroening, [email protected]
1616
#include <util/byte_operators.h>
1717
#include <util/c_types.h>
1818
#include <util/exception_utils.h>
19+
#include <util/fresh_symbol.h>
1920
#include <util/invariant.h>
2021
#include <util/prefix.h>
2122

@@ -46,6 +47,7 @@ void goto_symext::parameter_assignments(
4647
{
4748
INVARIANT(
4849
!identifier.empty(), "function parameter must have an identifier");
50+
state.top().parameter_names.push_back(identifier);
4951

5052
const symbolt &symbol=ns.lookup(identifier);
5153
symbol_exprt lhs=symbol.symbol_expr();
@@ -145,30 +147,20 @@ void goto_symext::parameter_assignments(
145147
if(function_type.has_ellipsis())
146148
{
147149
// These are va_arg arguments; their types may differ from call to call
148-
std::size_t va_count=0;
149-
const symbolt *va_sym=nullptr;
150-
while(!ns.lookup(
151-
id2string(function_identifier)+"::va_arg"+std::to_string(va_count),
152-
va_sym))
153-
++va_count;
154-
155-
for( ; it1!=arguments.end(); it1++, va_count++)
150+
for(; it1 != arguments.end(); it1++)
156151
{
157-
irep_idt id=
158-
id2string(function_identifier)+"::va_arg"+std::to_string(va_count);
159-
160-
// add to symbol table
161-
symbolt symbol;
162-
symbol.name=id;
163-
symbol.base_name="va_arg"+std::to_string(va_count);
164-
symbol.mode=ID_C;
165-
symbol.type=it1->type();
166-
167-
state.symbol_table.insert(std::move(symbol));
168-
169-
symbol_exprt lhs=symbol_exprt(id, it1->type());
170-
171-
symex_assign(state, code_assignt(lhs, *it1));
152+
symbolt &va_arg = get_fresh_aux_symbol(
153+
it1->type(),
154+
id2string(function_identifier),
155+
"va_arg",
156+
state.source.pc->source_location,
157+
ns.lookup(function_identifier).mode,
158+
state.symbol_table);
159+
va_arg.is_parameter = true;
160+
161+
state.top().parameter_names.push_back(va_arg.name);
162+
163+
symex_assign(state, code_assignt{va_arg.symbol_expr(), *it1});
172164
}
173165
}
174166
else if(it1!=arguments.end())

src/util/irep_ids.def

+1-1
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ IREP_ID_ONE(alignof)
303303
IREP_ID_ONE(clang_builtin_convertvector)
304304
IREP_ID_ONE(gcc_builtin_va_arg)
305305
IREP_ID_ONE(gcc_builtin_types_compatible_p)
306-
IREP_ID_ONE(gcc_builtin_va_arg_next)
306+
IREP_ID_ONE(va_start)
307307
IREP_ID_ONE(gcc_float16)
308308
IREP_ID_ONE(gcc_float32)
309309
IREP_ID_ONE(gcc_float32x)

0 commit comments

Comments
 (0)