Skip to content

Commit

Permalink
Merge pull request #2468 from PrincetonUniversity/devel
Browse files Browse the repository at this point in the history
Devel
  • Loading branch information
kmantel authored Aug 18, 2022
2 parents dbdba04 + ab6e911 commit 86c70a9
Show file tree
Hide file tree
Showing 9 changed files with 100 additions and 96 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -769,18 +769,22 @@ def _is_static(it:SampleIterator):
return False

assert all(_is_static(sample_iterator) for sample_iterator in self.search_space)

assert ocm is ocm.agent_rep.controller
# Compiled evaluate expects the same variable as mech function
variable = [input_port.parameters.value.get(context) for input_port in ocm.input_ports]

# Compiled evaluate expects the same variable as composition
state_features = ocm.parameters.state_feature_values._get(context)
inputs, num_inputs_sets = ocm.agent_rep._parse_run_inputs(state_features, context)

num_evals = np.prod([d.num for d in self.search_space])

# Map allocations to values
comp_exec = pnlvm.execution.CompExecution(ocm.agent_rep, [context.execution_id])
execution_mode = ocm.parameters.comp_execution_mode._get(context)
if execution_mode == "PTX":
outcomes = comp_exec.cuda_evaluate(variable, num_evals)
outcomes = comp_exec.cuda_evaluate(inputs, num_inputs_sets, num_evals)
elif execution_mode == "LLVM":
outcomes = comp_exec.thread_evaluate(variable, num_evals)
outcomes = comp_exec.thread_evaluate(inputs, num_inputs_sets, num_evals)
else:
assert False, f"Unknown execution mode for {ocm.name}: {execution_mode}."

Expand Down Expand Up @@ -1744,14 +1748,46 @@ def _gen_llvm_select_min_function(self, *, ctx:pnlvm.LLVMBuilderContext, tags:fr
return builder.function

def _gen_llvm_function_body(self, ctx, builder, params, state_features, arg_in, arg_out, *, tags:frozenset):
ocm = self._get_optimized_controller()
if ocm is not None:
assert ocm.function is self
obj_func = ctx.import_llvm_function(ocm, tags=tags.union({"evaluate"}))
controller = self._get_optimized_controller()
if controller is not None:
assert controller.function is self
obj_func = ctx.import_llvm_function(controller, tags=tags.union({"evaluate"}))
comp_args = builder.function.args[-3:]
obj_param_ptr = comp_args[0]
obj_state_ptr = comp_args[1]
extra_args = [arg_in, comp_args[2]]

# Construct input
comp_input = builder.alloca(obj_func.args[4].type.pointee, name="sim_input")

input_initialized = [False] * len(comp_input.type.pointee)
for src_idx, ip in enumerate(controller.input_ports):
if ip.shadow_inputs is None:
continue

# shadow inputs point to an input port of of a node.
# If that node takes direct input, it will have an associated
# (input_port, output_port) in the input_CIM.
# Take the former as an index to composition input variable.
cim_in_port = controller.agent_rep.input_CIM_ports[ip.shadow_inputs][0]
dst_idx = controller.agent_rep.input_CIM.input_ports.index(cim_in_port)

# Check that all inputs are unique
assert not input_initialized[dst_idx], "Double initialization of input {}".format(dst_idx)
input_initialized[dst_idx] = True

src = builder.gep(arg_in, [ctx.int32_ty(0), ctx.int32_ty(src_idx)])
# Destination is a struct of 2d arrays
dst = builder.gep(comp_input, [ctx.int32_ty(0),
ctx.int32_ty(dst_idx),
ctx.int32_ty(0)])
builder.store(builder.load(src), dst)

# Assert that we have populated all inputs
assert all(input_initialized), \
"Not all inputs to the simulated composition are initialized: {}".format(input_initialized)

# Extra args: input and data
extra_args = [comp_input, comp_args[2]]
else:
obj_func = ctx.import_llvm_function(self.objective_function)
obj_state_ptr = pnlvm.helpers.get_state_ptr(builder, self, state_features,
Expand Down
5 changes: 3 additions & 2 deletions psyneulink/core/components/functions/userdefinedfunction.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#
# ***************************************** USER-DEFINED FUNCTION ****************************************************

import builtins
import numpy as np
import typecheck as tc
from inspect import signature, _empty, getsourcelines, getsourcefile, getclosurevars
Expand All @@ -34,7 +35,7 @@ def __init__(self, *args, **kwargs):
self.functions = set()

def visit_Name(self, node):
if node.id not in __builtins__:
if node.id not in dir(builtins):
self.vars.add(node.id)

def visit_Call(self, node):
Expand All @@ -44,7 +45,7 @@ def visit_Call(self, node):
except AttributeError:
func_id = node.func.id

if func_id not in __builtins__:
if func_id not in dir(builtins):
self.functions.add(func_id)

for c in ast.iter_child_nodes(node):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3196,10 +3196,6 @@ def evaluate_agent_rep(self, control_allocation, context=None, return_results=Fa
context=context
)

def _get_evaluate_input_struct_type(self, ctx):
# We construct input from optimization function input
return ctx.get_input_struct_type(self.function)

def _get_evaluate_output_struct_type(self, ctx):
# Returns a scalar that is the predicted net_outcome
return ctx.float_ty
Expand Down Expand Up @@ -3326,15 +3322,15 @@ def _gen_llvm_evaluate_function(self, *, ctx:pnlvm.LLVMBuilderContext, tags=froz
ctx.get_state_struct_type(self.agent_rep).as_pointer(),
self._get_evaluate_alloc_struct_type(ctx).as_pointer(),
self._get_evaluate_output_struct_type(ctx).as_pointer(),
self._get_evaluate_input_struct_type(ctx).as_pointer(),
ctx.get_input_struct_type(self.agent_rep).as_pointer(),
ctx.get_data_struct_type(self.agent_rep).as_pointer()]

builder = ctx.create_llvm_function(args, self, str(self) + "_evaluate")
llvm_func = builder.function
for p in llvm_func.args:
p.attributes.add('nonnull')

comp_params, base_comp_state, allocation_sample, arg_out, arg_in, base_comp_data = llvm_func.args
comp_params, base_comp_state, allocation_sample, arg_out, comp_input, base_comp_data = llvm_func.args

if "const_params" in debug_env:
comp_params = builder.alloca(comp_params.type.pointee, name="const_params_loc")
Expand Down Expand Up @@ -3390,37 +3386,8 @@ def _gen_llvm_evaluate_function(self, *, ctx:pnlvm.LLVMBuilderContext, tags=froz
ctx.int32_ty(0)])
builder.store(builder.load(sample_ptr), sample_dst)

# Construct input
comp_input = builder.alloca(sim_f.args[3].type.pointee, name="sim_input")

input_initialized = [False] * len(comp_input.type.pointee)
for src_idx, ip in enumerate(self.input_ports):
if ip.shadow_inputs is None:
continue

# shadow inputs point to an input port of of a node.
# If that node takes direct input, it will have an associated
# (input_port, output_port) in the input_CIM.
# Take the former as an index to composition input variable.
cim_in_port = self.agent_rep.input_CIM_ports[ip.shadow_inputs][0]
dst_idx = self.agent_rep.input_CIM.input_ports.index(cim_in_port)

# Check that all inputs are unique
assert not input_initialized[dst_idx], "Double initialization of input {}".format(dst_idx)
input_initialized[dst_idx] = True

src = builder.gep(arg_in, [ctx.int32_ty(0), ctx.int32_ty(src_idx)])
# Destination is a struct of 2d arrays
dst = builder.gep(comp_input, [ctx.int32_ty(0),
ctx.int32_ty(dst_idx),
ctx.int32_ty(0)])
builder.store(builder.load(src), dst)

# Assert that we have populated all inputs
assert all(input_initialized), \
"Not all inputs to the simulated composition are initialized: {}".format(input_initialized)

if "const_input" in debug_env:
comp_input = builder.alloca(sim_f.args[3].type.pointee, name="sim_input")
if not debug_env["const_input"]:
input_init = [[os.defaults.variable.tolist()] for os in self.agent_rep.input_CIM.input_ports]
print("Setting default input: ", input_init)
Expand Down
38 changes: 20 additions & 18 deletions psyneulink/core/llvm/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def upload_ctype(self, data, name='other'):
# 0-sized structures fail to upload
# provide a small device buffer instead
return jit_engine.pycuda.driver.mem_alloc(4)
return jit_engine.pycuda.driver.to_device(bytearray(data))
return jit_engine.pycuda.driver.to_device(bytes(data))

def download_ctype(self, source, ty, name='other'):
self._downloaded_bytes[name] += ctypes.sizeof(ty)
Expand Down Expand Up @@ -563,8 +563,11 @@ def cuda_execute(self, inputs):

# Methods used to accelerate "Run"

def _get_run_input_struct(self, inputs, num_input_sets):
input_type = self._bin_run_func.byref_arg_types[3]
def _get_run_input_struct(self, inputs, num_input_sets, arg=3):
# Callers that override input arg, should ensure that _bin_func is not None
bin_f = self._bin_run_func if arg == 3 else self._bin_func

input_type = bin_f.byref_arg_types[arg]
c_input = (input_type * num_input_sets) * len(self._execution_contexts)
if len(self._execution_contexts) == 1:
inputs = [inputs]
Expand Down Expand Up @@ -676,7 +679,7 @@ def cuda_run(self, inputs, runs, num_input_sets):
assert runs_np[0] <= runs, "Composition ran more times than allowed!"
return _convert_ctype_to_python(ct_out)[0:runs_np[0]]

def _prepare_evaluate(self, variable, num_evaluations):
def _prepare_evaluate(self, inputs, num_input_sets, num_evaluations):
ocm = self._composition.controller
assert len(self._execution_contexts) == 1

Expand All @@ -694,26 +697,24 @@ def _prepare_evaluate(self, variable, num_evaluations):
ct_comp_state = self._get_compilation_param('_eval_state', '_get_state_initializer', 1)
ct_comp_data = self._get_compilation_param('_eval_data', '_get_data_initializer', 6)

# Construct input variable
var_dty = _element_dtype(bin_func.byref_arg_types[5])
converted_variable = np.concatenate(variable, dtype=var_dty)
# Construct input variable, the 5th parameter of the evaluate function
ct_inputs = self._get_run_input_struct(inputs, num_input_sets, 5)

# Output ctype
out_ty = bin_func.byref_arg_types[4] * num_evaluations

# return variable as numpy array. pycuda can use it directly
return ct_comp_param, ct_comp_state, ct_comp_data, converted_variable, out_ty
return ct_comp_param, ct_comp_state, ct_comp_data, ct_inputs, out_ty

def cuda_evaluate(self, variable, num_evaluations):
ct_comp_param, ct_comp_state, ct_comp_data, converted_variable, out_ty = \
self._prepare_evaluate(variable, num_evaluations)
self._uploaded_bytes['input'] += converted_variable.nbytes
def cuda_evaluate(self, inputs, num_input_sets, num_evaluations):
ct_comp_param, ct_comp_state, ct_comp_data, ct_inputs, out_ty = \
self._prepare_evaluate(inputs, num_input_sets, num_evaluations)

# Output is allocated on device, but we need the ctype (out_ty).
cuda_args = (self.upload_ctype(ct_comp_param, 'params'),
self.upload_ctype(ct_comp_state, 'state'),
jit_engine.pycuda.driver.mem_alloc(ctypes.sizeof(out_ty)),
jit_engine.pycuda.driver.In(converted_variable),
self.upload_ctype(ct_inputs, 'input'),
self.upload_ctype(ct_comp_data, 'data'),
)

Expand All @@ -722,12 +723,11 @@ def cuda_evaluate(self, variable, num_evaluations):

return ct_results

def thread_evaluate(self, variable, num_evaluations):
ct_param, ct_state, ct_data, converted_variale, out_ty = \
self._prepare_evaluate(variable, num_evaluations)
def thread_evaluate(self, inputs, num_input_sets, num_evaluations):
ct_param, ct_state, ct_data, ct_inputs, out_ty = \
self._prepare_evaluate(inputs, num_input_sets, num_evaluations)

ct_results = out_ty()
ct_variable = converted_variale.ctypes.data_as(self.__bin_func.c_func.argtypes[5])
jobs = min(os.cpu_count(), num_evaluations)
evals_per_job = (num_evaluations + jobs - 1) // jobs

Expand All @@ -738,7 +738,9 @@ def thread_evaluate(self, variable, num_evaluations):
results = [ex.submit(self.__bin_func, ct_param, ct_state,
int(i * evals_per_job),
min((i + 1) * evals_per_job, num_evaluations),
ct_results, ct_variable, ct_data)
ct_results,
ctypes.cast(ctypes.byref(ct_inputs), self.__bin_func.c_func.argtypes[5]),
ct_data)
for i in range(jobs)]

parallel_stop = time.time()
Expand Down
4 changes: 2 additions & 2 deletions psyneulink/core/llvm/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,15 +443,15 @@ def printf_float_array(builder, array, prefix="", suffix="\n", override_debug=Fa
printf(builder, prefix, override_debug=override_debug)

with array_ptr_loop(builder, array, "print_array_loop") as (b1, i):
printf(b1, "%lf ", b1.load(b1.gep(array, [ir.IntType(32)(0), i])), override_debug=override_debug)
printf(b1, "%lf ", b1.load(b1.gep(array, [i.type(0), i])), override_debug=override_debug)

printf(builder, suffix, override_debug=override_debug)


def printf_float_matrix(builder, matrix, prefix="", suffix="\n", override_debug=False):
printf(builder, prefix, override_debug=override_debug)
with array_ptr_loop(builder, matrix, "print_row_loop") as (b1, i):
row = b1.gep(matrix, [ir.IntType(32)(0), i])
row = b1.gep(matrix, [i.type(0), i])
printf_float_array(b1, row, suffix="\n", override_debug=override_debug)
printf(builder, suffix, override_debug=override_debug)

Expand Down
5 changes: 2 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@ graphviz<0.21.0
grpcio<1.43.0
grpcio-tools<1.43.0
llvmlite<0.40
matplotlib<3.5.3
modeci_mdf>=0.3.4, <0.4.2
modelspec<0.2.7
matplotlib<3.5.4
modeci_mdf<0.5, >=0.3.4
networkx<2.9
numpy<1.21.7, >=1.17.0
pillow<9.3.0
Expand Down
4 changes: 2 additions & 2 deletions tests/functions/test_transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,15 @@ def test_execute(func, variable, params, expected, benchmark, func_mode):
benchmark(ex, variable)


relu_derivative_helper = lambda x : RAND1 if x > 0 else RAND1 * RAND3
logistic_helper = RAND4 / (1 + np.exp(-(RAND1 * (test_var - RAND2)) + RAND3))
tanh_derivative_helper = (RAND1 * (test_var + RAND2) + RAND3)
tanh_derivative_helper = (1 - np.tanh(tanh_derivative_helper)**2) * RAND4 * RAND1

derivative_test_data = [
(Functions.Linear, test_var, {'slope':RAND1, 'intercept':RAND2}, RAND1),
(Functions.Exponential, test_var, {'scale':RAND1, 'rate':RAND2}, RAND1 * RAND2 * np.exp(RAND2 * test_var)),
(Functions.Logistic, test_var, {'gain':RAND1, 'x_0':RAND2, 'offset':RAND3, 'scale':RAND4}, RAND1 * RAND4 * logistic_helper * (1 - logistic_helper)),
(Functions.ReLU, test_var, {'gain':RAND1, 'bias':RAND2, 'leak':RAND3}, list(map(relu_derivative_helper, test_var))),
(Functions.ReLU, test_var, {'gain':RAND1, 'bias':RAND2, 'leak':RAND3}, np.where(test_var > 0, RAND1, RAND1 * RAND3)),
(Functions.Tanh, test_var, {'gain':RAND1, 'bias':RAND2, 'offset':RAND3, 'scale':RAND4}, tanh_derivative_helper),
]

Expand Down
Loading

0 comments on commit 86c70a9

Please sign in to comment.