Skip to content

Commit

Permalink
include partitioner module in repo to avoid segmentation fault
Browse files Browse the repository at this point in the history
  • Loading branch information
samdporter committed Sep 30, 2024
1 parent 9f8dbee commit 74d8929
Show file tree
Hide file tree
Showing 2 changed files with 278 additions and 11 deletions.
25 changes: 14 additions & 11 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
>>> algorithm.run(np.inf, callbacks=metrics + submission_callbacks)
"""
#%%
import partitioner
from cil.optimisation.algorithms import ISTA, Algorithm
from cil.optimisation.functions import IndicatorBox, SAGAFunction
from cil.optimisation.utilities import (Preconditioner, Sampler,
StepSizeRule)
from petric import Dataset
from sirf.contrib.partitioner import partitioner
import numpy as np

assert issubclass(ISTA, Algorithm)
Expand Down Expand Up @@ -55,21 +55,22 @@ class BSREMPreconditioner(Preconditioner):
Preconditioner for BSREM
'''

def __init__(self, acq_models, freeze_iter = np.inf, epsilon=1e-6):
def __init__(self, obj_funs, freeze_iter = np.inf, epsilon=1e-6):

self.epsilon = epsilon
self.freeze_iter = freeze_iter
self.freeze = None

for i,el in enumerate(acq_models):
if i == 0:
self.s_sum_inv = el.domain_geometry().get_uniform_copy(0.)
ones = el.range_geometry().allocate(1.)
s_inv = el.adjoint(ones)
for i,el in enumerate(obj_funs):
s_inv = el.get_subset_sensitivity(0)
s_inv.maximum(0, out=s_inv)
arr = s_inv.as_array()
np.reciprocal(arr, out=arr, where=arr!=0)
s_inv.fill(arr)
self.s_sum_inv += s_inv
if i == 0:
self.s_sum_inv = s_inv
else:
self.s_sum_inv += s_inv

def apply(self, algorithm, gradient, out=None):
if algorithm.iteration < self.freeze_iter:
Expand Down Expand Up @@ -227,11 +228,13 @@ def __init__(self, data: Dataset, update_objective_interval=10):
decay_perc = 0.1
decay = (1/(1-decay_perc) - 1)/update_interval
beta = 0.5

print(f"Using {self.num_subsets} subsets")

data_subs, acq_models, obj_funs = partitioner.data_partition(data.acquired_data, data.additive_term,
_, _, obj_funs = partitioner.data_partition(data.acquired_data, data.additive_term,
data.mult_factors, self.num_subsets, mode='staggered',
initial_image=data.OSEM_image)

print("made it past partitioner")

data.prior.set_penalisation_factor(data.prior.get_penalisation_factor() / len(obj_funs))
data.prior.set_up(data.OSEM_image)
Expand All @@ -242,7 +245,7 @@ def __init__(self, data: Dataset, update_objective_interval=10):
sampler = Sampler.random_without_replacement(len(obj_funs))
f = -FullGradientInitialiserFunction(obj_funs, sampler=sampler, init_steps=5)

preconditioner = BSREMPreconditioner(acq_models, epsilon=data.OSEM_image.max()/1e6, freeze_iter=10*update_interval+5)
preconditioner = BSREMPreconditioner(obj_funs, epsilon=data.OSEM_image.max()/1e6, freeze_iter=10*update_interval+5)
g = IndicatorBox(lower=0, accelerated=True) # non-negativity constraint

step_size_rule = ArmijoStepSearchRule(0.08, beta, decay, max_iter=100, tol=0.2, init_steps=5, update_interval=10*update_interval+5)
Expand Down
264 changes: 264 additions & 0 deletions partitioner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,264 @@
#
# SPDX-License-Identifier: Apache-2.0
#
# Functions for partitioning sirf.STIR.ACquisitionData in subsets
#
# Authors: Margaret Duff and Kris Thielemans
#
# Copyright 2024 Science Technology Facilities Council
# Copyright 2024 University College London

import numpy
import math
import sirf.STIR as pet
pet.AcquisitionData.set_storage_scheme('memory')

def create_RayTracingAcqModel(num_LORs=5):
'''
Factory that creates a ray-tracing acquisition model
'''
acq_model = pet.AcquisitionModelUsingRayTracingMatrix()
acq_model.set_num_tangential_LORs(num_LORs)
return acq_model

def create_ParallelprojModel():
'''
Factory that creates a Parallelproj acquisition model
'''
acq_model = pet.AcquisitionModelUsingParallelproj()
return acq_model

def _default_acq_model():
'''
create default acquisition model
creates the parallelproj model if it exists, otherwise create a ray-tracing matrix model with 5 LORs
'''
try:
return create_ParallelprojModel()
except AttributeError:
return create_RayTracingAcqModel()

def data_partition( prompts, additive_term, multiplicative_factors, num_batches, mode="sequential", seed=None, initial_image=None, create_acq_model = _default_acq_model):
'''Partition the data into ``num_batches`` batches using the specified ``mode`` and creates an acquisition model for every batch.
The modes are
1. ``sequential`` - The data will be partitioned into ``num_batches`` batches of sequential indices.
2. ``staggered`` - The data will be partitioned into ``num_batches`` batches of sequential indices, with stride equal to ``num_batches``.
3. ``random_permutation`` - The data will be partitioned into ``num_batches`` batches of random indices.
Parameters
----------
prompts:
Noisy data
additive_term:
additive term ("corrected" scatter+randoms)
multiplicative_factors:
bin efficiencies
num_batches : int
The number of batches to partition the data into.
mode : str
The mode to use for partitioning. Must be one of "sequential", "staggered" or "random_permutation".
seed : int, optional
The seed to use for the random permutation. If not specified, the random number
generator will not be seeded.
initial_image: Optional, ImageData
If passed, the returned objectives and acquisition models will be set-up. If not, you will have to do this yourself.
create_acq_model: Optional, AcquisitionModel "factory"
The argument needs to be a function that returns a new AcquisitionMode, such as create_RayTracingAcqModel.
Returns
-------
List of data subsets
List of acquisition models
List of objective functions
Example
-------
Partitioning a list of ints [0, 1, 2, 3, 4, 5, 6, 7, 8] into 4 batches will return:
1. [[0, 1, 2], [3, 4], [5, 6], [7, 8]] with ``sequential``
2. [[0, 4, 8], [1, 5], [2, 6], [3, 7]] with ``staggered``
3. [[8, 2, 6], [7, 1], [0, 4], [3, 5]] with ``random_permutation`` and seed 1
'''
if mode == "sequential":
return _partition_deterministic( prompts, additive_term, multiplicative_factors, num_batches, stagger=False, initial_image=initial_image, create_acq_model=create_acq_model)
elif mode == "staggered":
return _partition_deterministic( prompts, additive_term, multiplicative_factors, num_batches, stagger=True, initial_image=initial_image, create_acq_model=create_acq_model)
elif mode == "random":
return _partition_random_permutation( prompts, additive_term, multiplicative_factors, num_batches, seed=seed, initial_image=initial_image, create_acq_model=create_acq_model)
else:
raise ValueError('Unknown partition mode {}'.format(mode))

def _partition_deterministic( prompts, additive_term, multiplicative_factors, num_batches, stagger=False, indices=None, initial_image=None, create_acq_model = _default_acq_model):
'''Partition the data into ``num_batches`` batches.
Parameters
----------
prompts:
Noisy data
additive_term:
additive term ("corrected" scatter+randoms)
multiplicative_factors:
bin efficiencies
num_batches : int
The number of batches to partition the data into.
stagger : bool, optional
If ``True``, the batches will be staggered. Default is ``False``.
indices : list of int, optional
The indices to partition. If not specified, the indices will be generated from the number of projections.
initial_image: Optional, ImageData
If passed, the returned objectives and acquisition models will be set-up with this image. If not, they will be returned uninitialised.
create_acq_model: Optional, AcquisitionModel "factory"
The argument needs to be a function that returns a new AcquisitionModel, such as create_RayTracingAcqModel.
Returns
-------
List of data subsets
List of acquisition models
List of objective functions
'''
acquisition_models=[]
prompts_subsets=[]
objectives=[]

views=prompts.dimensions()[2]
if indices is None:
indices = list(range(views))
partitions_idxs = partition_indices(num_batches, indices, stagger)

for i in range(len(partitions_idxs)):
if len(partitions_idxs) > 1:
prompts_subset = prompts.get_subset(partitions_idxs[i])
additive_term_subset = additive_term.get_subset(partitions_idxs[i])
multiplicative_factors_subset = multiplicative_factors.get_subset(partitions_idxs[i])
else:
prompts_subset = prompts
additive_term_subset = additive_term
multiplicative_factors_subset = multiplicative_factors

sensitivity_factors = pet.AcquisitionSensitivityModel(multiplicative_factors_subset)
sensitivity_factors.set_up(multiplicative_factors_subset)
acquisition_model = create_acq_model()
acquisition_model.set_acquisition_sensitivity(sensitivity_factors)
acquisition_model.set_additive_term(additive_term_subset)

if initial_image is not None:
acquisition_model.set_up(prompts_subset, initial_image)

acquisition_models.append(acquisition_model)
prompts_subsets.append(prompts_subset)

objective_sirf = pet.make_Poisson_loglikelihood(prompts_subset, acq_model = acquisition_model)
if initial_image is not None:
objective_sirf.set_up(initial_image)
objectives.append(objective_sirf)


return prompts_subsets, acquisition_models, objectives


def _partition_random_permutation( prompts, additive_term, multiplicative_factor, num_batches, seed=None, initial_image=None, create_acq_model = _default_acq_model):
'''Partition the data into ``num_batches`` batches using a random permutation and creates an acquisition model for each batch.
Parameters
----------
prompts:
Noisy data
additive_term:
additive term ("corrected" scatter+randoms)
multiplicative_factors:
bin efficiencies
num_batches : int
The number of batches to partition the data into.
seed : int, optional
The seed to use for the random permutation. If not specified, the random number generator
will not be seeded.
initial_image: Optional, ImageData
If passed, the returned objectives and acquisition models will be set-up with this image. If not, they will be returned uninitialised
create_acq_model: Optional, AcquisitionModel "factory"
The argument needs to be a function that returns a new AcquisitionModel, such as create_RayTracingAcqModel.
Returns
-------
List of data subsets
List of acquisition models
List of objective functions
'''
views=prompts.dimensions()[2]
if seed is not None:
numpy.random.seed(seed)

indices = numpy.arange(views)
numpy.random.shuffle(indices)

indices = list(indices)

return _partition_deterministic(prompts, additive_term, multiplicative_factor, num_batches, stagger=False, indices=indices, initial_image=initial_image)

def partition_indices(num_batches, indices, stagger=False):
"""Partition a list of indices into num_batches of indices.
Parameters
----------
num_batches : int
The number of batches to partition the indices into.
indices : list of int, int
The indices to partition. If passed a list, this list will be partitioned in ``num_batches``
partitions. If passed an int the indices will be generated automatically using ``range(indices)``.
stagger : bool, default False
If True, the indices will be staggered across the batches.
Returns
--------
list of list of int
A list of batches of indices.
"""
# Partition the indices into batches.
if isinstance(indices, int):
indices = list(range(indices))

num_indices = len(indices)
# sanity check
if num_indices < num_batches:
raise ValueError(
'The number of batches must be less than or equal to the number of indices.'
)

if stagger:
batches = [indices[i::num_batches] for i in range(num_batches)]

else:
# we split the indices with floor(N/M)+1 indices in N%M groups
# and floor(N/M) indices in the remaining M - N%M groups.

# rename num_indices to N for brevity
N = num_indices
# rename num_batches to M for brevity
M = num_batches
batches = [
indices[j:j + math.floor(N / M) + 1] for j in range(N % M)
]
offset = N % M * (math.floor(N / M) + 1)
for i in range(M - N % M):
start = offset + i * math.floor(N / M)
end = start + math.floor(N / M)
batches.append(indices[start:end])

return batches

0 comments on commit 74d8929

Please sign in to comment.