Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix and improve: VAE tiling #372

Merged
merged 3 commits into from
Aug 27, 2024
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 30 additions & 9 deletions ggml_extend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,16 @@ __STATIC_INLINE__ void ggml_split_tensor_2d(struct ggml_tensor* input,
}
}

__STATIC_INLINE__ float ggml_lerp_f32(const float a, const float b, const float x) {
return (1 - x) * a + x * b;
}

Green-Sky marked this conversation as resolved.
Show resolved Hide resolved
// unclamped -> expects x in the range [0-1]
__STATIC_INLINE__ float ggml_smootherstep_f32(const float x) {
GGML_ASSERT(x >= 0.f && x <= 1.f);
return x * x * x * (x * (6.0f * x - 15.0f) + 10.0f);
}

__STATIC_INLINE__ void ggml_merge_tensor_2d(struct ggml_tensor* input,
struct ggml_tensor* output,
int x,
Expand All @@ -357,23 +367,34 @@ __STATIC_INLINE__ void ggml_merge_tensor_2d(struct ggml_tensor* input,
int64_t width = input->ne[0];
int64_t height = input->ne[1];
int64_t channels = input->ne[2];

int64_t img_width = output->ne[0];
int64_t img_height = output->ne[1];

GGML_ASSERT(input->type == GGML_TYPE_F32 && output->type == GGML_TYPE_F32);
for (int iy = 0; iy < height; iy++) {
for (int ix = 0; ix < width; ix++) {
for (int k = 0; k < channels; k++) {
float new_value = ggml_tensor_get_f32(input, ix, iy, k);
if (overlap > 0) { // blend colors in overlapped area
float old_value = ggml_tensor_get_f32(output, x + ix, y + iy, k);
if (x > 0 && ix < overlap) { // in overlapped horizontal
ggml_tensor_set_f32(output, old_value + (new_value - old_value) * (ix / (1.0f * overlap)), x + ix, y + iy, k);
continue;
}
if (y > 0 && iy < overlap) { // in overlapped vertical
ggml_tensor_set_f32(output, old_value + (new_value - old_value) * (iy / (1.0f * overlap)), x + ix, y + iy, k);
continue;
}

const float x_f_0 = (x > 0) ? ix / float(overlap) : 1;
const float x_f_1 = (x < (img_width - width)) ? (width - ix) / float(overlap) : 1 ;
const float y_f_0 = (y > 0) ? iy / float(overlap) : 1;
const float y_f_1 = (y < (img_height - height)) ? (height - iy) / float(overlap) : 1;

const float x_f = std::min(std::min(x_f_0, x_f_1), 1.f);
const float y_f = std::min(std::min(y_f_0, y_f_1), 1.f);

ggml_tensor_set_f32(
output,
old_value + new_value * ggml_smootherstep_f32(y_f) * ggml_smootherstep_f32(x_f),
x + ix, y + iy, k
);
} else {
ggml_tensor_set_f32(output, new_value, x + ix, y + iy, k);
}
ggml_tensor_set_f32(output, new_value, x + ix, y + iy, k);
}
}
}
Expand Down
Loading