SIMD для наложения буфферов
This commit is contained in:
@@ -162,7 +162,62 @@ pub const DrawContext = struct {
|
||||
pub fn compositeDrawerContext(self: *DrawContext, other: *const DrawContext, opacity: f32) void {
|
||||
if (self.buf_width != other.buf_width or self.buf_height != other.buf_height) return;
|
||||
const n = self.buf_width * self.buf_height;
|
||||
for (0..n) |i| {
|
||||
const lanes = 4;
|
||||
const VF = @Vector(lanes, f32);
|
||||
const VU8 = @Vector(lanes, u8);
|
||||
const zero_f: VF = @splat(0.0);
|
||||
const max_f: VF = @splat(255.0);
|
||||
const opacity_v: VF = @splat(opacity);
|
||||
const inv_255_v: VF = @splat(1.0 / 255.0);
|
||||
|
||||
var i: usize = 0;
|
||||
while (i + lanes <= n) : (i += lanes) {
|
||||
const src0 = other.pixels[i + 0];
|
||||
const src1 = other.pixels[i + 1];
|
||||
const src2 = other.pixels[i + 2];
|
||||
const src3 = other.pixels[i + 3];
|
||||
|
||||
const src_a_u8: VU8 = .{ src0.a, src1.a, src2.a, src3.a };
|
||||
const nonzero_mask = src_a_u8 != @as(VU8, @splat(0));
|
||||
if (!@reduce(.Or, nonzero_mask)) continue;
|
||||
|
||||
const src_r_f: VF = @as(VF, @floatFromInt(@as(VU8, .{ src0.r, src1.r, src2.r, src3.r }))) * opacity_v;
|
||||
const src_g_f: VF = @as(VF, @floatFromInt(@as(VU8, .{ src0.g, src1.g, src2.g, src3.g }))) * opacity_v;
|
||||
const src_b_f: VF = @as(VF, @floatFromInt(@as(VU8, .{ src0.b, src1.b, src2.b, src3.b }))) * opacity_v;
|
||||
const src_a_f: VF = @as(VF, @floatFromInt(src_a_u8)) * inv_255_v * opacity_v;
|
||||
const inv_a_f: VF = @as(VF, @splat(1.0)) - src_a_f;
|
||||
|
||||
const dst_r_u8: VU8 = .{ self.pixels[i + 0].r, self.pixels[i + 1].r, self.pixels[i + 2].r, self.pixels[i + 3].r };
|
||||
const dst_g_u8: VU8 = .{ self.pixels[i + 0].g, self.pixels[i + 1].g, self.pixels[i + 2].g, self.pixels[i + 3].g };
|
||||
const dst_b_u8: VU8 = .{ self.pixels[i + 0].b, self.pixels[i + 1].b, self.pixels[i + 2].b, self.pixels[i + 3].b };
|
||||
const dst_a_u8: VU8 = .{ self.pixels[i + 0].a, self.pixels[i + 1].a, self.pixels[i + 2].a, self.pixels[i + 3].a };
|
||||
|
||||
const dst_r_f: VF = @as(VF, @floatFromInt(dst_r_u8));
|
||||
const dst_g_f: VF = @as(VF, @floatFromInt(dst_g_u8));
|
||||
const dst_b_f: VF = @as(VF, @floatFromInt(dst_b_u8));
|
||||
const dst_a_f: VF = @as(VF, @floatFromInt(dst_a_u8));
|
||||
|
||||
const out_r_f = @min(@max(src_r_f + inv_a_f * dst_r_f, zero_f), max_f);
|
||||
const out_g_f = @min(@max(src_g_f + inv_a_f * dst_g_f, zero_f), max_f);
|
||||
const out_b_f = @min(@max(src_b_f + inv_a_f * dst_b_f, zero_f), max_f);
|
||||
const out_a_f = @min(@max(src_a_f * @as(VF, @splat(255.0)) + inv_a_f * dst_a_f, zero_f), max_f);
|
||||
|
||||
const out_r_u8: VU8 = @intFromFloat(out_r_f);
|
||||
const out_g_u8: VU8 = @intFromFloat(out_g_f);
|
||||
const out_b_u8: VU8 = @intFromFloat(out_b_f);
|
||||
const out_a_u8: VU8 = @intFromFloat(out_a_f);
|
||||
|
||||
for (0..lanes) |lane| {
|
||||
if (!nonzero_mask[lane]) continue;
|
||||
const dst = &self.pixels[i + lane];
|
||||
dst.r = out_r_u8[lane];
|
||||
dst.g = out_g_u8[lane];
|
||||
dst.b = out_b_u8[lane];
|
||||
dst.a = out_a_u8[lane];
|
||||
}
|
||||
}
|
||||
|
||||
while (i < n) : (i += 1) {
|
||||
const src = other.pixels[i];
|
||||
if (src.a == 0) continue;
|
||||
const dst = &self.pixels[i];
|
||||
|
||||
Reference in New Issue
Block a user