Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JpegEncoder - Optimize Some Low Hanging Fruit #1533

Merged
merged 15 commits into from
Feb 4, 2021
Merged
87 changes: 55 additions & 32 deletions src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// <summary>
/// Represents a Jpeg block with <see cref="float"/> coefficients.
/// </summary>
[StructLayout(LayoutKind.Sequential)]
internal partial struct Block8x8F : IEquatable<Block8x8F>
{
/// <summary>
Expand Down Expand Up @@ -51,9 +52,6 @@ internal partial struct Block8x8F : IEquatable<Block8x8F>
public Vector4 V7R;
#pragma warning restore SA1600 // ElementsMustBeDocumented

private static readonly Vector4 NegativeOne = new Vector4(-1);
private static readonly Vector4 Offset = new Vector4(.5F);

/// <summary>
/// Get/Set scalar elements at a given index
/// </summary>
Expand Down Expand Up @@ -556,22 +554,59 @@ private static unsafe void Scale16X16To8X8Scalar(ref Block8x8F destination, Read
[MethodImpl(InliningOptions.ShortMethod)]
private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b)
{
a.V0L = DivideRound(a.V0L, b.V0L);
a.V0R = DivideRound(a.V0R, b.V0R);
a.V1L = DivideRound(a.V1L, b.V1L);
a.V1R = DivideRound(a.V1R, b.V1R);
a.V2L = DivideRound(a.V2L, b.V2L);
a.V2R = DivideRound(a.V2R, b.V2R);
a.V3L = DivideRound(a.V3L, b.V3L);
a.V3R = DivideRound(a.V3R, b.V3R);
a.V4L = DivideRound(a.V4L, b.V4L);
a.V4R = DivideRound(a.V4R, b.V4R);
a.V5L = DivideRound(a.V5L, b.V5L);
a.V5R = DivideRound(a.V5R, b.V5R);
a.V6L = DivideRound(a.V6L, b.V6L);
a.V6R = DivideRound(a.V6R, b.V6R);
a.V7L = DivideRound(a.V7L, b.V7L);
a.V7R = DivideRound(a.V7R, b.V7R);
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
var vnegOne = Vector256.Create(-1f);
var vadd = Vector256.Create(.5F);
var vone = Vector256.Create(1f);

ref Vector256<float> aBase = ref Unsafe.AsRef(Unsafe.As<Vector4, Vector256<float>>(ref a.V0L));
ref Vector256<float> bBase = ref Unsafe.AsRef(Unsafe.As<Vector4, Vector256<float>>(ref b.V0L));
ref Vector256<float> aEnd = ref Unsafe.Add(ref aBase, 8);

while (Unsafe.IsAddressLessThan(ref aBase, ref aEnd))
{
Vector256<float> voff = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, aBase), vone), vadd);
Unsafe.Add(ref aBase, 0) = Avx.Add(Avx.Divide(aBase, bBase), voff);

aBase = ref Unsafe.Add(ref aBase, 1);
bBase = ref Unsafe.Add(ref bBase, 1);
}
}
else
#endif
{
a.V0L = DivideRound(a.V0L, b.V0L);
a.V0R = DivideRound(a.V0R, b.V0R);
a.V1L = DivideRound(a.V1L, b.V1L);
a.V1R = DivideRound(a.V1R, b.V1R);
a.V2L = DivideRound(a.V2L, b.V2L);
a.V2R = DivideRound(a.V2R, b.V2R);
a.V3L = DivideRound(a.V3L, b.V3L);
a.V3R = DivideRound(a.V3R, b.V3R);
a.V4L = DivideRound(a.V4L, b.V4L);
a.V4R = DivideRound(a.V4R, b.V4R);
a.V5L = DivideRound(a.V5L, b.V5L);
a.V5R = DivideRound(a.V5R, b.V5R);
a.V6L = DivideRound(a.V6L, b.V6L);
a.V6R = DivideRound(a.V6R, b.V6R);
a.V7L = DivideRound(a.V7L, b.V7L);
a.V7R = DivideRound(a.V7R, b.V7R);
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor)
{
var neg = new Vector4(-1);
var add = new Vector4(.5F);

// sign(dividend) = max(min(dividend, 1), -1)
Vector4 sign = Numerics.Clamp(dividend, neg, Vector4.One);

// AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend)
return (dividend / divisor) + (sign * add);
}

public void RoundInto(ref Block8x8 dest)
Expand Down Expand Up @@ -673,8 +708,7 @@ public void LoadFromInt16ExtendedAvx2(ref Block8x8 source)

/// <inheritdoc />
public bool Equals(Block8x8F other)
{
return this.V0L == other.V0L
=> this.V0L == other.V0L
&& this.V0R == other.V0R
&& this.V1L == other.V1L
&& this.V1R == other.V1R
Expand All @@ -690,7 +724,6 @@ public bool Equals(Block8x8F other)
&& this.V6R == other.V6R
&& this.V7L == other.V7L
&& this.V7R == other.V7R;
}

/// <inheritdoc />
public override string ToString()
Expand Down Expand Up @@ -718,16 +751,6 @@ private static Vector<float> NormalizeAndRound(Vector<float> row, Vector<float>
return row.FastRound();
}

[MethodImpl(InliningOptions.ShortMethod)]
private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor)
{
// sign(dividend) = max(min(dividend, 1), -1)
Vector4 sign = Numerics.Clamp(dividend, NegativeOne, Vector4.One);

// AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend)
return (dividend / divisor) + (sign * Offset);
}

[Conditional("DEBUG")]
private static void GuardBlockIndex(int idx)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ public static YCbCrForwardConverter<TPixel> Create()
/// <summary>
/// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (<see cref="Y"/>, <see cref="Cb"/>, <see cref="Cr"/>)
/// </summary>
public void Convert(ImageFrame<TPixel> frame, int x, int y, in RowOctet<TPixel> currentRows)
public void Convert(ImageFrame<TPixel> frame, int x, int y, ref RowOctet<TPixel> currentRows)
{
this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, currentRows);
this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows);

Span<Rgb24> rgbSpan = this.rgbBlock.AsSpanUnsafe();
PixelOperations<TPixel>.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), rgbSpan);
Expand All @@ -76,7 +76,7 @@ public void Convert(ImageFrame<TPixel> frame, int x, int y, in RowOctet<TPixel>
}
else
{
this.colorTables.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock);
this.colorTables.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/ImageSharp/Formats/Jpeg/Components/GenericBlock8x8.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public T this[int idx]
/// Load a 8x8 region of an image into the block.
/// The "outlying" area of the block will be stretched out with pixels on the right and bottom edge of the image.
/// </summary>
public void LoadAndStretchEdges(Buffer2D<T> source, int sourceX, int sourceY, in RowOctet<T> currentRows)
public void LoadAndStretchEdges(Buffer2D<T> source, int sourceX, int sourceY, ref RowOctet<T> currentRows)
{
int width = Math.Min(8, source.Width - sourceX);
int height = Math.Min(8, source.Height - sourceY);
Expand Down
92 changes: 60 additions & 32 deletions src/ImageSharp/Formats/Jpeg/Components/RowOctet.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

using System;
Expand All @@ -12,39 +12,24 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// Cache 8 pixel rows on the stack, which may originate from different buffers of a <see cref="MemoryGroup{T}"/>.
/// </summary>
[StructLayout(LayoutKind.Sequential)]
internal readonly ref struct RowOctet<T>
internal ref struct RowOctet<T>
where T : struct
{
private readonly Span<T> row0;
private readonly Span<T> row1;
private readonly Span<T> row2;
private readonly Span<T> row3;
private readonly Span<T> row4;
private readonly Span<T> row5;
private readonly Span<T> row6;
private readonly Span<T> row7;

public RowOctet(Buffer2D<T> buffer, int startY)
{
int y = startY;
int height = buffer.Height;
this.row0 = y < height ? buffer.GetRowSpan(y++) : default;
this.row1 = y < height ? buffer.GetRowSpan(y++) : default;
this.row2 = y < height ? buffer.GetRowSpan(y++) : default;
this.row3 = y < height ? buffer.GetRowSpan(y++) : default;
this.row4 = y < height ? buffer.GetRowSpan(y++) : default;
this.row5 = y < height ? buffer.GetRowSpan(y++) : default;
this.row6 = y < height ? buffer.GetRowSpan(y++) : default;
this.row7 = y < height ? buffer.GetRowSpan(y) : default;
}
private Span<T> row0;
private Span<T> row1;
private Span<T> row2;
private Span<T> row3;
private Span<T> row4;
private Span<T> row5;
private Span<T> row6;
private Span<T> row7;

// No unsafe tricks, since Span<T> can't be used as a generic argument
public Span<T> this[int y]
{
[MethodImpl(InliningOptions.ShortMethod)]
get
{
// No unsafe tricks, since Span<T> can't be used as a generic argument
return y switch
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get =>
y switch
{
0 => this.row0,
1 => this.row1,
Expand All @@ -56,13 +41,56 @@ public Span<T> this[int y]
7 => this.row7,
_ => ThrowIndexOutOfRangeException()
};

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private set
{
switch (y)
{
case 0:
this.row0 = value;
break;
case 1:
this.row1 = value;
break;
case 2:
this.row2 = value;
break;
case 3:
this.row3 = value;
break;
case 4:
this.row4 = value;
break;
case 5:
this.row5 = value;
break;
case 6:
this.row6 = value;
break;
default:
this.row7 = value;
break;
}
}
}

[MethodImpl(InliningOptions.ColdPath)]
private static Span<T> ThrowIndexOutOfRangeException()
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Update(Buffer2D<T> buffer, int startY)
{
throw new IndexOutOfRangeException();
int y = startY;
int height = buffer.Height;

// We don't actually have to assign values outside of the
// frame pixel buffer since they are never requested.
for (int i = 0; i < 8 && y < height; i++)
{
this[i] = buffer.GetRowSpan(y++);
}
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static Span<T> ThrowIndexOutOfRangeException()
=> throw new IndexOutOfRangeException();
}
}
Loading