Djdefrag
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 5 additions & 0 deletions b/‎.github/workflows/build.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎DEPLOY/Deploy.bat‎
Lines changed: 0 additions & 1 deletion b/‎DEPLOY/Deploy.bat‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎Effects/ACNet.hlsl‎
Lines changed: 494 additions & 372 deletions b/‎Effects/ACNet.hlsl‎
Lines changed: 494 additions & 372 deletions
diff --git a/‎Effects/AdaptiveSharpen.hlsl‎
Lines changed: 173 additions & 255 deletions b/‎Effects/AdaptiveSharpen.hlsl‎
Lines changed: 173 additions & 255 deletions
diff --git a/‎Effects/Anime4K_3D_AA_Upscale_US.hlsl‎
Lines changed: 185 additions & 85 deletions b/‎Effects/Anime4K_3D_AA_Upscale_US.hlsl‎
Lines changed: 185 additions & 85 deletions
diff --git a/‎Effects/Anime4K_3D_Upscale_US.hlsl‎
Lines changed: 185 additions & 85 deletions b/‎Effects/Anime4K_3D_Upscale_US.hlsl‎
Lines changed: 185 additions & 85 deletions
diff --git a/‎Effects/Anime4K_Denoise_Bilateral_Mean.hlsl‎
Lines changed: 78 additions & 30 deletions b/‎Effects/Anime4K_Denoise_Bilateral_Mean.hlsl‎
Lines changed: 78 additions & 30 deletions
diff --git a/‎Effects/Anime4K_Denoise_Bilateral_Median.hlsl‎
Lines changed: 37 additions & 40 deletions b/‎Effects/Anime4K_Denoise_Bilateral_Median.hlsl‎
Lines changed: 37 additions & 40 deletions
@@ -18,6 +18,11 @@ jobs:
 
     steps:
     - uses: actions/checkout@v2.4.0
+    
+    - name: Setup dotnet
+      uses: actions/setup-dotnet@v2
+      with:
+        dotnet-version: '6.0.202'
 
     - name: Load Conan cache
       uses: actions/cache@v2
 
@@ -10,6 +10,7 @@
 *.userosscache
 *.sln.docstates
 .conan/
+my_*/
 
 # User-specific files (MonoDevelop/Xamarin Studio)
 *.userprefs
 
@@ -30,4 +30,3 @@ cd ../publish/
 del *.pdb
 del *lib
 del *.exp
-del xaudio2_9redist.dll
@@ -1,20 +1,14 @@
+// Anime4K_Denoise_Bilateral_Mean
 // 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Denoise/Anime4K_Denoise_Bilateral_Mean.glsl
 
 
 //!MAGPIE EFFECT
-//!VERSION 1
+//!VERSION 2
 //!OUTPUT_WIDTH INPUT_WIDTH
 //!OUTPUT_HEIGHT INPUT_HEIGHT
 
-//!CONSTANT
-//!VALUE INPUT_PT_X
-float inputPtX;
 
-//!CONSTANT
-//!VALUE INPUT_PT_Y
-float inputPtY;
-
-//!CONSTANT
+//!PARAMETER
 //!MIN 1e-5
 //!DEFAULT 0.1
 float intensitySigma;
@@ -28,46 +22,100 @@ SamplerState sam;
 
 
 //!PASS 1
-//!BIND INPUT
+//!IN INPUT
+//!BLOCK_SIZE 16
+//!NUM_THREADS 64
 
 #define INTENSITY_SIGMA intensitySigma //Intensity window size, higher is stronger denoise, must be a positive real number
 #define SPATIAL_SIGMA 1.0 //Spatial window size, higher is stronger denoise, must be a positive real number.
 
 #define INTENSITY_POWER_CURVE 1.0 //Intensity window power curve. Setting it to 0 will make the intensity window treat all intensities equally, while increasing it will make the window narrower in darker intensities and wider in brighter intensities.
 
 #define KERNELSIZE (max(uint(ceil(SPATIAL_SIGMA * 2.0)), 1) * 2 + 1) //Kernel size, must be an positive odd integer.
-#define KERNELHALFSIZE (int(KERNELSIZE/2)) //Half of the kernel size without remainder. Must be equal to trunc(KERNELSIZE/2).
+#define KERNELHALFSIZE (uint(KERNELSIZE/2)) //Half of the kernel size without remainder. Must be equal to trunc(KERNELSIZE/2).
 #define KERNELLEN (KERNELSIZE * KERNELSIZE) //Total area of kernel. Must be equal to KERNELSIZE * KERNELSIZE.
 
-#define GETOFFSET(i) int2(int(i % KERNELSIZE) - KERNELHALFSIZE, int(i / KERNELSIZE) - KERNELHALFSIZE)
 
-float3 gaussian_vec(float3 x, float3 s, float3 m) {
-	float3 scaled = (x - m) / s;
+float3 gaussian_vec(float3 x, float3 rcpS, float3 m) {
+	float3 scaled = (x - m) * rcpS;
 	return exp(-0.5 * scaled * scaled);
 }
 
-float gaussian(float x, float s, float m) {
-	float scaled = (x - m) / s;
+float gaussian(float x, float rcpS, float m) {
+	float scaled = (x - m) * rcpS;
 	return exp(-0.5 * scaled * scaled);
 }
 
 
-float4 Pass1(float2 pos) {
-	float3 sum = 0;
-	float3 n = 0;
-
-	float3 vc = INPUT.Sample(sam, pos).rgb;
+void Pass1(uint2 blockStart, uint3 threadId) {
+	uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart;
+	if (!CheckViewport(gxy)) {
+		return;
+	}
 
-	float3 is = pow(vc + 0.0001, INTENSITY_POWER_CURVE) * INTENSITY_SIGMA;
-	float ss = SPATIAL_SIGMA;
+	float2 inputPt = GetInputPt();
+	uint i, j;
+
+	float3 src[KERNELSIZE + 1][KERNELSIZE + 1];
+	[unroll]
+	for (i = 0; i <= KERNELSIZE - 1; i += 2) {
+		[unroll]
+		for (j = 0; j <= KERNELSIZE - 1; j += 2) {
+			float2 tpos = (gxy + int2(i, j) - KERNELHALFSIZE + 1) * inputPt;
+			const float4 sr = INPUT.GatherRed(sam, tpos);
+			const float4 sg = INPUT.GatherGreen(sam, tpos);
+			const float4 sb = INPUT.GatherBlue(sam, tpos);
+
+			// w z
+			// x y
+			src[i][j] = float3(sr.w, sg.w, sb.w);
+			src[i][j + 1] = float3(sr.x, sg.x, sb.x);
+			src[i + 1][j] = float3(sr.z, sg.z, sb.z);
+			src[i + 1][j + 1] = float3(sr.y, sg.y, sb.y);
+		}
+	}
 
-	for (uint i = 0; i < KERNELLEN; i++) {
-		float2 ipos = pos + GETOFFSET(i) * float2(inputPtX, inputPtY);
-		float3 v = INPUT.Sample(sam, ipos).rgb;
-		float3 d = gaussian_vec(v, is, vc) * gaussian(length(ipos), ss, 0.0);
-		sum += d * v;
-		n += d;
+	float len[KERNELSIZE][KERNELSIZE];
+	[unroll]
+	for (i = 0; i < KERNELSIZE; ++i) {
+		[unroll]
+		for (j = 0; j < KERNELSIZE; ++j) {
+			len[i][j] = length(float2((int)i - KERNELHALFSIZE, (int)j - KERNELHALFSIZE));
+		}
 	}
 
-	return float4(sum / n, 1);
+	[unroll]
+	for (i = 0; i <= 1; ++i) {
+		[unroll]
+		for (j = 0; j <= 1; ++j) {
+			uint2 destPos = gxy + uint2(i, j);
+
+			if (i != 0 || j != 0) {
+				if (!CheckViewport(gxy)) {
+					continue;
+				}
+			}
+
+			float3 sum = 0;
+			float3 n = 0;
+
+			float3 vc = src[KERNELHALFSIZE + i][KERNELHALFSIZE + j].rgb;
+
+			float3 rcpIs = rcp(pow(vc + 0.0001, INTENSITY_POWER_CURVE) * INTENSITY_SIGMA);
+			float rcpSs = rcp(SPATIAL_SIGMA);
+
+			[unroll]
+			for (uint k = 0; k < KERNELSIZE; ++k) {
+				[unroll]
+				for (uint m = 0; m < KERNELSIZE; ++m) {
+					float3 v = src[k + i][m + j];
+					float3 d = gaussian_vec(v, rcpIs, vc) * gaussian(len[k][m], rcpSs, 0);
+					sum += d * v;
+					n += d;
+				}
+			}
+
+			WriteToOutput(destPos, sum / n);
+		}
+	}
 }
@@ -2,54 +2,29 @@
 
 
 //!MAGPIE EFFECT
-//!VERSION 1
+//!VERSION 2
 //!OUTPUT_WIDTH INPUT_WIDTH
 //!OUTPUT_HEIGHT INPUT_HEIGHT
 
-//!CONSTANT
-//!VALUE INPUT_PT_X
-float inputPtX;
 
-//!CONSTANT
-//!VALUE INPUT_PT_Y
-float inputPtY;
-
-//!CONSTANT
+//!PARAMETER
 //!MIN 1e-5
 //!DEFAULT 0.1
 float intensitySigma;
 
 //!TEXTURE
 Texture2D INPUT;
 
-//!TEXTURE
-//!WIDTH INPUT_WIDTH
-//!HEIGHT INPUT_HEIGHT
-//!FORMAT R16_FLOAT
-Texture2D lumaTex;
-
 //!SAMPLER
 //!FILTER POINT
 SamplerState sam;
 
 
-//!PASS 1
-//!BIND INPUT
-//!SAVE lumaTex
-
-float get_luma(float3 rgba) {
-	return dot(float3(0.299, 0.587, 0.114), rgba);
-}
-
-float4 Pass1(float2 pos) {
-	return float4(get_luma(INPUT.Sample(sam, pos).rgb), 0.0, 0.0, 0.0);
-}
-
 
-//!PASS 2
-//!BIND INPUT, lumaTex
-
-#pragma warning(disable: 3557)
+//!PASS 1
+//!IN INPUT
+//!BLOCK_SIZE 8
+//!NUM_THREADS 64
 
 #define INTENSITY_SIGMA intensitySigma //Intensity window size, higher is stronger denoise, must be a positive real number
 #define SPATIAL_SIGMA 1.0 //Spatial window size, higher is stronger denoise, must be a positive real number.
@@ -63,15 +38,23 @@ float4 Pass1(float2 pos) {
 
 #define GETOFFSET(i) int2(int(i % KERNELSIZE) - KERNELHALFSIZE, int(i / KERNELSIZE) - KERNELHALFSIZE)
 
+float get_luma(float3 rgb) {
+	return dot(float3(0.299, 0.587, 0.114), rgb);
+}
+
 float gaussian(float x, float s, float m) {
 	float scaled = (x - m) / s;
 	return exp(-0.5 * scaled * scaled);
 }
 
 float3 getMedian(float3 v[KERNELLEN], float w[KERNELLEN], float n) {
+	float3 result = float3(0, 0, 0);
+
+	[unroll]
 	for (uint i = 0; i < KERNELLEN; i++) {
 		float w_above = 0.0;
 		float w_below = 0.0;
+		[unroll]
 		for (uint j = 0; j < KERNELLEN; j++) {
 			if (v[j].x > v[i].x) {
 				w_above += w[j];
@@ -81,30 +64,40 @@ float3 getMedian(float3 v[KERNELLEN], float w[KERNELLEN], float n) {
 		}
 
 		if ((n - w_above) / n >= 0.5 && w_below / n <= 0.5) {
-			return v[i];
+			result = v[i];
+			break;
 		}
 	}
 
-	return float3(0, 0, 0);
+	return result;
 }
 
-float4 Pass2(float2 pos) {
+void Pass1(uint2 blockStart, uint3 threadId) {
+	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
+	if (!CheckViewport(gxy)) {
+		return;
+	}
+
+	float2 inputPt = GetInputPt();
+	float2 pos = (gxy + 0.5f) * inputPt;
+
 	float3 histogram_v[KERNELLEN];
 	float histogram_l[KERNELLEN];
 	float histogram_w[KERNELLEN];
 	float n = 0.0;
 
-	float vc = lumaTex.Sample(sam, pos).x;
+	float vc = get_luma(INPUT.SampleLevel(sam, pos, 0).rgb);
 
 	float is = pow(vc + 0.0001, INTENSITY_POWER_CURVE) * INTENSITY_SIGMA;
 	float ss = SPATIAL_SIGMA;
 
 	uint i;
 
+	[unroll]
 	for (i = 0; i < KERNELLEN; i++) {
-		float2 ipos = pos + GETOFFSET(i) * float2(inputPtX, inputPtY);
-		histogram_v[i] = INPUT.Sample(sam, ipos).rgb;
-		histogram_l[i] = lumaTex.Sample(sam, ipos).x;
+		int2 ipos = GETOFFSET(i);
+		histogram_v[i] = INPUT.SampleLevel(sam, pos + ipos * inputPt, 0).rgb;
+		histogram_l[i] = get_luma(histogram_v[i]);
 		histogram_w[i] = gaussian(histogram_l[i], is, vc) * gaussian(length(ipos), ss, 0.0);
 		n += histogram_w[i];
 	}
@@ -113,12 +106,15 @@ float4 Pass2(float2 pos) {
 		float histogram_wn[KERNELLEN];
 		n = 0.0;
 
+		[unroll]
 		for (i = 0; i < KERNELLEN; i++) {
 			histogram_wn[i] = 0.0;
 		}
 
+		[unroll]
 		for (i = 0; i < KERNELLEN; i++) {
 			histogram_wn[i] += gaussian(0.0, HISTOGRAM_REGULARIZATION, 0.0) * histogram_w[i];
+			[unroll]
 			for (uint j = (i + 1); j < KERNELLEN; j++) {
 				float d = gaussian(histogram_l[j], HISTOGRAM_REGULARIZATION, histogram_l[i]);
 				histogram_wn[j] += d * histogram_w[i];
@@ -127,8 +123,9 @@ float4 Pass2(float2 pos) {
 			n += histogram_wn[i];
 		}
 
-		return float4(getMedian(histogram_v, histogram_wn, n), 1);
+		WriteToOutput(gxy, getMedian(histogram_v, histogram_wn, n));
+		return;
 	}
 
-	return float4(getMedian(histogram_v, histogram_w, n), 1);
+	WriteToOutput(gxy, getMedian(histogram_v, histogram_w, n));
 }