From 9ec298305a9575266576442e1a775aa47d67772c Mon Sep 17 00:00:00 2001
From: Yuxuan Shui <yshuiv7@gmail.com>
Date: Wed, 20 Feb 2019 16:43:42 +0000
Subject: [PATCH] Cache converted blur kernel

After converting from struct conv to xorg format, cache the result to
save CPU time.

And remove an unused function.

Signed-off-by: Yuxuan Shui <yshuiv7@gmail.com>
---
 src/backend/xrender.c | 25 +++++++++++++++++++------
 src/kernel.c          | 20 --------------------
 src/kernel.h          |  7 -------
 src/render.c          | 25 +++++--------------------
 src/x.c               | 41 ++++++++++++++++++++++++++++-------------
 src/x.h               | 17 +++++++++--------
 6 files changed, 61 insertions(+), 74 deletions(-)

diff --git a/src/backend/xrender.c b/src/backend/xrender.c
index 9f81978..490036f 100644
--- a/src/backend/xrender.c
+++ b/src/backend/xrender.c
@@ -51,6 +51,11 @@ typedef struct _xrender_data {
 
 	/// 1x1 picture of the shadow color
 	xcb_render_picture_t shadow_pixel;
+
+	/// Blur kernels converted to X format
+	xcb_render_fixed_t *x_blur_kern[MAX_BLUR_PASS];
+	/// Number of elements in each blur kernel
+	size_t x_blur_kern_size[MAX_BLUR_PASS];
 } xrender_data;
 
 #if 0
@@ -118,7 +123,8 @@ static void compose(void *backend_data, session_t *ps, win *w, void *win_data, i
 		// content, and destroying it.
 		pixman_region32_intersect(&reg_tmp, &reg_tmp, (region_t *)reg_paint);
 
-		if (ps->o.xinerama_shadow_crop && w->xinerama_scr >= 0 && w->xinerama_scr < ps->xinerama_nscrs)
+		if (ps->o.xinerama_shadow_crop && w->xinerama_scr >= 0 &&
+		    w->xinerama_scr < ps->xinerama_nscrs)
 			// There can be a window where number of screens is updated,
 			// but the screen number attached to the windows have not.
 			//
@@ -160,7 +166,8 @@ blur(void *backend_data, session_t *ps, double opacity, const region_t *reg_pain
 	const pixman_box32_t *reg = pixman_region32_extents((region_t *)reg_paint);
 	const int height = reg->y2 - reg->y1;
 	const int width = reg->x2 - reg->x1;
-	static const char *default_filter = "Nearest";
+	static const char *filter0 = "Nearest";        // The "null" filter
+	static const char *filter = "convolution";
 
 	// Create a buffer for storing blurred picture, make it just big enough
 	// for the blur region
@@ -194,14 +201,15 @@ blur(void *backend_data, session_t *ps, double opacity, const region_t *reg_pain
 	// For 1 pass, we do
 	//   back -(pass 1)-> tmp0 -(copy)-> target_buffer
 	int i;
-	for (i = 0; ps->o.blur_kerns[i]; i++) {
+	for (i = 0; xd->x_blur_kern[i]; i++) {
 		assert(i < MAX_BLUR_PASS - 1);
 
 		// Copy from source picture to destination. The filter must
 		// be applied on source picture, to get the nearby pixels outside the
 		// window.
 		// TODO cache converted blur_kerns
-		x_set_picture_convolution_kernel(ps->c, src_pict, ps->o.blur_kerns[i]);
+		xcb_render_set_picture_filter(ps->c, src_pict, strlen(filter), filter,
+		                              xd->x_blur_kern_size[i], xd->x_blur_kern[i]);
 
 		if (ps->o.blur_kerns[i + 1] || i == 0) {
 			// This is not the last pass, or this is the first pass
@@ -216,8 +224,8 @@ blur(void *backend_data, session_t *ps, double opacity, const region_t *reg_pain
 		}
 
 		// reset filter
-		xcb_render_set_picture_filter(ps->c, src_pict, strlen(default_filter),
-		                              default_filter, 0, NULL);
+		xcb_render_set_picture_filter(ps->c, src_pict, strlen(filter0), filter0,
+		                              0, NULL);
 
 		src_pict = tmp_picture[current];
 		dst_pict = tmp_picture[!current];
@@ -434,6 +442,11 @@ static void *init(session_t *ps) {
 			free(e);
 		}
 	}
+	for (int i = 0; ps->o.blur_kerns[i]; i++) {
+		assert(i < MAX_BLUR_PASS - 1);
+		xd->x_blur_kern_size[i] = x_picture_filter_from_conv(
+		    ps->o.blur_kerns[i], 1, &xd->x_blur_kern[i], (size_t[]){0});
+	}
 	return xd;
 }
 
diff --git a/src/kernel.c b/src/kernel.c
index b5a4948..07d810c 100644
--- a/src/kernel.c
+++ b/src/kernel.c
@@ -121,24 +121,4 @@ void sum_kernel_preprocess(conv *map) {
 	}
 }
 
-/**
- * Normalize a convolution kernel.
- *
- * @param[in,out] kern the kernel
- */
-void normalize_conv_kern(conv *kern) {
-	double sum = 0.0;
-	for (int i = 0; i < kern->w * kern->h; i++) {
-		sum += kern->data[i];
-	}
-	double factor = 1.0 / sum;
-	for (int i = 0; i < kern->w * kern->h; i++) {
-		kern->data[i] *= factor;
-	}
-	if (kern->rsum) {
-		free(kern->rsum);
-		kern->rsum = NULL;
-	}
-}
-
 // vim: set noet sw=8 ts=8 :
diff --git a/src/kernel.h b/src/kernel.h
index 23dc38d..daf60d6 100644
--- a/src/kernel.h
+++ b/src/kernel.h
@@ -25,13 +25,6 @@ conv *gaussian_kernel(double r);
 /// shadow_sum[x*d+y] is the sum of the kernel from (0, 0) to (x, y), inclusive
 void sum_kernel_preprocess(conv *map);
 
-/**
- * Normalize a convolution kernel.
- *
- * @param[in,out] kern the kernel
- */
-void normalize_conv_kern(conv *kern);
-
 static inline void free_conv(conv *k) {
 	free(k->rsum);
 	free(k);
diff --git a/src/render.c b/src/render.c
index 23a1ace..a122eb7 100644
--- a/src/render.c
+++ b/src/render.c
@@ -688,29 +688,14 @@ static inline void win_blur_background(session_t *ps, win *w, xcb_render_picture
 			                     kern_src->h == kern_dst[1] / 65536));
 
 			// Skip for fixed factor_center if the cache exists already
-			if (ps->o.blur_background_fixed && kern_dst)
+			if (ps->o.blur_background_fixed && kern_dst) {
 				continue;
-
-			// Allocate cache space if needed
-			if (!kern_dst) {
-				kern_dst = ccalloc(kern_src->w * kern_src->h + 2,
-				                   xcb_render_fixed_t);
-				ps->blur_kerns_cache[i] = kern_dst;
 			}
 
-			double sum = factor_center;
-			for (int j = 0; j < kern_src->w * kern_src->h; j++) {
-				sum += kern_src->data[j];
-			}
-			// Copy src to dst, normalizing in the process
-			for (int j = 0; j < kern_src->w * kern_src->h; j++) {
-				kern_dst[j + 2] = kern_src->data[j] / sum * 65536;
-			}
-			// Modify the factor of the center pixel
-			kern_dst[2 + (kern_src->h / 2) * kern_src->w + kern_src->w / 2] =
-			    factor_center / sum * 65536;
-			kern_dst[0] = kern_src->w * 65536;
-			kern_dst[1] = kern_src->h * 65536;
+			// If kern_dst is allocated, it's always allocated to the right size
+			size_t size = kern_dst ? kern_src->w * kern_src->h + 2 : 0;
+			x_picture_filter_from_conv(kern_src, factor_center, &kern_dst, &size);
+			ps->blur_kerns_cache[i] = kern_dst;
 		}
 
 		// Minimize the region we try to blur, if the window itself is not
diff --git a/src/x.c b/src/x.c
index 333e0b1..e862e6d 100644
--- a/src/x.c
+++ b/src/x.c
@@ -520,23 +520,38 @@ bool x_fence_sync(xcb_connection_t *c, xcb_sync_fence_t f) {
 #define DOUBLE_TO_XFIXED(value) ((xcb_render_fixed_t) (((double) (value)) * 65536))
 
 /**
- * Set the picture filter of a xrender picture to a convolution
- * kernel.
+ * Convert a struct conv to a X picture convolution filter, normalizing the kernel
+ * in the process. Allow the caller to specify the element at the center of the kernel,
+ * for compatibility with legacy code.
  *
- * @param c   xcb connection
- * @param pict the picture
- * @param kern the convolution kernel
+ * @param[in] kernel the convolution kernel
+ * @param[in] center the element to put at the center of the matrix
+ * @param[inout] ret pointer to an array of `size`, if `size` is too small, more space
+ *                   will be allocated, and `*ret` will be updated
+ * @param[inout] size size of the array pointed to by `ret`, in number of elements
+ * @return number of elements filled into `*ret`
  */
-void
-x_set_picture_convolution_kernel(xcb_connection_t *c,
-                                 xcb_render_picture_t pict, conv *kernel) {
-  auto buf = ccalloc(kernel->w * kernel->h + 2, xcb_render_fixed_t);
-  static const char *filter = "convolution";
+size_t x_picture_filter_from_conv(const conv *kernel, double center, xcb_render_fixed_t **ret,
+                                size_t *size) {
+  if (*size < (size_t)(kernel->w * kernel->h + 2)) {
+    *size = kernel->w * kernel->h + 2;
+    *ret = crealloc(*ret, *size);
+  }
+  auto buf = *ret;
   buf[0] = DOUBLE_TO_XFIXED(kernel->w);
   buf[1] = DOUBLE_TO_XFIXED(kernel->h);
+  double sum = center;
   for (int i = 0; i < kernel->w * kernel->h; i++) {
-    buf[i + 2] = DOUBLE_TO_XFIXED(kernel->data[i]);
+    sum += kernel->data[i];
   }
-  xcb_render_set_picture_filter(c, pict, sizeof(filter), filter, kernel->w * kernel->h + 2, buf);
-  free(buf);
+
+  // Note for floating points a / b != a * (1 / b), but this shouldn't have any real
+  // impact on the result
+  double factor = sum != 0 ? 1.0 / sum : 1;
+  for (int i = 0; i < kernel->w * kernel->h; i++) {
+    buf[i + 2] = DOUBLE_TO_XFIXED(kernel->data[i] * factor);
+  }
+
+  buf[kernel->h / 2 * kernel->w + kernel->w / 2 + 2] = DOUBLE_TO_XFIXED(center * factor);
+  return kernel->w * kernel->h + 2;
 }
diff --git a/src/x.h b/src/x.h
index a3303fa..f912703 100644
--- a/src/x.h
+++ b/src/x.h
@@ -170,13 +170,14 @@ bool x_is_root_back_pixmap_atom(session_t *ps, xcb_atom_t atom);
 bool x_fence_sync(xcb_connection_t *, xcb_sync_fence_t);
 
 /**
- * Set the picture filter of a xrender picture to a convolution
- * kernel.
+ * Convert a struct conv to a X picture convolution filter, normalizing the kernel
+ * in the process. Allow the caller to specify the element at the center of the kernel,
+ * for compatibility with legacy code.
  *
- * @param c   xcb connection
- * @param pict the picture
- * @param kern the convolution kernel
+ * @param[in] kernel the convolution kernel
+ * @param[in] center the element to put at the center of the matrix
+ * @param[inout] ret pointer to an array of `size`, if `size` is too small, more space
+ *                   will be allocated, and `*ret` will be updated.
+ * @param[inout] size size of the array pointed to by `ret`.
  */
-void
-x_set_picture_convolution_kernel(xcb_connection_t *c,
-                                 xcb_render_picture_t pict, conv *kernel);
+size_t x_picture_filter_from_conv(const conv *kernel, double center, xcb_render_fixed_t **ret, size_t *size);