Imp: Fix GL_TEXTURE_RECTANGLE & Enhance --glx-copy-from-front

- Fix GL_TEXTURE_RECTANGLE support. Thanks to amonakov for guides.
  (#107)

- Enhance --glx-copy-from-front to improve performance and make it work
  with --glx-swap-method, copied from kwin patch. Thanks to bwat47 for
  info. (#107)

- Add texture2Doffset() support in blur GLSL shader. Thanks to amonakov
  for advice. No visible benefit here, though. (#107)

- Only limited tests are done and I'm super sleepy. Bugs expected
This commit is contained in:
Richard Grenville 2013-05-08 22:44:36 +08:00
parent 555131101f
commit 1c66237f99
3 changed files with 160 additions and 70 deletions

View File

@ -392,6 +392,8 @@ typedef struct {
bool glx_no_rebind_pixmap; bool glx_no_rebind_pixmap;
/// GLX swap method we assume OpenGL uses. /// GLX swap method we assume OpenGL uses.
int glx_swap_method; int glx_swap_method;
/// Whether to use GL_EXT_gpu_shader4 to (hopefully) accelerates blurring.
bool glx_use_gpushader4;
/// Whether to try to detect WM windows and mark them as focused. /// Whether to try to detect WM windows and mark them as focused.
bool mark_wmwin_focused; bool mark_wmwin_focused;
/// Whether to mark override-redirect windows as focused. /// Whether to mark override-redirect windows as focused.
@ -1234,7 +1236,7 @@ mstrncpy(const char *src, unsigned len) {
/** /**
* Allocate the space and join two strings. * Allocate the space and join two strings.
*/ */
static inline char * __attribute__((const)) static inline char *
mstrjoin(const char *src1, const char *src2) { mstrjoin(const char *src1, const char *src2) {
char *str = malloc(sizeof(char) * (strlen(src1) + strlen(src2) + 1)); char *str = malloc(sizeof(char) * (strlen(src1) + strlen(src2) + 1));
@ -1247,7 +1249,7 @@ mstrjoin(const char *src1, const char *src2) {
/** /**
* Allocate the space and join two strings; * Allocate the space and join two strings;
*/ */
static inline char * __attribute__((const)) static inline char *
mstrjoin3(const char *src1, const char *src2, const char *src3) { mstrjoin3(const char *src1, const char *src2, const char *src3) {
char *str = malloc(sizeof(char) * (strlen(src1) + strlen(src2) char *str = malloc(sizeof(char) * (strlen(src1) + strlen(src2)
+ strlen(src3) + 1)); + strlen(src3) + 1));
@ -1259,6 +1261,16 @@ mstrjoin3(const char *src1, const char *src2, const char *src3) {
return str; return str;
} }
/**
* Concatenate a string on heap with another string.
*/
static inline void
mstrextend(char **psrc1, const char *src2) {
*psrc1 = realloc(*psrc1, (*psrc1 ? strlen(*psrc1): 0) + strlen(src2) + 1);
strcat(*psrc1, src2);
}
/** /**
* Normalize an int value to a specific range. * Normalize an int value to a specific range.
* *

View File

@ -4238,9 +4238,10 @@ usage(void) {
" boost.\n" " boost.\n"
"--glx-copy-from-front\n" "--glx-copy-from-front\n"
" GLX backend: Copy unmodified regions from front buffer instead of\n" " GLX backend: Copy unmodified regions from front buffer instead of\n"
" redrawing them all. My tests with nvidia-drivers show a 10% decrease\n" " redrawing them all. My tests with nvidia-drivers show a 5% decrease\n"
" in performance when the whole screen is modified, but a 20% increase\n" " in performance when the whole screen is modified, but a 30% increase\n"
" when only 1/4 is. My tests on nouveau show terrible slowdown.\n" " when only 1/4 is. My tests on nouveau show terrible slowdown. Could\n"
" work with --glx-swap-method but not --glx-use-copysubbuffermesa.\n"
"--glx-use-copysubbuffermesa\n" "--glx-use-copysubbuffermesa\n"
" GLX backend: Use MESA_copy_sub_buffer to do partial screen update.\n" " GLX backend: Use MESA_copy_sub_buffer to do partial screen update.\n"
" My tests on nouveau shows a 200% performance boost when only 1/4 of\n" " My tests on nouveau shows a 200% performance boost when only 1/4 of\n"
@ -4258,6 +4259,9 @@ usage(void) {
" but safer (6 is still faster than 0). -1 means auto-detect using\n" " but safer (6 is still faster than 0). -1 means auto-detect using\n"
" GLX_EXT_buffer_age, supported by some drivers. Useless with\n" " GLX_EXT_buffer_age, supported by some drivers. Useless with\n"
" --glx-use-copysubbuffermesa.\n" " --glx-use-copysubbuffermesa.\n"
"--glx-use-gpushader4\n"
" GLX backend: Use GL_EXT_gpu_shader4 for some optimization on blur\n"
" GLSL code. My tests on GTX 670 show no noticeable effect.\n"
#undef WARNING #undef WARNING
#ifndef CONFIG_DBUS #ifndef CONFIG_DBUS
#define WARNING WARNING_DISABLED #define WARNING WARNING_DISABLED
@ -4879,6 +4883,7 @@ get_cfg(session_t *ps, int argc, char *const *argv, bool first_pass) {
{ "fade-exclude", required_argument, NULL, 300 }, { "fade-exclude", required_argument, NULL, 300 },
{ "blur-kern", required_argument, NULL, 301 }, { "blur-kern", required_argument, NULL, 301 },
{ "resize-damage", required_argument, NULL, 302 }, { "resize-damage", required_argument, NULL, 302 },
{ "glx-use-gpushader4", no_argument, NULL, 303 },
// Must terminate with a NULL entry // Must terminate with a NULL entry
{ NULL, 0, NULL, 0 }, { NULL, 0, NULL, 0 },
}; };
@ -5111,6 +5116,7 @@ get_cfg(session_t *ps, int argc, char *const *argv, bool first_pass) {
// --resize-damage // --resize-damage
ps->o.resize_damage = atoi(optarg); ps->o.resize_damage = atoi(optarg);
break; break;
P_CASEBOOL(303, glx_use_gpushader4);
default: default:
usage(); usage();
break; break;

View File

@ -197,23 +197,41 @@ glx_init_blur(session_t *ps) {
{ {
static const char *FRAG_SHADER_BLUR_PREFIX = static const char *FRAG_SHADER_BLUR_PREFIX =
"#version 110\n" "#version 110\n"
"%s"
"uniform float offset_x;\n" "uniform float offset_x;\n"
"uniform float offset_y;\n" "uniform float offset_y;\n"
"uniform float factor_center;\n" "uniform float factor_center;\n"
"uniform sampler2D tex_scr;\n" "uniform %s tex_scr;\n"
"\n" "\n"
"void main() {\n" "void main() {\n"
" vec4 sum = vec4(0.0, 0.0, 0.0, 0.0);\n"; " vec4 sum = vec4(0.0, 0.0, 0.0, 0.0);\n";
static const char *FRAG_SHADER_BLUR_ADD = static const char *FRAG_SHADER_BLUR_ADD =
" sum += float(%.7g) * texture2D(tex_scr, vec2(gl_TexCoord[0].x + offset_x * float(%d), gl_TexCoord[0].y + offset_y * float(%d)));\n"; " sum += float(%.7g) * %s(tex_scr, vec2(gl_TexCoord[0].x + offset_x * float(%d), gl_TexCoord[0].y + offset_y * float(%d)));\n";
static const char *FRAG_SHADER_BLUR_ADD_GPUSHADER4 =
" sum += float(%.7g) * %sOffset(tex_scr, vec2(gl_TexCoord[0].x, gl_TexCoord[0].y), ivec2(%d, %d));\n";
static const char *FRAG_SHADER_BLUR_SUFFIX = static const char *FRAG_SHADER_BLUR_SUFFIX =
" sum += texture2D(tex_scr, vec2(gl_TexCoord[0].x, gl_TexCoord[0].y)) * factor_center;\n" " sum += %s(tex_scr, vec2(gl_TexCoord[0].x, gl_TexCoord[0].y)) * factor_center;\n"
" gl_FragColor = sum / (factor_center + float(%.7g));\n" " gl_FragColor = sum / (factor_center + float(%.7g));\n"
"}\n"; "}\n";
int wid = XFixedToDouble(ps->o.blur_kern[0]), hei = XFixedToDouble(ps->o.blur_kern[1]);
const bool use_texture_rect = !ps->glx_has_texture_non_power_of_two;
const char *sampler_type = (use_texture_rect ?
"sampler2DRect": "sampler2D");
const char *texture_func = (use_texture_rect ?
"texture2DRect": "texture2D");
const char *shader_add = FRAG_SHADER_BLUR_ADD;
char *extension = mstrcpy("");
if (use_texture_rect)
mstrextend(&extension, "#extension GL_ARB_texture_rectangle : require\n");
if (ps->o.glx_use_gpushader4) {
mstrextend(&extension, "#extension GL_EXT_gpu_shader4 : require\n");
shader_add = FRAG_SHADER_BLUR_ADD_GPUSHADER4;
}
int wid = XFixedToDouble(ps->o.blur_kern[0]),
hei = XFixedToDouble(ps->o.blur_kern[1]);
int nele = wid * hei - 1; int nele = wid * hei - 1;
int len = strlen(FRAG_SHADER_BLUR_PREFIX) + (strlen(FRAG_SHADER_BLUR_ADD) + 42) * nele int len = strlen(FRAG_SHADER_BLUR_PREFIX) + strlen(sampler_type) + strlen(extension) + (strlen(shader_add) + strlen(texture_func) + 42) * nele + strlen(FRAG_SHADER_BLUR_SUFFIX) + strlen(texture_func) + 12 + 1;
+ strlen(FRAG_SHADER_BLUR_SUFFIX) + 12 + 1;
char *shader_str = calloc(len, sizeof(char)); char *shader_str = calloc(len, sizeof(char));
if (!shader_str) { if (!shader_str) {
printf_errf("(): Failed to allocate %d bytes for shader string.", len); printf_errf("(): Failed to allocate %d bytes for shader string.", len);
@ -221,8 +239,8 @@ glx_init_blur(session_t *ps) {
} }
{ {
char *pc = shader_str; char *pc = shader_str;
strcpy(pc, FRAG_SHADER_BLUR_PREFIX); sprintf(pc, FRAG_SHADER_BLUR_PREFIX, extension, sampler_type);
pc += strlen(FRAG_SHADER_BLUR_PREFIX); pc += strlen(pc);
assert(strlen(shader_str) < len); assert(strlen(shader_str) < len);
double sum = 0.0; double sum = 0.0;
@ -232,13 +250,13 @@ glx_init_blur(session_t *ps) {
continue; continue;
double val = XFixedToDouble(ps->o.blur_kern[2 + i * wid + j]); double val = XFixedToDouble(ps->o.blur_kern[2 + i * wid + j]);
sum += val; sum += val;
sprintf(pc, FRAG_SHADER_BLUR_ADD, val, j - wid / 2, i - hei / 2); sprintf(pc, shader_add, val, texture_func, j - wid / 2, i - hei / 2);
pc += strlen(pc); pc += strlen(pc);
assert(strlen(shader_str) < len); assert(strlen(shader_str) < len);
} }
} }
sprintf(pc, FRAG_SHADER_BLUR_SUFFIX, sum); sprintf(pc, FRAG_SHADER_BLUR_SUFFIX, texture_func, sum);
assert(strlen(shader_str) < len); assert(strlen(shader_str) < len);
#ifdef DEBUG_GLX_GLSL #ifdef DEBUG_GLX_GLSL
fputs(shader_str, stdout); fputs(shader_str, stdout);
@ -246,6 +264,7 @@ glx_init_blur(session_t *ps) {
#endif #endif
} }
ps->glx_frag_shader_blur = glx_create_shader(GL_FRAGMENT_SHADER, shader_str); ps->glx_frag_shader_blur = glx_create_shader(GL_FRAGMENT_SHADER, shader_str);
free(extension);
free(shader_str); free(shader_str);
} }
@ -268,8 +287,10 @@ glx_init_blur(session_t *ps) {
} }
P_GET_UNIFM_LOC("factor_center", glx_prog_blur_unifm_factor_center); P_GET_UNIFM_LOC("factor_center", glx_prog_blur_unifm_factor_center);
if (!ps->o.glx_use_gpushader4) {
P_GET_UNIFM_LOC("offset_x", glx_prog_blur_unifm_offset_x); P_GET_UNIFM_LOC("offset_x", glx_prog_blur_unifm_offset_x);
P_GET_UNIFM_LOC("offset_y", glx_prog_blur_unifm_offset_y); P_GET_UNIFM_LOC("offset_y", glx_prog_blur_unifm_offset_y);
}
#undef P_GET_UNIFM_LOC #undef P_GET_UNIFM_LOC
@ -593,9 +614,23 @@ glx_paint_pre(session_t *ps, XserverRegion *preg) {
// glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
// Get buffer age // Get buffer age
int buffer_age = ps->o.glx_swap_method;
bool trace_damage = (ps->o.glx_swap_method < 0 || ps->o.glx_swap_method > 1); bool trace_damage = (ps->o.glx_swap_method < 0 || ps->o.glx_swap_method > 1);
// Trace raw damage regions
XserverRegion newdamage = None;
if (trace_damage && *preg)
newdamage = copy_region(ps, *preg);
// OpenGL doesn't support partial repaint without GLX_MESA_copy_sub_buffer,
// we could redraw the whole screen or copy unmodified pixels from
// front buffer with --glx-copy-from-front.
if (ps->o.glx_use_copysubbuffermesa || !*preg) {
}
else {
int buffer_age = ps->o.glx_swap_method;
// Getting buffer age
{
// Query GLX_EXT_buffer_age for buffer age // Query GLX_EXT_buffer_age for buffer age
if (SWAPM_BUFFER_AGE == buffer_age) { if (SWAPM_BUFFER_AGE == buffer_age) {
unsigned val = 0; unsigned val = 0;
@ -611,46 +646,68 @@ glx_paint_pre(session_t *ps, XserverRegion *preg) {
// Make sure buffer age >= 0 // Make sure buffer age >= 0
buffer_age = max_i(buffer_age, 0); buffer_age = max_i(buffer_age, 0);
// Trace raw damage regions // Check if we have we have empty regions
XserverRegion newdamage = None; if (buffer_age > 1) {
if (trace_damage && *preg) for (int i = 0; i < buffer_age - 1; ++i)
newdamage = copy_region(ps, *preg); if (!ps->all_damage_last[i]) { buffer_age = 0; break; }
}
}
// OpenGL doesn't support partial repaint without GLX_MESA_copy_sub_buffer, // Do nothing for buffer_age 1 (copy)
// we could redraw the whole screen or copy unmodified pixels from if (1 != buffer_age) {
// front buffer with --glx-copy-from-front. // Copy pixels
if (ps->o.glx_use_copysubbuffermesa || 1 == buffer_age || !*preg) { if (ps->o.glx_copy_from_front) {
} // Determine copy area
else if (buffer_age > 1) { XserverRegion reg_copy = XFixesCreateRegion(ps->dpy, NULL, 0);
for (int i = 0; i < buffer_age - 1; ++i) { if (!buffer_age) {
XserverRegion dmg = ps->all_damage_last[i]; XFixesSubtractRegion(ps->dpy, reg_copy, ps->screen_reg, *preg);
if (!dmg) {
free_region(ps, preg);
break;
}
XFixesUnionRegion(ps->dpy, *preg, *preg, dmg);
}
}
else if (!ps->o.glx_copy_from_front) {
free_region(ps, preg);
} }
else { else {
{ for (int i = 0; i < buffer_age - 1; ++i)
XserverRegion reg_copy = XFixesCreateRegion(ps->dpy, NULL, 0); XFixesUnionRegion(ps->dpy, reg_copy, reg_copy,
XFixesSubtractRegion(ps->dpy, reg_copy, ps->screen_reg, *preg); ps->all_damage_last[i]);
glx_set_clip(ps, reg_copy, NULL); XFixesSubtractRegion(ps->dpy, reg_copy, reg_copy, *preg);
free_region(ps, &reg_copy);
} }
// Actually copy pixels
{ {
GLfloat raster_pos[4]; GLfloat raster_pos[4];
GLfloat curx = 0.0f, cury = 0.0f;
glGetFloatv(GL_CURRENT_RASTER_POSITION, raster_pos); glGetFloatv(GL_CURRENT_RASTER_POSITION, raster_pos);
glReadBuffer(GL_FRONT); glReadBuffer(GL_FRONT);
glRasterPos2f(0.0, 0.0); glRasterPos2f(0.0, 0.0);
glCopyPixels(0, 0, ps->root_width, ps->root_height, GL_COLOR); {
int nrects = 0;
XRectangle *rects = XFixesFetchRegion(ps->dpy, reg_copy, &nrects);
for (int i = 0; i < nrects; ++i) {
const int x = rects[i].x;
const int y = ps->root_height - rects[i].y - rects[i].height;
// Kwin patch says glRasterPos2f() causes artifacts on bottom
// screen edge with some drivers
glBitmap(0, 0, 0, 0, x - curx, y - cury, NULL);
curx = x;
cury = y;
glCopyPixels(x, y, rects[i].width, rects[i].height, GL_COLOR);
}
cxfree(rects);
}
glReadBuffer(GL_BACK); glReadBuffer(GL_BACK);
glRasterPos4fv(raster_pos); glRasterPos4fv(raster_pos);
} }
free_region(ps, &reg_copy);
}
// Determine paint area
if (ps->o.glx_copy_from_front) { }
else if (buffer_age) {
for (int i = 0; i < buffer_age - 1; ++i)
XFixesUnionRegion(ps->dpy, *preg, *preg, ps->all_damage_last[i]);
}
else {
free_region(ps, preg);
}
}
} }
if (trace_damage) { if (trace_damage) {
@ -819,13 +876,20 @@ glx_blur_dst(session_t *ps, int dx, int dy, int width, int height, float z,
glTexParameteri(tex_tgt, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(tex_tgt, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(tex_tgt, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(tex_tgt, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(tex_tgt, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glTexParameteri(tex_tgt, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(tex_tgt, 0, GL_RGB, mwidth, mheight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL); glTexImage2D(tex_tgt, 0, GL_RGB, mwidth, mheight, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
glCopyTexSubImage2D(tex_tgt, 0, 0, 0, mdx, ps->root_height - mdy - mheight, mwidth, mheight); glCopyTexSubImage2D(tex_tgt, 0, 0, 0, mdx, ps->root_height - mdy - mheight, mwidth, mheight);
#ifdef DEBUG_GLX #ifdef DEBUG_GLX
printf_dbgf("(): %d, %d, %d, %d\n", mdx, ps->root_height - mdy - mheight, mwidth, mheight); printf_dbgf("(): %d, %d, %d, %d\n", mdx, ps->root_height - mdy - mheight, mwidth, mheight);
#endif #endif
// Texture scaling factor
GLfloat texfac_x = 1.0f, texfac_y = 1.0f;
if (GL_TEXTURE_2D == tex_tgt) {
texfac_x /= mwidth;
texfac_y /= mheight;
}
// Paint it back // Paint it back
// Color negation for testing... // Color negation for testing...
// glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_COMBINE); // glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_COMBINE);
@ -836,9 +900,9 @@ glx_blur_dst(session_t *ps, int dx, int dy, int width, int height, float z,
#ifdef CONFIG_VSYNC_OPENGL_GLSL #ifdef CONFIG_VSYNC_OPENGL_GLSL
glUseProgram(ps->glx_prog_blur); glUseProgram(ps->glx_prog_blur);
if (ps->glx_prog_blur_unifm_offset_x >= 0) if (ps->glx_prog_blur_unifm_offset_x >= 0)
glUniform1f(ps->glx_prog_blur_unifm_offset_x, 1.0f / mwidth); glUniform1f(ps->glx_prog_blur_unifm_offset_x, texfac_x);
if (ps->glx_prog_blur_unifm_offset_y >= 0) if (ps->glx_prog_blur_unifm_offset_y >= 0)
glUniform1f(ps->glx_prog_blur_unifm_offset_y, 1.0f / mheight); glUniform1f(ps->glx_prog_blur_unifm_offset_y, texfac_y);
if (ps->glx_prog_blur_unifm_factor_center >= 0) if (ps->glx_prog_blur_unifm_factor_center >= 0)
glUniform1f(ps->glx_prog_blur_unifm_factor_center, factor_center); glUniform1f(ps->glx_prog_blur_unifm_factor_center, factor_center);
#endif #endif
@ -846,10 +910,10 @@ glx_blur_dst(session_t *ps, int dx, int dy, int width, int height, float z,
{ {
P_PAINTREG_START(); P_PAINTREG_START();
{ {
const GLfloat rx = (double) (crect.x - mdx) / mwidth; const GLfloat rx = (crect.x - mdx) * texfac_x;
const GLfloat ry = 1.0 - (double) (crect.y - mdy) / mheight; const GLfloat ry = (mheight - (crect.y - mdy)) * texfac_y;
const GLfloat rxe = rx + (double) crect.width / mwidth; const GLfloat rxe = rx + crect.width * texfac_x;
const GLfloat rye = ry - (double) crect.height / mheight; const GLfloat rye = ry - crect.height * texfac_y;
const GLfloat rdx = crect.x; const GLfloat rdx = crect.x;
const GLfloat rdy = ps->root_height - crect.y; const GLfloat rdy = ps->root_height - crect.y;
const GLfloat rdxe = rdx + crect.width; const GLfloat rdxe = rdx + crect.width;
@ -1049,10 +1113,18 @@ glx_render(session_t *ps, const glx_texture_t *ptex,
{ {
P_PAINTREG_START(); P_PAINTREG_START();
{ {
GLfloat rx = (double) (crect.x - dx + x) / ptex->width; GLfloat rx = (double) (crect.x - dx + x);
GLfloat ry = (double) (crect.y - dy + y) / ptex->height; GLfloat ry = (double) (crect.y - dy + y);
GLfloat rxe = rx + (double) crect.width / ptex->width; GLfloat rxe = rx + (double) crect.width;
GLfloat rye = ry + (double) crect.height / ptex->height; GLfloat rye = ry + (double) crect.height;
// Rectangle textures have [0-w] [0-h] while 2D texture has [0-1] [0-1]
// Thanks to amonakov for pointing out!
if (GL_TEXTURE_2D == ptex->target) {
rx = rx / ptex->width;
ry = ry / ptex->height;
rxe = rxe / ptex->width;
rye = rye / ptex->height;
}
GLint rdx = crect.x; GLint rdx = crect.x;
GLint rdy = ps->root_height - crect.y; GLint rdy = ps->root_height - crect.y;
GLint rdxe = rdx + crect.width; GLint rdxe = rdx + crect.width;