Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

opengl performance improvements #1410

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 94 additions & 70 deletions src/accelerator/ogl/image/image_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
* Author: Robert Nagy, [email protected]
*/
#include "image_kernel.h"

#include "core/mixer/image/blend_modes.h"
#include "image_shader.h"

#include "../util/device.h"
Expand Down Expand Up @@ -110,16 +110,34 @@ struct image_kernel::impl
{
spl::shared_ptr<device> ogl_;
spl::shared_ptr<shader> shader_;
spl::shared_ptr<shader> shader_fast_;
GLuint vao_;
GLuint vbo_;

explicit impl(const spl::shared_ptr<device>& ogl)
: ogl_(ogl)
, shader_(ogl_->dispatch_sync([&] { return get_image_shader(ogl); }))
, shader_fast_(ogl_->dispatch_sync([&] { return get_fast_image_shader(ogl); }))
{
ogl_->dispatch_sync([&] {
GL(glGenVertexArrays(1, &vao_));
GL(glGenBuffers(1, &vbo_));
GL(glCreateVertexArrays(1, &vao_));
GL(glCreateBuffers(1, &vbo_));
GL(glNamedBufferStorage(vbo_, 1024 *1024 * 4, NULL, GL_DYNAMIC_STORAGE_BIT));

auto vtx_loc = shader_->get_attrib_location("Position");
auto tex_loc = shader_->get_attrib_location("TexCoordIn");

GL(glEnableVertexArrayAttrib(vao_, vtx_loc));
GL(glEnableVertexArrayAttrib(vao_, tex_loc));

GL(glVertexArrayAttribFormat(vao_, vtx_loc, 2, GL_DOUBLE, GL_FALSE, 0));
GL(glVertexArrayAttribFormat(vao_, tex_loc, 4, GL_DOUBLE, GL_FALSE, (2 * sizeof(GLdouble))));

GL(glVertexArrayAttribBinding(vao_, vtx_loc, 0));
GL(glVertexArrayAttribBinding(vao_, tex_loc, 0));

auto stride = static_cast<GLsizei>(sizeof(core::frame_geometry::coord));
GL(glVertexArrayVertexBuffer(vao_, 0, vbo_, 0, stride));
});
}

Expand Down Expand Up @@ -235,34 +253,61 @@ struct image_kernel::impl
params.layer_key->bind(static_cast<int>(texture_id::layer_key));
}

bool fast = false;
// Setup shader

shader_->use();
bool levels = (params.transform.levels.min_input > epsilon || params.transform.levels.max_input < 1.0 - epsilon ||
params.transform.levels.min_output > epsilon || params.transform.levels.max_output < 1.0 - epsilon ||
std::abs(params.transform.levels.gamma - 1.0) > epsilon);

shader_->set("plane[0]", texture_id::plane0);
shader_->set("plane[1]", texture_id::plane1);
shader_->set("plane[2]", texture_id::plane2);
shader_->set("plane[3]", texture_id::plane3);
shader_->set("local_key", texture_id::local_key);
shader_->set("layer_key", texture_id::layer_key);
shader_->set("is_hd", params.pix_desc.planes.at(0).height > 700 ? 1 : 0);
shader_->set("has_local_key", static_cast<bool>(params.local_key));
shader_->set("has_layer_key", static_cast<bool>(params.layer_key));
shader_->set("pixel_format", params.pix_desc.format);
shader_->set("opacity", params.transform.is_key ? 1.0 : params.transform.opacity);
bool csb = (std::abs(params.transform.brightness - 1.0) > epsilon ||
std::abs(params.transform.saturation - 1.0) > epsilon ||
std::abs(params.transform.contrast - 1.0) > epsilon);

auto shader = shader_;
if (params.blend_mode <= core::blend_mode::normal &&
!params.local_key &&
!params.layer_key &&
!params.transform.is_key &&
!params.transform.chroma.enable &&
!levels &&
!csb &&
!params.transform.invert) {
shader = shader_fast_;
fast = true;
GL(glEnable(GL_BLEND));
if (params.keyer == ogl::keyer::additive) {
GL(glBlendFunc(GL_SRC_ALPHA, GL_ONE));
} else {
GL(glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA));
}
}
shader->use();

shader->set("plane[0]", texture_id::plane0);
shader->set("plane[1]", texture_id::plane1);
shader->set("plane[2]", texture_id::plane2);
shader->set("plane[3]", texture_id::plane3);
shader->set("local_key", texture_id::local_key);
shader->set("layer_key", texture_id::layer_key);
shader->set("is_hd", params.pix_desc.planes.at(0).height > 700 ? 1 : 0);
shader->set("has_local_key", static_cast<bool>(params.local_key));
shader->set("has_layer_key", static_cast<bool>(params.layer_key));
shader->set("pixel_format", params.pix_desc.format);
shader->set("opacity", params.transform.is_key ? 1.0 : params.transform.opacity);

if (params.transform.chroma.enable) {
shader_->set("chroma", true);
shader_->set("chroma_show_mask", params.transform.chroma.show_mask);
shader_->set("chroma_target_hue", params.transform.chroma.target_hue / 360.0);
shader_->set("chroma_hue_width", params.transform.chroma.hue_width);
shader_->set("chroma_min_saturation", params.transform.chroma.min_saturation);
shader_->set("chroma_min_brightness", params.transform.chroma.min_brightness);
shader_->set("chroma_softness", 1.0 + params.transform.chroma.softness);
shader_->set("chroma_spill_suppress", params.transform.chroma.spill_suppress / 360.0);
shader_->set("chroma_spill_suppress_saturation", params.transform.chroma.spill_suppress_saturation);
shader->set("chroma", true);
shader->set("chroma_show_mask", params.transform.chroma.show_mask);
shader->set("chroma_target_hue", params.transform.chroma.target_hue / 360.0);
shader->set("chroma_hue_width", params.transform.chroma.hue_width);
shader->set("chroma_min_saturation", params.transform.chroma.min_saturation);
shader->set("chroma_min_brightness", params.transform.chroma.min_brightness);
shader->set("chroma_softness", 1.0 + params.transform.chroma.softness);
shader->set("chroma_spill_suppress", params.transform.chroma.spill_suppress / 360.0);
shader->set("chroma_spill_suppress_saturation", params.transform.chroma.spill_suppress_saturation);
} else {
shader_->set("chroma", false);
shader->set("chroma", false);
}

// Setup blend_func
Expand All @@ -272,36 +317,32 @@ struct image_kernel::impl
}

params.background->bind(static_cast<int>(texture_id::background));
shader_->set("background", texture_id::background);
shader_->set("blend_mode", params.blend_mode);
shader_->set("keyer", params.keyer);
shader->set("background", texture_id::background);
shader->set("blend_mode", params.blend_mode);
shader->set("keyer", params.keyer);

// Setup image-adjustements
shader_->set("invert", params.transform.invert);

if (params.transform.levels.min_input > epsilon || params.transform.levels.max_input < 1.0 - epsilon ||
params.transform.levels.min_output > epsilon || params.transform.levels.max_output < 1.0 - epsilon ||
std::abs(params.transform.levels.gamma - 1.0) > epsilon) {
shader_->set("levels", true);
shader_->set("min_input", params.transform.levels.min_input);
shader_->set("max_input", params.transform.levels.max_input);
shader_->set("min_output", params.transform.levels.min_output);
shader_->set("max_output", params.transform.levels.max_output);
shader_->set("gamma", params.transform.levels.gamma);
shader->set("invert", params.transform.invert);

if (levels) {
shader->set("levels", true);
shader->set("min_input", params.transform.levels.min_input);
shader->set("max_input", params.transform.levels.max_input);
shader->set("min_output", params.transform.levels.min_output);
shader->set("max_output", params.transform.levels.max_output);
shader->set("gamma", params.transform.levels.gamma);
} else {
shader_->set("levels", false);
shader->set("levels", false);
}

if (std::abs(params.transform.brightness - 1.0) > epsilon ||
std::abs(params.transform.saturation - 1.0) > epsilon ||
std::abs(params.transform.contrast - 1.0) > epsilon) {
shader_->set("csb", true);
if (csb) {
shader->set("csb", true);

shader_->set("brt", params.transform.brightness);
shader_->set("sat", params.transform.saturation);
shader_->set("con", params.transform.contrast);
shader->set("brt", params.transform.brightness);
shader->set("sat", params.transform.saturation);
shader->set("con", params.transform.contrast);
} else {
shader_->set("csb", false);
shader->set("csb", false);
}

// Setup drawing area
Expand Down Expand Up @@ -377,36 +418,19 @@ struct image_kernel::impl
// Draw
switch (params.geometry.type()) {
case core::frame_geometry::geometry_type::quad: {
GL(glBindVertexArray(vao_));
GL(glBindBuffer(GL_ARRAY_BUFFER, vbo_));

std::vector<core::frame_geometry::coord> coords_triangles{
coords[0], coords[1], coords[2], coords[0], coords[2], coords[3]};

GL(glBufferData(GL_ARRAY_BUFFER,
static_cast<GLsizeiptr>(sizeof(core::frame_geometry::coord)) * coords_triangles.size(),
coords_triangles.data(),
GL_STATIC_DRAW));

auto stride = static_cast<GLsizei>(sizeof(core::frame_geometry::coord));

auto vtx_loc = shader_->get_attrib_location("Position");
auto tex_loc = shader_->get_attrib_location("TexCoordIn");

GL(glEnableVertexAttribArray(vtx_loc));
GL(glEnableVertexAttribArray(tex_loc));

GL(glVertexAttribPointer(vtx_loc, 2, GL_DOUBLE, GL_FALSE, stride, nullptr));
GL(glVertexAttribPointer(tex_loc, 4, GL_DOUBLE, GL_FALSE, stride, (GLvoid*)(2 * sizeof(GLdouble))));
GL(glNamedBufferSubData(vbo_, 0, stride * coords_triangles.size(), coords_triangles.data()));
GL(glBindVertexArray( vao_ ));

GL(glDrawArrays(GL_TRIANGLES, 0, static_cast<GLsizei>(coords_triangles.size())));
GL(glTextureBarrier());

GL(glDisableVertexAttribArray(vtx_loc));
GL(glDisableVertexAttribArray(tex_loc));

if (!fast) {
GL(glTextureBarrier());
}
GL(glBindVertexArray(0));
GL(glBindBuffer(GL_ARRAY_BUFFER, 0));

break;
}
Expand Down
10 changes: 9 additions & 1 deletion src/accelerator/ogl/image/image_mixer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <common/array.h>
#include <common/future.h>
#include <common/log.h>
#include <common/gl/gl_check.h>

#include <core/frame/frame.h>
#include <core/frame/frame_transform.h>
Expand Down Expand Up @@ -90,7 +91,14 @@ class image_renderer
static const std::vector<uint8_t> buffer(8192 * 8192 * 8, 0);
return make_ready_future(array<const std::uint8_t>(buffer.data(), format_desc.size, true));
}

ogl_->dispatch_async([=]() {
GL(glClear(GL_COLOR_BUFFER_BIT));
#ifdef _DEBUG
if (glFrameTerminatorGREMEDY != nullptr) {
GL(glFrameTerminatorGREMEDY());
}
#endif
}).wait();
return flatten(ogl_->dispatch_async([=]() mutable -> std::shared_future<array<const std::uint8_t>> {
auto target_texture = ogl_->create_texture(format_desc.width, format_desc.height, 4);

Expand Down
47 changes: 46 additions & 1 deletion src/accelerator/ogl/image/image_shader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,13 @@

#include "ogl_image_fragment.h"
#include "ogl_image_vertex.h"
#include <string>
#include <vector>

namespace caspar { namespace accelerator { namespace ogl {

std::weak_ptr<shader> g_shader;
std::weak_ptr<shader> g_shader_fast;
std::mutex g_shader_mutex;

std::shared_ptr<shader> get_image_shader(const spl::shared_ptr<device>& ogl)
Expand All @@ -51,8 +54,50 @@ std::shared_ptr<shader> get_image_shader(const spl::shared_ptr<device>& ogl)
ogl->dispatch_async([=] { delete p; });
}
};
std::vector<std::string> fragment_shader_sources;

existing_shader.reset(new shader(std::string(vertex_shader), std::string(fragment_shader)), deleter);
std::string prepend = "#version 450\n";

fragment_shader_sources.push_back(prepend);
fragment_shader_sources.push_back(std::string(fragment_shader));

existing_shader.reset(new shader(std::string(vertex_shader), fragment_shader_sources), deleter);

g_shader = existing_shader;

return existing_shader;
}

std::shared_ptr<shader> get_fast_image_shader(const spl::shared_ptr<device>& ogl)
{
std::lock_guard<std::mutex> lock(g_shader_mutex);
auto existing_shader = g_shader_fast.lock();

if (existing_shader) {
return existing_shader;
}

// The deleter is alive until the weak pointer is destroyed, so we have
// to weakly reference ogl, to not keep it alive until atexit
std::weak_ptr<device> weak_ogl = ogl;

auto deleter = [weak_ogl](shader* p) {
auto ogl = weak_ogl.lock();

if (ogl) {
ogl->dispatch_async([=] { delete p; });
}
};

std::vector<std::string> fragment_shader_sources;


std::string prepend = "#version 450\n#define FRAGMENT_FAST\n";

fragment_shader_sources.push_back(prepend);
fragment_shader_sources.push_back(std::string(fragment_shader));

existing_shader.reset(new shader(std::string(vertex_shader), fragment_shader_sources), deleter);

g_shader = existing_shader;

Expand Down
1 change: 1 addition & 0 deletions src/accelerator/ogl/image/image_shader.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,6 @@ enum class texture_id
};

std::shared_ptr<shader> get_image_shader(const spl::shared_ptr<device>& ogl);
std::shared_ptr<shader> get_fast_image_shader(const spl::shared_ptr<device>& ogl);

}}} // namespace caspar::accelerator::ogl
21 changes: 14 additions & 7 deletions src/accelerator/ogl/image/shader.frag
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#version 450
in vec4 TexCoord;
in vec4 TexCoord2;
out vec4 fragColor;
Expand Down Expand Up @@ -468,7 +467,7 @@ vec4 ycbcra_to_rgba(float y, float cb, float cr, float a)

vec4 get_sample(sampler2D sampler, vec2 coords)
{
return texture2D(sampler, coords);
return texture(sampler, coords);
}

vec4 get_rgba_color()
Expand Down Expand Up @@ -513,7 +512,7 @@ vec4 get_rgba_color()
{
float y = get_sample(plane[0], TexCoord.st / TexCoord.q).g;
float cb = get_sample(plane[1], TexCoord.st / TexCoord.q).b;
float cr = get_sample(plane[1], TexCoord.st / TexCoord.q).r;
float cr = get_sample(plane[1], TexCoord.st / TexCoord.q).r;
return ycbcra_to_rgba(y, cb, cr, 1.0);
}
}
Expand All @@ -523,20 +522,28 @@ vec4 get_rgba_color()
void main()
{
vec4 color = get_rgba_color();
#ifndef FRAGMENT_FAST
if (color.a < 0.01)
discard;
if (chroma)
color = chroma_key(color);
if(levels)
color.rgb = LevelsControl(color.rgb, min_input, gamma, max_input, min_output, max_output);
if(csb)
color.rgb = ContrastSaturationBrightness(color, brt, sat, con);
if(has_local_key)
color *= texture(local_key, TexCoord2.st).r;
if(has_layer_key)
color *= texture(layer_key, TexCoord2.st).r;
float local_key = texture(local_key, TexCoord2.st).r;
if (has_local_key)
color *= local_key;
float layer_key = texture(layer_key, TexCoord2.st).r;
if (has_layer_key)
color *= layer_key;
#endif
color *= opacity;
#ifndef FRAGMENT_FAST
if (invert)
color = 1.0 - color;
if (blend_mode >= 0)
color = blend(color);
#endif
fragColor = color.bgra;
}
Loading