Skip to content

Commit

Permalink
Fix for long running messages (#30)
Browse files Browse the repository at this point in the history
* Make sure *_poll on the main queue/consumer queue is called at least once in the max.poll.interval.ms interval

* Bug fixing

* Update version
  • Loading branch information
silviucpp authored Jul 5, 2021
1 parent 1ca7ec7 commit bb95f53
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 26 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
### Changelog:

#### v2.0.4

- Fix for processing messages that takes longer than max_poll_interval_ms

#### v2.0.3

- Upgrade to lager v3.9.2 (works on OTP 24)
Expand Down
121 changes: 97 additions & 24 deletions c_src/queuecallbacksdispatcher.cc
Original file line number Diff line number Diff line change
@@ -1,16 +1,25 @@
#include "queuecallbacksdispatcher.h"
#include "rdkafka.h"

#include <chrono>
#include <limits>

namespace {
void consumers_event_callback(rd_kafka_t *rk, void *qev_opaque)
{
static_cast<QueueCallbacksDispatcher*>(qev_opaque)->signal(rk);
}

inline uint64_t now_ms()
{
return std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
}
}

QueueCallbacksDispatcher::QueueCallbacksDispatcher()
QueueCallbacksDispatcher::QueueCallbacksDispatcher():
running_(true),
poll_timeout_us_(-1)
{
running_ = true;
thread_callbacks_ = std::thread(&QueueCallbacksDispatcher::process_callbacks, this);
}

Expand All @@ -24,9 +33,27 @@ QueueCallbacksDispatcher::~QueueCallbacksDispatcher()

void QueueCallbacksDispatcher::watch(rd_kafka_t* instance, bool is_consumer)
{
char buffer[256];
size_t buffer_size = sizeof(buffer);

const rd_kafka_conf_t* conf = rd_kafka_conf(instance);
rd_kafka_conf_get(conf, "max.poll.interval.ms", buffer, &buffer_size);
ASSERT(buffer_size > 0);
uint64_t max_poll_interval_ms = static_cast<uint64_t>(std::stoull(buffer)/2);

{
CritScope ss(&crt_);
objects_[instance] = is_consumer;
objects_[instance] = item(is_consumer, max_poll_interval_ms, now_ms());

// poll timeout is the minimum value of all instances

for(auto& it: objects_)
{
if(it.second.max_poll_interval_ms < max_poll_interval_ms)
max_poll_interval_ms = it.second.max_poll_interval_ms;
}

poll_timeout_us_ = static_cast<int64_t>(max_poll_interval_ms*1000);
}

rd_kafka_queue_t* queue = is_consumer ? rd_kafka_queue_get_consumer(instance): rd_kafka_queue_get_main(instance);
Expand All @@ -46,8 +73,27 @@ bool QueueCallbacksDispatcher::remove(rd_kafka_t* instance)
if(it == objects_.end())
return false;

is_consumer = it->second;
is_consumer = it->second.is_consumer;
objects_.erase(it);

// recalculate the minimum polling time.

if(objects_.empty() == false)
{
uint64_t max_poll_interval_ms = std::numeric_limits<int64_t>::max();

for(auto& it: objects_)
{
if(it.second.max_poll_interval_ms < max_poll_interval_ms)
max_poll_interval_ms = it.second.max_poll_interval_ms;
}

poll_timeout_us_ = static_cast<int64_t>(max_poll_interval_ms*1000);
}
else
{
poll_timeout_us_ = -1;
}
}

rd_kafka_queue_t* queue = is_consumer ? rd_kafka_queue_get_consumer(instance): rd_kafka_queue_get_main(instance);
Expand All @@ -67,37 +113,64 @@ void QueueCallbacksDispatcher::process_callbacks()

while (running_)
{
events_.wait_dequeue(obj);
if(events_.wait_dequeue_timed(obj, poll_timeout_us_) == false)
{
uint64_t now = now_ms();
CritScope ss(&crt_);
check_max_poll_interval_ms(now);
continue;
}

if(obj == nullptr)
continue;

uint64_t now = now_ms();

CritScope ss(&crt_);

auto it = objects_.find(obj);

if(it != objects_.end())
{
if(it->second)
{
//consumer polling

rd_kafka_message_t* msg = nullptr;
while(running_ && ((msg = rd_kafka_consumer_poll(obj, 0)) != nullptr))
{
// because communication between nif and erlang it's based on async messages might be a small window
// between starting of revoking partitions (queued are forwarded back on the main queue) and when actual we revoked them
// when we get the messages here. we drop all this messages as time they have no impact because offset is not changed.
// we are sleeping here as well to not consume lot of cpu
rd_kafka_message_destroy(msg);
}
}
else
{
//producer polling
rd_kafka_poll(obj, 0);
}
it->second.last_poll_ms = now;
do_poll(obj, it->second.is_consumer);
}

check_max_poll_interval_ms(now);
}
}

void QueueCallbacksDispatcher::check_max_poll_interval_ms(uint64_t now)
{
for(auto& it: objects_)
{
if(now - it.second.last_poll_ms >= it.second.max_poll_interval_ms)
{
it.second.last_poll_ms = now;
do_poll(it.first, it.second.is_consumer);
}
}
}

void QueueCallbacksDispatcher::do_poll(rd_kafka_t* obj, bool is_consumer)
{
if(is_consumer)
{
//consumer polling

rd_kafka_message_t* msg = nullptr;
while(running_ && ((msg = rd_kafka_consumer_poll(obj, 0)) != nullptr))
{
// because communication between nif and erlang it's based on async messages might be a small window
// between starting of revoking partitions (queued are forwarded back on the main queue) and when actual we revoked them
// when we get the messages here. we drop all this messages as time they have no impact because offset is not changed.
// we are sleeping here as well to not consume lot of cpu
rd_kafka_message_destroy(msg);
}
}
else
{
//producer polling
rd_kafka_poll(obj, 0);
}
}
15 changes: 14 additions & 1 deletion c_src/queuecallbacksdispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,27 @@ class QueueCallbacksDispatcher

private:

void check_max_poll_interval_ms(uint64_t now);
void do_poll(rd_kafka_t* obj, bool is_consumer);

struct item {
item() {}
item(bool v, uint64_t m, uint64_t t): is_consumer(v), max_poll_interval_ms(m), last_poll_ms(t) {}

bool is_consumer = false;
uint64_t max_poll_interval_ms = 0;
uint64_t last_poll_ms = 0;
};

void process_callbacks();

CriticalSection crt_;
std::thread thread_callbacks_;

bool running_;
int64_t poll_timeout_us_;
moodycamel::BlockingConcurrentQueue<rd_kafka_t*> events_;
std::unordered_map<rd_kafka_t*, bool> objects_;
std::unordered_map<rd_kafka_t*, item> objects_;

DISALLOW_COPY_AND_ASSIGN(QueueCallbacksDispatcher);
};
Expand Down
2 changes: 1 addition & 1 deletion src/erlkaf.app.src
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
{description, "erlkaf - Erlang Kafka library based on librdkafka"},
{licenses, ["MIT"]},
{links,[{"Github","https://github.com/silviucpp/erlkaf"}]},
{vsn, "2.0.3"},
{vsn, "2.0.4"},
{registered, []},
{applications, [
kernel,
Expand Down

0 comments on commit bb95f53

Please sign in to comment.