From 87a1e7e0f89b2fd9f0ac331ad13aa846f022a66e Mon Sep 17 00:00:00 2001 From: sajibreadd Date: Tue, 10 Sep 2024 10:46:26 +0600 Subject: [PATCH 1/5] Notification interface added in mds Signed-off-by: Md Mahamudur Rahaman Sajib --- CMakeLists.txt | 1 + src/common/options/mds.yaml.in | 33 ++ src/include/ceph_fs.h | 20 + src/include/config-h.in.cmake | 3 + src/mds/CMakeLists.txt | 27 +- src/mds/MDSDaemon.cc | 39 ++ src/mds/MDSKafka.cc | 560 ++++++++++++++++++++ src/mds/MDSKafka.h | 111 ++++ src/mds/MDSNotificationManager.cc | 109 ++++ src/mds/MDSNotificationManager.h | 50 ++ src/mds/MDSNotificationMessage.cc | 80 +++ src/mds/MDSNotificationMessage.h | 24 + src/mds/MDSRank.cc | 105 +++- src/mds/MDSRank.h | 14 + src/mds/MDSUDPEndpoint.cc | 97 ++++ src/mds/MDSUDPEndpoint.h | 43 ++ src/mds/Server.cc | 98 +++- src/mds/Server.h | 14 + src/messages/MNotificationInfoKafkaTopic.h | 75 +++ src/messages/MNotificationInfoUDPEndpoint.h | 60 +++ src/msg/Message.cc | 15 + src/msg/Message.h | 5 + 22 files changed, 1572 insertions(+), 11 deletions(-) create mode 100644 src/mds/MDSKafka.cc create mode 100644 src/mds/MDSKafka.h create mode 100644 src/mds/MDSNotificationManager.cc create mode 100644 src/mds/MDSNotificationManager.h create mode 100644 src/mds/MDSNotificationMessage.cc create mode 100644 src/mds/MDSNotificationMessage.h create mode 100644 src/mds/MDSUDPEndpoint.cc create mode 100644 src/mds/MDSUDPEndpoint.h create mode 100644 src/messages/MNotificationInfoKafkaTopic.h create mode 100644 src/messages/MNotificationInfoUDPEndpoint.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 161a363f129a9..bb7b44c433886 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -584,6 +584,7 @@ endif (WITH_RADOSGW) #option for CephFS option(WITH_CEPHFS "CephFS is enabled" ON) +option(WITH_CEPHFS_NOTIFICATION, "CephFS notification is disabled" OFF) if(NOT WIN32) # Please specify 3.x if you want to build with a certain version of python3. diff --git a/src/common/options/mds.yaml.in b/src/common/options/mds.yaml.in index dcf3eaac0d683..0b422a91bffd8 100644 --- a/src/common/options/mds.yaml.in +++ b/src/common/options/mds.yaml.in @@ -1730,3 +1730,36 @@ options: - mds flags: - runtime +- name: mds_allow_notification_secrets_in_cleartext + type: bool + level: advanced + desc: Allows sending secrets (e.g. passwords) over non encrypted HTTP messages. + long_desc: When notification endpoint require secrets (e.g. passwords), + we allow the topic creation. This parameter can be set to "true" to bypass + this check. Use this only if mds is on a trusted private network, and + the message broker cannot be configured without password authentication. + Otherwise, this will leak the credentials of your message broker and + compromise its security. + default: false + services: + - mds +- name: mds_kafka_sleep_timeout + type: uint + level: advanced + desc: Time in milliseconds to sleep while polling for kafka replies + long_desc: This will be used to prevent busy waiting for the kafka replies + As well as for the cases where the broker is down and we try to reconnect. + The same values times 3 will be used to sleep if there were no messages + sent or received across all kafka connections + default: 10 + services: + - mds +- name: mds_kafka_message_timeout + type: uint + level: advanced + desc: This is the maximum time in milliseconds to deliver a message (including retries) + long_desc: Delivery error occurs when the message timeout is exceeded. + Value must be greater than zero, if set to zero, a value of 1 millisecond will be used. + default: 5000 + services: + - mds diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 57eb18b0d3e5a..6a8de2b7ca79d 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -442,6 +442,26 @@ enum { CEPH_MDS_OP_LOCK_PATH = 0x0150a, }; +enum { + CEPH_MDS_NOTIFY_ACCESS = 0x0000000000000001, + CEPH_MDS_NOTIFY_ATTRIB = 0x0000000000000002, + CEPH_MDS_NOTIFY_CLOSE_WRITE = 0x0000000000000004, + CEPH_MDS_NOTIFY_CLOSE_NOWRITE = 0x0000000000000008, + CEPH_MDS_NOTIFY_CREATE = 0x0000000000000010, + CEPH_MDS_NOTIFY_DELETE = 0x0000000000000020, + CEPH_MDS_NOTIFY_DELETE_SELF = 0x0000000000000040, + CEPH_MDS_NOTIFY_MODIFY = 0x0000000000000080, + CEPH_MDS_NOTIFY_MOVE_SELF = 0x0000000000000100, + CEPH_MDS_NOTIFY_MOVED_FROM = 0x0000000000000200, + CEPH_MDS_NOTIFY_MOVED_TO = 0x0000000000000400, + CEPH_MDS_NOTIFY_OPEN = 0x0000000000000800, + CEPH_MDS_NOTIFY_CLOSE = 0x0000000000001000, + CEPH_MDS_NOTIFY_MOVE = 0x0000000000002000, + CEPH_MDS_NOTIFY_ONESHOT = 0x0000000000004000, + CEPH_MDS_NOTIFY_IGNORED = 0x0000000000008000, + CEPH_MDS_NOTIFY_ONLYDIR = 0x0000000000010000 +}; + #define IS_CEPH_MDS_OP_NEWINODE(op) (op == CEPH_MDS_OP_CREATE || \ op == CEPH_MDS_OP_MKNOD || \ op == CEPH_MDS_OP_MKDIR || \ diff --git a/src/include/config-h.in.cmake b/src/include/config-h.in.cmake index b10ea7c27cbcd..7aba8634f1468 100644 --- a/src/include/config-h.in.cmake +++ b/src/include/config-h.in.cmake @@ -133,6 +133,9 @@ /* define if cephfs enabled */ #cmakedefine WITH_CEPHFS +/* define if cephfs notification enabled */ +#cmakedefine WITH_CEPHFS_NOTIFICATION + /* define if systemed is enabled */ #cmakedefine WITH_SYSTEMD diff --git a/src/mds/CMakeLists.txt b/src/mds/CMakeLists.txt index f3980c7e04b50..071df618a6bfb 100644 --- a/src/mds/CMakeLists.txt +++ b/src/mds/CMakeLists.txt @@ -1,3 +1,8 @@ + +if (WITH_CEPHFS_NOTIFICATION) + find_package(RDKafka 0.9.2 REQUIRED) +endif() + set(mds_srcs BatchOp.cc Capability.cc @@ -48,9 +53,25 @@ set(mds_srcs ${CMAKE_SOURCE_DIR}/src/common/TrackedOp.cc ${CMAKE_SOURCE_DIR}/src/common/MemoryModel.cc ${CMAKE_SOURCE_DIR}/src/osdc/Journaler.cc - ${CMAKE_SOURCE_DIR}/src/mgr/MDSPerfMetricTypes.cc) + ${CMAKE_SOURCE_DIR}/src/mgr/MDSPerfMetricTypes.cc + MDSNotificationManager.cc) + +if (WITH_CEPHFS_NOTIFICATION) + list(APPEND mds_srcs MDSKafka.cc MDSUDPEndpoint.cc MDSNotificationMessage.cc) +endif() + add_library(mds STATIC ${mds_srcs}) target_link_libraries(mds PRIVATE - legacy-option-headers Boost::url - heap_profiler cpu_profiler osdc ${LUA_LIBRARIES}) + legacy-option-headers Boost::url + heap_profiler cpu_profiler osdc ${LUA_LIBRARIES} + ${Boost_LIBRARIES}) + +if (WITH_CEPHFS_NOTIFICATION) + target_link_libraries(mds PRIVATE RDKafka::RDKafka) +endif() + target_include_directories(mds PRIVATE "${LUA_INCLUDE_DIR}") + +if (WITH_CEPHFS_NOTIFICATION) + target_include_directories(mds PRIVATE ${Boost_INCLUDE_DIRS}) +endif() \ No newline at end of file diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc index b31d9c95220cc..25aae6deef37f 100644 --- a/src/mds/MDSDaemon.cc +++ b/src/mds/MDSDaemon.cc @@ -532,6 +532,45 @@ void MDSDaemon::set_up_admin_socket() asok_hook, "run cpu profiling on daemon"); ceph_assert(r == 0); + + #ifdef WITH_CEPHFS_NOTIFICATION + r = admin_socket->register_command( + "add_topic " + "name=topic_name,type=CephString,req=true " + "name=broker,type=CephString,req=true " + "name=use_ssl,type=CephBool,req=false " + "name=username,type=CephString,req=false " + "name=password,type=CephString,req=false " + "name=ca_location,type=CephString,req=false " + "name=mechanism,type=CephString,req=false", + asok_hook, + "add topic for notification" + ); + ceph_assert(r == 0); + r = admin_socket->register_command( + "remove_topic " + "name=topic_name,type=CephString,req=true", + asok_hook, + "remove kafka topic" + ); + ceph_assert(r == 0); + r = admin_socket->register_command( + "add_udp_endpoint " + "name=entity,type=CephString,req=true " + "name=ip,type=CephString,req=true " + "name=port,type=CephInt,req=true", + asok_hook, + "add udp endpoint for notification" + ); + ceph_assert(r == 0); + r = admin_socket->register_command( + "remove_udp_endpoint " + "name=entity,type=CephString,req=true", + asok_hook, + "remove UDP endpoint" + ); + ceph_assert(r == 0); + #endif } void MDSDaemon::clean_up_admin_socket() diff --git a/src/mds/MDSKafka.cc b/src/mds/MDSKafka.cc new file mode 100644 index 0000000000000..61370f2b502c1 --- /dev/null +++ b/src/mds/MDSKafka.cc @@ -0,0 +1,560 @@ + +#include "MDSKafka.h" +#include "include/fs_types.h" + +#define dout_subsys ceph_subsys_mds + +CephContext *MDSKafka::cct = nullptr; +CephContext *MDSKafkaTopic::cct = nullptr; + +connection_t::connection_t(const std::string &broker, bool use_ssl, + const std::string &user, const std::string &password, + const std::optional &ca_location, + const std::optional &mechanism) + : broker(broker), use_ssl(use_ssl), user(user), password(password), + ca_location(ca_location), mechanism(mechanism) { + combine_hash(); +} + +MDSKafkaManager::MDSKafkaManager(CephContext *cct) + : cct(cct), stop_flag(true) {} + +int MDSKafkaManager::remove_topic(const std::string &topic_name) { + std::unique_lock lock(endpoint_mutex); + std::shared_ptr kafka_from; + for (auto &[hash_key, endpoint] : endpoints) { + if (endpoint->has_topic(topic_name)) { + kafka_from = endpoint; + break; + } + } + if (kafka_from) { + kafka_from->remove_topic(topic_name); + if (kafka_from->topics.size() == 0) { + endpoints.erase(kafka_from->connection.hash_key); + } + ldout(cct, 1) << "Kafka topic with topic name '" << topic_name + << "' is removed successfully" << dendl; + if (endpoints.empty() && !stop_flag) { + lock.unlock(); + stop_worker(); + } + return 0; + } + ldout(cct, 1) << "No kafka topic exist with topic name '" << topic_name << "'" + << dendl; + return -CEPHFS_EINVAL; +} + +int MDSKafkaManager::add_topic(const std::string &topic_name, + const connection_t &connection) { + std::unique_lock lock(endpoint_mutex); + std::shared_ptr kafka_from, kafka_to; + for (auto &[hash_key, endpoint] : endpoints) { + if (endpoint->has_topic(topic_name)) { + kafka_from = endpoint; + break; + } + } + auto it = endpoints.find(connection.hash_key); + if (it != endpoints.end()) { + kafka_to = it->second; + } + if (kafka_from && kafka_from == kafka_to) { + ldout(cct, 1) << "Kafka topic with topic name '" << topic_name + << "' is added successfully" << dendl; + return 0; + } + bool created = false; + if (!kafka_to) { + if (endpoints.size() >= MAX_CONNECTIONS_DEFAULT) { + ldout(cct, 1) << "Kafka connect: max connections exceeded" << dendl; + return -CEPHFS_ENOMEM; + } + kafka_to = MDSKafka::create(cct, connection); + if (!kafka_to) { + return -CEPHFS_ECANCELED; + } + created = true; + } + std::shared_ptr topic = + MDSKafkaTopic::create(cct, topic_name, kafka_to); + if (!topic) { + return -CEPHFS_ECANCELED; + } + kafka_to->add_topic(topic_name, topic); + if (created) { + endpoints[connection.hash_key] = kafka_to; + } + if (kafka_from) { + kafka_from->remove_topic(topic_name); + if (kafka_from->topics.size() == 0) { + endpoints.erase(kafka_from->connection.hash_key); + } + } + ldout(cct, 1) << "Kafka topic with topic name '" << topic_name + << "' is added successfully" << dendl; + if (stop_flag) { + lock.unlock(); + start_worker(); + } + return 0; +} + +void MDSKafkaManager::start_worker() { + ceph_assert(stop_flag); + stop_flag = false; + worker = std::thread(&MDSKafkaManager::run, this); + ldout(cct, 1) + << "started worker thread of kafka manager to send notifications" + << dendl; +} + +void MDSKafkaManager::stop_worker() { + ceph_assert(!stop_flag); + stop_flag = true; + if (worker.joinable()) { + worker.join(); + ldout(cct, 1) << "stopped the worker thread of kafka manager as there is " + "no kafka topics to send notifications" + << dendl; + } +} + +int MDSKafkaManager::send( + const std::shared_ptr &message) { + std::unique_lock lock(queue_mutex); + if (message_queue.size() >= MAX_QUEUE_DEFAULT) { + ldout(cct, 1) << "Notification message for kafka with seq_id=" + << message->seq_id << " is dropped as queue is full" << dendl; + return -CEPHFS_EBUSY; + } + message_queue.push(message); + return 0; +} + +uint64_t MDSKafkaManager::publish( + const std::shared_ptr &message) { + std::shared_lock lock(endpoint_mutex); + uint64_t reply_count = 0; + for (auto &[key, endpoint] : endpoints) { + reply_count += endpoint->publish_internal(message); + } + return reply_count; +} + +uint64_t MDSKafkaManager::polling(int read_timeout) { + std::shared_lock lock(endpoint_mutex); + uint64_t reply_count = 0; + for (auto &[key, endpoint] : endpoints) { + reply_count += endpoint->poll(read_timeout); + } + return reply_count; +} + +void MDSKafkaManager::run() { + while (!stop_flag) { + int send_count = 0, reply_count = 0; + while (true) { + std::unique_lock lock(queue_mutex); + if (message_queue.empty()) { + break; + } + std::shared_ptr message = message_queue.front(); + message_queue.pop(); + ++send_count; + lock.unlock(); + reply_count += publish(message); + } + reply_count += polling(READ_TIMEOUT_MS_DEFAULT); + } +} + +void connection_t::combine_hash() { + hash_key = 0; + boost::hash_combine(hash_key, broker); + boost::hash_combine(hash_key, use_ssl); + boost::hash_combine(hash_key, user); + boost::hash_combine(hash_key, password); + if (ca_location.has_value()) { + boost::hash_combine(hash_key, ca_location.value()); + } + if (mechanism.has_value()) { + boost::hash_combine(hash_key, mechanism.value()); + } +} + +void MDSKafkaTopic::kafka_topic_deleter(rd_kafka_topic_t *topic_ptr) { + if (topic_ptr) { + rd_kafka_topic_destroy(topic_ptr); + } +} + +MDSKafkaTopic::MDSKafkaTopic(const std::string &topic_name) + : topic_name(topic_name), head(0), tail(0), inflight_count(0) {} + +std::shared_ptr +MDSKafkaTopic::create(CephContext *_cct, const std::string &topic_name, + const std::shared_ptr &kafka_endpoint) { + try { + if (!MDSKafkaTopic::cct && _cct) { + MDSKafkaTopic::cct = _cct; + } + + std::shared_ptr topic_ptr = + std::make_shared(topic_name); + topic_ptr->kafka_topic_ptr.reset(rd_kafka_topic_new( + kafka_endpoint->producer.get(), topic_name.c_str(), nullptr)); + if (!topic_ptr->kafka_topic_ptr) { + return nullptr; + } + topic_ptr->delivery_ring = std::vector(MAX_INFLIGHT_DEFAULT, false); + return topic_ptr; + } catch (...) { + } + return nullptr; +} + +int MDSKafkaTopic::push_unack_event() { + std::unique_lock lock(ring_mutex); + if (inflight_count >= (int)MAX_INFLIGHT_DEFAULT) { + return -1; + } + delivery_ring[tail] = true; + int idx = tail; + tail = (tail + 1) % MAX_INFLIGHT_DEFAULT; + ++inflight_count; + return idx; +} + +void MDSKafkaTopic::acknowledge_event(int idx) { + if (!(idx >= 0 && idx < (int)MAX_INFLIGHT_DEFAULT)) { + ldout(cct, 10) << "Kafka run: unsolicited n/ack received with tag=" << idx + << dendl; + return; + } + std::unique_lock lock(ring_mutex); + delivery_ring[idx] = false; + while (inflight_count > 0 && !delivery_ring[head]) { + head = (head + 1) % MAX_INFLIGHT_DEFAULT; + --inflight_count; + } +} + +void MDSKafkaTopic::drop_last_event() { + std::unique_lock lock(ring_mutex); + delivery_ring[tail] = false; + tail = (tail - 1 + MAX_INFLIGHT_DEFAULT) % MAX_INFLIGHT_DEFAULT; + --inflight_count; +} + +void MDSKafka::kafka_producer_deleter(rd_kafka_t *producer_ptr) { + if (producer_ptr) { + rd_kafka_flush(producer_ptr, + 10 * 1000); // Wait for max 10 seconds to flush. + rd_kafka_destroy(producer_ptr); // Destroy producer instance. + } +} + +MDSKafka::MDSKafka(const connection_t &connection) : connection(connection) {} + +std::shared_ptr MDSKafka::create(CephContext *_cct, + const connection_t &connection) { + try { + if (!MDSKafka::cct && _cct) { + MDSKafka::cct = _cct; + } + // validation before creating kafka interface + if (connection.user.empty() != connection.password.empty()) { + return nullptr; + } + if (!connection.user.empty() && !connection.use_ssl && + !g_conf().get_val( + "mds_allow_notification_secrets_in_cleartext")) { + ldout(cct, 1) << "Kafka connect: user/password are only allowed over " + "secure connection" + << dendl; + return nullptr; + } + std::shared_ptr kafka_ptr = + std::make_shared(connection); + char errstr[512] = {0}; + auto kafka_conf_deleter = [](rd_kafka_conf_t *conf) { + rd_kafka_conf_destroy(conf); + }; + std::unique_ptr conf( + rd_kafka_conf_new(), kafka_conf_deleter); + if (!conf) { + ldout(cct, 1) << "Kafka connect: failed to allocate configuration" + << dendl; + return nullptr; + } + constexpr std::uint64_t min_message_timeout = 1; + const auto message_timeout = + std::max(min_message_timeout, + cct->_conf.get_val("mds_kafka_message_timeout")); + if (rd_kafka_conf_set(conf.get(), "message.timeout.ms", + std::to_string(message_timeout).c_str(), errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + if (rd_kafka_conf_set(conf.get(), "bootstrap.servers", + connection.broker.c_str(), errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + + if (connection.use_ssl) { + if (!connection.user.empty()) { + // use SSL+SASL + if (rd_kafka_conf_set(conf.get(), "security.protocol", "SASL_SSL", + errstr, sizeof(errstr)) != RD_KAFKA_CONF_OK || + rd_kafka_conf_set(conf.get(), "sasl.username", + connection.user.c_str(), errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK || + rd_kafka_conf_set(conf.get(), "sasl.password", + connection.password.c_str(), errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + ldout(cct, 20) + << "Kafka connect: successfully configured SSL+SASL security" + << dendl; + + if (connection.mechanism) { + if (rd_kafka_conf_set(conf.get(), "sasl.mechanism", + connection.mechanism->c_str(), errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + ldout(cct, 20) + << "Kafka connect: successfully configured SASL mechanism" + << dendl; + } else { + if (rd_kafka_conf_set(conf.get(), "sasl.mechanism", "PLAIN", errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + ldout(cct, 20) << "Kafka connect: using default SASL mechanism" + << dendl; + } + } else { + // use only SSL + if (rd_kafka_conf_set(conf.get(), "security.protocol", "SSL", errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + ldout(cct, 20) << "Kafka connect: successfully configured SSL security" + << dendl; + } + if (connection.ca_location) { + if (rd_kafka_conf_set(conf.get(), "ssl.ca.location", + connection.ca_location->c_str(), errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + ldout(cct, 20) << "Kafka connect: successfully configured CA location" + << dendl; + } else { + ldout(cct, 20) << "Kafka connect: using default CA location" << dendl; + } + ldout(cct, 20) << "Kafka connect: successfully configured security" + << dendl; + } else if (!connection.user.empty()) { + // use SASL+PLAINTEXT + if (rd_kafka_conf_set(conf.get(), "security.protocol", "SASL_PLAINTEXT", + errstr, sizeof(errstr)) != RD_KAFKA_CONF_OK || + rd_kafka_conf_set(conf.get(), "sasl.username", + connection.user.c_str(), errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK || + rd_kafka_conf_set(conf.get(), "sasl.password", + connection.password.c_str(), errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + ldout(cct, 20) << "Kafka connect: successfully configured SASL_PLAINTEXT" + << dendl; + + if (connection.mechanism) { + if (rd_kafka_conf_set(conf.get(), "sasl.mechanism", + connection.mechanism->c_str(), errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + ldout(cct, 20) + << "Kafka connect: successfully configured SASL mechanism" << dendl; + } else { + if (rd_kafka_conf_set(conf.get(), "sasl.mechanism", "PLAIN", errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + ldout(cct, 20) << "Kafka connect: using default SASL mechanism" + << dendl; + } + } + rd_kafka_conf_set_dr_msg_cb(conf.get(), message_callback); + rd_kafka_conf_set_opaque(conf.get(), kafka_ptr.get()); + rd_kafka_conf_set_log_cb(conf.get(), log_callback); + rd_kafka_conf_set_error_cb(conf.get(), poll_err_callback); + { + rd_kafka_t *prod = rd_kafka_new(RD_KAFKA_PRODUCER, conf.release(), errstr, + sizeof(errstr)); + if (!prod) { + ldout(cct, 1) << "Kafka connect: failed to create producer: " << errstr + << dendl; + return nullptr; + } + kafka_ptr->producer.reset(prod); + } + ldout(cct, 1) << "Kafka connect: successfully created new producer" + << dendl; + { + const auto log_level = cct->_conf->subsys.get_log_level(ceph_subsys_mds); + if (log_level <= 1) { + rd_kafka_set_log_level(kafka_ptr->producer.get(), 3); + } else if (log_level <= 2) { + rd_kafka_set_log_level(kafka_ptr->producer.get(), 5); + } else if (log_level <= 10) { + rd_kafka_set_log_level(kafka_ptr->producer.get(), 5); + } else { + rd_kafka_set_log_level(kafka_ptr->producer.get(), 5); + } + } + return kafka_ptr; + + conf_error: + ldout(cct, 1) << "Kafka connect: configuration failed: " << errstr << dendl; + return nullptr; + } catch (...) { + } + return nullptr; +} + +bool MDSKafka::has_topic(const std::string &topic_name) { + std::unique_lock lock(topic_mutex); + return (topics.find(topic_name) != topics.end()); +} + +void MDSKafka::add_topic(const std::string &topic_name, + const std::shared_ptr &topic) { + std::unique_lock lock(topic_mutex); + topics[topic_name] = topic; +} + +void MDSKafka::remove_topic(const std::string &topic_name) { + std::unique_lock lock(topic_mutex); + auto it = topics.find(topic_name); + if (it != topics.end()) { + topics.erase(it); + } +} + +void MDSKafka::log_callback(const rd_kafka_t *rk, int level, const char *fac, + const char *buf) { + if (!cct) { + return; + } + if (level <= 3) { + ldout(cct, 1) << "RDKAFKA-" << level << "-" << fac << ": " + << rd_kafka_name(rk) << ": " << buf << dendl; + } else if (level <= 5) { + ldout(cct, 2) << "RDKAFKA-" << level << "-" << fac << ": " + << rd_kafka_name(rk) << ": " << buf << dendl; + } else if (level <= 6) { + ldout(cct, 10) << "RDKAFKA-" << level << "-" << fac << ": " + << rd_kafka_name(rk) << ": " << buf << dendl; + } else { + ldout(cct, 20) << "RDKAFKA-" << level << "-" << fac << ": " + << rd_kafka_name(rk) << ": " << buf << dendl; + } +} + +void MDSKafka::poll_err_callback(rd_kafka_t *rk, int err, const char *reason, + void *opaque) { + if (!cct) { + return; + } + ldout(cct, 10) << "Kafka run: poll error(" << err << "): " << reason << dendl; +} + +uint64_t MDSKafka::publish_internal( + const std::shared_ptr &message) { + uint64_t reply_count = 0; + std::shared_lock lock(topic_mutex); + uint64_t read_timeout = + cct->_conf.get_val("mds_kafka_sleep_timeout"); + for (auto [topic_name, topic_ptr] : topics) { + int idx = topic_ptr->push_unack_event(); + if (idx == -1) { + ldout(cct, 1) << "Kafka publish (with callback): failed with error: " + "callback queue full, trying to poll again" + << dendl; + reply_count += rd_kafka_poll(producer.get(), 3 * read_timeout); + idx = topic_ptr->push_unack_event(); + if (idx == -1) { + ldout(cct, 1) + << "Kafka publish (with callback): failed with error: " + "message dropped, callback queue full event after polling for " + << 3 * read_timeout << "ms" << dendl; + continue; + } + } + int *tag = new int(idx); + // RdKafka::ErrorCode response = producer->produce( + // topic_name, RdKafka::Topic::PARTITION_UA, + // RdKafka::Producer::RK_MSG_COPY, const_cast(message->c_str()), + // message->length(), nullptr, 0, 0, tag); + const auto response = rd_kafka_produce( + topic_ptr->kafka_topic_ptr.get(), RD_KAFKA_PARTITION_UA, + RD_KAFKA_MSG_F_COPY, const_cast(message->message.c_str()), + message->message.length(), nullptr, 0, tag); + if (response == -1) { + const auto err = rd_kafka_last_error(); + ldout(cct, 1) << "Kafka publish: failed to produce for topic: " + << topic_name << ". with error: " << rd_kafka_err2str(err) + << dendl; + + delete tag; + topic_ptr->drop_last_event(); + continue; + } + reply_count += rd_kafka_poll(producer.get(), 0); + } + return reply_count; +} + +uint64_t MDSKafka::poll(int read_timeout) { + return rd_kafka_poll(producer.get(), read_timeout); +} + +void MDSKafka::message_callback(rd_kafka_t *rk, + const rd_kafka_message_t *rkmessage, + void *opaque) { + const auto kafka_ptr = reinterpret_cast(opaque); + const auto result = rkmessage->err; + if (result == 0) { + ldout(cct, 20) << "Kafka run: ack received with result=" + << rd_kafka_err2str(result) << dendl; + } else { + ldout(cct, 1) << "Kafka run: nack received with result=" + << rd_kafka_err2str(result) + << " for broker: " << kafka_ptr->connection.broker << dendl; + } + if (!rkmessage->_private) { + ldout(cct, 20) << "Kafka run: n/ack received without a callback" << dendl; + return; + } + int *tag = reinterpret_cast(rkmessage->_private); + std::string topic_name = std::string(rd_kafka_topic_name(rkmessage->rkt)); + std::shared_lock lock(kafka_ptr->topic_mutex); + if (kafka_ptr->topics.find(topic_name) == kafka_ptr->topics.end()) { + ldout(cct, 20) << "Kafka run: topic=" << topic_name + << " is removed before ack" << dendl; + delete tag; + return; + } + std::shared_ptr topic_ptr = kafka_ptr->topics[topic_name]; + lock.unlock(); + topic_ptr->acknowledge_event(*tag); + delete tag; +} \ No newline at end of file diff --git a/src/mds/MDSKafka.h b/src/mds/MDSKafka.h new file mode 100644 index 0000000000000..755e00d00fce1 --- /dev/null +++ b/src/mds/MDSKafka.h @@ -0,0 +1,111 @@ +#pragma once + +#include "MDSNotificationMessage.h" +#include "common/ceph_context.h" +#include "include/buffer.h" +#include +#include +#include +#include + +class MDSKafka; + +struct connection_t { + std::string broker; + bool use_ssl; + std::string user; + std::string password; + std::optional ca_location; + std::optional mechanism; + uint64_t hash_key; + connection_t() = default; + connection_t(const std::string &broker, bool use_ssl, const std::string &user, + const std::string &password, + const std::optional &ca_location, + const std::optional &mechanism); + void combine_hash(); + std::string to_string() const { return broker + ":" + user; } +}; + +class MDSKafkaManager { +public: + MDSKafkaManager(CephContext *cct); + int add_topic(const std::string &topic_name, const connection_t &connection); + int remove_topic(const std::string &topic_name); + int send(const std::shared_ptr &message); + CephContext *cct; + +private: + void run(); + void start_worker(); + void stop_worker(); + uint64_t publish(const std::shared_ptr &message); + uint64_t polling(int read_timeout); + static const size_t MAX_CONNECTIONS_DEFAULT = 32; + static const size_t MAX_QUEUE_DEFAULT = 32768; + static const unsigned IDLE_TIME_MS = 100; + static const int READ_TIMEOUT_MS_DEFAULT = 100; + std::shared_mutex endpoint_mutex; + std::unordered_map> endpoints; + std::mutex queue_mutex; + std::queue> message_queue; + std::thread worker; + std::atomic stop_flag; +}; + +class MDSKafkaTopic { +public: + MDSKafkaTopic() = delete; + MDSKafkaTopic(const std::string &topic_name); + int push_unack_event(); + void acknowledge_event(int idx); + void drop_last_event(); + static std::shared_ptr + create(CephContext *_cct, const std::string &topic_name, + const std::shared_ptr &kafka_endpoint); + static void kafka_topic_deleter(rd_kafka_topic_t *topic_ptr); + std::unique_ptr + kafka_topic_ptr{nullptr, kafka_topic_deleter}; + friend class MDSKafka; + +private: + std::string topic_name; + static CephContext *cct; + std::vector delivery_ring; + std::mutex ring_mutex; + int head, tail, inflight_count; + static const size_t MAX_INFLIGHT_DEFAULT = 32768; +}; + +class MDSKafka { +public: + MDSKafka() = delete; + MDSKafka(const connection_t &connection); + static std::shared_ptr create(CephContext *_cct, + const connection_t &connection); + uint64_t + publish_internal(const std::shared_ptr &message); + uint64_t poll(int read_timeout); + void add_topic(const std::string &topic_name, + const std::shared_ptr &topic); + bool has_topic(const std::string &topic_name); + void remove_topic(const std::string &topic_name); + static void kafka_producer_deleter(rd_kafka_t *producer_ptr); + friend class MDSKafkaManager; + friend class MDSKafkaTopic; + +private: + std::unique_ptr producer{ + nullptr, kafka_producer_deleter}; + std::shared_mutex topic_mutex; + std::unordered_map> topics; + static CephContext *cct; + connection_t connection; + static void message_callback(rd_kafka_t *rk, + const rd_kafka_message_t *rkmessage, + void *opaque); + static void log_callback(const rd_kafka_t *rk, int level, const char *fac, + const char *buf); + static void poll_err_callback(rd_kafka_t *rk, int err, const char *reason, + void *opaque); +}; diff --git a/src/mds/MDSNotificationManager.cc b/src/mds/MDSNotificationManager.cc new file mode 100644 index 0000000000000..0ee56621fc5f3 --- /dev/null +++ b/src/mds/MDSNotificationManager.cc @@ -0,0 +1,109 @@ +#include "MDSNotificationManager.h" +#include "include/uuid.h" +#define dout_subsys ceph_subsys_mds + +MDSNotificationManager::MDSNotificationManager(CephContext *cct) + : cct(cct), cur_notification_seq_id(0) { +#ifdef WITH_CEPHFS_NOTIFICATION + uuid_d uid; + uid.generate_random(); + session_id = uid.to_string(); + kafka_manager = std::make_unique(cct); + udp_manager = std::make_unique(cct); +#endif +} + +#ifdef WITH_CEPHFS_NOTIFICATION +int MDSNotificationManager::add_kafka_topic(const std::string &topic_name, + const connection_t &connection) { + return kafka_manager->add_topic(topic_name, connection); +} + +int MDSNotificationManager::remove_kafka_topic(const std::string &topic_name) { + return kafka_manager->remove_topic(topic_name); +} + +int MDSNotificationManager::add_udp_endpoint(const std::string &name, + const std::string &ip, int port) { + return udp_manager->add_endpoint(name, ip, port); +} + +int MDSNotificationManager::remove_udp_endpoint(const std::string &name) { + return udp_manager->remove_endpoint(name); +} + +void MDSNotificationManager::push_notification( + const std::shared_ptr &message) { + kafka_manager->send(message); + udp_manager->send(message); +} +#endif + +void MDSNotificationManager::push_notification(int32_t whoami, CInode *in, + uint64_t notify_mask) { +#ifdef WITH_CEPHFS_NOTIFICATION + std::string path; + in->make_path_string(path, true, nullptr); + std::shared_ptr message = + std::make_shared( + cur_notification_seq_id.fetch_add(1)); + message->create_message(whoami, session_id, notify_mask, path); + push_notification(message); +#endif +} + +void MDSNotificationManager::push_notification_link( + int32_t whoami, CInode *targeti, CDentry *destdn, + uint64_t notify_mask_for_target, uint64_t notify_mask_for_link) { +#ifdef WITH_CEPHFS_NOTIFICATION + std::string target_path; + targeti->make_path_string(target_path, true, nullptr); + std::string link_path; + destdn->make_path_string(link_path, true); + std::shared_ptr message = + std::make_shared( + cur_notification_seq_id.fetch_add(1)); + if (target_path == link_path) { + message->create_message(whoami, session_id, notify_mask_for_link, + target_path); + push_notification(message); + return; + } + message->create_link_message(whoami, session_id, notify_mask_for_target, + notify_mask_for_link, target_path, link_path); + push_notification(message); +#endif +} + +void MDSNotificationManager::push_notification_move(int32_t whoami, + CDentry *srcdn, + CDentry *destdn) { +#ifdef WITH_CEPHFS_NOTIFICATION + std::string dest_path, src_path; + srcdn->make_path_string(src_path, true); + destdn->make_path_string(dest_path, true); + uint64_t src_mask = CEPH_MDS_NOTIFY_MOVED_FROM, + dest_mask = CEPH_MDS_NOTIFY_MOVED_TO; + std::shared_ptr message = + std::make_shared( + cur_notification_seq_id.fetch_add(1)); + message->create_move_message(whoami, session_id, src_mask, dest_mask, + src_path, dest_path); + push_notification(message); +#endif +} + +void MDSNotificationManager::push_notification_snap(int32_t whoami, CInode *in, + const std::string &snapname, + uint64_t notify_mask) { +#ifdef WITH_CEPHFS_NOTIFICATION + std::string path; + in->make_path_string(path, true, nullptr); + std::shared_ptr message = + std::make_shared( + cur_notification_seq_id.fetch_add(1)); + message->create_snap_message(whoami, session_id, notify_mask, path, + std::string(snapname)); + push_notification(message); +#endif +} diff --git a/src/mds/MDSNotificationManager.h b/src/mds/MDSNotificationManager.h new file mode 100644 index 0000000000000..ac0f51ed282df --- /dev/null +++ b/src/mds/MDSNotificationManager.h @@ -0,0 +1,50 @@ +#pragma once + +#include "CDentry.h" +#include "CInode.h" + +#ifdef WITH_CEPHFS_NOTIFICATION +#include "MDSKafka.h" +#include "MDSNotificationMessage.h" +#include "MDSUDPEndpoint.h" +#endif + +#include "common/ceph_context.h" +#include "include/buffer.h" +#include + +class MDSNotificationManager { +public: + MDSNotificationManager(CephContext *cct); + +#ifdef WITH_CEPHFS_NOTIFICATION + int add_kafka_topic(const std::string &topic_name, + const connection_t &connection); + int remove_kafka_topic(const std::string &topic_name); + int add_udp_endpoint(const std::string &name, const std::string &ip, + int port); + int remove_udp_endpoint(const std::string &name); +#endif + + void push_notification(int32_t whoami, CInode *in, uint64_t notify_mask); + void push_notification_link(int32_t whoami, CInode *targeti, CDentry *destdn, + uint64_t notify_mask_for_target, + uint64_t notify_mask_for_link); + void push_notification_move(int32_t whoami, CDentry *srcdn, CDentry *destdn); + void push_notification_snap(int32_t whoami, CInode *in, + const std::string &snapname, + uint64_t notify_mask); + +private: + +#ifdef WITH_CEPHFS_NOTIFICATION + std::unique_ptr kafka_manager; + std::unique_ptr udp_manager; + void + push_notification(const std::shared_ptr &message); +#endif + + CephContext *cct; + std::atomic cur_notification_seq_id; + std::string session_id; +}; \ No newline at end of file diff --git a/src/mds/MDSNotificationMessage.cc b/src/mds/MDSNotificationMessage.cc new file mode 100644 index 0000000000000..ca1ed540f749e --- /dev/null +++ b/src/mds/MDSNotificationMessage.cc @@ -0,0 +1,80 @@ +#include "MDSNotificationMessage.h" +#include "common/Clock.h" +#include "common/ceph_json.h" + +#define dout_subsys ceph_subsys_mds + +MDSNotificationMessage::MDSNotificationMessage(uint64_t seq_id) + : seq_id(seq_id) {} + +void MDSNotificationMessage::create_message(int32_t whoami, + const std::string &session_id, + const uint64_t mask, + const std::string &path) { + JSONFormatter f; + f.open_object_section(""); + ceph_clock_now().gmtime_nsec(f.dump_stream("timestamp")); + f.dump_int("mds_id", (int64_t)whoami); + f.dump_string("session_id", session_id); + f.dump_unsigned("seq_id", seq_id); + f.dump_unsigned("mask", mask); + f.dump_string("path", path); + f.close_section(); + f.flush(message); +} + +void MDSNotificationMessage::create_move_message(int32_t whoami, + const std::string &session_id, + uint64_t src_mask, + uint64_t dest_mask, + const std::string &src_path, + const std::string &dest_path) { + JSONFormatter f; + f.open_object_section(""); + ceph_clock_now().gmtime_nsec(f.dump_stream("timestamp")); + f.dump_int("mds_id", (int64_t)whoami); + f.dump_string("session_id", session_id); + f.dump_unsigned("seq_id", seq_id); + f.dump_unsigned("src_mask", src_mask); + f.dump_unsigned("dest_mask", dest_mask); + f.dump_string("src_path", src_path); + f.dump_string("dest_path", dest_path); + f.close_section(); + f.flush(message); +} + +void MDSNotificationMessage::create_link_message(int32_t whoami, + const std::string &session_id, + uint64_t target_mask, + uint64_t link_mask, + const std::string &target_path, + const std::string &link_path) { + JSONFormatter f; + f.open_object_section(""); + ceph_clock_now().gmtime_nsec(f.dump_stream("timestamp")); + f.dump_int("mds_id", (int64_t)whoami); + f.dump_string("session_id", session_id); + f.dump_unsigned("seq_id", seq_id); + f.dump_unsigned("target_mask", target_mask); + f.dump_unsigned("link_mask", link_mask); + f.dump_string("target_path", target_path); + f.dump_string("link_path", link_path); + f.close_section(); + f.flush(message); +} + +void MDSNotificationMessage::create_snap_message( + int32_t whoami, const std::string &session_id, uint64_t mask, + const std::string &path, const std::string &snapshot_name) { + JSONFormatter f; + f.open_object_section(""); + ceph_clock_now().gmtime_nsec(f.dump_stream("timestamp")); + f.dump_int("mds_id", (int64_t)whoami); + f.dump_string("session_id", session_id); + f.dump_unsigned("seq_id", seq_id); + f.dump_unsigned("mask", mask); + f.dump_string("path", path); + f.dump_string("snapshot_name", snapshot_name); + f.close_section(); + f.flush(message); +} diff --git a/src/mds/MDSNotificationMessage.h b/src/mds/MDSNotificationMessage.h new file mode 100644 index 0000000000000..a203127791863 --- /dev/null +++ b/src/mds/MDSNotificationMessage.h @@ -0,0 +1,24 @@ +#pragma once +#include "common/ceph_context.h" +#include "include/Context.h" +#include "include/buffer.h" +#include + +struct MDSNotificationMessage { + bufferlist message; + uint64_t seq_id; + MDSNotificationMessage(uint64_t seq_id); + void create_message(int32_t whoami, const std::string &session_id, + uint64_t mask, const std::string &path); + void create_move_message(int32_t whoami, const std::string &session_id, + uint64_t src_mask, uint64_t dest_mask, + const std::string &src_path, + const std::string &dest_path); + void create_link_message(int32_t whoami, const std::string &session_id, + uint64_t target_mask, uint64_t link_mask, + const std::string &target_path, + const std::string &link_path); + void create_snap_message(int32_t whoami, const std::string &session_id, + uint64_t mask, const std::string &path, + const std::string &snapshot_name); +}; \ No newline at end of file diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 1cd742423e641..c5c30b2a9fb6d 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -26,6 +26,11 @@ #include "messages/MMDSTableRequest.h" #include "messages/MMDSMetrics.h" +#ifdef WITH_CEPHFS_NOTIFICATION +#include "messages/MNotificationInfoKafkaTopic.h" +#include "messages/MNotificationInfoUDPEndpoint.h" +#endif + #include "mgr/MgrClient.h" #include "MDSDaemon.h" @@ -1066,10 +1071,15 @@ bool MDSRank::_dispatch(const cref_t &m, bool new_msg) if (quiesce_dispatch(m)) { return true; } - if (is_stale_message(m)) { return true; } +#ifdef WITH_CEPHFS_NOTIFICATION + if (is_notification_info(m)) { + return true; + } +#endif + // do not proceed if this message cannot be handled if (!is_valid_message(m)) { return false; @@ -1175,6 +1185,32 @@ bool MDSRank::_dispatch(const cref_t &m, bool new_msg) return true; } +#ifdef WITH_CEPHFS_NOTIFICATION +bool MDSRank::is_notification_info(const cref_t &m) { + if (m->get_type() == MSG_MDS_NOTIFICATION_INFO_KAFKA_TOPIC) { + const auto &req = ref_cast(m); + if (!req->is_remove) { + server->add_kafka_topic(req->topic_name, + connection_t(req->broker, req->use_ssl, req->user, + req->password, req->ca_location, + req->mechanism)); + } else { + server->remove_kafka_topic(req->topic_name); + } + return true; + } else if (m->get_type() == MSG_MDS_NOTIFICATION_INFO_UDP_ENDPOINT) { + const auto &req = ref_cast(m); + if (!req->is_remove) { + server->add_udp_endpoint(req->name, req->ip, req->port); + } else { + server->remove_udp_endpoint(req->name); + } + return true; + } + return false; +} +#endif + void MDSRank::update_mlogger() { if (mlogger) { @@ -1474,6 +1510,18 @@ class C_MDS_RetrySendMessageMDS : public MDSInternalContext { ref_t m; }; +#ifdef WITH_CEPHFS_NOTIFICATION +void MDSRank::send_notification_info_to_peers(const ref_t &m) { + set up; + get_mds_map()->get_up_mds_set(up); + for (const auto &r : up) { + if (r == get_nodeid()) { + continue; + } + send_message_mds(m, r); + } +} +#endif int MDSRank::send_message_mds(const ref_t& m, mds_rank_t mds) { @@ -3098,7 +3146,60 @@ void MDSRankDispatcher::handle_asok_command( } else if (command == "quiesce db") { command_quiesce_db(cmdmap, on_finish); return; - } else { + } +#ifdef WITH_CEPHFS_NOTIFICATION + else if (command == "add_topic") { + std::string topic_name, broker, username; + std::string password; + bool use_ssl; + std::optional ca_location, mechanism; + cmd_getval(cmdmap, "topic_name", topic_name); + cmd_getval(cmdmap, "broker", broker); + if (!cmd_getval(cmdmap, "use_ssl", use_ssl)) { + use_ssl = false; + } + cmd_getval(cmdmap, "username", username); + cmd_getval(cmdmap, "password", password); + std::string ca, mch; + if (cmd_getval(cmdmap, "ca_location", ca)) { + ca_location = ca; + } + if (cmd_getval(cmdmap, "mechanism", mch)) { + mechanism = mch; + } + auto m = make_message(topic_name, broker, use_ssl, username, password, + ca_location, mechanism); + send_notification_info_to_peers(m); + server->add_kafka_topic(topic_name, connection_t(broker, use_ssl, username, password, + ca_location, mechanism)); + r = 0; + } else if (command == "remove_topic") { + std::string topic_name; + cmd_getval(cmdmap, "topic_name", topic_name); + auto m = make_message (topic_name, true); + send_notification_info_to_peers(m); + server->remove_kafka_topic(topic_name); + r = 0; + } else if (command == "add_udp_endpoint") { + std::string ip, name; + int64_t port; + cmd_getval(cmdmap, "entity", name); + cmd_getval(cmdmap, "ip", ip); + cmd_getval(cmdmap, "port", port); + auto m = make_message(name, ip, port); + send_notification_info_to_peers(m); + server->add_udp_endpoint(name, ip, (int)port); + r = 0; + } else if (command == "remove_udp_endpoint") { + std::string name; + cmd_getval(cmdmap, "entity", name); + auto m = make_message (name, true); + send_notification_info_to_peers(m); + server->remove_udp_endpoint(name); + r = 0; + } +#endif + else { r = -CEPHFS_ENOSYS; } out: diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h index c4a8809b6e1a0..d915f9406d706 100644 --- a/src/mds/MDSRank.h +++ b/src/mds/MDSRank.h @@ -46,6 +46,10 @@ #include "osdc/Journaler.h" #include "MDSMetaRequest.h" +#ifdef WITH_CEPHFS_NOTIFICATION +#include "MDSKafka.h" +#endif + // Full .h import instead of forward declaration for PerfCounter, for the // benefit of those including this header and using MDSRank::logger #include "common/perf_counters.h" @@ -649,6 +653,12 @@ class MDSRank { bool standby_replaying = false; // true if current replay pass is in standby-replay mode uint64_t extraordinary_events_dump_interval = 0; double inject_journal_corrupt_dentry_first = 0.0; +protected: + +#ifdef WITH_CEPHFS_NOTIFICATION + void send_notification_info_to_peers(const ref_t& m); +#endif + private: bool send_status = true; @@ -661,6 +671,10 @@ class MDSRank { bool client_eviction_dump = false; +#ifdef WITH_CEPHFS_NOTIFICATION + bool is_notification_info(const cref_t& m); +#endif + void get_task_status(std::map *status); void schedule_update_timer_task(); void send_task_status(); diff --git a/src/mds/MDSUDPEndpoint.cc b/src/mds/MDSUDPEndpoint.cc new file mode 100644 index 0000000000000..a82be52747c09 --- /dev/null +++ b/src/mds/MDSUDPEndpoint.cc @@ -0,0 +1,97 @@ +#include "MDSUDPEndpoint.h" +#include "include/fs_types.h" + +#define dout_subsys ceph_subsys_mds + +int MDSUDPManager::send( + const std::shared_ptr &message) { + std::shared_lock lock(endpoint_mutex); + std::vector buf(2); + for (auto &[key, endpoint] : endpoints) { + uint64_t len = message->message.length(); + buf[0] = boost::asio::buffer(&len, sizeof(len)); + buf[1] = boost::asio::buffer(message->message.c_str(), + message->message.length()); + endpoint->publish_internal(buf, message->seq_id); + } + return 0; +} + +int MDSUDPManager::add_endpoint(const std::string &name, const std::string &ip, + int port) { + std::unique_lock lock(endpoint_mutex); + auto it = endpoints.find(name); + if (it == endpoints.end() && endpoints.size() >= MAX_CONNECTIONS_DEFAULT) { + ldout(cct, 1) << "UDP connect: max connections exceeded" << dendl; + return -CEPHFS_ENOMEM; + } + std::shared_ptr new_endpoint = + MDSUDPEndpoint::create(cct, name, ip, port); + if (!new_endpoint) { + ldout(cct, 1) << "UDP connect: udp endpoint creation failed" << dendl; + return -CEPHFS_ECANCELED; + } + endpoints[name] = new_endpoint; + ldout(cct, 1) << "UDP endpoint with entity name '" << name + << "' is added successfully" << dendl; + return 0; +} + +int MDSUDPManager::remove_endpoint(const std::string &name) { + std::unique_lock lock(endpoint_mutex); + auto it = endpoints.find(name); + if (it != endpoints.end()) { + endpoints.erase(it); + ldout(cct, 1) << "UDP endpoint with entity name '" << name + << "' is removed successfully" << dendl; + return 0; + } + ldout(cct, 1) << "No UDP endpoint exist with entity name '" << name << "'" + << dendl; + return -CEPHFS_EINVAL; +} + +MDSUDPEndpoint::MDSUDPEndpoint(CephContext *cct, const std::string &name, + const std::string &ip, int port) + : cct(cct), name(name), socket(io_context), ip(ip), port(port), + endpoint(boost::asio::ip::address::from_string(ip), port) { + try { + boost::system::error_code ec; + socket.open(boost::asio::ip::udp::v4(), ec); + if (ec) { + throw std::runtime_error(ec.message()); + } + } catch (const std::exception &e) { + lderr(cct) << "Error occurred while opening UDP socket with error:" + << e.what() << dendl; + throw; + } +} + +std::shared_ptr MDSUDPEndpoint::create(CephContext *cct, + const std::string &name, + const std::string &ip, + int port) { + try { + std::shared_ptr endpoint = + std::make_shared(cct, name, ip, port); + return endpoint; + } catch (...) { + } + return nullptr; +} + +int MDSUDPEndpoint::publish_internal( + std::vector &buf, uint64_t seq_id) { + boost::system::error_code ec; + socket.send_to(buf, endpoint, 0, ec); + if (ec) { + ldout(cct, 1) << "Error occurred while sending notification having seq_id=" + << seq_id << ":" << ec.message() << dendl; + return -ec.value(); + } else { + ldout(cct, 20) << "Notification having seq_id=" << seq_id << " delivered" + << dendl; + } + return 0; +} diff --git a/src/mds/MDSUDPEndpoint.h b/src/mds/MDSUDPEndpoint.h new file mode 100644 index 0000000000000..9a7504bd7b2e4 --- /dev/null +++ b/src/mds/MDSUDPEndpoint.h @@ -0,0 +1,43 @@ +#include "MDSNotificationMessage.h" +#include +#include +#include + +class MDSUDPEndpoint; + +class MDSUDPManager { +public: + MDSUDPManager(CephContext *cct) : cct(cct) {} + int send(const std::shared_ptr &message); + int add_endpoint(const std::string &name, const std::string &ip, int port); + int remove_endpoint(const std::string &name); + +private: + CephContext *cct; + std::shared_mutex endpoint_mutex; + std::unordered_map> endpoints; + static const size_t MAX_CONNECTIONS_DEFAULT = 256; +}; + +class MDSUDPEndpoint { +public: + MDSUDPEndpoint() = delete; + MDSUDPEndpoint(CephContext *cct, const std::string &name, + const std::string &ip, int port); + int publish_internal(std::vector &buf, + uint64_t seq_id); + static std::shared_ptr create(CephContext *cct, + const std::string &name, + const std::string &ip, + int port); + friend class MDSUDPManager; + +private: + std::string name; + std::string ip; + int port; + boost::asio::io_context io_context; + boost::asio::ip::udp::socket socket; + boost::asio::ip::udp::endpoint endpoint; + CephContext *cct; +}; \ No newline at end of file diff --git a/src/mds/Server.cc b/src/mds/Server.cc index f26625bfd3c7b..06eaea164aabd 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -277,6 +277,19 @@ Server::Server(MDSRank *m, MetricsHandler *metrics_handler) : dispatch_killpoint_random = g_conf().get_val("mds_server_dispatch_killpoint_random"); supported_features = feature_bitset_t(CEPHFS_FEATURES_MDS_SUPPORTED); supported_metric_spec = feature_bitset_t(CEPHFS_METRIC_FEATURES_ALL); + // connection_t conn("localhost:9093", true, "admin", "admin-secret", + // std::nullopt, std::nullopt); + // MDSAsyncNotificationManager::create(mds->cct); + // MDSSyncNotificationManager::create(mds->cct); + notification_manager = std::make_unique(mds->cct); + // topic_ptr = MDSKafkaTopic::create( + // "my-topic", mds->cct, + // connection_t("localhost:9093", true, "admin", "admin-secret", + // std::optional( + // "/home/sajibreadd/croit/certs-kafka/ca-cert"), + // std::optional("PLAIN"))); + // udp_sender = + // MDSUDPNotificationSender::create("udp", mds->cct, "127.0.0.1", 8080); } void Server::dispatch(const cref_t &m) @@ -4845,6 +4858,10 @@ void Server::handle_client_openc(const MDRequestRef& mdr) set_reply_extra_bl(req, _inode->ino, mdr->reply_extra_bl); + notification_manager->push_notification(mds->get_nodeid(), newi, + CEPH_MDS_NOTIFY_CREATE | + CEPH_MDS_NOTIFY_OPEN); + journal_and_reply(mdr, newi, dn, le, fin); // We hit_dir (via hit_inode) in our finish callback, but by then we might @@ -5522,7 +5539,10 @@ void Server::handle_client_setattr(const MDRequestRef& mdr) le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); - + + notification_manager->push_notification(mds->get_nodeid(), cur, + CEPH_MDS_NOTIFY_ATTRIB); + journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur, truncating_smaller, changed_ranges)); @@ -5587,6 +5607,11 @@ void Server::do_open_truncate(const MDRequestRef& mdr, int cmode) dn = mdr->dn[0].back(); } + notification_manager->push_notification(mds->get_nodeid(), in, + CEPH_MDS_NOTIFY_MODIFY | + CEPH_MDS_NOTIFY_ACCESS | + CEPH_MDS_NOTIFY_OPEN); + journal_and_reply(mdr, in, dn, le, new C_MDS_inode_update_finish(this, mdr, in, old_size > 0, changed_ranges)); // Although the `open` part can give an early reply, the truncation won't @@ -5675,6 +5700,9 @@ void Server::handle_client_setlayout(const MDRequestRef& mdr) le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); + + notification_manager->push_notification(mds->get_nodeid(), cur, + CEPH_MDS_NOTIFY_ATTRIB); journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur)); } @@ -5792,6 +5820,10 @@ void Server::handle_client_setdirlayout(const MDRequestRef& mdr) mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); mdr->no_early_reply = true; + + notification_manager->push_notification(mds->get_nodeid(), cur, + CEPH_MDS_NOTIFY_ATTRIB); + journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur)); } @@ -6495,6 +6527,9 @@ void Server::handle_client_setvxattr(const MDRequestRef& mdr, CInode *cur) mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); + notification_manager->push_notification(mds->get_nodeid(), cur, + CEPH_MDS_NOTIFY_ATTRIB); + journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur, false, false, adjust_realm)); return; @@ -6777,6 +6812,9 @@ void Server::handle_client_setxattr(const MDRequestRef& mdr) mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); + notification_manager->push_notification(mds->get_nodeid(), cur, + CEPH_MDS_NOTIFY_ATTRIB); + journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur)); } @@ -6846,6 +6884,9 @@ void Server::handle_client_removexattr(const MDRequestRef& mdr) mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); + notification_manager->push_notification(mds->get_nodeid(), cur, + CEPH_MDS_NOTIFY_ATTRIB); + journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur)); } @@ -7162,6 +7203,10 @@ void Server::handle_client_mknod(const MDRequestRef& mdr) PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); le->metablob.add_primary_dentry(dn, newi, true, true, true); + notification_manager->push_notification(mds->get_nodeid(), newi, + CEPH_MDS_NOTIFY_CREATE | + CEPH_MDS_NOTIFY_ATTRIB); + journal_and_reply(mdr, newi, dn, le, new C_MDS_mknod_finish(this, mdr, dn, newi)); mds->balancer->maybe_fragment(dn->get_dir(), false); } @@ -7173,7 +7218,6 @@ void Server::handle_client_mknod(const MDRequestRef& mdr) void Server::handle_client_mkdir(const MDRequestRef& mdr) { const cref_t &req = mdr->client_request; - mdr->disable_lock_cache(); CDentry *dn = rdlock_path_xlock_dentry(mdr, true); if (!dn) @@ -7253,6 +7297,9 @@ void Server::handle_client_mkdir(const MDRequestRef& mdr) // make sure this inode gets into the journal le->metablob.add_opened_ino(newi->ino()); + notification_manager->push_notification(mds->get_nodeid(), newi, + CEPH_MDS_NOTIFY_CREATE); + journal_and_reply(mdr, newi, dn, le, new C_MDS_mknod_finish(this, mdr, dn, newi)); // We hit_dir (via hit_inode) in our finish callback, but by then we might @@ -7316,6 +7363,9 @@ void Server::handle_client_symlink(const MDRequestRef& mdr) mdcache->predirty_journal_parents(mdr, &le->metablob, newi, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); le->metablob.add_primary_dentry(dn, newi, true, true); + notification_manager->push_notification(mds->get_nodeid(), newi, + CEPH_MDS_NOTIFY_CREATE); + journal_and_reply(mdr, newi, dn, le, new C_MDS_mknod_finish(this, mdr, dn, newi)); mds->balancer->maybe_fragment(dir, false); @@ -7447,6 +7497,10 @@ void Server::handle_client_link(const MDRequestRef& mdr) // go! ceph_assert(g_conf()->mds_kill_link_at != 1); + notification_manager->push_notification_link(mds->get_nodeid(), targeti, destdn, + CEPH_MDS_NOTIFY_ATTRIB, + CEPH_MDS_NOTIFY_CREATE); + // local or remote? if (targeti->is_auth()) _link_local(mdr, destdn, targeti, target_realm); @@ -7511,7 +7565,6 @@ void Server::_link_local(const MDRequestRef& mdr, CDentry *dn, CInode *targeti, // do this after predirty_*, to avoid funky extra dnl arg dn->push_projected_linkage(targeti->ino(), targeti->d_type()); - journal_and_reply(mdr, targeti, dn, le, new C_MDS_link_local_finish(this, mdr, dn, targeti, dnpv, tipv, adjust_realm)); } @@ -7633,7 +7686,6 @@ void Server::_link_remote(const MDRequestRef& mdr, bool inc, CDentry *dn, CInode le->metablob.add_null_dentry(dn, true); dn->push_projected_linkage(); } - journal_and_reply(mdr, (inc ? targeti : nullptr), dn, le, new C_MDS_link_remote_finish(this, mdr, inc, dn, targeti)); } @@ -8035,7 +8087,6 @@ void Server::handle_client_unlink(const MDRequestRef& mdr) { const cref_t &req = mdr->client_request; client_t client = mdr->get_client(); - // rmdir or unlink? bool rmdir = (req->get_op() == CEPH_MDS_OP_RMDIR); @@ -8176,11 +8227,16 @@ void Server::handle_client_unlink(const MDRequestRef& mdr) if (!rmdir && dnl->is_primary() && mdr->dn[0].size() == 1) mds->locker->create_lock_cache(mdr, diri); + notification_manager->push_notification_link(mds->get_nodeid(), in, dn, + CEPH_MDS_NOTIFY_ATTRIB, + CEPH_MDS_NOTIFY_DELETE); + // ok! if (dnl->is_remote() && !dnl->get_inode()->is_auth()) _link_remote(mdr, false, dn, dnl->get_inode()); else _unlink_local(mdr, dn, straydn); + } class C_MDS_unlink_local_finish : public ServerLogContext { @@ -8283,7 +8339,6 @@ void Server::_unlink_local(const MDRequestRef& mdr, CDentry *dn, CDentry *strayd ceph_assert(straydn); mdcache->project_subtree_rename(in, dn->get_dir(), straydn->get_dir()); } - journal_and_reply(mdr, 0, dn, le, new C_MDS_unlink_local_finish(this, mdr, dn, straydn)); } @@ -9206,6 +9261,9 @@ void Server::handle_client_rename(const MDRequestRef& mdr) // -- commit locally -- C_MDS_rename_finish *fin = new C_MDS_rename_finish(this, mdr, srcdn, destdn, straydn); + notification_manager->push_notification_move(mds->get_nodeid(), srcdn, + destdn); + journal_and_reply(mdr, srci, destdn, le, fin); mds->balancer->maybe_fragment(destdn->get_dir(), false); } @@ -11175,6 +11233,10 @@ void Server::handle_client_mksnap(const MDRequestRef& mdr) le->metablob.add_table_transaction(TABLE_SNAP, stid); mdcache->predirty_journal_parents(mdr, &le->metablob, diri, 0, PREDIRTY_PRIMARY, false); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, diri); + + notification_manager->push_notification_snap( + mds->get_nodeid(), diri, std::string(snapname), + CEPH_MDS_NOTIFY_CREATE | CEPH_MDS_NOTIFY_ATTRIB); // journal the snaprealm changes submit_mdlog_entry(le, new C_MDS_mksnap_finish(this, mdr, diri, info), @@ -11309,6 +11371,10 @@ void Server::handle_client_rmsnap(const MDRequestRef& mdr) mdcache->predirty_journal_parents(mdr, &le->metablob, diri, 0, PREDIRTY_PRIMARY, false); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, diri); + notification_manager->push_notification_snap( + mds->get_nodeid(), diri, std::string(snapname), + CEPH_MDS_NOTIFY_DELETE | CEPH_MDS_NOTIFY_ATTRIB); + submit_mdlog_entry(le, new C_MDS_rmsnap_finish(this, mdr, diri, snapid), mdr, __func__); mdlog->flush(); @@ -11926,3 +11992,23 @@ bool Server::build_snap_diff( } return it == dir->end(); } + +#ifdef WITH_CEPHFS_NOTIFICATION +int Server::add_kafka_topic(const std::string &topic_name, + const connection_t &connection) { + return notification_manager->add_kafka_topic(topic_name, connection); +} + +int Server::remove_kafka_topic(const std::string& topic_name) { + return notification_manager->remove_kafka_topic(topic_name); +} + +int Server::add_udp_endpoint(const std::string &name, const std::string &ip, + int port) { + return notification_manager->add_udp_endpoint(name, ip, port); +} + +int Server::remove_udp_endpoint(const std::string& name) { + return notification_manager->remove_udp_endpoint(name); +} +#endif diff --git a/src/mds/Server.h b/src/mds/Server.h index 68842ea01cbeb..e17da02db2a7a 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -37,6 +37,11 @@ using namespace std::literals::string_view_literals; #include "Mutation.h" #include "MDSContext.h" +#ifdef WITH_CEPHFS_NOTIFICATION +#include "MDSKafka.h" +#endif +#include "MDSNotificationManager.h" + class OSDMap; class LogEvent; class EMetaBlob; @@ -122,6 +127,14 @@ class Server { return last_recall_state; } +#ifdef WITH_CEPHFS_NOTIFICATION + int add_kafka_topic(const std::string &topic_name, + const connection_t &connection); + int remove_kafka_topic(const std::string& topic_name); + int add_udp_endpoint(const std::string& name, const std::string& ip, int port); + int remove_udp_endpoint(const std::string& name); +#endif + void handle_client_session(const cref_t &m); void _session_logged(Session *session, uint64_t state_seq, bool open, version_t pv, const interval_set& inos_to_free, version_t piv, @@ -525,6 +538,7 @@ class Server { MDCache *mdcache; MDLog *mdlog; PerfCounters *logger = nullptr; + std::unique_ptr notification_manager; // OSDMap full status, used to generate CEPHFS_ENOSPC on some operations bool is_full = false; diff --git a/src/messages/MNotificationInfoKafkaTopic.h b/src/messages/MNotificationInfoKafkaTopic.h new file mode 100644 index 0000000000000..b77468cfcf291 --- /dev/null +++ b/src/messages/MNotificationInfoKafkaTopic.h @@ -0,0 +1,75 @@ +#pragma once +#include "messages/MMDSOp.h" + +class MNotificationInfoKafkaTopic : public MMDSOp { + static constexpr int HEAD_VERSION = 1; + static constexpr int COMPAT_VERSION = 1; + +public: + std::string topic_name; + std::string broker; + bool use_ssl; + std::string user, password; + std::optional ca_location; + std::optional mechanism; + bool is_remove; + +protected: + MNotificationInfoKafkaTopic() + : MMDSOp(MSG_MDS_NOTIFICATION_INFO_KAFKA_TOPIC, HEAD_VERSION, + COMPAT_VERSION) {} + MNotificationInfoKafkaTopic(const std::string &topic_name, + bool is_remove = true) + : MMDSOp(MSG_MDS_NOTIFICATION_INFO_KAFKA_TOPIC, HEAD_VERSION, + COMPAT_VERSION), + topic_name(topic_name), is_remove(is_remove) {} + MNotificationInfoKafkaTopic(const std::string &topic_name, + const std::string &broker, bool use_ssl, + const std::string &user, + const std::string &password, + const std::optional &ca_location, + const std::optional &mechanism, + bool is_remove = false) + : MMDSOp(MSG_MDS_NOTIFICATION_INFO_KAFKA_TOPIC, HEAD_VERSION, + COMPAT_VERSION), + topic_name(topic_name), broker(broker), use_ssl(use_ssl), user(user), + password(password), ca_location(ca_location), mechanism(mechanism), + is_remove(is_remove) {} + ~MNotificationInfoKafkaTopic() final {} + +public: + std::string_view get_type_name() const override { return "mdskafka_topic"; } + + void print(std::ostream &out) const override { out << "mdskafka_topic"; } + + void encode_payload(uint64_t features) override { + using ceph::encode; + encode(topic_name, payload); + encode(broker, payload); + encode(use_ssl, payload); + encode(user, payload); + encode(password, payload); + encode(ca_location, payload); + encode(mechanism, payload); + encode(is_remove, payload); + } + + void decode_payload() override { + using ceph::decode; + auto p = payload.cbegin(); + decode(topic_name, p); + decode(broker, p); + decode(use_ssl, p); + decode(user, p); + decode(password, p); + decode(ca_location, p); + decode(mechanism, p); + decode(is_remove, p); + } + +private: + template + friend boost::intrusive_ptr ceph::make_message(Args &&...args); + template + friend MURef crimson::make_message(Args &&...args); +}; diff --git a/src/messages/MNotificationInfoUDPEndpoint.h b/src/messages/MNotificationInfoUDPEndpoint.h new file mode 100644 index 0000000000000..46e6fcf0979d3 --- /dev/null +++ b/src/messages/MNotificationInfoUDPEndpoint.h @@ -0,0 +1,60 @@ +#pragma once +#include "messages/MMDSOp.h" + +class MNotificationInfoUDPEndpoint : public MMDSOp { + static constexpr int HEAD_VERSION = 1; + static constexpr int COMPAT_VERSION = 1; + +public: + std::string name; + std::string ip; + int port; + bool is_remove; + +protected: + MNotificationInfoUDPEndpoint() + : MMDSOp(MSG_MDS_NOTIFICATION_INFO_UDP_ENDPOINT, HEAD_VERSION, + COMPAT_VERSION) {} + MNotificationInfoUDPEndpoint(const std::string &name, bool is_remove = true) + : MMDSOp(MSG_MDS_NOTIFICATION_INFO_UDP_ENDPOINT, HEAD_VERSION, + COMPAT_VERSION), + name(name), is_remove(is_remove) {} + MNotificationInfoUDPEndpoint(const std::string &name, const std::string &ip, + int port, bool is_remove = false) + : MMDSOp(MSG_MDS_NOTIFICATION_INFO_UDP_ENDPOINT, HEAD_VERSION, + COMPAT_VERSION), + name(name), ip(ip), port(port), is_remove(is_remove) {} + ~MNotificationInfoUDPEndpoint() final {} + +public: + std::string_view get_type_name() const override { + return "mdsudp_notification_client"; + } + + void print(std::ostream &out) const override { + out << "mdsudp_notification_client"; + } + + void encode_payload(uint64_t features) override { + using ceph::encode; + encode(name, payload); + encode(ip, payload); + encode(port, payload); + encode(is_remove, payload); + } + + void decode_payload() override { + using ceph::decode; + auto p = payload.cbegin(); + decode(name, p); + decode(ip, p); + decode(port, p); + decode(is_remove, p); + } + +private: + template + friend boost::intrusive_ptr ceph::make_message(Args &&...args); + template + friend MURef crimson::make_message(Args &&...args); +}; diff --git a/src/msg/Message.cc b/src/msg/Message.cc index f649e0f3d3ee2..5833d5b490428 100644 --- a/src/msg/Message.cc +++ b/src/msg/Message.cc @@ -157,6 +157,11 @@ #include "messages/MMDSFragmentNotify.h" #include "messages/MMDSFragmentNotifyAck.h" +#ifdef WITH_CEPHFS_NOTIFICATION +#include "messages/MNotificationInfoKafkaTopic.h" +#include "messages/MNotificationInfoUDPEndpoint.h" +#endif + #include "messages/MExportDirDiscover.h" #include "messages/MExportDirDiscoverAck.h" #include "messages/MExportDirCancel.h" @@ -884,6 +889,16 @@ Message *decode_message(CephContext *cct, m = make_message(); break; +#ifdef WITH_CEPHFS_NOTIFICATION + case MSG_MDS_NOTIFICATION_INFO_KAFKA_TOPIC: + m = make_message (); + break; + + case MSG_MDS_NOTIFICATION_INFO_UDP_ENDPOINT: + m = make_message (); + break; +#endif + case MSG_MGR_BEACON: m = make_message(); break; diff --git a/src/msg/Message.h b/src/msg/Message.h index 3e5c58ec376b4..ec066b0f4742d 100644 --- a/src/msg/Message.h +++ b/src/msg/Message.h @@ -202,6 +202,11 @@ #define MSG_MDS_QUIESCE_DB_LISTING 0x505 // quiesce db replication #define MSG_MDS_QUIESCE_DB_ACK 0x506 // quiesce agent ack back to the db +#ifdef WITH_CEPHFS_NOTIFICATION +#define MSG_MDS_NOTIFICATION_INFO_KAFKA_TOPIC 0x507 +#define MSG_MDS_NOTIFICATION_INFO_UDP_ENDPOINT 0x508 +#endif + // *** generic *** #define MSG_TIMECHECK 0x600 #define MSG_MON_HEALTH 0x601 From e3b9fde24184a24f70a3421abecf21fb85647b59 Mon Sep 17 00:00:00 2001 From: sajibreadd Date: Fri, 18 Oct 2024 13:19:30 +0200 Subject: [PATCH 2/5] Storage mechanism added for kafka and udp endpoints Signed-off-by: Md Mahamudur Rahaman Sajib --- src/mds/MDSKafka.cc | 258 +++++++++++++++++++++++++----- src/mds/MDSKafka.h | 57 +++++-- src/mds/MDSNotificationManager.cc | 86 ++++++++-- src/mds/MDSNotificationManager.h | 32 ++-- src/mds/MDSRank.cc | 95 ++++++----- src/mds/MDSRank.h | 16 +- src/mds/MDSUDPEndpoint.cc | 189 +++++++++++++++++++--- src/mds/MDSUDPEndpoint.h | 51 ++++-- src/mds/Server.cc | 54 ++----- src/mds/Server.h | 16 +- 10 files changed, 641 insertions(+), 213 deletions(-) diff --git a/src/mds/MDSKafka.cc b/src/mds/MDSKafka.cc index 61370f2b502c1..5495b4a58b9d7 100644 --- a/src/mds/MDSKafka.cc +++ b/src/mds/MDSKafka.cc @@ -1,5 +1,7 @@ #include "MDSKafka.h" +#include "common/Cond.h" +#include "common/errno.h" #include "include/fs_types.h" #define dout_subsys ceph_subsys_mds @@ -7,20 +9,158 @@ CephContext *MDSKafka::cct = nullptr; CephContext *MDSKafkaTopic::cct = nullptr; -connection_t::connection_t(const std::string &broker, bool use_ssl, - const std::string &user, const std::string &password, - const std::optional &ca_location, - const std::optional &mechanism) +MDSKafkaConnection::MDSKafkaConnection( + const std::string &broker, bool use_ssl, const std::string &user, + const std::string &password, const std::optional &ca_location, + const std::optional &mechanism) : broker(broker), use_ssl(use_ssl), user(user), password(password), ca_location(ca_location), mechanism(mechanism) { combine_hash(); } -MDSKafkaManager::MDSKafkaManager(CephContext *cct) - : cct(cct), stop_flag(true) {} +void MDSKafkaConnection::encode(ceph::buffer::list &bl) const { + ENCODE_START(1, 1, bl); + encode(broker, bl); + encode(use_ssl, bl); + encode(user, bl); + encode(password, bl); + encode(ca_location, bl); + encode(mechanism, bl); + ENCODE_FINISH(bl); +} + +void MDSKafkaConnection::decode(ceph::buffer::list::const_iterator &iter) { + DECODE_START(1, iter); + decode(broker, iter); + decode(use_ssl, iter); + decode(user, iter); + decode(password, iter); + decode(ca_location, iter); + decode(mechanism, iter); + DECODE_FINISH(iter); +} -int MDSKafkaManager::remove_topic(const std::string &topic_name) { +void MDSKafkaConnection::dump(ceph::Formatter *f) const { + f->dump_string("broker", broker); + f->dump_bool("use_ssl", use_ssl); + f->dump_string("user", user); + f->dump_string("password", password); + if (ca_location.has_value()) { + f->dump_string("ca_location", ca_location.value()); + } + if (mechanism.has_value()) { + f->dump_string("mechanism", mechanism.value()); + } +} + +void MDSKafkaConnection::generate_test_instances( + std::list &o) { + o.push_back(new MDSKafkaConnection); +} + +MDSKafkaDriver::MDSKafkaDriver(MDSRank *mds, const std::string &object_name) + : mds(mds), object_name(object_name) {} + +MDSKafkaManager::MDSKafkaManager(MDSRank *mds) : cct(mds->cct), paused(true) { + driver = std::make_unique(mds, "mds_kafka_topics"); +} + +int MDSKafkaDriver::load_data(std::map &mp) { + int r = update_omap(std::map()); + if (r < 0) { + return r; + } + C_SaferCond sync_finisher; + ObjectOperation op; + op.omap_get_vals("", "", UINT_MAX, &mp, NULL, NULL); + mds->objecter->read(object_t(object_name), + object_locator_t(mds->get_metadata_pool()), op, + CEPH_NOSNAP, NULL, 0, &sync_finisher); + r = sync_finisher.wait(); + if (r < 0) { + lderr(mds->cct) << "Error reading omap values from object '" << object_name + << "':" << cpp_strerror(r) << dendl; + } + return r; +} + +int MDSKafkaDriver::update_omap(const std::map &mp) { + C_SaferCond sync_finisher; + ObjectOperation op; + op.omap_set(mp); + mds->objecter->mutate( + object_t(object_name), object_locator_t(mds->get_metadata_pool()), op, + SnapContext(), ceph::real_clock::now(), 0, &sync_finisher); + int r = sync_finisher.wait(); + if (r < 0) { + lderr(mds->cct) << "Error updating omap of object '" << object_name + << "':" << cpp_strerror(r) << dendl; + } + return r; +} + +int MDSKafkaDriver::remove_keys(const std::set &st) { + C_SaferCond sync_finisher; + ObjectOperation op; + op.omap_rm_keys(st); + mds->objecter->mutate( + object_t(object_name), object_locator_t(mds->get_metadata_pool()), op, + SnapContext(), ceph::real_clock::now(), 0, &sync_finisher); + int r = sync_finisher.wait(); + if (r < 0) { + lderr(mds->cct) << "Error removing keys from omap of object '" + << object_name << "':" << cpp_strerror(r) << dendl; + } + return r; +} + +int MDSKafkaDriver::add_topic(const std::string &topic_name, + const MDSKafkaConnection &connection) { + std::map mp; + bufferlist bl; + encode(connection, bl); + mp[topic_name] = std::move(bl); + int r = update_omap(mp); + return r; +} + +int MDSKafkaDriver::remove_topic(const std::string &topic_name) { + std::set st; + st.insert(topic_name); + int r = remove_keys(st); + return r; +} + +int MDSKafkaManager::init() { + std::map mp; + int r = driver->load_data(mp); + if (r < 0) { + lderr(cct) << "Error occurred while initilizing kafka topics" << dendl; + } + for (auto &[key, val] : mp) { + try { + MDSKafkaConnection connection; + auto iter = val.cbegin(); + decode(connection, iter); + add_topic(key, connection, false); + } catch (const ceph::buffer::error &e) { + ldout(cct, 1) + << "No value exist in the omap of object 'mds_kafka_topics' " + "for kafka topic '" + << key << "'" << dendl; + } + } + if (endpoints.empty()) { + pause(); + } else { + activate(); + } + return r; +} + +int MDSKafkaManager::remove_topic(const std::string &topic_name, bool write_into_disk) { std::unique_lock lock(endpoint_mutex); + int r = 0; std::shared_ptr kafka_from; for (auto &[hash_key, endpoint] : endpoints) { if (endpoint->has_topic(topic_name)) { @@ -33,21 +173,33 @@ int MDSKafkaManager::remove_topic(const std::string &topic_name) { if (kafka_from->topics.size() == 0) { endpoints.erase(kafka_from->connection.hash_key); } - ldout(cct, 1) << "Kafka topic with topic name '" << topic_name - << "' is removed successfully" << dendl; - if (endpoints.empty() && !stop_flag) { + if (write_into_disk) { + r = driver->remove_topic(topic_name); + } + if (r == 0) { + ldout(cct, 1) << "Kafka topic with topic name '" << topic_name + << "' is removed successfully" << dendl; + } else { + lderr(cct) << "Kafka topic '" << topic_name + << "' can not be removed, failed with an error:" + << cpp_strerror(r) << dendl; + return r; + } + if (endpoints.empty()) { lock.unlock(); - stop_worker(); + pause(); } - return 0; + return r; } ldout(cct, 1) << "No kafka topic exist with topic name '" << topic_name << "'" << dendl; return -CEPHFS_EINVAL; + ; } int MDSKafkaManager::add_topic(const std::string &topic_name, - const connection_t &connection) { + const MDSKafkaConnection &connection, + bool write_into_disk) { std::unique_lock lock(endpoint_mutex); std::shared_ptr kafka_from, kafka_to; for (auto &[hash_key, endpoint] : endpoints) { @@ -65,23 +217,28 @@ int MDSKafkaManager::add_topic(const std::string &topic_name, << "' is added successfully" << dendl; return 0; } + std::shared_ptr topic; bool created = false; + int r = 0; if (!kafka_to) { if (endpoints.size() >= MAX_CONNECTIONS_DEFAULT) { ldout(cct, 1) << "Kafka connect: max connections exceeded" << dendl; - return -CEPHFS_ENOMEM; + r = -CEPHFS_ENOMEM; + goto error_occurred; } kafka_to = MDSKafka::create(cct, connection); if (!kafka_to) { - return -CEPHFS_ECANCELED; + r = -CEPHFS_ECANCELED; + goto error_occurred; } created = true; } - std::shared_ptr topic = - MDSKafkaTopic::create(cct, topic_name, kafka_to); + topic = MDSKafkaTopic::create(cct, topic_name, kafka_to); if (!topic) { - return -CEPHFS_ECANCELED; + r = -CEPHFS_ECANCELED; + goto error_occurred; } + kafka_to->add_topic(topic_name, topic); if (created) { endpoints[connection.hash_key] = kafka_to; @@ -92,37 +249,51 @@ int MDSKafkaManager::add_topic(const std::string &topic_name, endpoints.erase(kafka_from->connection.hash_key); } } + if (write_into_disk) { + r = driver->add_topic(topic_name, connection); + if (r < 0) { + goto error_occurred; + } + } ldout(cct, 1) << "Kafka topic with topic name '" << topic_name << "' is added successfully" << dendl; - if (stop_flag) { - lock.unlock(); - start_worker(); - } - return 0; + lock.unlock(); + activate(); + return r; +error_occurred: + lderr(cct) << "Kafka topic '" << topic + << "' can not be added, failed with an error:" << cpp_strerror(r) + << dendl; + return r; } -void MDSKafkaManager::start_worker() { - ceph_assert(stop_flag); - stop_flag = false; +void MDSKafkaManager::activate() { + if (!paused) { + return; + } worker = std::thread(&MDSKafkaManager::run, this); - ldout(cct, 1) - << "started worker thread of kafka manager to send notifications" - << dendl; + paused = false; + ldout(cct, 1) << "worker thread of kafka manager started." << dendl; } -void MDSKafkaManager::stop_worker() { - ceph_assert(!stop_flag); - stop_flag = true; +void MDSKafkaManager::pause() { + if (paused) { + return; + } + paused = true; if (worker.joinable()) { worker.join(); - ldout(cct, 1) << "stopped the worker thread of kafka manager as there is " - "no kafka topics to send notifications" - << dendl; } + ldout(cct, 1) << "paused worker thread of kafka manager as there is no " + "endpoints for sending notifications" + << dendl; } int MDSKafkaManager::send( const std::shared_ptr &message) { + if (paused) { + return -CEPHFS_ECANCELED; + } std::unique_lock lock(queue_mutex); if (message_queue.size() >= MAX_QUEUE_DEFAULT) { ldout(cct, 1) << "Notification message for kafka with seq_id=" @@ -153,9 +324,9 @@ uint64_t MDSKafkaManager::polling(int read_timeout) { } void MDSKafkaManager::run() { - while (!stop_flag) { + while (!paused) { int send_count = 0, reply_count = 0; - while (true) { + while (!paused) { std::unique_lock lock(queue_mutex); if (message_queue.empty()) { break; @@ -170,7 +341,7 @@ void MDSKafkaManager::run() { } } -void connection_t::combine_hash() { +void MDSKafkaConnection::combine_hash() { hash_key = 0; boost::hash_combine(hash_key, broker); boost::hash_combine(hash_key, use_ssl); @@ -256,10 +427,11 @@ void MDSKafka::kafka_producer_deleter(rd_kafka_t *producer_ptr) { } } -MDSKafka::MDSKafka(const connection_t &connection) : connection(connection) {} +MDSKafka::MDSKafka(const MDSKafkaConnection &connection) + : connection(connection) {} -std::shared_ptr MDSKafka::create(CephContext *_cct, - const connection_t &connection) { +std::shared_ptr +MDSKafka::create(CephContext *_cct, const MDSKafkaConnection &connection) { try { if (!MDSKafka::cct && _cct) { MDSKafka::cct = _cct; @@ -502,8 +674,8 @@ uint64_t MDSKafka::publish_internal( int *tag = new int(idx); // RdKafka::ErrorCode response = producer->produce( // topic_name, RdKafka::Topic::PARTITION_UA, - // RdKafka::Producer::RK_MSG_COPY, const_cast(message->c_str()), - // message->length(), nullptr, 0, 0, tag); + // RdKafka::Producer::RK_MSG_COPY, const_cast(message->c_str()), message->length(), nullptr, 0, 0, tag); const auto response = rd_kafka_produce( topic_ptr->kafka_topic_ptr.get(), RD_KAFKA_PARTITION_UA, RD_KAFKA_MSG_F_COPY, const_cast(message->message.c_str()), diff --git a/src/mds/MDSKafka.h b/src/mds/MDSKafka.h index 755e00d00fce1..01884608fa2e2 100644 --- a/src/mds/MDSKafka.h +++ b/src/mds/MDSKafka.h @@ -1,6 +1,7 @@ #pragma once #include "MDSNotificationMessage.h" +#include "MDSRank.h" #include "common/ceph_context.h" #include "include/buffer.h" #include @@ -9,8 +10,9 @@ #include class MDSKafka; +class MDSKafkaTopic; -struct connection_t { +struct MDSKafkaConnection { std::string broker; bool use_ssl; std::string user; @@ -18,27 +20,50 @@ struct connection_t { std::optional ca_location; std::optional mechanism; uint64_t hash_key; - connection_t() = default; - connection_t(const std::string &broker, bool use_ssl, const std::string &user, - const std::string &password, - const std::optional &ca_location, - const std::optional &mechanism); + MDSKafkaConnection() = default; + MDSKafkaConnection(const std::string &broker, bool use_ssl, + const std::string &user, const std::string &password, + const std::optional &ca_location, + const std::optional &mechanism); void combine_hash(); std::string to_string() const { return broker + ":" + user; } + void encode(ceph::buffer::list &bl) const; + void decode(ceph::buffer::list::const_iterator &iter); + void dump(ceph::Formatter *f) const; + static void generate_test_instances(std::list &o); }; -class MDSKafkaManager { +WRITE_CLASS_ENCODER(MDSKafkaConnection) + +class MDSKafkaDriver { public: - MDSKafkaManager(CephContext *cct); - int add_topic(const std::string &topic_name, const connection_t &connection); + MDSKafkaDriver(MDSRank *mds, const std::string &object_name); + int load_data(std::map &mp); + int add_topic(const std::string &topic_name, + const MDSKafkaConnection &connection); int remove_topic(const std::string &topic_name); + +private: + MDSRank *mds; + std::string object_name; + int update_omap(const std::map &mp); + int remove_keys(const std::set &st); +}; + +class MDSKafkaManager { +public: + MDSKafkaManager(MDSRank *mds); + int init(); + void activate(); + void pause(); + int add_topic(const std::string &topic_name, + const MDSKafkaConnection &connection, bool write_into_disk); + int remove_topic(const std::string &topic_name, bool write_into_disk); int send(const std::shared_ptr &message); CephContext *cct; private: void run(); - void start_worker(); - void stop_worker(); uint64_t publish(const std::shared_ptr &message); uint64_t polling(int read_timeout); static const size_t MAX_CONNECTIONS_DEFAULT = 32; @@ -50,7 +75,8 @@ class MDSKafkaManager { std::mutex queue_mutex; std::queue> message_queue; std::thread worker; - std::atomic stop_flag; + std::unique_ptr driver; + std::atomic paused; }; class MDSKafkaTopic { @@ -66,6 +92,7 @@ class MDSKafkaTopic { static void kafka_topic_deleter(rd_kafka_topic_t *topic_ptr); std::unique_ptr kafka_topic_ptr{nullptr, kafka_topic_deleter}; + friend class MDSKafkaManager; friend class MDSKafka; private: @@ -80,9 +107,9 @@ class MDSKafkaTopic { class MDSKafka { public: MDSKafka() = delete; - MDSKafka(const connection_t &connection); + MDSKafka(const MDSKafkaConnection &connection); static std::shared_ptr create(CephContext *_cct, - const connection_t &connection); + const MDSKafkaConnection &connection); uint64_t publish_internal(const std::shared_ptr &message); uint64_t poll(int read_timeout); @@ -100,7 +127,7 @@ class MDSKafka { std::shared_mutex topic_mutex; std::unordered_map> topics; static CephContext *cct; - connection_t connection; + MDSKafkaConnection connection; static void message_callback(rd_kafka_t *rk, const rd_kafka_message_t *rkmessage, void *opaque); diff --git a/src/mds/MDSNotificationManager.cc b/src/mds/MDSNotificationManager.cc index 0ee56621fc5f3..43ab44f3cb12f 100644 --- a/src/mds/MDSNotificationManager.cc +++ b/src/mds/MDSNotificationManager.cc @@ -2,40 +2,96 @@ #include "include/uuid.h" #define dout_subsys ceph_subsys_mds -MDSNotificationManager::MDSNotificationManager(CephContext *cct) - : cct(cct), cur_notification_seq_id(0) { +MDSNotificationManager::MDSNotificationManager(MDSRank *mds) + : cct(mds->cct), cur_notification_seq_id(0) { #ifdef WITH_CEPHFS_NOTIFICATION uuid_d uid; uid.generate_random(); session_id = uid.to_string(); - kafka_manager = std::make_unique(cct); - udp_manager = std::make_unique(cct); + kafka_manager = std::make_unique(mds); + udp_manager = std::make_unique(mds); #endif } +void MDSNotificationManager::init() { #ifdef WITH_CEPHFS_NOTIFICATION -int MDSNotificationManager::add_kafka_topic(const std::string &topic_name, - const connection_t &connection) { - return kafka_manager->add_topic(topic_name, connection); + int r = kafka_manager->init(); + if (r < 0) { + kafka_manager = nullptr; + } + r = udp_manager->init(); + if (r < 0) { + udp_manager = nullptr; + } +#endif +} + +#ifdef WITH_CEPHFS_NOTIFICATION +int MDSNotificationManager::add_kafka_topic( + const std::string &topic_name, const std::string &broker, bool use_ssl, + const std::string &user, const std::string &password, + const std::optional &ca_location, + const std::optional &mechanism, bool write_into_disk) { + if (!kafka_manager) { + ldout(cct, 1) + << "Kafka topic '" << topic_name + << "' creation failed as kafka manager is not initialized correctly" + << dendl; + return -CEPHFS_EFAULT; + } + return kafka_manager->add_topic(topic_name, + MDSKafkaConnection(broker, use_ssl, user, + password, ca_location, + mechanism), + write_into_disk); } -int MDSNotificationManager::remove_kafka_topic(const std::string &topic_name) { - return kafka_manager->remove_topic(topic_name); +int MDSNotificationManager::remove_kafka_topic(const std::string &topic_name, + bool write_into_disk) { + if (!kafka_manager) { + ldout(cct, 1) + << "Kafka topic '" << topic_name + << "' removal failed as kafka manager is not initialized correctly" + << dendl; + return -CEPHFS_EFAULT; + } + return kafka_manager->remove_topic(topic_name, write_into_disk); } int MDSNotificationManager::add_udp_endpoint(const std::string &name, - const std::string &ip, int port) { - return udp_manager->add_endpoint(name, ip, port); + const std::string &ip, int port, + bool write_into_disk) { + if (!udp_manager) { + ldout(cct, 1) + << "UDP endpoint '" << name + << "' creation failed as udp manager is not initialized correctly" + << dendl; + return -CEPHFS_EFAULT; + } + return udp_manager->add_endpoint(name, MDSUDPConnection(ip, port), + write_into_disk); } -int MDSNotificationManager::remove_udp_endpoint(const std::string &name) { - return udp_manager->remove_endpoint(name); +int MDSNotificationManager::remove_udp_endpoint(const std::string &name, + bool write_into_disk) { + if (!udp_manager) { + ldout(cct, 1) + << "UDP endpoint '" << name + << "' removal failed as udp manager is not initialized correctly" + << dendl; + return -CEPHFS_EFAULT; + } + return udp_manager->remove_endpoint(name, write_into_disk); } void MDSNotificationManager::push_notification( const std::shared_ptr &message) { - kafka_manager->send(message); - udp_manager->send(message); + if (kafka_manager) { + kafka_manager->send(message); + } + if (udp_manager) { + udp_manager->send(message); + } } #endif diff --git a/src/mds/MDSNotificationManager.h b/src/mds/MDSNotificationManager.h index ac0f51ed282df..5cc5a8188da8c 100644 --- a/src/mds/MDSNotificationManager.h +++ b/src/mds/MDSNotificationManager.h @@ -1,29 +1,36 @@ #pragma once - #include "CDentry.h" #include "CInode.h" +#include "MDSRank.h" +#include "common/ceph_context.h" +#include "include/buffer.h" +#include #ifdef WITH_CEPHFS_NOTIFICATION #include "MDSKafka.h" #include "MDSNotificationMessage.h" #include "MDSUDPEndpoint.h" -#endif -#include "common/ceph_context.h" -#include "include/buffer.h" -#include +class MDSKafkaManager; +class MDSUDPManager; +#endif class MDSNotificationManager { public: - MDSNotificationManager(CephContext *cct); + MDSNotificationManager(MDSRank *mds); + void init(); #ifdef WITH_CEPHFS_NOTIFICATION - int add_kafka_topic(const std::string &topic_name, - const connection_t &connection); - int remove_kafka_topic(const std::string &topic_name); - int add_udp_endpoint(const std::string &name, const std::string &ip, - int port); - int remove_udp_endpoint(const std::string &name); + int add_kafka_topic(const std::string &topic_name, const std::string &broker, + bool use_ssl, const std::string &user, + const std::string &password, + const std::optional &ca_location, + const std::optional &mechanism, + bool write_into_disk); + int remove_kafka_topic(const std::string &topic_name, bool write_into_disk); + int add_udp_endpoint(const std::string &name, const std::string &ip, int port, + bool write_into_disk); + int remove_udp_endpoint(const std::string &name, bool write_into_disk); #endif void push_notification(int32_t whoami, CInode *in, uint64_t notify_mask); @@ -36,7 +43,6 @@ class MDSNotificationManager { uint64_t notify_mask); private: - #ifdef WITH_CEPHFS_NOTIFICATION std::unique_ptr kafka_manager; std::unique_ptr udp_manager; diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index c5c30b2a9fb6d..a8607e2c3a4ae 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -557,9 +557,10 @@ MDSRank::MDSRank( server = new Server(this, &metrics_handler); locker = new Locker(this, mdcache); - + notification_manager = std::make_unique(this); quiesce_db_manager.reset(new QuiesceDbManager()); + _heartbeat_reset_grace = g_conf().get_val("mds_heartbeat_reset_grace"); heartbeat_grace = g_conf().get_val("mds_heartbeat_grace"); op_tracker.set_complaint_and_threshold(cct->_conf->mds_op_complaint_time, @@ -1186,24 +1187,36 @@ bool MDSRank::_dispatch(const cref_t &m, bool new_msg) } #ifdef WITH_CEPHFS_NOTIFICATION + +void MDSRank::send_notification_info_to_peers(const ref_t &m) { + set up; + get_mds_map()->get_up_mds_set(up); + for (const auto &r : up) { + if (r == get_nodeid()) { + continue; + } + send_message_mds(m, r); + } +} + bool MDSRank::is_notification_info(const cref_t &m) { if (m->get_type() == MSG_MDS_NOTIFICATION_INFO_KAFKA_TOPIC) { const auto &req = ref_cast(m); if (!req->is_remove) { - server->add_kafka_topic(req->topic_name, - connection_t(req->broker, req->use_ssl, req->user, - req->password, req->ca_location, - req->mechanism)); + notification_manager->add_kafka_topic( + req->topic_name, req->broker, req->use_ssl, req->user, req->password, + req->ca_location, req->mechanism, false); } else { - server->remove_kafka_topic(req->topic_name); + notification_manager->remove_kafka_topic(req->topic_name, false); } return true; } else if (m->get_type() == MSG_MDS_NOTIFICATION_INFO_UDP_ENDPOINT) { const auto &req = ref_cast(m); if (!req->is_remove) { - server->add_udp_endpoint(req->name, req->ip, req->port); + notification_manager->add_udp_endpoint(req->name, req->ip, req->port, + false); } else { - server->remove_udp_endpoint(req->name); + notification_manager->remove_udp_endpoint(req->name, false); } return true; } @@ -1510,19 +1523,6 @@ class C_MDS_RetrySendMessageMDS : public MDSInternalContext { ref_t m; }; -#ifdef WITH_CEPHFS_NOTIFICATION -void MDSRank::send_notification_info_to_peers(const ref_t &m) { - set up; - get_mds_map()->get_up_mds_set(up); - for (const auto &r : up) { - if (r == get_nodeid()) { - continue; - } - send_message_mds(m, r); - } -} -#endif - int MDSRank::send_message_mds(const ref_t& m, mds_rank_t mds) { if (!mdsmap->is_up(mds)) { @@ -2208,6 +2208,7 @@ void MDSRank::active_start() finish_contexts(g_ceph_context, waiting_for_active); // kick waiters quiesce_agent_setup(); + notification_manager->init(); } void MDSRank::recovery_done(int oldstate) @@ -2501,6 +2502,7 @@ void MDSRankDispatcher::handle_mds_map( ceph_assert(oldstate == MDSMap::STATE_ACTIVE); stopping_start(); } + } } @@ -3152,7 +3154,7 @@ void MDSRankDispatcher::handle_asok_command( std::string topic_name, broker, username; std::string password; bool use_ssl; - std::optional ca_location, mechanism; + std::optional ca_location, mechanism; cmd_getval(cmdmap, "topic_name", topic_name); cmd_getval(cmdmap, "broker", broker); if (!cmd_getval(cmdmap, "use_ssl", use_ssl)) { @@ -3167,36 +3169,45 @@ void MDSRankDispatcher::handle_asok_command( if (cmd_getval(cmdmap, "mechanism", mch)) { mechanism = mch; } - auto m = make_message(topic_name, broker, use_ssl, username, password, - ca_location, mechanism); - send_notification_info_to_peers(m); - server->add_kafka_topic(topic_name, connection_t(broker, use_ssl, username, password, - ca_location, mechanism)); - r = 0; - } else if (command == "remove_topic") { + auto m = make_message( + topic_name, broker, use_ssl, username, password, ca_location, + mechanism); + r = notification_manager->add_kafka_topic(topic_name, broker, use_ssl, + username, password, ca_location, + mechanism, true); + if (r == 0) { + send_notification_info_to_peers(m); + } + } + else if (command == "remove_topic") { std::string topic_name; cmd_getval(cmdmap, "topic_name", topic_name); - auto m = make_message (topic_name, true); - send_notification_info_to_peers(m); - server->remove_kafka_topic(topic_name); - r = 0; - } else if (command == "add_udp_endpoint") { + auto m = make_message(topic_name, true); + r = notification_manager->remove_kafka_topic(topic_name, true); + if (r == 0) { + send_notification_info_to_peers(m); + } + } + else if (command == "add_udp_endpoint") { std::string ip, name; int64_t port; cmd_getval(cmdmap, "entity", name); cmd_getval(cmdmap, "ip", ip); cmd_getval(cmdmap, "port", port); auto m = make_message(name, ip, port); - send_notification_info_to_peers(m); - server->add_udp_endpoint(name, ip, (int)port); - r = 0; - } else if (command == "remove_udp_endpoint") { + r = notification_manager->add_udp_endpoint(name, ip, (int)port, true); + if (r == 0) { + send_notification_info_to_peers(m); + } + } + else if (command == "remove_udp_endpoint") { std::string name; cmd_getval(cmdmap, "entity", name); - auto m = make_message (name, true); - send_notification_info_to_peers(m); - server->remove_udp_endpoint(name); - r = 0; + auto m = make_message(name, true); + r = notification_manager->remove_udp_endpoint(name, true); + if (r == 0) { + send_notification_info_to_peers(m); + } } #endif else { diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h index d915f9406d706..ca443d19d8f0d 100644 --- a/src/mds/MDSRank.h +++ b/src/mds/MDSRank.h @@ -45,10 +45,8 @@ #include "MetricsHandler.h" #include "osdc/Journaler.h" #include "MDSMetaRequest.h" +#include "MDSNotificationManager.h" -#ifdef WITH_CEPHFS_NOTIFICATION -#include "MDSKafka.h" -#endif // Full .h import instead of forward declaration for PerfCounter, for the // benefit of those including this header and using MDSRank::logger @@ -158,6 +156,7 @@ class ScrubStack; class C_ExecAndReply; class QuiesceDbManager; class QuiesceAgent; +class MDSNotificationManager; /** * The public part of this class's interface is what's exposed to all @@ -399,6 +398,16 @@ class MDSRank { double get_inject_journal_corrupt_dentry_first() const { return inject_journal_corrupt_dentry_first; } +#ifdef WITH_CEPHFS_NOTIFICATION + int add_kafka_topic(const std::string &topic_name, const std::string &broker, + bool use_ssl, const std::string &user, + const std::string &password, + const std::optional &ca_location, + const std::optional &mechanism); + int remove_kafka_topic(const std::string& topic_name); + int add_udp_endpoint(const std::string& name, const std::string& ip, int port); + int remove_udp_endpoint(const std::string& name); +#endif // Reference to global MDS::mds_lock, so that users of MDSRank don't // carry around references to the outer MDS, and we can substitute @@ -431,6 +440,7 @@ class MDSRank { SnapServer *snapserver = nullptr; SnapClient *snapclient = nullptr; + std::unique_ptr notification_manager; SessionMap sessionmap; diff --git a/src/mds/MDSUDPEndpoint.cc b/src/mds/MDSUDPEndpoint.cc index a82be52747c09..27dfaaf96bdb8 100644 --- a/src/mds/MDSUDPEndpoint.cc +++ b/src/mds/MDSUDPEndpoint.cc @@ -3,6 +3,129 @@ #define dout_subsys ceph_subsys_mds +MDSUDPConnection::MDSUDPConnection(const std::string &ip, int port) + : ip(ip), port(port) {} + +void MDSUDPConnection::encode(ceph::buffer::list &bl) const { + ENCODE_START(1, 1, bl); + encode(ip, bl); + encode(port, bl); + ENCODE_FINISH(bl); +} + +void MDSUDPConnection::dump(ceph::Formatter *f) const { + f->dump_string("ip", ip); + f->dump_bool("port", port); +} + +void MDSUDPConnection::generate_test_instances( + std::list &o) { + o.push_back(new MDSUDPConnection); +} + +void MDSUDPConnection::decode(ceph::buffer::list::const_iterator &iter) { + DECODE_START(1, iter); + decode(ip, iter); + decode(port, iter); + DECODE_FINISH(iter); +} + +MDSUDPDriver::MDSUDPDriver(MDSRank *mds, const std::string &object_name) + : mds(mds), object_name(object_name) {} + +int MDSUDPDriver::load_data(std::map &mp) { + int r = update_omap(std::map()); + if (r < 0) { + return r; + } + C_SaferCond sync_finisher; + ObjectOperation op; + op.omap_get_vals("", "", UINT_MAX, &mp, NULL, NULL); + mds->objecter->read(object_t(object_name), + object_locator_t(mds->get_metadata_pool()), op, + CEPH_NOSNAP, NULL, 0, &sync_finisher); + r = sync_finisher.wait(); + if (r < 0) { + lderr(mds->cct) << "Error reading omap values from object '" << object_name + << "':" << cpp_strerror(r) << dendl; + } + return r; +} + +int MDSUDPDriver::update_omap(const std::map &mp) { + C_SaferCond sync_finisher; + ObjectOperation op; + op.omap_set(mp); + mds->objecter->mutate( + object_t(object_name), object_locator_t(mds->get_metadata_pool()), op, + SnapContext(), ceph::real_clock::now(), 0, &sync_finisher); + int r = sync_finisher.wait(); + if (r < 0) { + lderr(mds->cct) << "Error updating omap of object '" << object_name + << "':" << cpp_strerror(r) << dendl; + } + return r; +} + +int MDSUDPDriver::remove_keys(const std::set &st) { + C_SaferCond sync_finisher; + ObjectOperation op; + op.omap_rm_keys(st); + mds->objecter->mutate( + object_t(object_name), object_locator_t(mds->get_metadata_pool()), op, + SnapContext(), ceph::real_clock::now(), 0, &sync_finisher); + int r = sync_finisher.wait(); + if (r < 0) { + lderr(mds->cct) << "Error removing keys from omap of object '" + << object_name << "':" << cpp_strerror(r) << dendl; + } + return r; +} + +int MDSUDPDriver::add_endpoint(const std::string &name, + const MDSUDPConnection &connection) { + std::map mp; + bufferlist bl; + encode(connection, bl); + mp[name] = std::move(bl); + int r = update_omap(mp); + return r; +} + +int MDSUDPDriver::remove_endpoint(const std::string &name) { + std::set st; + st.insert(name); + int r = remove_keys(st); + return r; +} + +MDSUDPManager::MDSUDPManager(MDSRank *mds) : cct(mds->cct) { + driver = std::make_unique(mds, "mds_udp_endpoints"); +} + +int MDSUDPManager::init() { + std::map mp; + int r = driver->load_data(mp); + if (r < 0) { + lderr(cct) << "Error occurred while initilizing UDP endpoints" << dendl; + return r; + } + for (auto &[key, val] : mp) { + try { + MDSUDPConnection connection; + auto iter = val.cbegin(); + decode(connection, iter); + add_endpoint(key, connection, false); + } catch (const ceph::buffer::error &e) { + ldout(cct, 1) + << "No value exist in the omap of object 'mds_udp_endpoints' " + "for udp entity name '" + << key << "'" << dendl; + } + } + return r; +} + int MDSUDPManager::send( const std::shared_ptr &message) { std::shared_lock lock(endpoint_mutex); @@ -17,34 +140,60 @@ int MDSUDPManager::send( return 0; } -int MDSUDPManager::add_endpoint(const std::string &name, const std::string &ip, - int port) { +int MDSUDPManager::add_endpoint(const std::string &name, + const MDSUDPConnection &connection, + bool write_into_disk) { std::unique_lock lock(endpoint_mutex); + std::shared_ptr new_endpoint; auto it = endpoints.find(name); + int r = 0; if (it == endpoints.end() && endpoints.size() >= MAX_CONNECTIONS_DEFAULT) { ldout(cct, 1) << "UDP connect: max connections exceeded" << dendl; - return -CEPHFS_ENOMEM; + r = -CEPHFS_ENOMEM; + goto error_occurred; } - std::shared_ptr new_endpoint = - MDSUDPEndpoint::create(cct, name, ip, port); + new_endpoint = MDSUDPEndpoint::create(cct, name, connection); if (!new_endpoint) { ldout(cct, 1) << "UDP connect: udp endpoint creation failed" << dendl; - return -CEPHFS_ECANCELED; + r = -CEPHFS_ECANCELED; + goto error_occurred; } endpoints[name] = new_endpoint; + if (write_into_disk) { + r = driver->add_endpoint(name, connection); + if (r < 0) { + goto error_occurred; + } + } ldout(cct, 1) << "UDP endpoint with entity name '" << name - << "' is added successfully" << dendl; - return 0; + << "' is added successfully" << dendl; + return r; +error_occurred: + lderr(cct) << "UDP endpoint with entity name '" << name + << "' can not be added, failed with an error:" << cpp_strerror(r) + << dendl; + return r; } -int MDSUDPManager::remove_endpoint(const std::string &name) { +int MDSUDPManager::remove_endpoint(const std::string &name, + bool write_into_disk) { std::unique_lock lock(endpoint_mutex); + int r = 0; auto it = endpoints.find(name); if (it != endpoints.end()) { endpoints.erase(it); - ldout(cct, 1) << "UDP endpoint with entity name '" << name - << "' is removed successfully" << dendl; - return 0; + if (write_into_disk) { + r = driver->remove_endpoint(name); + } + if (r == 0) { + ldout(cct, 1) << "UDP endpoint with entity name '" << name + << "' is removed successfully" << dendl; + } else { + lderr(cct) << "UDP endpoint '" << name + << "' can not be removed, failed with an error:" + << cpp_strerror(r) << dendl; + } + return r; } ldout(cct, 1) << "No UDP endpoint exist with entity name '" << name << "'" << dendl; @@ -52,9 +201,10 @@ int MDSUDPManager::remove_endpoint(const std::string &name) { } MDSUDPEndpoint::MDSUDPEndpoint(CephContext *cct, const std::string &name, - const std::string &ip, int port) - : cct(cct), name(name), socket(io_context), ip(ip), port(port), - endpoint(boost::asio::ip::address::from_string(ip), port) { + const MDSUDPConnection &connection) + : cct(cct), name(name), socket(io_context), connection(connection), + endpoint(boost::asio::ip::address::from_string(connection.ip), + connection.port) { try { boost::system::error_code ec; socket.open(boost::asio::ip::udp::v4(), ec); @@ -68,13 +218,12 @@ MDSUDPEndpoint::MDSUDPEndpoint(CephContext *cct, const std::string &name, } } -std::shared_ptr MDSUDPEndpoint::create(CephContext *cct, - const std::string &name, - const std::string &ip, - int port) { +std::shared_ptr +MDSUDPEndpoint::create(CephContext *cct, const std::string &name, + const MDSUDPConnection &connection) { try { std::shared_ptr endpoint = - std::make_shared(cct, name, ip, port); + std::make_shared(cct, name, connection); return endpoint; } catch (...) { } diff --git a/src/mds/MDSUDPEndpoint.h b/src/mds/MDSUDPEndpoint.h index 9a7504bd7b2e4..d3885717c29b3 100644 --- a/src/mds/MDSUDPEndpoint.h +++ b/src/mds/MDSUDPEndpoint.h @@ -1,21 +1,52 @@ +#pragma once + #include "MDSNotificationMessage.h" +#include "MDSRank.h" #include #include -#include class MDSUDPEndpoint; +struct MDSUDPConnection { + std::string ip; + int port; + MDSUDPConnection() = default; + MDSUDPConnection(const std::string &ip, int port); + void encode(ceph::buffer::list &bl) const; + void decode(ceph::buffer::list::const_iterator &iter); + void dump(ceph::Formatter *f) const; + static void generate_test_instances(std::list &o); +}; +WRITE_CLASS_ENCODER(MDSUDPConnection) + +class MDSUDPDriver { +public: + MDSUDPDriver(MDSRank *mds, const std::string &object_name); + int load_data(std::map &mp); + int add_endpoint(const std::string &name, const MDSUDPConnection &connection); + int remove_endpoint(const std::string &name); + +private: + MDSRank *mds; + std::string object_name; + int update_omap(const std::map &mp); + int remove_keys(const std::set &st); +}; + class MDSUDPManager { public: - MDSUDPManager(CephContext *cct) : cct(cct) {} + MDSUDPManager(MDSRank *mds); + int init(); int send(const std::shared_ptr &message); - int add_endpoint(const std::string &name, const std::string &ip, int port); - int remove_endpoint(const std::string &name); + int add_endpoint(const std::string &name, const MDSUDPConnection &connection, + bool write_into_disk); + int remove_endpoint(const std::string &name, bool write_into_disk); private: CephContext *cct; std::shared_mutex endpoint_mutex; std::unordered_map> endpoints; + std::unique_ptr driver; static const size_t MAX_CONNECTIONS_DEFAULT = 256; }; @@ -23,19 +54,17 @@ class MDSUDPEndpoint { public: MDSUDPEndpoint() = delete; MDSUDPEndpoint(CephContext *cct, const std::string &name, - const std::string &ip, int port); + const MDSUDPConnection &connection); int publish_internal(std::vector &buf, uint64_t seq_id); - static std::shared_ptr create(CephContext *cct, - const std::string &name, - const std::string &ip, - int port); + static std::shared_ptr + create(CephContext *cct, const std::string &name, + const MDSUDPConnection &connection); friend class MDSUDPManager; private: std::string name; - std::string ip; - int port; + MDSUDPConnection connection; boost::asio::io_context io_context; boost::asio::ip::udp::socket socket; boost::asio::ip::udp::endpoint endpoint; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 06eaea164aabd..835372ad0a395 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -281,7 +281,7 @@ Server::Server(MDSRank *m, MetricsHandler *metrics_handler) : // std::nullopt, std::nullopt); // MDSAsyncNotificationManager::create(mds->cct); // MDSSyncNotificationManager::create(mds->cct); - notification_manager = std::make_unique(mds->cct); + // notification_manager = std::make_unique(mds); // topic_ptr = MDSKafkaTopic::create( // "my-topic", mds->cct, // connection_t("localhost:9093", true, "admin", "admin-secret", @@ -4858,7 +4858,7 @@ void Server::handle_client_openc(const MDRequestRef& mdr) set_reply_extra_bl(req, _inode->ino, mdr->reply_extra_bl); - notification_manager->push_notification(mds->get_nodeid(), newi, + mds->notification_manager->push_notification(mds->get_nodeid(), newi, CEPH_MDS_NOTIFY_CREATE | CEPH_MDS_NOTIFY_OPEN); @@ -5540,7 +5540,7 @@ void Server::handle_client_setattr(const MDRequestRef& mdr) mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); - notification_manager->push_notification(mds->get_nodeid(), cur, + mds->notification_manager->push_notification(mds->get_nodeid(), cur, CEPH_MDS_NOTIFY_ATTRIB); journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur, @@ -5607,7 +5607,7 @@ void Server::do_open_truncate(const MDRequestRef& mdr, int cmode) dn = mdr->dn[0].back(); } - notification_manager->push_notification(mds->get_nodeid(), in, + mds->notification_manager->push_notification(mds->get_nodeid(), in, CEPH_MDS_NOTIFY_MODIFY | CEPH_MDS_NOTIFY_ACCESS | CEPH_MDS_NOTIFY_OPEN); @@ -5701,7 +5701,7 @@ void Server::handle_client_setlayout(const MDRequestRef& mdr) mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); - notification_manager->push_notification(mds->get_nodeid(), cur, + mds->notification_manager->push_notification(mds->get_nodeid(), cur, CEPH_MDS_NOTIFY_ATTRIB); journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur)); @@ -5821,7 +5821,7 @@ void Server::handle_client_setdirlayout(const MDRequestRef& mdr) mdr->no_early_reply = true; - notification_manager->push_notification(mds->get_nodeid(), cur, + mds->notification_manager->push_notification(mds->get_nodeid(), cur, CEPH_MDS_NOTIFY_ATTRIB); journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur)); @@ -6527,7 +6527,7 @@ void Server::handle_client_setvxattr(const MDRequestRef& mdr, CInode *cur) mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); - notification_manager->push_notification(mds->get_nodeid(), cur, + mds->notification_manager->push_notification(mds->get_nodeid(), cur, CEPH_MDS_NOTIFY_ATTRIB); journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur, @@ -6812,7 +6812,7 @@ void Server::handle_client_setxattr(const MDRequestRef& mdr) mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); - notification_manager->push_notification(mds->get_nodeid(), cur, + mds->notification_manager->push_notification(mds->get_nodeid(), cur, CEPH_MDS_NOTIFY_ATTRIB); journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur)); @@ -6884,7 +6884,7 @@ void Server::handle_client_removexattr(const MDRequestRef& mdr) mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); - notification_manager->push_notification(mds->get_nodeid(), cur, + mds->notification_manager->push_notification(mds->get_nodeid(), cur, CEPH_MDS_NOTIFY_ATTRIB); journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur)); @@ -7203,7 +7203,7 @@ void Server::handle_client_mknod(const MDRequestRef& mdr) PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); le->metablob.add_primary_dentry(dn, newi, true, true, true); - notification_manager->push_notification(mds->get_nodeid(), newi, + mds->notification_manager->push_notification(mds->get_nodeid(), newi, CEPH_MDS_NOTIFY_CREATE | CEPH_MDS_NOTIFY_ATTRIB); @@ -7297,7 +7297,7 @@ void Server::handle_client_mkdir(const MDRequestRef& mdr) // make sure this inode gets into the journal le->metablob.add_opened_ino(newi->ino()); - notification_manager->push_notification(mds->get_nodeid(), newi, + mds->notification_manager->push_notification(mds->get_nodeid(), newi, CEPH_MDS_NOTIFY_CREATE); journal_and_reply(mdr, newi, dn, le, new C_MDS_mknod_finish(this, mdr, dn, newi)); @@ -7363,7 +7363,7 @@ void Server::handle_client_symlink(const MDRequestRef& mdr) mdcache->predirty_journal_parents(mdr, &le->metablob, newi, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); le->metablob.add_primary_dentry(dn, newi, true, true); - notification_manager->push_notification(mds->get_nodeid(), newi, + mds->notification_manager->push_notification(mds->get_nodeid(), newi, CEPH_MDS_NOTIFY_CREATE); journal_and_reply(mdr, newi, dn, le, new C_MDS_mknod_finish(this, mdr, dn, newi)); @@ -7497,7 +7497,7 @@ void Server::handle_client_link(const MDRequestRef& mdr) // go! ceph_assert(g_conf()->mds_kill_link_at != 1); - notification_manager->push_notification_link(mds->get_nodeid(), targeti, destdn, + mds->notification_manager->push_notification_link(mds->get_nodeid(), targeti, destdn, CEPH_MDS_NOTIFY_ATTRIB, CEPH_MDS_NOTIFY_CREATE); @@ -8227,7 +8227,7 @@ void Server::handle_client_unlink(const MDRequestRef& mdr) if (!rmdir && dnl->is_primary() && mdr->dn[0].size() == 1) mds->locker->create_lock_cache(mdr, diri); - notification_manager->push_notification_link(mds->get_nodeid(), in, dn, + mds->notification_manager->push_notification_link(mds->get_nodeid(), in, dn, CEPH_MDS_NOTIFY_ATTRIB, CEPH_MDS_NOTIFY_DELETE); @@ -9261,7 +9261,7 @@ void Server::handle_client_rename(const MDRequestRef& mdr) // -- commit locally -- C_MDS_rename_finish *fin = new C_MDS_rename_finish(this, mdr, srcdn, destdn, straydn); - notification_manager->push_notification_move(mds->get_nodeid(), srcdn, + mds->notification_manager->push_notification_move(mds->get_nodeid(), srcdn, destdn); journal_and_reply(mdr, srci, destdn, le, fin); @@ -11234,7 +11234,7 @@ void Server::handle_client_mksnap(const MDRequestRef& mdr) mdcache->predirty_journal_parents(mdr, &le->metablob, diri, 0, PREDIRTY_PRIMARY, false); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, diri); - notification_manager->push_notification_snap( + mds->notification_manager->push_notification_snap( mds->get_nodeid(), diri, std::string(snapname), CEPH_MDS_NOTIFY_CREATE | CEPH_MDS_NOTIFY_ATTRIB); @@ -11371,7 +11371,7 @@ void Server::handle_client_rmsnap(const MDRequestRef& mdr) mdcache->predirty_journal_parents(mdr, &le->metablob, diri, 0, PREDIRTY_PRIMARY, false); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, diri); - notification_manager->push_notification_snap( + mds->notification_manager->push_notification_snap( mds->get_nodeid(), diri, std::string(snapname), CEPH_MDS_NOTIFY_DELETE | CEPH_MDS_NOTIFY_ATTRIB); @@ -11992,23 +11992,3 @@ bool Server::build_snap_diff( } return it == dir->end(); } - -#ifdef WITH_CEPHFS_NOTIFICATION -int Server::add_kafka_topic(const std::string &topic_name, - const connection_t &connection) { - return notification_manager->add_kafka_topic(topic_name, connection); -} - -int Server::remove_kafka_topic(const std::string& topic_name) { - return notification_manager->remove_kafka_topic(topic_name); -} - -int Server::add_udp_endpoint(const std::string &name, const std::string &ip, - int port) { - return notification_manager->add_udp_endpoint(name, ip, port); -} - -int Server::remove_udp_endpoint(const std::string& name) { - return notification_manager->remove_udp_endpoint(name); -} -#endif diff --git a/src/mds/Server.h b/src/mds/Server.h index e17da02db2a7a..d2788f447b71b 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -36,12 +36,9 @@ using namespace std::literals::string_view_literals; #include "MDSRank.h" #include "Mutation.h" #include "MDSContext.h" +// #include "MDSNotificationManager.h" -#ifdef WITH_CEPHFS_NOTIFICATION -#include "MDSKafka.h" -#endif -#include "MDSNotificationManager.h" - +// class MDSNotificationManager; class OSDMap; class LogEvent; class EMetaBlob; @@ -127,14 +124,6 @@ class Server { return last_recall_state; } -#ifdef WITH_CEPHFS_NOTIFICATION - int add_kafka_topic(const std::string &topic_name, - const connection_t &connection); - int remove_kafka_topic(const std::string& topic_name); - int add_udp_endpoint(const std::string& name, const std::string& ip, int port); - int remove_udp_endpoint(const std::string& name); -#endif - void handle_client_session(const cref_t &m); void _session_logged(Session *session, uint64_t state_seq, bool open, version_t pv, const interval_set& inos_to_free, version_t piv, @@ -538,7 +527,6 @@ class Server { MDCache *mdcache; MDLog *mdlog; PerfCounters *logger = nullptr; - std::unique_ptr notification_manager; // OSDMap full status, used to generate CEPHFS_ENOSPC on some operations bool is_full = false; From bf62eb712e8ecf3805a780c6a99742354ed2e22f Mon Sep 17 00:00:00 2001 From: sajibreadd Date: Tue, 10 Sep 2024 10:46:26 +0600 Subject: [PATCH 3/5] Notification interface added in mds Signed-off-by: Md Mahamudur Rahaman Sajib --- src/mds/MDSRank.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index a8607e2c3a4ae..2fbdbfbca1cca 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -1210,7 +1210,11 @@ bool MDSRank::is_notification_info(const cref_t &m) { notification_manager->remove_kafka_topic(req->topic_name, false); } return true; +<<<<<<< HEAD } else if (m->get_type() == MSG_MDS_NOTIFICATION_INFO_UDP_ENDPOINT) { +======= + } else if (m->get_type() == MSG_MDS_NOTIFICATION_INFO_KAFKA_TOPIC) { +>>>>>>> a2cbf540416 (Notification interface added in mds) const auto &req = ref_cast(m); if (!req->is_remove) { notification_manager->add_udp_endpoint(req->name, req->ip, req->port, From b2ae4a12ab105cec203f9019ebfffdf40916b250 Mon Sep 17 00:00:00 2001 From: sajibreadd Date: Mon, 7 Oct 2024 05:51:28 +0200 Subject: [PATCH 4/5] libcephfs api is adjusted for add/remove notification endpoints Signed-off-by: Md Mahamudur Rahaman Sajib --- src/client/Client.cc | 73 ++++++++- src/client/Client.h | 13 ++ src/include/ceph_fs.h | 6 +- src/include/cephfs/libcephfs.h | 51 ++++++ src/libcephfs.cc | 40 +++++ src/mds/MDSRank.cc | 4 - src/mds/MDSRank.h | 9 +- src/mds/Server.cc | 147 ++++++++++++++++++ src/mds/Server.h | 15 ++ src/messages/MClientRequest.h | 100 ++++++++++++ src/test/libcephfs/CMakeLists.txt | 3 + .../libcephfs/notification_endpoint_api.cc | 25 +++ src/tools/ceph-dencoder/common_types.h | 2 + 13 files changed, 477 insertions(+), 11 deletions(-) create mode 100644 src/test/libcephfs/notification_endpoint_api.cc diff --git a/src/client/Client.cc b/src/client/Client.cc index 8af860634a69c..386b3f23f0870 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -1709,6 +1709,10 @@ mds_rank_t Client::choose_target_mds(MetaRequest *req, Inode** phash_diri) Inode *in = NULL; Dentry *de = NULL; + bool is_notification_op = (req->get_op() == CEPH_MDS_OP_ADD_KAFKA_TOPIC || + req->get_op() == CEPH_MDS_OP_REMOVE_KAFKA_TOPIC || + req->get_op() == CEPH_MDS_OP_ADD_UDP_ENDPOINT || + req->get_op() == CEPH_MDS_OP_REMOVE_UDP_ENDPOINT); if (req->resend_mds >= 0) { mds = req->resend_mds; @@ -1719,6 +1723,10 @@ mds_rank_t Client::choose_target_mds(MetaRequest *req, Inode** phash_diri) if (cct->_conf->client_use_random_mds) goto random_mds; + if (is_notification_op) { + mds = 0; + } + in = req->inode(); de = req->dentry(); if (in) { @@ -7737,7 +7745,6 @@ int Client::rename(const char *relfrom, const char *relto, const UserPerm& perm, out: return r; } - // dirs int Client::mkdir(const char *relpath, mode_t mode, const UserPerm& perm, std::string alternate_name) @@ -17487,3 +17494,67 @@ void StandaloneClient::shutdown() objecter->shutdown(); monclient->shutdown(); } + +#ifdef WITH_CEPHFS_NOTIFICATION +// notifications +int Client::add_kafka_topic(const char *topic_name, const char *broker, + bool use_ssl, const char *user, + const char *password, const char *ca_location, + const char *mechanism, const UserPerm &perm) { + ldout(cct, 10) << __func__ << ": " + << "topic_name=" << topic_name << ", broker=" << broker + << dendl; + MetaRequest *req = new MetaRequest(CEPH_MDS_OP_ADD_KAFKA_TOPIC); + + KafkaTopicPayload payload(topic_name, broker, use_ssl, + (user == nullptr ? "" : user), + (password == nullptr ? "" : password), + (ca_location == nullptr || strlen(ca_location) == 0) + ? std::optional(std::nullopt) + : ca_location, + (mechanism == nullptr || strlen(mechanism) == 0) + ? std::optional(std::nullopt) + : mechanism); + bufferlist bl; + encode(payload, bl); + req->set_data(bl); + std::scoped_lock lock(client_lock); + int res = make_request(req, perm); + return res; +} + +int Client::remove_kafka_topic(const char* topic_name, + const UserPerm &perm) { + MetaRequest *req = new MetaRequest(CEPH_MDS_OP_REMOVE_KAFKA_TOPIC); + KafkaTopicPayload payload(topic_name); + bufferlist bl; + encode(payload, bl); + req->set_data(bl); + std::scoped_lock lock(client_lock); + int res = make_request(req, perm); + return res; +} + +int Client::add_udp_endpoint(const char* name, const char* ip, + int port, const UserPerm &perm) { + MetaRequest *req = new MetaRequest(CEPH_MDS_OP_ADD_UDP_ENDPOINT); + UDPEndpointPayload payload(name, ip, port); + bufferlist bl; + encode(payload, bl); + req->set_data(bl); + std::scoped_lock lock(client_lock); + int res = make_request(req, perm); + return res; +} + +int Client::remove_udp_endpoint(const char* name, const UserPerm &perm) { + MetaRequest *req = new MetaRequest(CEPH_MDS_OP_REMOVE_UDP_ENDPOINT); + UDPEndpointPayload payload(name); + bufferlist bl; + encode(payload, bl); + req->set_data(bl); + std::scoped_lock lock(client_lock); + int res = make_request(req, perm); + return res; +} +#endif diff --git a/src/client/Client.h b/src/client/Client.h index 5a1e69394d02a..8a230d58dee6c 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -400,6 +400,19 @@ class Client : public Dispatcher, public md_config_obs_t { int unlinkat(int dirfd, const char *relpath, int flags, const UserPerm& perm); int rename(const char *from, const char *to, const UserPerm& perm, std::string alternate_name=""); +#ifdef WITH_CEPHFS_NOTIFICATION + // notifications + int add_kafka_topic(const char *topic_name, const char *broker, + bool use_ssl, const char *user, + const char *password, const char *ca_location, + const char *mechanism, const UserPerm &perm); + int remove_kafka_topic(const char* topic_name, + const UserPerm &perm); + int add_udp_endpoint(const char* name, const char* ip, + int port, const UserPerm &perm); + int remove_udp_endpoint(const char* name, const UserPerm &perm); +#endif + // dirs int mkdir(const char *path, mode_t mode, const UserPerm& perm, std::string alternate_name=""); int mkdirat(int dirfd, const char *relpath, mode_t mode, const UserPerm& perm, diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 6a8de2b7ca79d..37210989fc4a1 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -427,7 +427,11 @@ enum { CEPH_MDS_OP_RMSNAP = 0x01401, CEPH_MDS_OP_LSSNAP = 0x00402, CEPH_MDS_OP_RENAMESNAP = 0x01403, - CEPH_MDS_OP_READDIR_SNAPDIFF = 0x01404, + CEPH_MDS_OP_READDIR_SNAPDIFF = 0x01404, + CEPH_MDS_OP_ADD_KAFKA_TOPIC = 0x01405, + CEPH_MDS_OP_REMOVE_KAFKA_TOPIC = 0x01406, + CEPH_MDS_OP_ADD_UDP_ENDPOINT = 0x01407, + CEPH_MDS_OP_REMOVE_UDP_ENDPOINT = 0x01408, // internal op CEPH_MDS_OP_FRAGMENTDIR= 0x01500, diff --git a/src/include/cephfs/libcephfs.h b/src/include/cephfs/libcephfs.h index ba0b76e072b57..1bc6d9f8dce5d 100644 --- a/src/include/cephfs/libcephfs.h +++ b/src/include/cephfs/libcephfs.h @@ -731,6 +731,57 @@ int64_t ceph_telldir(struct ceph_mount_info *cmount, struct ceph_dir_result *dir */ void ceph_seekdir(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp, int64_t offset); +#ifdef WITH_CEPHFS_NOTIFICATION +/** + * Create/replace kafka topic for notification. + * + * @param cmount the ceph mount handle. + * @param topic_name kafka topic name to create. + * @param broker address of kafka endpoint. + * @param use_ssl ssl authentication required or not. + * @param user username + * @param password password for authentication + * @param ca_location a trusted entity that issues SSL certificates + * @param mechanism user to specify which of these Kafka SASL mechanisms to + * use when connecting to a Kafka broker that requires authentication. + * @returns 0 on success or a negative return code on error. + */ +int ceph_add_kafka_topic(struct ceph_mount_info *cmount, const char *topic_name, + const char *broker, bool use_ssl, const char *user, + const char *password, const char *ca_location, + const char *mechanism); + +/** + * Remove kafka topic. + * + * @param cmount the ceph mount handle. + * @param topic_name kafka topic name to remove. + * @returns 0 on success or a negative return code on error. + */ +int ceph_remove_kafka_topic(struct ceph_mount_info *cmount, const char *topic_name); + +/** + * Create/replace a udp endpoint. + * + * @param cmount the ceph mount handle. + * @param name udp endpoint name to create. + * @param ip ip address of udp endpoint. + * @param port port to connect for udp endpoint. + * @returns 0 on success or a negative return code on error. + */ +int ceph_add_udp_endpoint(struct ceph_mount_info *cmount, const char *name, + const char *ip, int port); + +/** + * Remove an udp endpoint. + * + * @param cmount the ceph mount handle. + * @param name udp endpoint name to remove. + * @returns 0 on success or a negative return code on error. + */ +int ceph_remove_udp_endpoint(struct ceph_mount_info *cmount, const char *name); +#endif + /** * Create a directory. * diff --git a/src/libcephfs.cc b/src/libcephfs.cc index 7eea6665f6145..88d9cac2c7be8 100644 --- a/src/libcephfs.cc +++ b/src/libcephfs.cc @@ -882,6 +882,46 @@ extern "C" int ceph_rename(struct ceph_mount_info *cmount, const char *from, return cmount->get_client()->rename(from, to, cmount->default_perms); } +#ifdef WITH_CEPHFS_NOTIFICATION +// notifications +extern "C" int ceph_add_kafka_topic(struct ceph_mount_info *cmount, + const char *topic_name, const char *broker, + bool use_ssl, const char *user, + const char *password, + const char *ca_location, + const char *mechanism) { + if (!cmount->is_mounted()) + return -CEPHFS_ENOTCONN; + return cmount->get_client()->add_kafka_topic( + topic_name, broker, use_ssl, user, password, ca_location, mechanism, + cmount->default_perms); +} + +extern "C" int ceph_remove_kafka_topic(struct ceph_mount_info *cmount, + const char *topic_name) { + if (!cmount->is_mounted()) + return -CEPHFS_ENOTCONN; + return cmount->get_client()->remove_kafka_topic(topic_name, + cmount->default_perms); +} + +extern "C" int ceph_add_udp_endpoint(struct ceph_mount_info *cmount, + const char *name, const char *ip, + int port) { + if (!cmount->is_mounted()) + return -CEPHFS_ENOTCONN; + return cmount->get_client()->add_udp_endpoint(name, ip, port, + cmount->default_perms); +} + +extern "C" int ceph_remove_udp_endpoint(struct ceph_mount_info *cmount, + const char *name) { + if (!cmount->is_mounted()) + return -CEPHFS_ENOTCONN; + return cmount->get_client()->remove_udp_endpoint(name, cmount->default_perms); +} +#endif + // dirs extern "C" int ceph_mkdir(struct ceph_mount_info *cmount, const char *path, mode_t mode) { diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 2fbdbfbca1cca..a8607e2c3a4ae 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -1210,11 +1210,7 @@ bool MDSRank::is_notification_info(const cref_t &m) { notification_manager->remove_kafka_topic(req->topic_name, false); } return true; -<<<<<<< HEAD } else if (m->get_type() == MSG_MDS_NOTIFICATION_INFO_UDP_ENDPOINT) { -======= - } else if (m->get_type() == MSG_MDS_NOTIFICATION_INFO_KAFKA_TOPIC) { ->>>>>>> a2cbf540416 (Notification interface added in mds) const auto &req = ref_cast(m); if (!req->is_remove) { notification_manager->add_udp_endpoint(req->name, req->ip, req->port, diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h index ca443d19d8f0d..16ed5cf64ac82 100644 --- a/src/mds/MDSRank.h +++ b/src/mds/MDSRank.h @@ -265,6 +265,10 @@ class MDSRank { uint64_t get_global_id() const { return monc->get_global_id(); } + +#ifdef WITH_CEPHFS_NOTIFICATION + void send_notification_info_to_peers(const ref_t& m); +#endif // Daemon lifetime functions: these guys break the abstraction // and call up into the parent MDSDaemon instance. It's kind @@ -663,11 +667,6 @@ class MDSRank { bool standby_replaying = false; // true if current replay pass is in standby-replay mode uint64_t extraordinary_events_dump_interval = 0; double inject_journal_corrupt_dentry_first = 0.0; -protected: - -#ifdef WITH_CEPHFS_NOTIFICATION - void send_notification_info_to_peers(const ref_t& m); -#endif private: bool send_status = true; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 835372ad0a395..9e5dc6c282f52 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -33,6 +33,11 @@ #include "cephfs_features.h" #include "MDSContext.h" +#ifdef WITH_CEPHFS_NOTIFICATION +#include "messages/MNotificationInfoKafkaTopic.h" +#include "messages/MNotificationInfoUDPEndpoint.h" +#endif + #include "msg/Messenger.h" #include "osdc/Objecter.h" @@ -245,6 +250,21 @@ void Server::create_logger() plb.add_time_avg(l_mdss_req_snapdiff_latency, "req_snapdiff_latency", "Request type snapshot difference latency"); +#ifdef WITH_CEPHFS_NOTIFICATION + plb.add_time_avg(l_mdss_req_add_kafka_topic_latency, + "req_add_kafka_topic_latency", + "Request type add kafka topic latency"); + plb.add_time_avg(l_mdss_req_remove_kafka_topic_latency, + "req_remove_kafka_topic_latency", + "Request type remove kafka topic latency"); + plb.add_time_avg(l_mdss_req_add_udp_endpoint_latency, + "req_add_udp_endpoint_latency", + "Request type add udp endpoint latency"); + plb.add_time_avg(l_mdss_req_remove_udp_endpoint_latency, + "req_remove_udp_endpoint_latency", + "Request type remove udp endpoint latency"); +#endif + plb.set_prio_default(PerfCountersBuilder::PRIO_DEBUGONLY); plb.add_u64_counter(l_mdss_dispatch_client_request, "dispatch_client_request", "Client requests dispatched"); @@ -2207,6 +2227,22 @@ void Server::perf_gather_op_latency(const cref_t &req, utime_t l case CEPH_MDS_OP_READDIR_SNAPDIFF: code = l_mdss_req_snapdiff_latency; break; + +#ifdef WITH_CEPHFS_NOTIFICATION + case CEPH_MDS_OP_ADD_KAFKA_TOPIC: + code = l_mdss_req_add_kafka_topic_latency; + break; + case CEPH_MDS_OP_REMOVE_KAFKA_TOPIC: + code = l_mdss_req_remove_kafka_topic_latency; + break; + case CEPH_MDS_OP_ADD_UDP_ENDPOINT: + code = l_mdss_req_add_udp_endpoint_latency; + break; + case CEPH_MDS_OP_REMOVE_UDP_ENDPOINT: + code = l_mdss_req_remove_udp_endpoint_latency; + break; +#endif + default: dout(1) << ": unknown client op" << dendl; return; @@ -2863,6 +2899,21 @@ void Server::dispatch_client_request(const MDRequestRef& mdr) case CEPH_MDS_OP_READDIR_SNAPDIFF: handle_client_readdir_snapdiff(mdr); break; +#ifdef WITH_CEPHFS_NOTIFICATION + // notifications + case CEPH_MDS_OP_ADD_KAFKA_TOPIC: + handle_client_add_kafka_topic(mdr); + break; + case CEPH_MDS_OP_REMOVE_KAFKA_TOPIC: + handle_client_remove_kafka_topic(mdr); + break; + case CEPH_MDS_OP_ADD_UDP_ENDPOINT: + handle_client_add_udp_endpoint(mdr); + break; + case CEPH_MDS_OP_REMOVE_UDP_ENDPOINT: + handle_client_remove_udp_endpoint(mdr); + break; +#endif default: dout(1) << " unknown client op " << req->get_op() << dendl; @@ -11992,3 +12043,99 @@ bool Server::build_snap_diff( } return it == dir->end(); } + +#ifdef WITH_CEPHFS_NOTIFICATION + +// FIXME handling user rights +void Server::handle_client_add_kafka_topic(const MDRequestRef &mdr) { + const cref_t &req = mdr->client_request; + KafkaTopicPayload payload; + if (req->get_data().length()) { + try { + auto iter = req->get_data().cbegin(); + decode(payload, iter); + } catch (const ceph::buffer::error &e) { + dout(1) << ": no data in kafka topic payload" << dendl; + respond_to_request(mdr, -CEPHFS_EINVAL); + return; + } + } + int r = mds->notification_manager->add_kafka_topic( + payload.topic_name, payload.broker, payload.use_ssl, payload.user, + payload.password, payload.ca_location, payload.mechanism, true); + if (r == 0) { + auto m = make_message( + payload.topic_name, payload.broker, payload.use_ssl, payload.user, + payload.password, payload.ca_location, payload.mechanism); + mds->send_notification_info_to_peers(m); + } + respond_to_request(mdr, r); +} + +void Server::handle_client_remove_kafka_topic(const MDRequestRef &mdr) { + const cref_t &req = mdr->client_request; + KafkaTopicPayload payload; + if (req->get_data().length()) { + try { + auto iter = req->get_data().cbegin(); + decode(payload, iter); + } catch (const ceph::buffer::error &e) { + dout(1) << ": no data in kafka topic payload" << dendl; + respond_to_request(mdr, -CEPHFS_EINVAL); + return; + } + } + int r = + mds->notification_manager->remove_kafka_topic(payload.topic_name, true); + if (r == 0) { + auto m = + make_message(payload.topic_name, true); + mds->send_notification_info_to_peers(m); + } + respond_to_request(mdr, r); +} + +void Server::handle_client_add_udp_endpoint(const MDRequestRef &mdr) { + const cref_t &req = mdr->client_request; + UDPEndpointPayload payload; + if (req->get_data().length()) { + try { + auto iter = req->get_data().cbegin(); + decode(payload, iter); + } catch (const ceph::buffer::error &e) { + dout(1) << ": no data in udp endpoint payload" << dendl; + respond_to_request(mdr, -CEPHFS_EINVAL); + return; + } + } + int r = mds->notification_manager->add_udp_endpoint(payload.name, payload.ip, + payload.port, true); + if (r == 0) { + auto m = make_message( + payload.name, payload.ip, payload.port); + mds->send_notification_info_to_peers(m); + } + respond_to_request(mdr, r); +} + +void Server::handle_client_remove_udp_endpoint(const MDRequestRef &mdr) { + const cref_t &req = mdr->client_request; + UDPEndpointPayload payload; + if (req->get_data().length()) { + try { + auto iter = req->get_data().cbegin(); + decode(payload, iter); + } catch (const ceph::buffer::error &e) { + dout(1) << ": no data in udp endpoint payload" << dendl; + respond_to_request(mdr, -CEPHFS_EINVAL); + return; + } + } + int r = mds->notification_manager->remove_udp_endpoint(payload.name, true); + if (r == 0) { + auto m = make_message(payload.name, true); + mds->send_notification_info_to_peers(m); + } + respond_to_request(mdr, r); +} +#endif diff --git a/src/mds/Server.h b/src/mds/Server.h index d2788f447b71b..b1f6fe9f5f0d0 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -86,6 +86,14 @@ enum { l_mdss_cap_revoke_eviction, l_mdss_cap_acquisition_throttle, l_mdss_req_getvxattr_latency, + +#ifdef WITH_CEPHFS_NOTIFICATION + l_mdss_req_add_kafka_topic_latency, + l_mdss_req_remove_kafka_topic_latency, + l_mdss_req_add_udp_endpoint_latency, + l_mdss_req_remove_udp_endpoint_latency, +#endif + l_mdss_last, }; @@ -124,6 +132,13 @@ class Server { return last_recall_state; } +#ifdef WITH_CEPHFS_NOTIFICATION + void handle_client_add_kafka_topic(const MDRequestRef& mdr); + void handle_client_remove_kafka_topic(const MDRequestRef& mdr); + void handle_client_add_udp_endpoint(const MDRequestRef& mdr); + void handle_client_remove_udp_endpoint(const MDRequestRef& mdr); +#endif + void handle_client_session(const cref_t &m); void _session_logged(Session *session, uint64_t state_seq, bool open, version_t pv, const interval_set& inos_to_free, version_t piv, diff --git a/src/messages/MClientRequest.h b/src/messages/MClientRequest.h index cb9a888b0bed5..a04400e398ff6 100644 --- a/src/messages/MClientRequest.h +++ b/src/messages/MClientRequest.h @@ -75,6 +75,106 @@ struct SnapPayload { WRITE_CLASS_ENCODER(SnapPayload) +struct NotificationEndpointPayload { + virtual void encode(ceph::buffer::list &bl) const = 0; + virtual void decode(ceph::buffer::list::const_iterator &iter) = 0; + virtual void dump(ceph::Formatter *f) const = 0; + virtual ~NotificationEndpointPayload() {} +}; + +struct KafkaTopicPayload final : public NotificationEndpointPayload { + std::string topic_name; + std::string broker; + bool use_ssl = false; + std::string user, password; + std::optional ca_location; + std::optional mechanism; + KafkaTopicPayload() {} + KafkaTopicPayload(const std::string &topic_name, const std::string &broker, + bool use_ssl, const std::string &user, + const std::string &password, + const std::optional &ca_location, + const std::optional &mechanism) + : topic_name(topic_name), broker(broker), use_ssl(use_ssl), user(user), + password(password), ca_location(ca_location), mechanism(mechanism) {} + KafkaTopicPayload(const std::string &topic_name) : topic_name(topic_name) {} + void encode(ceph::buffer::list &bl) const { + ENCODE_START(1, 1, bl); + encode(topic_name, bl); + encode(broker, bl); + encode(use_ssl, bl); + encode(user, bl); + encode(password, bl); + encode(ca_location, bl); + encode(mechanism, bl); + ENCODE_FINISH(bl); + } + void decode(ceph::buffer::list::const_iterator &iter) { + DECODE_START(1, iter); + decode(topic_name, iter); + decode(broker, iter); + decode(use_ssl, iter); + decode(user, iter); + decode(password, iter); + decode(ca_location, iter); + decode(mechanism, iter); + DECODE_FINISH(iter); + } + void dump(ceph::Formatter *f) const { + f->dump_string("topic_name", topic_name); + f->dump_string("broker", broker); + f->dump_bool("use_ssl", use_ssl); + f->dump_string("user", user); + f->dump_string("password", password); + if (ca_location.has_value()) { + f->dump_string("ca_location", ca_location.value()); + } + if (mechanism.has_value()) { + f->dump_string("mechanism", mechanism.value()); + } + } + static void generate_test_instances(std::list &o) { + o.push_back(new KafkaTopicPayload); + } +}; + +WRITE_CLASS_ENCODER(KafkaTopicPayload) + +struct UDPEndpointPayload final : public NotificationEndpointPayload { + std::string name; + std::string ip; + int port = -1; + UDPEndpointPayload() {} + UDPEndpointPayload(const std::string &name, const std::string &ip, int port) + : name(name), ip(ip), port(port) { + } + UDPEndpointPayload(const std::string &name) : name(name) {} + void encode(ceph::buffer::list &bl) const { + ENCODE_START(1, 1, bl); + encode(name, bl); + encode(ip, bl); + encode(port, bl); + ENCODE_FINISH(bl); + } + void decode(ceph::buffer::list::const_iterator &iter) { + DECODE_START(1, iter); + decode(name, iter); + decode(ip, iter); + decode(port, iter); + DECODE_FINISH(iter); + } + void dump(ceph::Formatter *f) const { + f->dump_string("name", name); + f->dump_string("ip", ip); + f->dump_int("port", port); + } + static void generate_test_instances(std::list &o) { + o.push_back(new UDPEndpointPayload); + } +}; + +WRITE_CLASS_ENCODER(UDPEndpointPayload) + // metadata ops. class MClientRequest final : public MMDSOp { diff --git a/src/test/libcephfs/CMakeLists.txt b/src/test/libcephfs/CMakeLists.txt index 6cbbbe246a5e2..8d4e356ce3d12 100644 --- a/src/test/libcephfs/CMakeLists.txt +++ b/src/test/libcephfs/CMakeLists.txt @@ -11,6 +11,9 @@ if(WITH_LIBCEPHFS) deleg.cc monconfig.cc ) + if (WITH_CEPHFS_NOTIFICATION) + target_sources(ceph_test_libcephfs PRIVATE notification_endpoint_api.cc) + endif() target_link_libraries(ceph_test_libcephfs ceph-common cephfs diff --git a/src/test/libcephfs/notification_endpoint_api.cc b/src/test/libcephfs/notification_endpoint_api.cc new file mode 100644 index 0000000000000..225ef2aa5520f --- /dev/null +++ b/src/test/libcephfs/notification_endpoint_api.cc @@ -0,0 +1,25 @@ + +#include "gtest/gtest.h" +#include "include/compat.h" +#include "include/cephfs/libcephfs.h" +#include "include/fs_types.h" +#include +#include + +TEST(LibCephFS, AddUDPEndpoint) { + struct ceph_mount_info *cmount; + ASSERT_EQ(0, ceph_create(&cmount, NULL)); + ASSERT_EQ(0, ceph_conf_read_file(cmount, NULL)); + ASSERT_EQ(0, ceph_mount(cmount, "/")); + ASSERT_EQ(0, ceph_add_udp_endpoint(cmount, "udp", "127.0.0.1", 8080)); + ASSERT_EQ(0, ceph_add_kafka_topic(cmount, "my-topic", "localhost:9092", false, + nullptr, nullptr, nullptr, nullptr)); + ASSERT_EQ(0, ceph_mkdirs(cmount, "/dir1", 0777)); + ASSERT_EQ(0, ceph_mkdirs(cmount, "/dir2", 0777)); + ASSERT_EQ(0, ceph_mkdirs(cmount, "/dir3", 0777)); + ASSERT_EQ(0, ceph_mkdirs(cmount, "/dir4", 0777)); + ASSERT_EQ(0, ceph_remove_udp_endpoint(cmount, "udp")); + ASSERT_EQ(0, ceph_remove_kafka_topic(cmount, "my-topic")); + ASSERT_EQ(0, ceph_unmount(cmount)); + ASSERT_EQ(0, ceph_release(cmount)); +} diff --git a/src/tools/ceph-dencoder/common_types.h b/src/tools/ceph-dencoder/common_types.h index e853321645ba2..6233e89e1a8ca 100644 --- a/src/tools/ceph-dencoder/common_types.h +++ b/src/tools/ceph-dencoder/common_types.h @@ -214,6 +214,8 @@ TYPE(openc_response_t) #include "messages/MClientRequest.h" MESSAGE(MClientRequest) TYPE(SnapPayload) +TYPE(KafkaTopicPayload) +TYPE(UDPEndpointPayload) TYPE(MClientRequest::Release) #include "messages/MClientRequestForward.h" From 5243e16e80068666e507e5a6f03ad58ad559be90 Mon Sep 17 00:00:00 2001 From: sajibreadd Date: Tue, 10 Sep 2024 10:46:26 +0600 Subject: [PATCH 5/5] Notification interface added in mds Signed-off-by: Md Mahamudur Rahaman Sajib Storage mechanism added for kafka and udp endpoints Signed-off-by: Md Mahamudur Rahaman Sajib --- CMakeLists.txt | 1 + src/common/options/mds.yaml.in | 51 ++ src/include/ceph_fs.h | 20 + src/include/config-h.in.cmake | 3 + src/mds/CMakeLists.txt | 27 +- src/mds/Locker.cc | 7 +- src/mds/MDSDaemon.cc | 41 ++ src/mds/MDSKafka.cc | 750 ++++++++++++++++++++ src/mds/MDSKafka.h | 137 ++++ src/mds/MDSNotificationManager.cc | 268 +++++++ src/mds/MDSNotificationManager.h | 64 ++ src/mds/MDSNotificationMessage.cc | 80 +++ src/mds/MDSNotificationMessage.h | 24 + src/mds/MDSRank.cc | 74 +- src/mds/MDSRank.h | 8 + src/mds/MDSUDPEndpoint.cc | 242 +++++++ src/mds/MDSUDPEndpoint.h | 65 ++ src/mds/Server.cc | 80 ++- src/mds/Server.h | 1 + src/messages/MNotificationInfoKafkaTopic.h | 80 +++ src/messages/MNotificationInfoUDPEndpoint.h | 60 ++ src/msg/Message.cc | 15 + src/msg/Message.h | 5 + 23 files changed, 2088 insertions(+), 15 deletions(-) create mode 100644 src/mds/MDSKafka.cc create mode 100644 src/mds/MDSKafka.h create mode 100644 src/mds/MDSNotificationManager.cc create mode 100644 src/mds/MDSNotificationManager.h create mode 100644 src/mds/MDSNotificationMessage.cc create mode 100644 src/mds/MDSNotificationMessage.h create mode 100644 src/mds/MDSUDPEndpoint.cc create mode 100644 src/mds/MDSUDPEndpoint.h create mode 100644 src/messages/MNotificationInfoKafkaTopic.h create mode 100644 src/messages/MNotificationInfoUDPEndpoint.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 161a363f129a9..bb7b44c433886 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -584,6 +584,7 @@ endif (WITH_RADOSGW) #option for CephFS option(WITH_CEPHFS "CephFS is enabled" ON) +option(WITH_CEPHFS_NOTIFICATION, "CephFS notification is disabled" OFF) if(NOT WIN32) # Please specify 3.x if you want to build with a certain version of python3. diff --git a/src/common/options/mds.yaml.in b/src/common/options/mds.yaml.in index dcf3eaac0d683..1e39788cd6ead 100644 --- a/src/common/options/mds.yaml.in +++ b/src/common/options/mds.yaml.in @@ -1730,3 +1730,54 @@ options: - mds flags: - runtime +- name: mds_allow_notification_secrets_in_cleartext + type: bool + level: advanced + desc: Allows sending secrets (e.g. passwords) over non encrypted HTTP messages. + long_desc: When notification endpoint require secrets (e.g. passwords), + we allow the topic creation. This parameter can be set to "true" to bypass + this check. Use this only if mds is on a trusted private network, and + the message broker cannot be configured without password authentication. + Otherwise, this will leak the credentials of your message broker and + compromise its security. + default: false + services: + - mds +- name: mds_kafka_sleep_timeout + type: uint + level: advanced + desc: Time in milliseconds to sleep while polling for kafka replies + long_desc: This will be used to prevent busy waiting for the kafka replies + As well as for the cases where the broker is down and we try to reconnect. + The same values times 3 will be used to sleep if there were no messages + sent or received across all kafka connections + default: 10 + services: + - mds +- name: mds_kafka_message_timeout + type: uint + level: advanced + desc: This is the maximum time in milliseconds to deliver a message (including retries) + long_desc: Delivery error occurs when the message timeout is exceeded. + Value must be greater than zero, if set to zero, a value of 1 millisecond will be used. + default: 5000 + services: + - mds +- name: mds_notification_dir_mask + type: uint + level: advanced + desc: This indicates which events are subscribed to for any operations on directories + long_desc: There are certain events mentioned. This mask represents OR of those event flag. + which filter out events for directories. + default: 130943 + services: + - mds +- name: mds_notification_file_mask + type: uint + level: advanced + desc: This indicates which events are subscribed to for any operations on files + long_desc: There are certain events mentioned. This mask represents OR of those event flag. + which filter out events for files. + default: 65535 + services: + - mds \ No newline at end of file diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 57eb18b0d3e5a..6a8de2b7ca79d 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -442,6 +442,26 @@ enum { CEPH_MDS_OP_LOCK_PATH = 0x0150a, }; +enum { + CEPH_MDS_NOTIFY_ACCESS = 0x0000000000000001, + CEPH_MDS_NOTIFY_ATTRIB = 0x0000000000000002, + CEPH_MDS_NOTIFY_CLOSE_WRITE = 0x0000000000000004, + CEPH_MDS_NOTIFY_CLOSE_NOWRITE = 0x0000000000000008, + CEPH_MDS_NOTIFY_CREATE = 0x0000000000000010, + CEPH_MDS_NOTIFY_DELETE = 0x0000000000000020, + CEPH_MDS_NOTIFY_DELETE_SELF = 0x0000000000000040, + CEPH_MDS_NOTIFY_MODIFY = 0x0000000000000080, + CEPH_MDS_NOTIFY_MOVE_SELF = 0x0000000000000100, + CEPH_MDS_NOTIFY_MOVED_FROM = 0x0000000000000200, + CEPH_MDS_NOTIFY_MOVED_TO = 0x0000000000000400, + CEPH_MDS_NOTIFY_OPEN = 0x0000000000000800, + CEPH_MDS_NOTIFY_CLOSE = 0x0000000000001000, + CEPH_MDS_NOTIFY_MOVE = 0x0000000000002000, + CEPH_MDS_NOTIFY_ONESHOT = 0x0000000000004000, + CEPH_MDS_NOTIFY_IGNORED = 0x0000000000008000, + CEPH_MDS_NOTIFY_ONLYDIR = 0x0000000000010000 +}; + #define IS_CEPH_MDS_OP_NEWINODE(op) (op == CEPH_MDS_OP_CREATE || \ op == CEPH_MDS_OP_MKNOD || \ op == CEPH_MDS_OP_MKDIR || \ diff --git a/src/include/config-h.in.cmake b/src/include/config-h.in.cmake index b10ea7c27cbcd..7aba8634f1468 100644 --- a/src/include/config-h.in.cmake +++ b/src/include/config-h.in.cmake @@ -133,6 +133,9 @@ /* define if cephfs enabled */ #cmakedefine WITH_CEPHFS +/* define if cephfs notification enabled */ +#cmakedefine WITH_CEPHFS_NOTIFICATION + /* define if systemed is enabled */ #cmakedefine WITH_SYSTEMD diff --git a/src/mds/CMakeLists.txt b/src/mds/CMakeLists.txt index f3980c7e04b50..071df618a6bfb 100644 --- a/src/mds/CMakeLists.txt +++ b/src/mds/CMakeLists.txt @@ -1,3 +1,8 @@ + +if (WITH_CEPHFS_NOTIFICATION) + find_package(RDKafka 0.9.2 REQUIRED) +endif() + set(mds_srcs BatchOp.cc Capability.cc @@ -48,9 +53,25 @@ set(mds_srcs ${CMAKE_SOURCE_DIR}/src/common/TrackedOp.cc ${CMAKE_SOURCE_DIR}/src/common/MemoryModel.cc ${CMAKE_SOURCE_DIR}/src/osdc/Journaler.cc - ${CMAKE_SOURCE_DIR}/src/mgr/MDSPerfMetricTypes.cc) + ${CMAKE_SOURCE_DIR}/src/mgr/MDSPerfMetricTypes.cc + MDSNotificationManager.cc) + +if (WITH_CEPHFS_NOTIFICATION) + list(APPEND mds_srcs MDSKafka.cc MDSUDPEndpoint.cc MDSNotificationMessage.cc) +endif() + add_library(mds STATIC ${mds_srcs}) target_link_libraries(mds PRIVATE - legacy-option-headers Boost::url - heap_profiler cpu_profiler osdc ${LUA_LIBRARIES}) + legacy-option-headers Boost::url + heap_profiler cpu_profiler osdc ${LUA_LIBRARIES} + ${Boost_LIBRARIES}) + +if (WITH_CEPHFS_NOTIFICATION) + target_link_libraries(mds PRIVATE RDKafka::RDKafka) +endif() + target_include_directories(mds PRIVATE "${LUA_INCLUDE_DIR}") + +if (WITH_CEPHFS_NOTIFICATION) + target_include_directories(mds PRIVATE ${Boost_INCLUDE_DIRS}) +endif() \ No newline at end of file diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index f7c320ddd85b2..7616ae3d51424 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -93,7 +93,7 @@ void Locker::dispatch(const cref_t &m) case CEPH_MSG_CLIENT_CAPS: handle_client_caps(ref_cast(m)); break; - case CEPH_MSG_CLIENT_CAPRELEASE: + case CEPH_MSG_CLIENT_CAPRELEASE: handle_client_cap_release(ref_cast(m)); break; case CEPH_MSG_CLIENT_LEASE: @@ -4027,11 +4027,13 @@ bool Locker::_do_cap_update(CInode *in, Capability *cap, const cref_t &m, const ref_t &ack, bool *need_flush) { - dout(10) << "_do_cap_update dirty " << ccap_string(dirty) + dout(1) << "_do_cap_update dirty " << ccap_string(dirty) << " issued " << ccap_string(cap ? cap->issued() : 0) << " wanted " << ccap_string(cap ? cap->wanted() : 0) << " on " << *in << dendl; ceph_assert(in->is_auth()); + mds->notification_manager->push_notification(mds->get_nodeid(), in, + CEPH_MDS_NOTIFY_MODIFY, false); client_t client = m->get_source().num(); const auto& latest = in->get_projected_inode(); @@ -4040,7 +4042,6 @@ bool Locker::_do_cap_update(CInode *in, Capability *cap, bool change_max = false; uint64_t old_max = latest->get_client_range(client); uint64_t new_max = old_max; - if (in->is_file()) { bool forced_change_max = false; dout(20) << "inode is file" << dendl; diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc index b31d9c95220cc..200acc7eab40b 100644 --- a/src/mds/MDSDaemon.cc +++ b/src/mds/MDSDaemon.cc @@ -532,6 +532,47 @@ void MDSDaemon::set_up_admin_socket() asok_hook, "run cpu profiling on daemon"); ceph_assert(r == 0); + + #ifdef WITH_CEPHFS_NOTIFICATION + r = admin_socket->register_command( + "add_topic " + "name=topic_name,type=CephString,req=true " + "name=endpoint_name,type=CephString,req=true " + "name=broker,type=CephString,req=false " + "name=use_ssl,type=CephBool,req=false " + "name=username,type=CephString,req=false " + "name=password,type=CephString,req=false " + "name=ca_location,type=CephString,req=false " + "name=mechanism,type=CephString,req=false", + asok_hook, + "add topic for notification" + ); + ceph_assert(r == 0); + r = admin_socket->register_command( + "remove_topic " + "name=topic_name,type=CephString,req=true " + "name=endpoint_name,type=CephString,req=true", + asok_hook, + "remove kafka topic" + ); + ceph_assert(r == 0); + r = admin_socket->register_command( + "add_udp_endpoint " + "name=entity,type=CephString,req=true " + "name=ip,type=CephString,req=true " + "name=port,type=CephInt,req=true", + asok_hook, + "add udp endpoint for notification" + ); + ceph_assert(r == 0); + r = admin_socket->register_command( + "remove_udp_endpoint " + "name=entity,type=CephString,req=true", + asok_hook, + "remove UDP endpoint" + ); + ceph_assert(r == 0); + #endif } void MDSDaemon::clean_up_admin_socket() diff --git a/src/mds/MDSKafka.cc b/src/mds/MDSKafka.cc new file mode 100644 index 0000000000000..5e5139a592dc3 --- /dev/null +++ b/src/mds/MDSKafka.cc @@ -0,0 +1,750 @@ + +#include "MDSKafka.h" +#include "common/Cond.h" +#include "common/errno.h" +#include "include/fs_types.h" + +#define dout_subsys ceph_subsys_mds + +CephContext *MDSKafka::cct = nullptr; +CephContext *MDSKafkaTopic::cct = nullptr; + +MDSKafkaConnection::MDSKafkaConnection( + const std::string &broker, bool use_ssl, const std::string &user, + const std::string &password, const std::optional &ca_location, + const std::optional &mechanism) + : broker(broker), use_ssl(use_ssl), user(user), password(password), + ca_location(ca_location), mechanism(mechanism) { + combine_hash(); +} + +void MDSKafkaConnection::encode(ceph::buffer::list &bl) const { + ENCODE_START(1, 1, bl); + encode(broker, bl); + encode(use_ssl, bl); + encode(user, bl); + encode(password, bl); + encode(ca_location, bl); + encode(mechanism, bl); + ENCODE_FINISH(bl); +} + +void MDSKafkaConnection::decode(ceph::buffer::list::const_iterator &iter) { + DECODE_START(1, iter); + decode(broker, iter); + decode(use_ssl, iter); + decode(user, iter); + decode(password, iter); + decode(ca_location, iter); + decode(mechanism, iter); + DECODE_FINISH(iter); +} + +void MDSKafkaConnection::dump(ceph::Formatter *f) const { + f->dump_string("broker", broker); + f->dump_bool("use_ssl", use_ssl); + f->dump_string("user", user); + f->dump_string("password", password); + if (ca_location.has_value()) { + f->dump_string("ca_location", ca_location.value()); + } + if (mechanism.has_value()) { + f->dump_string("mechanism", mechanism.value()); + } +} + +void MDSKafkaConnection::generate_test_instances( + std::list &o) { + o.push_back(new MDSKafkaConnection); +} + +bool MDSKafkaConnection::is_empty() const { + return broker.empty() && !use_ssl && user.empty() && password.empty() && + !ca_location.has_value() && !mechanism.has_value(); +} + +MDSKafkaManager::MDSKafkaManager(MDSRank *mds) + : mds(mds), cct(mds->cct), paused(true), object_name("mds_kafka_topics"), + endpoints_epoch(0), prev_endpoints_epoch(0) {} + +int MDSKafkaManager::load_data(std::map &mp) { + int r = update_omap(std::map()); + if (r < 0) { + return r; + } + C_SaferCond sync_finisher; + ObjectOperation op; + op.omap_get_vals("", "", UINT_MAX, &mp, NULL, NULL); + mds->objecter->read(object_t(object_name), + object_locator_t(mds->get_metadata_pool()), op, + CEPH_NOSNAP, NULL, 0, &sync_finisher); + r = sync_finisher.wait(); + if (r < 0) { + lderr(mds->cct) << "Error reading omap values from object '" << object_name + << "':" << cpp_strerror(r) << dendl; + } + return r; +} + +int MDSKafkaManager::update_omap(const std::map &mp) { + C_SaferCond sync_finisher; + ObjectOperation op; + op.omap_set(mp); + mds->objecter->mutate( + object_t(object_name), object_locator_t(mds->get_metadata_pool()), op, + SnapContext(), ceph::real_clock::now(), 0, &sync_finisher); + int r = sync_finisher.wait(); + if (r < 0) { + lderr(mds->cct) << "Error updating omap of object '" << object_name + << "':" << cpp_strerror(r) << dendl; + } + return r; +} + +int MDSKafkaManager::remove_keys(const std::set &st) { + C_SaferCond sync_finisher; + ObjectOperation op; + op.omap_rm_keys(st); + mds->objecter->mutate( + object_t(object_name), object_locator_t(mds->get_metadata_pool()), op, + SnapContext(), ceph::real_clock::now(), 0, &sync_finisher); + int r = sync_finisher.wait(); + if (r < 0) { + lderr(mds->cct) << "Error removing keys from omap of object '" + << object_name << "':" << cpp_strerror(r) << dendl; + } + return r; +} + +int MDSKafkaManager::add_topic_into_disk(const std::string &topic_name, + const std::string &endpoint_name, + const MDSKafkaConnection &connection) { + std::map mp; + std::string key = topic_name + "," + endpoint_name; + bufferlist bl; + encode(connection, bl); + mp[key] = std::move(bl); + int r = update_omap(mp); + return r; +} + +int MDSKafkaManager::remove_topic_from_disk(const std::string &topic_name, + const std::string &endpoint_name) { + std::set st; + std::string key = topic_name + "," + endpoint_name; + st.insert(key); + int r = remove_keys(st); + return r; +} + +int MDSKafkaManager::init() { + std::map mp; + int r = load_data(mp); + if (r < 0) { + lderr(cct) << "Error occurred while initilizing kafka topics" << dendl; + } + for (auto &[key, val] : mp) { + try { + MDSKafkaConnection connection; + auto iter = val.cbegin(); + decode(connection, iter); + size_t pos = key.find(','); + std::string topic_name = key.substr(0, pos); + std::string endpoint_name = key.substr(pos + 1); + add_topic(topic_name, endpoint_name, connection, false); + endpoints_epoch++; + } catch (const ceph::buffer::error &e) { + ldout(cct, 1) << "Undecodable kafka topic found:" << e.what() << dendl; + } + } + if (candidate_endpoints.empty()) { + pause(); + } else { + activate(); + } + return r; +} + +int MDSKafkaManager::remove_topic(const std::string &topic_name, + const std::string &endpoint_name, + bool write_into_disk) { + std::unique_lock lock(endpoint_mutex); + int r = 0; + bool is_empty = false; + auto it = candidate_endpoints.find(endpoint_name); + if (it == candidate_endpoints.end()) { + ldout(cct, 1) << "No kafka endpoint exist having name '" << endpoint_name + << "'" << dendl; + r = -CEPHFS_EINVAL; + goto error_occurred; + } + r = it->second->remove_topic(topic_name, is_empty); + if (r < 0) { + ldout(cct, 1) << "No kafka topic exist with topic name '" << topic_name + << "' with endpoint having endpoint name '" << endpoint_name + << "'" << dendl; + goto error_occurred; + } + if (is_empty) { + candidate_endpoints.erase(it); + endpoints_epoch++; + } + if (write_into_disk) { + r = remove_topic_from_disk(topic_name, endpoint_name); + if (r < 0) { + goto error_occurred; + } + } + ldout(cct, 1) << "Kafka topic named '" << topic_name + << "' having endpoint name '" << endpoint_name + << "' is removed successfully" << dendl; + if (candidate_endpoints.empty()) { + lock.unlock(); + pause(); + } + return r; + +error_occurred: + lderr(cct) << "Kafka topic named '" << topic_name + << "' having endpoint name '" << endpoint_name + << "' can not be removed, failed with an error:" << cpp_strerror(r) + << dendl; + return r; +} + +int MDSKafkaManager::add_topic(const std::string &topic_name, + const std::string &endpoint_name, + const MDSKafkaConnection &connection, + bool write_into_disk) { + std::unique_lock lock(endpoint_mutex); + auto it = candidate_endpoints.find(endpoint_name); + std::shared_ptr kafka; + std::shared_ptr topic; + bool created = false; + int r = 0; + if (it == candidate_endpoints.end()) { + if (candidate_endpoints.size() >= MAX_CONNECTIONS_DEFAULT) { + ldout(cct, 1) << "Kafka connect: max connections exceeded" << dendl; + r = -CEPHFS_ENOMEM; + goto error_occurred; + } + kafka = MDSKafka::create(cct, connection); + if (!kafka) { + r = -CEPHFS_ECANCELED; + goto error_occurred; + } + created = true; + } else { + if (!connection.is_empty() && + connection.hash_key != it->second->connection.hash_key) { + ldout(cct, 1) + << "Kafka endpoint name already exist with different endpoint " + "information" + << dendl; + r = -CEPHFS_EINVAL; + goto error_occurred; + } + kafka = it->second; + } + topic = MDSKafkaTopic::create(cct, topic_name, kafka); + if (!topic) { + r = -CEPHFS_ECANCELED; + goto error_occurred; + } + kafka->add_topic(topic_name, topic); + if (created) { + candidate_endpoints[endpoint_name] = kafka; + endpoints_epoch++; + } + if (write_into_disk) { + r = add_topic_into_disk(topic_name, endpoint_name, connection); + if (r < 0) { + goto error_occurred; + } + } + ldout(cct, 1) << "Kafka topic named '" << topic_name + << "' having endpoint name '" << endpoint_name + << "' is added successfully" << dendl; + lock.unlock(); + activate(); + return r; + +error_occurred: + lderr(cct) << "Kafka topic named '" << topic_name + << "' having endpoint name '" << endpoint_name + << "' can not be added, failed with an error:" << cpp_strerror(r) + << dendl; + return r; +} + +void MDSKafkaManager::activate() { + if (!paused) { + return; + } + worker = std::thread(&MDSKafkaManager::run, this); + paused = false; + ldout(cct, 1) << "KafkaManager worker thread started" << dendl; +} + +void MDSKafkaManager::pause() { + if (paused) { + return; + } + paused = true; + if (worker.joinable()) { + worker.join(); + } + ldout(cct, 1) << "KafkaManager worker thread paused" << dendl; +} + +int MDSKafkaManager::send( + const std::shared_ptr &message) { + if (paused) { + return -CEPHFS_ECANCELED; + } + std::unique_lock lock(queue_mutex); + if (message_queue.size() >= MAX_QUEUE_DEFAULT) { + ldout(cct, 1) << "Notification message for kafka with seq_id=" + << message->seq_id << " is dropped as queue is full" << dendl; + return -CEPHFS_EBUSY; + } + message_queue.push(message); + return 0; +} + +void MDSKafkaManager::sync_endpoints() { + uint64_t current_epoch = endpoints_epoch.load(); + if (prev_endpoints_epoch != current_epoch) { + effective_endpoints = candidate_endpoints; + prev_endpoints_epoch = current_epoch; + } +} + +uint64_t MDSKafkaManager::publish( + const std::shared_ptr &message) { + sync_endpoints(); + uint64_t reply_count = 0; + for (auto &[key, endpoint] : effective_endpoints) { + reply_count += endpoint->publish_internal(message); + } + return reply_count; +} + +uint64_t MDSKafkaManager::polling(int read_timeout) { + sync_endpoints(); + uint64_t reply_count = 0; + for (auto &[key, endpoint] : effective_endpoints) { + reply_count += endpoint->poll(read_timeout); + } + return reply_count; +} + +void MDSKafkaManager::run() { + while (!paused) { + int send_count = 0, reply_count = 0; + std::unique_lock queue_lock(queue_mutex); + std::queue> local_message_queue; + swap(local_message_queue, message_queue); + ceph_assert(message_queue.empty()); + queue_lock.unlock(); + while (!local_message_queue.empty() && !paused) { + std::shared_ptr message = + local_message_queue.front(); + local_message_queue.pop(); + ++send_count; + reply_count += publish(message); + } + reply_count += polling(READ_TIMEOUT_MS_DEFAULT); + } +} + +void MDSKafkaConnection::combine_hash() { + hash_key = 0; + boost::hash_combine(hash_key, broker); + boost::hash_combine(hash_key, use_ssl); + boost::hash_combine(hash_key, user); + boost::hash_combine(hash_key, password); + if (ca_location.has_value()) { + boost::hash_combine(hash_key, ca_location.value()); + } + if (mechanism.has_value()) { + boost::hash_combine(hash_key, mechanism.value()); + } +} + +void MDSKafkaTopic::kafka_topic_deleter(rd_kafka_topic_t *topic_ptr) { + if (topic_ptr) { + rd_kafka_topic_destroy(topic_ptr); + } +} + +MDSKafkaTopic::MDSKafkaTopic(const std::string &topic_name) + : topic_name(topic_name), head(0), tail(0), inflight_count(0) {} + +std::shared_ptr +MDSKafkaTopic::create(CephContext *_cct, const std::string &topic_name, + const std::shared_ptr &kafka_endpoint) { + try { + if (!MDSKafkaTopic::cct && _cct) { + MDSKafkaTopic::cct = _cct; + } + + std::shared_ptr topic_ptr = + std::make_shared(topic_name); + topic_ptr->kafka_topic_ptr.reset(rd_kafka_topic_new( + kafka_endpoint->producer.get(), topic_name.c_str(), nullptr)); + if (!topic_ptr->kafka_topic_ptr) { + return nullptr; + } + topic_ptr->delivery_ring = std::vector(MAX_INFLIGHT_DEFAULT, false); + return topic_ptr; + } catch (...) { + } + return nullptr; +} + +int MDSKafkaTopic::push_unack_event() { + std::unique_lock lock(ring_mutex); + if (inflight_count >= (int)MAX_INFLIGHT_DEFAULT) { + return -1; + } + delivery_ring[tail] = true; + int idx = tail; + tail = (tail + 1) % MAX_INFLIGHT_DEFAULT; + ++inflight_count; + return idx; +} + +void MDSKafkaTopic::acknowledge_event(int idx) { + if (!(idx >= 0 && idx < (int)MAX_INFLIGHT_DEFAULT)) { + ldout(cct, 10) << "Kafka run: unsolicited n/ack received with tag=" << idx + << dendl; + return; + } + std::unique_lock lock(ring_mutex); + delivery_ring[idx] = false; + while (inflight_count > 0 && !delivery_ring[head]) { + head = (head + 1) % MAX_INFLIGHT_DEFAULT; + --inflight_count; + } +} + +void MDSKafkaTopic::drop_last_event() { + std::unique_lock lock(ring_mutex); + delivery_ring[tail] = false; + tail = (tail - 1 + MAX_INFLIGHT_DEFAULT) % MAX_INFLIGHT_DEFAULT; + --inflight_count; +} + +void MDSKafka::kafka_producer_deleter(rd_kafka_t *producer_ptr) { + if (producer_ptr) { + rd_kafka_flush(producer_ptr, + 10 * 1000); // Wait for max 10 seconds to flush. + rd_kafka_destroy(producer_ptr); // Destroy producer instance. + } +} + +MDSKafka::MDSKafka(const MDSKafkaConnection &connection) + : connection(connection) {} + +std::shared_ptr +MDSKafka::create(CephContext *_cct, const MDSKafkaConnection &connection) { + try { + if (!MDSKafka::cct && _cct) { + MDSKafka::cct = _cct; + } + // validation before creating kafka interface + if (connection.broker.empty()) { + return nullptr; + } + if (connection.user.empty() != connection.password.empty()) { + return nullptr; + } + if (!connection.user.empty() && !connection.use_ssl && + !g_conf().get_val( + "mds_allow_notification_secrets_in_cleartext")) { + ldout(cct, 1) << "Kafka connect: user/password are only allowed over " + "secure connection" + << dendl; + return nullptr; + } + std::shared_ptr kafka_ptr = + std::make_shared(connection); + char errstr[512] = {0}; + auto kafka_conf_deleter = [](rd_kafka_conf_t *conf) { + rd_kafka_conf_destroy(conf); + }; + std::unique_ptr conf( + rd_kafka_conf_new(), kafka_conf_deleter); + if (!conf) { + ldout(cct, 1) << "Kafka connect: failed to allocate configuration" + << dendl; + return nullptr; + } + constexpr std::uint64_t min_message_timeout = 1; + const auto message_timeout = + std::max(min_message_timeout, + cct->_conf.get_val("mds_kafka_message_timeout")); + if (rd_kafka_conf_set(conf.get(), "message.timeout.ms", + std::to_string(message_timeout).c_str(), errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + if (rd_kafka_conf_set(conf.get(), "bootstrap.servers", + connection.broker.c_str(), errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + + if (connection.use_ssl) { + if (!connection.user.empty()) { + // use SSL+SASL + if (rd_kafka_conf_set(conf.get(), "security.protocol", "SASL_SSL", + errstr, sizeof(errstr)) != RD_KAFKA_CONF_OK || + rd_kafka_conf_set(conf.get(), "sasl.username", + connection.user.c_str(), errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK || + rd_kafka_conf_set(conf.get(), "sasl.password", + connection.password.c_str(), errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + ldout(cct, 20) + << "Kafka connect: successfully configured SSL+SASL security" + << dendl; + + if (connection.mechanism) { + if (rd_kafka_conf_set(conf.get(), "sasl.mechanism", + connection.mechanism->c_str(), errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + ldout(cct, 20) + << "Kafka connect: successfully configured SASL mechanism" + << dendl; + } else { + if (rd_kafka_conf_set(conf.get(), "sasl.mechanism", "PLAIN", errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + ldout(cct, 20) << "Kafka connect: using default SASL mechanism" + << dendl; + } + } else { + // use only SSL + if (rd_kafka_conf_set(conf.get(), "security.protocol", "SSL", errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + ldout(cct, 20) << "Kafka connect: successfully configured SSL security" + << dendl; + } + if (connection.ca_location) { + if (rd_kafka_conf_set(conf.get(), "ssl.ca.location", + connection.ca_location->c_str(), errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + ldout(cct, 20) << "Kafka connect: successfully configured CA location" + << dendl; + } else { + ldout(cct, 20) << "Kafka connect: using default CA location" << dendl; + } + ldout(cct, 20) << "Kafka connect: successfully configured security" + << dendl; + } else if (!connection.user.empty()) { + // use SASL+PLAINTEXT + if (rd_kafka_conf_set(conf.get(), "security.protocol", "SASL_PLAINTEXT", + errstr, sizeof(errstr)) != RD_KAFKA_CONF_OK || + rd_kafka_conf_set(conf.get(), "sasl.username", + connection.user.c_str(), errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK || + rd_kafka_conf_set(conf.get(), "sasl.password", + connection.password.c_str(), errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + ldout(cct, 20) << "Kafka connect: successfully configured SASL_PLAINTEXT" + << dendl; + + if (connection.mechanism) { + if (rd_kafka_conf_set(conf.get(), "sasl.mechanism", + connection.mechanism->c_str(), errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + ldout(cct, 20) + << "Kafka connect: successfully configured SASL mechanism" << dendl; + } else { + if (rd_kafka_conf_set(conf.get(), "sasl.mechanism", "PLAIN", errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + goto conf_error; + } + ldout(cct, 20) << "Kafka connect: using default SASL mechanism" + << dendl; + } + } + rd_kafka_conf_set_dr_msg_cb(conf.get(), message_callback); + rd_kafka_conf_set_opaque(conf.get(), kafka_ptr.get()); + rd_kafka_conf_set_log_cb(conf.get(), log_callback); + rd_kafka_conf_set_error_cb(conf.get(), poll_err_callback); + { + rd_kafka_t *prod = rd_kafka_new(RD_KAFKA_PRODUCER, conf.release(), errstr, + sizeof(errstr)); + if (!prod) { + ldout(cct, 1) << "Kafka connect: failed to create producer: " << errstr + << dendl; + return nullptr; + } + kafka_ptr->producer.reset(prod); + } + ldout(cct, 1) << "Kafka connect: successfully created new producer" + << dendl; + { + const auto log_level = cct->_conf->subsys.get_log_level(ceph_subsys_mds); + if (log_level <= 1) { + rd_kafka_set_log_level(kafka_ptr->producer.get(), 3); + } else if (log_level <= 2) { + rd_kafka_set_log_level(kafka_ptr->producer.get(), 5); + } else if (log_level <= 10) { + rd_kafka_set_log_level(kafka_ptr->producer.get(), 5); + } else { + rd_kafka_set_log_level(kafka_ptr->producer.get(), 5); + } + } + return kafka_ptr; + + conf_error: + ldout(cct, 1) << "Kafka connect: configuration failed: " << errstr << dendl; + return nullptr; + } catch (...) { + } + return nullptr; +} + +void MDSKafka::add_topic(const std::string &topic_name, + const std::shared_ptr &topic) { + std::unique_lock lock(topic_mutex); + topics[topic_name] = topic; +} + +int MDSKafka::remove_topic(const std::string &topic_name, bool &is_empty) { + std::unique_lock lock(topic_mutex); + auto it = topics.find(topic_name); + if (it == topics.end()) { + return -CEPHFS_EINVAL; + } + topics.erase(it); + is_empty = topics.empty(); + return 0; +} + +void MDSKafka::log_callback(const rd_kafka_t *rk, int level, const char *fac, + const char *buf) { + if (!cct) { + return; + } + if (level <= 3) { + ldout(cct, 1) << "RDKAFKA-" << level << "-" << fac << ": " + << rd_kafka_name(rk) << ": " << buf << dendl; + } else if (level <= 5) { + ldout(cct, 2) << "RDKAFKA-" << level << "-" << fac << ": " + << rd_kafka_name(rk) << ": " << buf << dendl; + } else if (level <= 6) { + ldout(cct, 10) << "RDKAFKA-" << level << "-" << fac << ": " + << rd_kafka_name(rk) << ": " << buf << dendl; + } else { + ldout(cct, 20) << "RDKAFKA-" << level << "-" << fac << ": " + << rd_kafka_name(rk) << ": " << buf << dendl; + } +} + +void MDSKafka::poll_err_callback(rd_kafka_t *rk, int err, const char *reason, + void *opaque) { + if (!cct) { + return; + } + ldout(cct, 10) << "Kafka run: poll error(" << err << "): " << reason << dendl; +} + +uint64_t MDSKafka::publish_internal( + const std::shared_ptr &message) { + uint64_t reply_count = 0; + std::shared_lock lock(topic_mutex); + uint64_t read_timeout = + cct->_conf.get_val("mds_kafka_sleep_timeout"); + for (auto [topic_name, topic_ptr] : topics) { + int idx = topic_ptr->push_unack_event(); + if (idx == -1) { + ldout(cct, 1) << "Kafka publish (with callback): failed with error: " + "callback queue full, trying to poll again" + << dendl; + reply_count += rd_kafka_poll(producer.get(), 3 * read_timeout); + idx = topic_ptr->push_unack_event(); + if (idx == -1) { + ldout(cct, 1) + << "Kafka publish (with callback): failed with error: " + "message dropped, callback queue full event after polling for " + << 3 * read_timeout << "ms" << dendl; + continue; + } + } + int *tag = new int(idx); + // RdKafka::ErrorCode response = producer->produce( + // topic_name, RdKafka::Topic::PARTITION_UA, + // RdKafka::Producer::RK_MSG_COPY, const_cast(message->c_str()), message->length(), nullptr, 0, 0, tag); + const auto response = rd_kafka_produce( + topic_ptr->kafka_topic_ptr.get(), RD_KAFKA_PARTITION_UA, + RD_KAFKA_MSG_F_COPY, const_cast(message->message.c_str()), + message->message.length(), nullptr, 0, tag); + if (response == -1) { + const auto err = rd_kafka_last_error(); + ldout(cct, 1) << "Kafka publish: failed to produce for topic: " + << topic_name << ". with error: " << rd_kafka_err2str(err) + << dendl; + + delete tag; + topic_ptr->drop_last_event(); + continue; + } + reply_count += rd_kafka_poll(producer.get(), 0); + } + return reply_count; +} + +uint64_t MDSKafka::poll(int read_timeout) { + return rd_kafka_poll(producer.get(), read_timeout); +} + +void MDSKafka::message_callback(rd_kafka_t *rk, + const rd_kafka_message_t *rkmessage, + void *opaque) { + const auto kafka_ptr = reinterpret_cast(opaque); + const auto result = rkmessage->err; + if (result == 0) { + ldout(cct, 20) << "Kafka run: ack received with result=" + << rd_kafka_err2str(result) << dendl; + } else { + ldout(cct, 1) << "Kafka run: nack received with result=" + << rd_kafka_err2str(result) + << " for broker: " << kafka_ptr->connection.broker << dendl; + } + if (!rkmessage->_private) { + ldout(cct, 20) << "Kafka run: n/ack received without a callback" << dendl; + return; + } + int *tag = reinterpret_cast(rkmessage->_private); + std::string topic_name = std::string(rd_kafka_topic_name(rkmessage->rkt)); + std::shared_lock lock(kafka_ptr->topic_mutex); + if (kafka_ptr->topics.find(topic_name) == kafka_ptr->topics.end()) { + ldout(cct, 20) << "Kafka run: topic=" << topic_name + << " is removed before ack" << dendl; + delete tag; + return; + } + std::shared_ptr topic_ptr = kafka_ptr->topics[topic_name]; + lock.unlock(); + topic_ptr->acknowledge_event(*tag); + delete tag; +} \ No newline at end of file diff --git a/src/mds/MDSKafka.h b/src/mds/MDSKafka.h new file mode 100644 index 0000000000000..cffa9854480d5 --- /dev/null +++ b/src/mds/MDSKafka.h @@ -0,0 +1,137 @@ +#pragma once + +#include "MDSNotificationMessage.h" +#include "MDSRank.h" +#include "common/ceph_context.h" +#include "include/buffer.h" +#include +#include +#include +#include + +class MDSKafka; +class MDSKafkaTopic; + +struct MDSKafkaConnection { + std::string broker; + bool use_ssl; + std::string user; + std::string password; + std::optional ca_location; + std::optional mechanism; + uint64_t hash_key; + MDSKafkaConnection() = default; + MDSKafkaConnection(const std::string &broker, bool use_ssl, + const std::string &user, const std::string &password, + const std::optional &ca_location, + const std::optional &mechanism); + void combine_hash(); + bool is_empty() const; + std::string to_string() const { return broker + ":" + user; } + void encode(ceph::buffer::list &bl) const; + void decode(ceph::buffer::list::const_iterator &iter); + void dump(ceph::Formatter *f) const; + static void generate_test_instances(std::list &o); +}; + +WRITE_CLASS_ENCODER(MDSKafkaConnection) + +class MDSKafkaManager { +public: + MDSKafkaManager(MDSRank *mds); + int init(); + void activate(); + void pause(); + int add_topic(const std::string &topic_name, const std::string &endpoint_name, + const MDSKafkaConnection &connection, bool write_into_disk); + int remove_topic(const std::string &topic_name, + const std::string &endpoint_name, bool write_into_disk); + int send(const std::shared_ptr &message); + CephContext *cct; + +private: + void run(); + uint64_t publish(const std::shared_ptr &message); + uint64_t polling(int read_timeout); + int load_data(std::map &mp); + int add_topic_into_disk(const std::string &topic_name, + const std::string &endpoint_name, + const MDSKafkaConnection &connection); + int remove_topic_from_disk(const std::string &topic_name, + const std::string &endpoint_name); + int update_omap(const std::map &mp); + int remove_keys(const std::set &st); + void sync_endpoints(); + static const size_t MAX_CONNECTIONS_DEFAULT = 32; + static const size_t MAX_QUEUE_DEFAULT = 32768; + static const unsigned IDLE_TIME_MS = 100; + static const int READ_TIMEOUT_MS_DEFAULT = 100; + std::shared_mutex endpoint_mutex; + std::unordered_map> + candidate_endpoints, effective_endpoints; + std::mutex queue_mutex; + std::queue> message_queue; + std::thread worker; + std::atomic paused; + MDSRank *mds; + std::string object_name; + std::atomic endpoints_epoch = 0; + uint64_t prev_endpoints_epoch = 0; +}; + +class MDSKafkaTopic { +public: + MDSKafkaTopic() = delete; + MDSKafkaTopic(const std::string &topic_name); + int push_unack_event(); + void acknowledge_event(int idx); + void drop_last_event(); + static std::shared_ptr + create(CephContext *_cct, const std::string &topic_name, + const std::shared_ptr &kafka_endpoint); + static void kafka_topic_deleter(rd_kafka_topic_t *topic_ptr); + std::unique_ptr + kafka_topic_ptr{nullptr, kafka_topic_deleter}; + friend class MDSKafkaManager; + friend class MDSKafka; + +private: + std::string topic_name; + static CephContext *cct; + std::vector delivery_ring; + std::mutex ring_mutex; + int head, tail, inflight_count; + static const size_t MAX_INFLIGHT_DEFAULT = 32768; +}; + +class MDSKafka { +public: + MDSKafka() = delete; + MDSKafka(const MDSKafkaConnection &connection); + static std::shared_ptr create(CephContext *_cct, + const MDSKafkaConnection &connection); + uint64_t + publish_internal(const std::shared_ptr &message); + uint64_t poll(int read_timeout); + void add_topic(const std::string &topic_name, + const std::shared_ptr &topic); + int remove_topic(const std::string &topic_name, bool &is_empty); + static void kafka_producer_deleter(rd_kafka_t *producer_ptr); + friend class MDSKafkaManager; + friend class MDSKafkaTopic; + +private: + std::unique_ptr producer{ + nullptr, kafka_producer_deleter}; + std::shared_mutex topic_mutex; + std::unordered_map> topics; + static CephContext *cct; + MDSKafkaConnection connection; + static void message_callback(rd_kafka_t *rk, + const rd_kafka_message_t *rkmessage, + void *opaque); + static void log_callback(const rd_kafka_t *rk, int level, const char *fac, + const char *buf); + static void poll_err_callback(rd_kafka_t *rk, int err, const char *reason, + void *opaque); +}; diff --git a/src/mds/MDSNotificationManager.cc b/src/mds/MDSNotificationManager.cc new file mode 100644 index 0000000000000..1967e90a7e3e9 --- /dev/null +++ b/src/mds/MDSNotificationManager.cc @@ -0,0 +1,268 @@ +#include "MDSNotificationManager.h" +#include "include/uuid.h" +#define dout_subsys ceph_subsys_mds + +MDSNotificationManager::MDSNotificationManager(MDSRank *mds) + : cct(mds->cct), mds(mds), cur_notification_seq_id(0) { +#ifdef WITH_CEPHFS_NOTIFICATION + uuid_d uid; + uid.generate_random(); + session_id = uid.to_string(); + kafka_manager = std::make_unique(mds); + udp_manager = std::make_unique(mds); +#endif +} + +void MDSNotificationManager::init() { +#ifdef WITH_CEPHFS_NOTIFICATION + int r = kafka_manager->init(); + if (r < 0) { + kafka_manager = nullptr; + } + r = udp_manager->init(); + if (r < 0) { + udp_manager = nullptr; + } +#endif +} + +void MDSNotificationManager::dispatch(const cref_t &m) { +#ifdef WITH_CEPHFS_NOTIFICATION + if (m->get_type() == MSG_MDS_NOTIFICATION_INFO_KAFKA_TOPIC) { + const auto &req = ref_cast(m); + if (!req->is_remove) { + add_kafka_topic(req->topic_name, req->endpoint_name, req->broker, + req->use_ssl, req->user, req->password, req->ca_location, + req->mechanism, false, false); + } else { + remove_kafka_topic(req->topic_name, req->endpoint_name, false, false); + } + } else if (m->get_type() == MSG_MDS_NOTIFICATION_INFO_UDP_ENDPOINT) { + const auto &req = ref_cast(m); + if (!req->is_remove) { + add_udp_endpoint(req->name, req->ip, req->port, false, false); + } else { + remove_udp_endpoint(req->name, false, false); + } + } +#endif +} + +#ifdef WITH_CEPHFS_NOTIFICATION +int MDSNotificationManager::add_kafka_topic( + const std::string &topic_name, const std::string &endpoint_name, + const std::string &broker, bool use_ssl, const std::string &user, + const std::string &password, const std::optional &ca_location, + const std::optional &mechanism, bool write_into_disk, + bool send_peers) { + if (!kafka_manager) { + ldout(cct, 1) + << "Kafka topic '" << topic_name + << "' creation failed as kafka manager is not initialized correctly" + << dendl; + return -CEPHFS_EFAULT; + } + int r = kafka_manager->add_topic(topic_name, endpoint_name, + MDSKafkaConnection(broker, use_ssl, user, + password, ca_location, + mechanism), + write_into_disk); + if (send_peers && r == 0) { + auto m = make_message( + topic_name, endpoint_name, broker, use_ssl, user, password, ca_location, + mechanism, false); + mds->send_to_peers(m); + } + return r; +} + +int MDSNotificationManager::remove_kafka_topic(const std::string &topic_name, + const std::string &endpoint_name, + bool write_into_disk, + bool send_peers) { + if (!kafka_manager) { + ldout(cct, 1) + << "Kafka topic '" << topic_name + << "' removal failed as kafka manager is not initialized correctly" + << dendl; + return -CEPHFS_EFAULT; + } + int r = + kafka_manager->remove_topic(topic_name, endpoint_name, write_into_disk); + if (send_peers && r == 0) { + auto m = make_message(topic_name, + endpoint_name, true); + mds->send_to_peers(m); + } + return r; +} + +int MDSNotificationManager::add_udp_endpoint(const std::string &name, + const std::string &ip, int port, + bool write_into_disk, + bool send_peers) { + if (!udp_manager) { + ldout(cct, 1) + << "UDP endpoint '" << name + << "' creation failed as udp manager is not initialized correctly" + << dendl; + return -CEPHFS_EFAULT; + } + int r = udp_manager->add_endpoint(name, MDSUDPConnection(ip, port), + write_into_disk); + if (send_peers && r == 0) { + auto m = make_message(name, ip, port, false); + mds->send_to_peers(m); + } + return r; +} + +int MDSNotificationManager::remove_udp_endpoint(const std::string &name, + bool write_into_disk, + bool send_peers) { + if (!udp_manager) { + ldout(cct, 1) + << "UDP endpoint '" << name + << "' removal failed as udp manager is not initialized correctly" + << dendl; + return -CEPHFS_EFAULT; + } + int r = udp_manager->remove_endpoint(name, write_into_disk); + if (send_peers && r == 0) { + auto m = make_message(name, true); + mds->send_to_peers(m); + } + return r; +} + +void MDSNotificationManager::push_notification( + const std::shared_ptr &message) { + if (kafka_manager) { + kafka_manager->send(message); + } + if (udp_manager) { + udp_manager->send(message); + } +} +#endif + +void MDSNotificationManager::push_notification(int32_t whoami, CInode *in, + uint64_t notify_mask, + bool projected) { +#ifdef WITH_CEPHFS_NOTIFICATION + std::string path; + in->make_path_string(path, projected); + std::shared_ptr message = + std::make_shared( + cur_notification_seq_id.fetch_add(1)); + uint64_t filter_mask = (1 << 17) - 1; + if (in->is_dir()) { + filter_mask = cct->_conf.get_val("mds_notification_dir_mask"); + notify_mask |= CEPH_MDS_NOTIFY_ONLYDIR; + } else { + filter_mask = cct->_conf.get_val("mds_notification_file_mask"); + } + notify_mask &= filter_mask; + uint64_t check_mask = notify_mask & ((1 << 16) - 1); + if (check_mask) { + message->create_message(whoami, session_id, notify_mask, path); + push_notification(message); + } +#endif +} + +void MDSNotificationManager::push_notification_link( + int32_t whoami, CInode *targeti, CDentry *destdn, + uint64_t notify_mask_for_target, uint64_t notify_mask_for_link) { +#ifdef WITH_CEPHFS_NOTIFICATION + std::string target_path; + targeti->make_path_string(target_path, true, nullptr); + std::string link_path; + destdn->make_path_string(link_path, true); + std::shared_ptr message = + std::make_shared( + cur_notification_seq_id.fetch_add(1)); + uint64_t filter_mask = (1 << 17) - 1; + if (targeti->is_dir()) { + filter_mask = cct->_conf.get_val("mds_notification_dir_mask"); + notify_mask_for_target |= CEPH_MDS_NOTIFY_ONLYDIR; + notify_mask_for_link |= CEPH_MDS_NOTIFY_ONLYDIR; + } else { + filter_mask = cct->_conf.get_val("mds_notification_file_mask"); + } + notify_mask_for_target &= filter_mask; + notify_mask_for_link &= filter_mask; + uint64_t check_mask = + (notify_mask_for_target | notify_mask_for_link) & ((1 << 16) - 1); + if (check_mask) { + if (target_path == link_path) { + message->create_message(whoami, session_id, notify_mask_for_link, + target_path); + push_notification(message); + return; + } + message->create_link_message(whoami, session_id, notify_mask_for_target, + notify_mask_for_link, target_path, link_path); + push_notification(message); + } +#endif +} + +void MDSNotificationManager::push_notification_move(int32_t whoami, + CDentry *srcdn, + CDentry *destdn) { +#ifdef WITH_CEPHFS_NOTIFICATION + std::string dest_path, src_path; + srcdn->make_path_string(src_path, true); + destdn->make_path_string(dest_path, true); + uint64_t src_mask = CEPH_MDS_NOTIFY_MOVED_FROM, + dest_mask = CEPH_MDS_NOTIFY_MOVED_TO; + CInode *srci = srcdn->get_projected_linkage()->get_inode(); + CInode *desti = destdn->get_projected_linkage()->get_inode(); + uint64_t filter_mask = (1 << 17) - 1; + if (srci && srci->is_dir()) { + src_mask |= CEPH_MDS_NOTIFY_ONLYDIR; + dest_mask |= CEPH_MDS_NOTIFY_ONLYDIR; + filter_mask = cct->_conf.get_val("mds_notification_dir_mask"); + } else { + filter_mask = cct->_conf.get_val("mds_notification_file_mask"); + } + src_mask &= filter_mask; + dest_mask &= filter_mask; + uint64_t check_mask = (src_mask | dest_mask) & ((1 << 16) - 1); + if (check_mask) { + std::shared_ptr message = + std::make_shared( + cur_notification_seq_id.fetch_add(1)); + message->create_move_message(whoami, session_id, src_mask, dest_mask, + src_path, dest_path); + push_notification(message); + } +#endif +} + +void MDSNotificationManager::push_notification_snap(int32_t whoami, CInode *in, + const std::string &snapname, + uint64_t notify_mask) { +#ifdef WITH_CEPHFS_NOTIFICATION + std::string path; + in->make_path_string(path, true, nullptr); + std::shared_ptr message = + std::make_shared( + cur_notification_seq_id.fetch_add(1)); + uint64_t filter_mask = (1 << 17) - 1; + if (in->is_dir()) { + notify_mask |= CEPH_MDS_NOTIFY_ONLYDIR; + filter_mask = cct->_conf.get_val("mds_notification_dir_mask"); + } else { + filter_mask = cct->_conf.get_val("mds_notification_file_mask"); + } + notify_mask &= filter_mask; + uint64_t check_mask = notify_mask & ((1 << 16) - 1); + if (check_mask) { + message->create_snap_message(whoami, session_id, notify_mask, path, + std::string(snapname)); + push_notification(message); + } +#endif +} diff --git a/src/mds/MDSNotificationManager.h b/src/mds/MDSNotificationManager.h new file mode 100644 index 0000000000000..34f33be050a3c --- /dev/null +++ b/src/mds/MDSNotificationManager.h @@ -0,0 +1,64 @@ +#pragma once +#include "CDentry.h" +#include "CInode.h" +#include "MDSRank.h" +#include "common/ceph_context.h" +#include "include/buffer.h" +#include + +#ifdef WITH_CEPHFS_NOTIFICATION +#include "MDSKafka.h" +#include "MDSNotificationMessage.h" +#include "MDSUDPEndpoint.h" +#include "messages/MNotificationInfoKafkaTopic.h" +#include "messages/MNotificationInfoUDPEndpoint.h" + +class MDSKafkaManager; +class MDSUDPManager; +#endif + +class MDSNotificationManager { +public: + MDSNotificationManager(MDSRank *mds); + void init(); + + // incoming notification endpoints + void dispatch(const cref_t &m); + int add_kafka_topic(const std::string &topic_name, + const std::string &endpoint_name, + const std::string &broker, bool use_ssl, + const std::string &user, const std::string &password, + const std::optional &ca_location, + const std::optional &mechanism, + bool write_into_disk, bool send_peers); + int remove_kafka_topic(const std::string &topic_name, + const std::string &endpoint_name, bool write_into_disk, + bool send_peers); + int add_udp_endpoint(const std::string &name, const std::string &ip, int port, + bool write_into_disk, bool send_peers); + int remove_udp_endpoint(const std::string &name, bool write_into_disk, + bool send_peers); + + void push_notification(int32_t whoami, CInode *in, uint64_t notify_mask, + bool projected = true); + void push_notification_link(int32_t whoami, CInode *targeti, CDentry *destdn, + uint64_t notify_mask_for_target, + uint64_t notify_mask_for_link); + void push_notification_move(int32_t whoami, CDentry *srcdn, CDentry *destdn); + void push_notification_snap(int32_t whoami, CInode *in, + const std::string &snapname, + uint64_t notify_mask); + +private: +#ifdef WITH_CEPHFS_NOTIFICATION + std::unique_ptr kafka_manager; + std::unique_ptr udp_manager; + void + push_notification(const std::shared_ptr &message); +#endif + + CephContext *cct; + std::atomic cur_notification_seq_id; + std::string session_id; + MDSRank *mds; +}; \ No newline at end of file diff --git a/src/mds/MDSNotificationMessage.cc b/src/mds/MDSNotificationMessage.cc new file mode 100644 index 0000000000000..ca1ed540f749e --- /dev/null +++ b/src/mds/MDSNotificationMessage.cc @@ -0,0 +1,80 @@ +#include "MDSNotificationMessage.h" +#include "common/Clock.h" +#include "common/ceph_json.h" + +#define dout_subsys ceph_subsys_mds + +MDSNotificationMessage::MDSNotificationMessage(uint64_t seq_id) + : seq_id(seq_id) {} + +void MDSNotificationMessage::create_message(int32_t whoami, + const std::string &session_id, + const uint64_t mask, + const std::string &path) { + JSONFormatter f; + f.open_object_section(""); + ceph_clock_now().gmtime_nsec(f.dump_stream("timestamp")); + f.dump_int("mds_id", (int64_t)whoami); + f.dump_string("session_id", session_id); + f.dump_unsigned("seq_id", seq_id); + f.dump_unsigned("mask", mask); + f.dump_string("path", path); + f.close_section(); + f.flush(message); +} + +void MDSNotificationMessage::create_move_message(int32_t whoami, + const std::string &session_id, + uint64_t src_mask, + uint64_t dest_mask, + const std::string &src_path, + const std::string &dest_path) { + JSONFormatter f; + f.open_object_section(""); + ceph_clock_now().gmtime_nsec(f.dump_stream("timestamp")); + f.dump_int("mds_id", (int64_t)whoami); + f.dump_string("session_id", session_id); + f.dump_unsigned("seq_id", seq_id); + f.dump_unsigned("src_mask", src_mask); + f.dump_unsigned("dest_mask", dest_mask); + f.dump_string("src_path", src_path); + f.dump_string("dest_path", dest_path); + f.close_section(); + f.flush(message); +} + +void MDSNotificationMessage::create_link_message(int32_t whoami, + const std::string &session_id, + uint64_t target_mask, + uint64_t link_mask, + const std::string &target_path, + const std::string &link_path) { + JSONFormatter f; + f.open_object_section(""); + ceph_clock_now().gmtime_nsec(f.dump_stream("timestamp")); + f.dump_int("mds_id", (int64_t)whoami); + f.dump_string("session_id", session_id); + f.dump_unsigned("seq_id", seq_id); + f.dump_unsigned("target_mask", target_mask); + f.dump_unsigned("link_mask", link_mask); + f.dump_string("target_path", target_path); + f.dump_string("link_path", link_path); + f.close_section(); + f.flush(message); +} + +void MDSNotificationMessage::create_snap_message( + int32_t whoami, const std::string &session_id, uint64_t mask, + const std::string &path, const std::string &snapshot_name) { + JSONFormatter f; + f.open_object_section(""); + ceph_clock_now().gmtime_nsec(f.dump_stream("timestamp")); + f.dump_int("mds_id", (int64_t)whoami); + f.dump_string("session_id", session_id); + f.dump_unsigned("seq_id", seq_id); + f.dump_unsigned("mask", mask); + f.dump_string("path", path); + f.dump_string("snapshot_name", snapshot_name); + f.close_section(); + f.flush(message); +} diff --git a/src/mds/MDSNotificationMessage.h b/src/mds/MDSNotificationMessage.h new file mode 100644 index 0000000000000..a203127791863 --- /dev/null +++ b/src/mds/MDSNotificationMessage.h @@ -0,0 +1,24 @@ +#pragma once +#include "common/ceph_context.h" +#include "include/Context.h" +#include "include/buffer.h" +#include + +struct MDSNotificationMessage { + bufferlist message; + uint64_t seq_id; + MDSNotificationMessage(uint64_t seq_id); + void create_message(int32_t whoami, const std::string &session_id, + uint64_t mask, const std::string &path); + void create_move_message(int32_t whoami, const std::string &session_id, + uint64_t src_mask, uint64_t dest_mask, + const std::string &src_path, + const std::string &dest_path); + void create_link_message(int32_t whoami, const std::string &session_id, + uint64_t target_mask, uint64_t link_mask, + const std::string &target_path, + const std::string &link_path); + void create_snap_message(int32_t whoami, const std::string &session_id, + uint64_t mask, const std::string &path, + const std::string &snapshot_name); +}; \ No newline at end of file diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 1cd742423e641..c8751b2182c93 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -552,9 +552,10 @@ MDSRank::MDSRank( server = new Server(this, &metrics_handler); locker = new Locker(this, mdcache); - + notification_manager = std::make_unique(this); quiesce_db_manager.reset(new QuiesceDbManager()); + _heartbeat_reset_grace = g_conf().get_val("mds_heartbeat_reset_grace"); heartbeat_grace = g_conf().get_val("mds_heartbeat_grace"); op_tracker.set_complaint_and_threshold(cct->_conf->mds_op_complaint_time, @@ -1066,10 +1067,13 @@ bool MDSRank::_dispatch(const cref_t &m, bool new_msg) if (quiesce_dispatch(m)) { return true; } - if (is_stale_message(m)) { return true; } + if (is_notification_info(m)) { + return true; + } + // do not proceed if this message cannot be handled if (!is_valid_message(m)) { return false; @@ -1175,6 +1179,26 @@ bool MDSRank::_dispatch(const cref_t &m, bool new_msg) return true; } +void MDSRank::send_to_peers(const ref_t &m) { + set up; + get_mds_map()->get_up_mds_set(up); + for (const auto &r : up) { + if (r == get_nodeid()) { + continue; + } + send_message_mds(m, r); + } +} + +bool MDSRank::is_notification_info(const cref_t &m) { + if (m->get_type() == MSG_MDS_NOTIFICATION_INFO_KAFKA_TOPIC || + m->get_type() == MSG_MDS_NOTIFICATION_INFO_UDP_ENDPOINT) { + notification_manager->dispatch(m); + return true; + } + return false; +} + void MDSRank::update_mlogger() { if (mlogger) { @@ -1474,7 +1498,6 @@ class C_MDS_RetrySendMessageMDS : public MDSInternalContext { ref_t m; }; - int MDSRank::send_message_mds(const ref_t& m, mds_rank_t mds) { if (!mdsmap->is_up(mds)) { @@ -2160,6 +2183,7 @@ void MDSRank::active_start() finish_contexts(g_ceph_context, waiting_for_active); // kick waiters quiesce_agent_setup(); + notification_manager->init(); } void MDSRank::recovery_done(int oldstate) @@ -2453,6 +2477,7 @@ void MDSRankDispatcher::handle_mds_map( ceph_assert(oldstate == MDSMap::STATE_ACTIVE); stopping_start(); } + } } @@ -3098,6 +3123,49 @@ void MDSRankDispatcher::handle_asok_command( } else if (command == "quiesce db") { command_quiesce_db(cmdmap, on_finish); return; + } else if (command == "add_topic") { + std::string endpoint_name; + std::string topic_name, broker, username; + std::string password; + bool use_ssl; + std::optional ca_location, mechanism; + cmd_getval(cmdmap, "topic_name", topic_name); + cmd_getval(cmdmap, "endpoint_name", endpoint_name); + cmd_getval(cmdmap, "broker", broker); + if (!cmd_getval(cmdmap, "use_ssl", use_ssl)) { + use_ssl = false; + } + cmd_getval(cmdmap, "username", username); + cmd_getval(cmdmap, "password", password); + std::string ca, mch; + if (cmd_getval(cmdmap, "ca_location", ca)) { + ca_location = ca; + } + if (cmd_getval(cmdmap, "mechanism", mch)) { + mechanism = mch; + } + r = notification_manager->add_kafka_topic( + topic_name, endpoint_name, broker, use_ssl, username, password, + ca_location, mechanism, true, true); + } else if (command == "remove_topic") { + std::string topic_name, endpoint_name; + cmd_getval(cmdmap, "topic_name", topic_name); + cmd_getval(cmdmap, "endpoint_name", endpoint_name); + r = notification_manager->remove_kafka_topic(topic_name, endpoint_name, + true, true); + } + else if (command == "add_udp_endpoint") { + std::string ip, name; + int64_t port; + cmd_getval(cmdmap, "entity", name); + cmd_getval(cmdmap, "ip", ip); + cmd_getval(cmdmap, "port", port); + r = notification_manager->add_udp_endpoint(name, ip, (int)port, true, true); + } + else if (command == "remove_udp_endpoint") { + std::string name; + cmd_getval(cmdmap, "entity", name); + r = notification_manager->remove_udp_endpoint(name, true, true); } else { r = -CEPHFS_ENOSYS; } diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h index c4a8809b6e1a0..793c7791c2292 100644 --- a/src/mds/MDSRank.h +++ b/src/mds/MDSRank.h @@ -45,6 +45,8 @@ #include "MetricsHandler.h" #include "osdc/Journaler.h" #include "MDSMetaRequest.h" +#include "MDSNotificationManager.h" + // Full .h import instead of forward declaration for PerfCounter, for the // benefit of those including this header and using MDSRank::logger @@ -154,6 +156,7 @@ class ScrubStack; class C_ExecAndReply; class QuiesceDbManager; class QuiesceAgent; +class MDSNotificationManager; /** * The public part of this class's interface is what's exposed to all @@ -199,6 +202,7 @@ class MDSRank { } bool is_daemon_stopping() const; + void send_to_peers(const ref_t& m); MDSTableClient *get_table_client(int t); MDSTableServer *get_table_server(int t); @@ -427,6 +431,7 @@ class MDSRank { SnapServer *snapserver = nullptr; SnapClient *snapclient = nullptr; + std::unique_ptr notification_manager; SessionMap sessionmap; @@ -649,6 +654,7 @@ class MDSRank { bool standby_replaying = false; // true if current replay pass is in standby-replay mode uint64_t extraordinary_events_dump_interval = 0; double inject_journal_corrupt_dentry_first = 0.0; + private: bool send_status = true; @@ -661,6 +667,8 @@ class MDSRank { bool client_eviction_dump = false; + bool is_notification_info(const cref_t& m); + void get_task_status(std::map *status); void schedule_update_timer_task(); void send_task_status(); diff --git a/src/mds/MDSUDPEndpoint.cc b/src/mds/MDSUDPEndpoint.cc new file mode 100644 index 0000000000000..6f7a8cb17f8b6 --- /dev/null +++ b/src/mds/MDSUDPEndpoint.cc @@ -0,0 +1,242 @@ +#include "MDSUDPEndpoint.h" +#include "include/fs_types.h" + +#define dout_subsys ceph_subsys_mds + +MDSUDPConnection::MDSUDPConnection(const std::string &ip, int port) + : ip(ip), port(port) {} + +void MDSUDPConnection::encode(ceph::buffer::list &bl) const { + ENCODE_START(1, 1, bl); + encode(ip, bl); + encode(port, bl); + ENCODE_FINISH(bl); +} + +void MDSUDPConnection::dump(ceph::Formatter *f) const { + f->dump_string("ip", ip); + f->dump_bool("port", port); +} + +void MDSUDPConnection::generate_test_instances( + std::list &o) { + o.push_back(new MDSUDPConnection); +} + +void MDSUDPConnection::decode(ceph::buffer::list::const_iterator &iter) { + DECODE_START(1, iter); + decode(ip, iter); + decode(port, iter); + DECODE_FINISH(iter); +} + +int MDSUDPManager::load_data(std::map &mp) { + int r = update_omap(std::map()); + if (r < 0) { + return r; + } + C_SaferCond sync_finisher; + ObjectOperation op; + op.omap_get_vals("", "", UINT_MAX, &mp, NULL, NULL); + mds->objecter->read(object_t(object_name), + object_locator_t(mds->get_metadata_pool()), op, + CEPH_NOSNAP, NULL, 0, &sync_finisher); + r = sync_finisher.wait(); + if (r < 0) { + lderr(mds->cct) << "Error reading omap values from object '" << object_name + << "':" << cpp_strerror(r) << dendl; + } + return r; +} + +int MDSUDPManager::update_omap(const std::map &mp) { + C_SaferCond sync_finisher; + ObjectOperation op; + op.omap_set(mp); + mds->objecter->mutate( + object_t(object_name), object_locator_t(mds->get_metadata_pool()), op, + SnapContext(), ceph::real_clock::now(), 0, &sync_finisher); + int r = sync_finisher.wait(); + if (r < 0) { + lderr(mds->cct) << "Error updating omap of object '" << object_name + << "':" << cpp_strerror(r) << dendl; + } + return r; +} + +int MDSUDPManager::remove_keys(const std::set &st) { + C_SaferCond sync_finisher; + ObjectOperation op; + op.omap_rm_keys(st); + mds->objecter->mutate( + object_t(object_name), object_locator_t(mds->get_metadata_pool()), op, + SnapContext(), ceph::real_clock::now(), 0, &sync_finisher); + int r = sync_finisher.wait(); + if (r < 0) { + lderr(mds->cct) << "Error removing keys from omap of object '" + << object_name << "':" << cpp_strerror(r) << dendl; + } + return r; +} + +int MDSUDPManager::add_endpoint_into_disk(const std::string &name, + const MDSUDPConnection &connection) { + std::map mp; + bufferlist bl; + encode(connection, bl); + mp[name] = std::move(bl); + int r = update_omap(mp); + return r; +} + +int MDSUDPManager::remove_endpoint_from_disk(const std::string &name) { + std::set st; + st.insert(name); + int r = remove_keys(st); + return r; +} + +MDSUDPManager::MDSUDPManager(MDSRank *mds) + : mds(mds), cct(mds->cct), object_name("mds_udp_endpoints") {} + +int MDSUDPManager::init() { + std::map mp; + int r = load_data(mp); + if (r < 0) { + lderr(cct) << "Error occurred while initilizing UDP endpoints" << dendl; + return r; + } + for (auto &[key, val] : mp) { + try { + MDSUDPConnection connection; + auto iter = val.cbegin(); + decode(connection, iter); + add_endpoint(key, connection, false); + } catch (const ceph::buffer::error &e) { + ldout(cct, 1) + << "No value exist in the omap of object 'mds_udp_endpoints' " + "for udp entity name '" + << key << "'" << dendl; + } + } + return r; +} + +int MDSUDPManager::send( + const std::shared_ptr &message) { + std::shared_lock lock(endpoint_mutex); + std::vector buf(2); + for (auto &[key, endpoint] : endpoints) { + uint64_t len = message->message.length(); + buf[0] = boost::asio::buffer(&len, sizeof(len)); + buf[1] = boost::asio::buffer(message->message.c_str(), + message->message.length()); + endpoint->publish_internal(buf, message->seq_id); + } + return 0; +} + +int MDSUDPManager::add_endpoint(const std::string &name, + const MDSUDPConnection &connection, + bool write_into_disk) { + std::unique_lock lock(endpoint_mutex); + std::shared_ptr new_endpoint; + auto it = endpoints.find(name); + int r = 0; + if (it == endpoints.end() && endpoints.size() >= MAX_CONNECTIONS_DEFAULT) { + ldout(cct, 1) << "UDP connect: max connections exceeded" << dendl; + r = -CEPHFS_ENOMEM; + goto error_occurred; + } + new_endpoint = MDSUDPEndpoint::create(cct, name, connection); + if (!new_endpoint) { + ldout(cct, 1) << "UDP connect: udp endpoint creation failed" << dendl; + r = -CEPHFS_ECANCELED; + goto error_occurred; + } + endpoints[name] = new_endpoint; + if (write_into_disk) { + r = add_endpoint_into_disk(name, connection); + if (r < 0) { + goto error_occurred; + } + } + ldout(cct, 1) << "UDP endpoint with entity name '" << name + << "' is added successfully" << dendl; + return r; +error_occurred: + lderr(cct) << "UDP endpoint with entity name '" << name + << "' can not be added, failed with an error:" << cpp_strerror(r) + << dendl; + return r; +} + +int MDSUDPManager::remove_endpoint(const std::string &name, + bool write_into_disk) { + std::unique_lock lock(endpoint_mutex); + int r = 0; + auto it = endpoints.find(name); + if (it != endpoints.end()) { + endpoints.erase(it); + if (write_into_disk) { + r = remove_endpoint_from_disk(name); + } + if (r == 0) { + ldout(cct, 1) << "UDP endpoint with entity name '" << name + << "' is removed successfully" << dendl; + } else { + lderr(cct) << "UDP endpoint '" << name + << "' can not be removed, failed with an error:" + << cpp_strerror(r) << dendl; + } + return r; + } + ldout(cct, 1) << "No UDP endpoint exist with entity name '" << name << "'" + << dendl; + return -CEPHFS_EINVAL; +} + +MDSUDPEndpoint::MDSUDPEndpoint(CephContext *cct, const std::string &name, + const MDSUDPConnection &connection) + : cct(cct), name(name), socket(io_context), connection(connection), + endpoint(boost::asio::ip::address::from_string(connection.ip), + connection.port) { + try { + boost::system::error_code ec; + socket.open(boost::asio::ip::udp::v4(), ec); + if (ec) { + throw std::runtime_error(ec.message()); + } + } catch (const std::exception &e) { + lderr(cct) << "Error occurred while opening UDP socket with error:" + << e.what() << dendl; + throw; + } +} + +std::shared_ptr +MDSUDPEndpoint::create(CephContext *cct, const std::string &name, + const MDSUDPConnection &connection) { + try { + std::shared_ptr endpoint = + std::make_shared(cct, name, connection); + return endpoint; + } catch (...) { + } + return nullptr; +} + +int MDSUDPEndpoint::publish_internal( + std::vector &buf, uint64_t seq_id) { + boost::system::error_code ec; + socket.send_to(buf, endpoint, 0, ec); + if (ec) { + ldout(cct, 1) << "Error occurred while sending notification having seq_id=" + << seq_id << ":" << ec.message() << dendl; + return -ec.value(); + } else { + ldout(cct, 20) << "Notification having seq_id=" << seq_id << " delivered" + << dendl; + } + return 0; +} diff --git a/src/mds/MDSUDPEndpoint.h b/src/mds/MDSUDPEndpoint.h new file mode 100644 index 0000000000000..90cfb7f8ea76e --- /dev/null +++ b/src/mds/MDSUDPEndpoint.h @@ -0,0 +1,65 @@ +#pragma once + +#include "MDSNotificationMessage.h" +#include "MDSRank.h" +#include +#include + +class MDSUDPEndpoint; + +struct MDSUDPConnection { + std::string ip; + int port; + MDSUDPConnection() = default; + MDSUDPConnection(const std::string &ip, int port); + void encode(ceph::buffer::list &bl) const; + void decode(ceph::buffer::list::const_iterator &iter); + void dump(ceph::Formatter *f) const; + static void generate_test_instances(std::list &o); +}; +WRITE_CLASS_ENCODER(MDSUDPConnection) + +class MDSUDPManager { +public: + MDSUDPManager(MDSRank *mds); + int init(); + int send(const std::shared_ptr &message); + int add_endpoint(const std::string &name, const MDSUDPConnection &connection, + bool write_into_disk); + int remove_endpoint(const std::string &name, bool write_into_disk); + +private: + int load_data(std::map &mp); + int add_endpoint_into_disk(const std::string &name, + const MDSUDPConnection &connection); + int remove_endpoint_from_disk(const std::string &name); + int update_omap(const std::map &mp); + int remove_keys(const std::set &st); + CephContext *cct; + std::shared_mutex endpoint_mutex; + std::unordered_map> endpoints; + static const size_t MAX_CONNECTIONS_DEFAULT = 256; + MDSRank *mds; + std::string object_name; +}; + +class MDSUDPEndpoint { +public: + MDSUDPEndpoint() = delete; + MDSUDPEndpoint(CephContext *cct, const std::string &name, + const MDSUDPConnection &connection); + int publish_internal(std::vector &buf, + uint64_t seq_id); + static std::shared_ptr + create(CephContext *cct, const std::string &name, + const MDSUDPConnection &connection); + friend class MDSUDPManager; + +private: + std::string name; + MDSUDPConnection connection; + boost::asio::io_context io_context; + boost::asio::ip::udp::socket socket; + boost::asio::ip::udp::endpoint endpoint; + CephContext *cct; +}; \ No newline at end of file diff --git a/src/mds/Server.cc b/src/mds/Server.cc index f26625bfd3c7b..1ea0c8295b229 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -277,6 +277,19 @@ Server::Server(MDSRank *m, MetricsHandler *metrics_handler) : dispatch_killpoint_random = g_conf().get_val("mds_server_dispatch_killpoint_random"); supported_features = feature_bitset_t(CEPHFS_FEATURES_MDS_SUPPORTED); supported_metric_spec = feature_bitset_t(CEPHFS_METRIC_FEATURES_ALL); + // connection_t conn("localhost:9093", true, "admin", "admin-secret", + // std::nullopt, std::nullopt); + // MDSAsyncNotificationManager::create(mds->cct); + // MDSSyncNotificationManager::create(mds->cct); + // notification_manager = std::make_unique(mds); + // topic_ptr = MDSKafkaTopic::create( + // "my-topic", mds->cct, + // connection_t("localhost:9093", true, "admin", "admin-secret", + // std::optional( + // "/home/sajibreadd/croit/certs-kafka/ca-cert"), + // std::optional("PLAIN"))); + // udp_sender = + // MDSUDPNotificationSender::create("udp", mds->cct, "127.0.0.1", 8080); } void Server::dispatch(const cref_t &m) @@ -4631,6 +4644,8 @@ void Server::handle_client_open(const MDRequestRef& mdr) mds->locker->check_inode_max_size(cur); // make sure this inode gets into the journal + mds->notification_manager->push_notification(mds->get_nodeid(), cur, + CEPH_MDS_NOTIFY_OPEN); if (cur->is_auth() && cur->last == CEPH_NOSNAP && mdcache->open_file_table.should_log_open(cur)) { EOpen *le = new EOpen(mds->mdlog); @@ -4845,6 +4860,10 @@ void Server::handle_client_openc(const MDRequestRef& mdr) set_reply_extra_bl(req, _inode->ino, mdr->reply_extra_bl); + mds->notification_manager->push_notification(mds->get_nodeid(), newi, + CEPH_MDS_NOTIFY_CREATE | + CEPH_MDS_NOTIFY_OPEN); + journal_and_reply(mdr, newi, dn, le, fin); // We hit_dir (via hit_inode) in our finish callback, but by then we might @@ -5522,7 +5541,10 @@ void Server::handle_client_setattr(const MDRequestRef& mdr) le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); - + + mds->notification_manager->push_notification(mds->get_nodeid(), cur, + CEPH_MDS_NOTIFY_ATTRIB); + journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur, truncating_smaller, changed_ranges)); @@ -5587,6 +5609,11 @@ void Server::do_open_truncate(const MDRequestRef& mdr, int cmode) dn = mdr->dn[0].back(); } + mds->notification_manager->push_notification(mds->get_nodeid(), in, + CEPH_MDS_NOTIFY_MODIFY | + CEPH_MDS_NOTIFY_ACCESS | + CEPH_MDS_NOTIFY_OPEN); + journal_and_reply(mdr, in, dn, le, new C_MDS_inode_update_finish(this, mdr, in, old_size > 0, changed_ranges)); // Although the `open` part can give an early reply, the truncation won't @@ -5675,6 +5702,9 @@ void Server::handle_client_setlayout(const MDRequestRef& mdr) le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); + + mds->notification_manager->push_notification(mds->get_nodeid(), cur, + CEPH_MDS_NOTIFY_ATTRIB); journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur)); } @@ -5792,6 +5822,10 @@ void Server::handle_client_setdirlayout(const MDRequestRef& mdr) mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); mdr->no_early_reply = true; + + mds->notification_manager->push_notification(mds->get_nodeid(), cur, + CEPH_MDS_NOTIFY_ATTRIB); + journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur)); } @@ -6495,6 +6529,9 @@ void Server::handle_client_setvxattr(const MDRequestRef& mdr, CInode *cur) mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); + mds->notification_manager->push_notification(mds->get_nodeid(), cur, + CEPH_MDS_NOTIFY_ATTRIB); + journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur, false, false, adjust_realm)); return; @@ -6777,6 +6814,9 @@ void Server::handle_client_setxattr(const MDRequestRef& mdr) mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); + mds->notification_manager->push_notification(mds->get_nodeid(), cur, + CEPH_MDS_NOTIFY_ATTRIB); + journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur)); } @@ -6846,6 +6886,9 @@ void Server::handle_client_removexattr(const MDRequestRef& mdr) mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); + mds->notification_manager->push_notification(mds->get_nodeid(), cur, + CEPH_MDS_NOTIFY_ATTRIB); + journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur)); } @@ -7162,6 +7205,10 @@ void Server::handle_client_mknod(const MDRequestRef& mdr) PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); le->metablob.add_primary_dentry(dn, newi, true, true, true); + mds->notification_manager->push_notification(mds->get_nodeid(), newi, + CEPH_MDS_NOTIFY_CREATE | + CEPH_MDS_NOTIFY_ATTRIB); + journal_and_reply(mdr, newi, dn, le, new C_MDS_mknod_finish(this, mdr, dn, newi)); mds->balancer->maybe_fragment(dn->get_dir(), false); } @@ -7173,7 +7220,6 @@ void Server::handle_client_mknod(const MDRequestRef& mdr) void Server::handle_client_mkdir(const MDRequestRef& mdr) { const cref_t &req = mdr->client_request; - mdr->disable_lock_cache(); CDentry *dn = rdlock_path_xlock_dentry(mdr, true); if (!dn) @@ -7253,6 +7299,9 @@ void Server::handle_client_mkdir(const MDRequestRef& mdr) // make sure this inode gets into the journal le->metablob.add_opened_ino(newi->ino()); + mds->notification_manager->push_notification(mds->get_nodeid(), newi, + CEPH_MDS_NOTIFY_CREATE); + journal_and_reply(mdr, newi, dn, le, new C_MDS_mknod_finish(this, mdr, dn, newi)); // We hit_dir (via hit_inode) in our finish callback, but by then we might @@ -7316,6 +7365,9 @@ void Server::handle_client_symlink(const MDRequestRef& mdr) mdcache->predirty_journal_parents(mdr, &le->metablob, newi, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); le->metablob.add_primary_dentry(dn, newi, true, true); + mds->notification_manager->push_notification(mds->get_nodeid(), newi, + CEPH_MDS_NOTIFY_CREATE); + journal_and_reply(mdr, newi, dn, le, new C_MDS_mknod_finish(this, mdr, dn, newi)); mds->balancer->maybe_fragment(dir, false); @@ -7447,6 +7499,10 @@ void Server::handle_client_link(const MDRequestRef& mdr) // go! ceph_assert(g_conf()->mds_kill_link_at != 1); + mds->notification_manager->push_notification_link(mds->get_nodeid(), targeti, destdn, + CEPH_MDS_NOTIFY_ATTRIB, + CEPH_MDS_NOTIFY_CREATE); + // local or remote? if (targeti->is_auth()) _link_local(mdr, destdn, targeti, target_realm); @@ -7511,7 +7567,6 @@ void Server::_link_local(const MDRequestRef& mdr, CDentry *dn, CInode *targeti, // do this after predirty_*, to avoid funky extra dnl arg dn->push_projected_linkage(targeti->ino(), targeti->d_type()); - journal_and_reply(mdr, targeti, dn, le, new C_MDS_link_local_finish(this, mdr, dn, targeti, dnpv, tipv, adjust_realm)); } @@ -7633,7 +7688,6 @@ void Server::_link_remote(const MDRequestRef& mdr, bool inc, CDentry *dn, CInode le->metablob.add_null_dentry(dn, true); dn->push_projected_linkage(); } - journal_and_reply(mdr, (inc ? targeti : nullptr), dn, le, new C_MDS_link_remote_finish(this, mdr, inc, dn, targeti)); } @@ -8035,7 +8089,6 @@ void Server::handle_client_unlink(const MDRequestRef& mdr) { const cref_t &req = mdr->client_request; client_t client = mdr->get_client(); - // rmdir or unlink? bool rmdir = (req->get_op() == CEPH_MDS_OP_RMDIR); @@ -8176,11 +8229,16 @@ void Server::handle_client_unlink(const MDRequestRef& mdr) if (!rmdir && dnl->is_primary() && mdr->dn[0].size() == 1) mds->locker->create_lock_cache(mdr, diri); + mds->notification_manager->push_notification_link(mds->get_nodeid(), in, dn, + CEPH_MDS_NOTIFY_ATTRIB, + CEPH_MDS_NOTIFY_DELETE); + // ok! if (dnl->is_remote() && !dnl->get_inode()->is_auth()) _link_remote(mdr, false, dn, dnl->get_inode()); else _unlink_local(mdr, dn, straydn); + } class C_MDS_unlink_local_finish : public ServerLogContext { @@ -8283,7 +8341,6 @@ void Server::_unlink_local(const MDRequestRef& mdr, CDentry *dn, CDentry *strayd ceph_assert(straydn); mdcache->project_subtree_rename(in, dn->get_dir(), straydn->get_dir()); } - journal_and_reply(mdr, 0, dn, le, new C_MDS_unlink_local_finish(this, mdr, dn, straydn)); } @@ -9206,6 +9263,9 @@ void Server::handle_client_rename(const MDRequestRef& mdr) // -- commit locally -- C_MDS_rename_finish *fin = new C_MDS_rename_finish(this, mdr, srcdn, destdn, straydn); + mds->notification_manager->push_notification_move(mds->get_nodeid(), srcdn, + destdn); + journal_and_reply(mdr, srci, destdn, le, fin); mds->balancer->maybe_fragment(destdn->get_dir(), false); } @@ -11175,6 +11235,10 @@ void Server::handle_client_mksnap(const MDRequestRef& mdr) le->metablob.add_table_transaction(TABLE_SNAP, stid); mdcache->predirty_journal_parents(mdr, &le->metablob, diri, 0, PREDIRTY_PRIMARY, false); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, diri); + + mds->notification_manager->push_notification_snap( + mds->get_nodeid(), diri, std::string(snapname), + CEPH_MDS_NOTIFY_CREATE | CEPH_MDS_NOTIFY_ATTRIB); // journal the snaprealm changes submit_mdlog_entry(le, new C_MDS_mksnap_finish(this, mdr, diri, info), @@ -11309,6 +11373,10 @@ void Server::handle_client_rmsnap(const MDRequestRef& mdr) mdcache->predirty_journal_parents(mdr, &le->metablob, diri, 0, PREDIRTY_PRIMARY, false); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, diri); + mds->notification_manager->push_notification_snap( + mds->get_nodeid(), diri, std::string(snapname), + CEPH_MDS_NOTIFY_DELETE | CEPH_MDS_NOTIFY_ATTRIB); + submit_mdlog_entry(le, new C_MDS_rmsnap_finish(this, mdr, diri, snapid), mdr, __func__); mdlog->flush(); diff --git a/src/mds/Server.h b/src/mds/Server.h index 68842ea01cbeb..0922f784a83c2 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -37,6 +37,7 @@ using namespace std::literals::string_view_literals; #include "Mutation.h" #include "MDSContext.h" +// class MDSNotificationManager; class OSDMap; class LogEvent; class EMetaBlob; diff --git a/src/messages/MNotificationInfoKafkaTopic.h b/src/messages/MNotificationInfoKafkaTopic.h new file mode 100644 index 0000000000000..b8ca7f355a912 --- /dev/null +++ b/src/messages/MNotificationInfoKafkaTopic.h @@ -0,0 +1,80 @@ +#pragma once +#include "messages/MMDSOp.h" + +class MNotificationInfoKafkaTopic : public MMDSOp { + static constexpr int HEAD_VERSION = 1; + static constexpr int COMPAT_VERSION = 1; + +public: + std::string topic_name; + std::string endpoint_name; + std::string broker; + bool use_ssl; + std::string user, password; + std::optional ca_location; + std::optional mechanism; + bool is_remove; + +protected: + MNotificationInfoKafkaTopic() + : MMDSOp(MSG_MDS_NOTIFICATION_INFO_KAFKA_TOPIC, HEAD_VERSION, + COMPAT_VERSION) {} + MNotificationInfoKafkaTopic(const std::string &topic_name, + const std::string &endpoint_name, bool is_remove) + : MMDSOp(MSG_MDS_NOTIFICATION_INFO_KAFKA_TOPIC, HEAD_VERSION, + COMPAT_VERSION), + topic_name(topic_name), endpoint_name(endpoint_name), + is_remove(is_remove) {} + MNotificationInfoKafkaTopic(const std::string &topic_name, + const std::string &endpoint_name, + const std::string &broker, bool use_ssl, + const std::string &user, + const std::string &password, + const std::optional &ca_location, + const std::optional &mechanism, + bool is_remove) + : MMDSOp(MSG_MDS_NOTIFICATION_INFO_KAFKA_TOPIC, HEAD_VERSION, + COMPAT_VERSION), + topic_name(topic_name), endpoint_name(endpoint_name), broker(broker), + use_ssl(use_ssl), user(user), password(password), + ca_location(ca_location), mechanism(mechanism), is_remove(is_remove) {} + ~MNotificationInfoKafkaTopic() final {} + +public: + std::string_view get_type_name() const override { return "mdskafka_topic"; } + + void print(std::ostream &out) const override { out << "mdskafka_topic"; } + + void encode_payload(uint64_t features) override { + using ceph::encode; + encode(topic_name, payload); + encode(endpoint_name, payload); + encode(broker, payload); + encode(use_ssl, payload); + encode(user, payload); + encode(password, payload); + encode(ca_location, payload); + encode(mechanism, payload); + encode(is_remove, payload); + } + + void decode_payload() override { + using ceph::decode; + auto p = payload.cbegin(); + decode(topic_name, p); + decode(endpoint_name, p); + decode(broker, p); + decode(use_ssl, p); + decode(user, p); + decode(password, p); + decode(ca_location, p); + decode(mechanism, p); + decode(is_remove, p); + } + +private: + template + friend boost::intrusive_ptr ceph::make_message(Args &&...args); + template + friend MURef crimson::make_message(Args &&...args); +}; diff --git a/src/messages/MNotificationInfoUDPEndpoint.h b/src/messages/MNotificationInfoUDPEndpoint.h new file mode 100644 index 0000000000000..f6bbdea73e17e --- /dev/null +++ b/src/messages/MNotificationInfoUDPEndpoint.h @@ -0,0 +1,60 @@ +#pragma once +#include "messages/MMDSOp.h" + +class MNotificationInfoUDPEndpoint : public MMDSOp { + static constexpr int HEAD_VERSION = 1; + static constexpr int COMPAT_VERSION = 1; + +public: + std::string name; + std::string ip; + int port; + bool is_remove; + +protected: + MNotificationInfoUDPEndpoint() + : MMDSOp(MSG_MDS_NOTIFICATION_INFO_UDP_ENDPOINT, HEAD_VERSION, + COMPAT_VERSION) {} + MNotificationInfoUDPEndpoint(const std::string &name, bool is_remove) + : MMDSOp(MSG_MDS_NOTIFICATION_INFO_UDP_ENDPOINT, HEAD_VERSION, + COMPAT_VERSION), + name(name), is_remove(is_remove) {} + MNotificationInfoUDPEndpoint(const std::string &name, const std::string &ip, + int port, bool is_remove) + : MMDSOp(MSG_MDS_NOTIFICATION_INFO_UDP_ENDPOINT, HEAD_VERSION, + COMPAT_VERSION), + name(name), ip(ip), port(port), is_remove(is_remove) {} + ~MNotificationInfoUDPEndpoint() final {} + +public: + std::string_view get_type_name() const override { + return "mdsudp_notification_client"; + } + + void print(std::ostream &out) const override { + out << "mdsudp_notification_client"; + } + + void encode_payload(uint64_t features) override { + using ceph::encode; + encode(name, payload); + encode(ip, payload); + encode(port, payload); + encode(is_remove, payload); + } + + void decode_payload() override { + using ceph::decode; + auto p = payload.cbegin(); + decode(name, p); + decode(ip, p); + decode(port, p); + decode(is_remove, p); + } + +private: + template + friend boost::intrusive_ptr ceph::make_message(Args &&...args); + template + friend MURef crimson::make_message(Args &&...args); +}; diff --git a/src/msg/Message.cc b/src/msg/Message.cc index f649e0f3d3ee2..5833d5b490428 100644 --- a/src/msg/Message.cc +++ b/src/msg/Message.cc @@ -157,6 +157,11 @@ #include "messages/MMDSFragmentNotify.h" #include "messages/MMDSFragmentNotifyAck.h" +#ifdef WITH_CEPHFS_NOTIFICATION +#include "messages/MNotificationInfoKafkaTopic.h" +#include "messages/MNotificationInfoUDPEndpoint.h" +#endif + #include "messages/MExportDirDiscover.h" #include "messages/MExportDirDiscoverAck.h" #include "messages/MExportDirCancel.h" @@ -884,6 +889,16 @@ Message *decode_message(CephContext *cct, m = make_message(); break; +#ifdef WITH_CEPHFS_NOTIFICATION + case MSG_MDS_NOTIFICATION_INFO_KAFKA_TOPIC: + m = make_message (); + break; + + case MSG_MDS_NOTIFICATION_INFO_UDP_ENDPOINT: + m = make_message (); + break; +#endif + case MSG_MGR_BEACON: m = make_message(); break; diff --git a/src/msg/Message.h b/src/msg/Message.h index 3e5c58ec376b4..ec066b0f4742d 100644 --- a/src/msg/Message.h +++ b/src/msg/Message.h @@ -202,6 +202,11 @@ #define MSG_MDS_QUIESCE_DB_LISTING 0x505 // quiesce db replication #define MSG_MDS_QUIESCE_DB_ACK 0x506 // quiesce agent ack back to the db +#ifdef WITH_CEPHFS_NOTIFICATION +#define MSG_MDS_NOTIFICATION_INFO_KAFKA_TOPIC 0x507 +#define MSG_MDS_NOTIFICATION_INFO_UDP_ENDPOINT 0x508 +#endif + // *** generic *** #define MSG_TIMECHECK 0x600 #define MSG_MON_HEALTH 0x601