Skip to content

Commit

Permalink
[202205][teamd]: Clean teamd process if LAG creation fails (#2888) (#…
Browse files Browse the repository at this point in the history
…2932)

What I did

202205 cherry-pick for [teamd]: Clean teamd process if LAG creation fails #2888
During LAG creation, if the teamd process fails for any reason, cleanup any leftover teamd processes for the LAG alias
Call kill directly in removeLag instead of using teamd -k

Why I did it

If the teamd process times out for any reason, subsequent attempts to create a LAG may fail if there is an orphaned process left running
As detailed in Removal of LAG with no members takes around 1 sec on 202012  sonic-buildimage#8071, team -k may block for some time.

Signed-off-by: Lawrence Lee <[email protected]>
  • Loading branch information
theasianpianist authored Nov 3, 2023
1 parent 8b280d8 commit dc0e29b
Show file tree
Hide file tree
Showing 5 changed files with 154 additions and 9 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ tests/tests
tests/mock_tests/tests_response_publisher
tests/mock_tests/tests_fpmsyncd
tests/mock_tests/tests_intfmgrd
tests/mock_tests/tests_teammgrd
tests/mock_tests/tests_portsyncd


Expand Down
41 changes: 38 additions & 3 deletions cfgmgr/teammgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,8 @@ void TeamMgr::doLagTask(Consumer &consumer)
{
if (addLag(alias, min_links, fallback) == task_need_retry)
{
// If LAG creation fails, we need to clean up any potentially orphaned teamd processes
removeLag(alias);
it++;
continue;
}
Expand Down Expand Up @@ -615,7 +617,7 @@ task_process_status TeamMgr::addLag(const string &alias, int min_links, bool fal
SWSS_LOG_INFO("Port channel %s teamd configuration: %s",
alias.c_str(), conf.str().c_str());

string warmstart_flag = WarmStart::isWarmStart() ? " -w -o " : " -r ";
string warmstart_flag = WarmStart::isWarmStart() ? " -w -o" : " -r";

cmd << TEAMD_CMD
<< warmstart_flag
Expand All @@ -642,9 +644,42 @@ bool TeamMgr::removeLag(const string &alias)

stringstream cmd;
string res;
pid_t pid;

cmd << TEAMD_CMD << " -k -t " << shellquote(alias);
EXEC_WITH_ERROR_THROW(cmd.str(), res);
try
{
std::stringstream cmd;
cmd << "cat " << shellquote("/var/run/teamd/" + alias + ".pid");
EXEC_WITH_ERROR_THROW(cmd.str(), res);
}
catch (const std::exception &e)
{
SWSS_LOG_NOTICE("Failed to remove non-existent port channel %s pid...", alias.c_str());
return false;
}

try
{
pid = static_cast<pid_t>(std::stoul(res, nullptr, 10));
SWSS_LOG_INFO("Read port channel %s pid %d", alias.c_str(), pid);
}
catch (const std::exception &e)
{
SWSS_LOG_ERROR("Failed to read port channel %s pid: %s", alias.c_str(), e.what());
return false;
}

try
{
std::stringstream cmd;
cmd << "kill -TERM " << pid;
EXEC_WITH_ERROR_THROW(cmd.str(), res);
}
catch (const std::exception &e)
{
SWSS_LOG_ERROR("Failed to send SIGTERM to port channel %s pid %d: %s", alias.c_str(), pid, e.what());
return false;
}

SWSS_LOG_NOTICE("Stop port channel %s", alias.c_str());

Expand Down
25 changes: 23 additions & 2 deletions tests/mock_tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ INCLUDES = -I $(FLEX_CTR_DIR) -I $(DEBUG_CTR_DIR) -I $(top_srcdir)/lib -I $(top_

CFLAGS_SAI = -I /usr/include/sai

TESTS = tests tests_intfmgrd
TESTS = tests tests_intfmgrd tests_teammgrd

noinst_PROGRAMS = tests tests_intfmgrd
noinst_PROGRAMS = tests tests_intfmgrd tests_teammgrd

LDADD_SAI = -lsaimeta -lsaimetadata -lsaivs -lsairedis

Expand Down Expand Up @@ -146,3 +146,24 @@ tests_intfmgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_GTEST
tests_intfmgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_GTEST) $(CFLAGS_SAI) -I $(top_srcdir)/cfgmgr -I $(top_srcdir)/orchagent/
tests_intfmgrd_LDADD = $(LDADD_GTEST) $(LDADD_SAI) -lnl-genl-3 -lhiredis -lhiredis \
-lswsscommon -lswsscommon -lgtest -lgtest_main -lzmq -lnl-3 -lnl-route-3 -lpthread

## teammgrd unit tests

tests_teammgrd_SOURCES = teammgrd/teammgr_ut.cpp \
$(top_srcdir)/cfgmgr/teammgr.cpp \
$(top_srcdir)/lib/subintf.cpp \
$(top_srcdir)/orchagent/orch.cpp \
$(top_srcdir)/orchagent/request_parser.cpp \
mock_orchagent_main.cpp \
mock_dbconnector.cpp \
mock_table.cpp \
mock_hiredis.cpp \
fake_response_publisher.cpp \
mock_redisreply.cpp \
mock_shell_command.cpp

tests_teammgrd_INCLUDES = $(tests_INCLUDES) -I$(top_srcdir)/cfgmgr -I$(top_srcdir)/lib
tests_teammgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_GTEST) $(CFLAGS_SAI)
tests_teammgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_GTEST) $(CFLAGS_SAI) $(tests_teammgrd_INCLUDES)
tests_teammgrd_LDADD = $(LDADD_GTEST) $(LDADD_SAI) -lnl-genl-3 -lhiredis -lhiredis \
-lswsscommon -lswsscommon -lgtest -lgtest_main -lzmq -lnl-3 -lnl-route-3 -lpthread -lgmock -lgmock_main
18 changes: 14 additions & 4 deletions tests/mock_tests/mock_shell_command.cpp
Original file line number Diff line number Diff line change
@@ -1,15 +1,25 @@
#include <string>
#include <vector>

/* Override this pointer for custom behavior */
int (*callback)(const std::string &cmd, std::string &stdout) = nullptr;

int mockCmdReturn = 0;
std::string mockCmdStdcout = "";
std::vector<std::string> mockCallArgs;

namespace swss {
int exec(const std::string &cmd, std::string &stdout)
{
mockCallArgs.push_back(cmd);
stdout = mockCmdStdcout;
return mockCmdReturn;
if (callback != nullptr)
{
return callback(cmd, stdout);
}
else
{
mockCallArgs.push_back(cmd);
stdout = mockCmdStdcout;
return mockCmdReturn;
}
}
}
}
78 changes: 78 additions & 0 deletions tests/mock_tests/teammgrd/teammgr_ut.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#include "gtest/gtest.h"
#include "../mock_table.h"
#include "teammgr.h"

extern int (*callback)(const std::string &cmd, std::string &stdout);
extern std::vector<std::string> mockCallArgs;

int cb(const std::string &cmd, std::string &stdout)
{
mockCallArgs.push_back(cmd);
if (cmd.find("/usr/bin/teamd -r -t PortChannel1") != std::string::npos)
{
return 1;
}
else if (cmd.find("cat \"/var/run/teamd/PortChannel1.pid\"") != std::string::npos)
{
stdout = "1234";
return 0;
}
return 0;
}

namespace teammgr_ut
{
struct TeamMgrTest : public ::testing::Test
{
std::shared_ptr<swss::DBConnector> m_config_db;
std::shared_ptr<swss::DBConnector> m_app_db;
std::shared_ptr<swss::DBConnector> m_state_db;
std::vector<TableConnector> cfg_lag_tables;

virtual void SetUp() override
{
testing_db::reset();
m_config_db = std::make_shared<swss::DBConnector>("CONFIG_DB", 0);
m_app_db = std::make_shared<swss::DBConnector>("APPL_DB", 0);
m_state_db = std::make_shared<swss::DBConnector>("STATE_DB", 0);

swss::Table metadata_table = swss::Table(m_config_db.get(), CFG_DEVICE_METADATA_TABLE_NAME);
std::vector<swss::FieldValueTuple> vec;
vec.emplace_back("mac", "01:23:45:67:89:ab");
metadata_table.set("localhost", vec);

TableConnector conf_lag_table(m_config_db.get(), CFG_LAG_TABLE_NAME);
TableConnector conf_lag_member_table(m_config_db.get(), CFG_LAG_MEMBER_TABLE_NAME);
TableConnector state_port_table(m_state_db.get(), STATE_PORT_TABLE_NAME);

std::vector<TableConnector> tables = {
conf_lag_table,
conf_lag_member_table,
state_port_table
};

cfg_lag_tables = tables;
mockCallArgs.clear();
callback = cb;
}
};

TEST_F(TeamMgrTest, testProcessKilledAfterAddLagFailure)
{
swss::TeamMgr teammgr(m_config_db.get(), m_app_db.get(), m_state_db.get(), cfg_lag_tables);
swss::Table cfg_lag_table = swss::Table(m_config_db.get(), CFG_LAG_TABLE_NAME);
cfg_lag_table.set("PortChannel1", { { "admin_status", "up" },
{ "mtu", "9100" },
{ "lacp_key", "auto" },
{ "min_links", "2" } });
teammgr.addExistingData(&cfg_lag_table);
teammgr.doTask();
int kill_cmd_called = 0;
for (auto cmd : mockCallArgs){
if (cmd.find("kill -TERM 1234") != std::string::npos){
kill_cmd_called++;
}
}
ASSERT_EQ(kill_cmd_called, 1);
}
}

0 comments on commit dc0e29b

Please sign in to comment.