Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change default for process checks in core agent #34489

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/agent/subcommands/flare/command_other_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ func NewSystemProbeTestServer(_ http.Handler) (*httptest.Server, error) {
func InjectConnectionFailures(mockSysProbeConfig model.Config, _ model.Config) {
mockSysProbeConfig.SetWithoutSource("system_probe_config.enabled", true)
mockSysProbeConfig.SetWithoutSource("system_probe_config.sysprobe_socket", "/opt/datadog-agent/run/sysprobe-bad.sock")
mockSysProbeConfig.SetWithoutSource("network_config.enabled", true)
}

// CheckExpectedConnectionFailures checks the expected errors after simulated
Expand Down
2 changes: 2 additions & 0 deletions cmd/agent/subcommands/flare/command_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ func (c *commandTestSuite) TestReadProfileData() {
if runtime.GOOS != "darwin" {
mockSysProbeConfig.SetWithoutSource("system_probe_config.enabled", true)
mockSysProbeConfig.SetWithoutSource("system_probe_config.sysprobe_socket", c.sysprobeSocketPath)
mockSysProbeConfig.SetWithoutSource("network_config.enabled", true)
}

profiler := getProfiler(t, mockConfig, mockSysProbeConfig)
Expand Down Expand Up @@ -214,6 +215,7 @@ func (c *commandTestSuite) TestReadProfileDataNoTraceAgent() {
mockSysProbeConfig := configmock.NewSystemProbe(t)
mockSysProbeConfig.SetWithoutSource("system_probe_config.enabled", true)
mockSysProbeConfig.SetWithoutSource("system_probe_config.sysprobe_socket", c.sysprobeSocketPath)
mockSysProbeConfig.SetWithoutSource("network_config.enabled", true)

profiler := getProfiler(t, mockConfig, mockSysProbeConfig)
data, err := profiler.ReadProfileData(10, func(string, ...interface{}) error { return nil })
Expand Down
8 changes: 7 additions & 1 deletion comp/core/profiler/impl/profiler.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,15 @@ func (profiler) securityAgentEnabled() bool {
}

func (p profiler) processAgentEnabled() bool {
return p.cfg.GetBool("process_config.enabled") ||
processChecksEnabled := p.cfg.GetBool("process_config.enabled") ||
p.cfg.GetBool("process_config.container_collection.enabled") ||
p.cfg.GetBool("process_config.process_collection.enabled")
processChecksInProcessAgent := !p.cfg.GetBool("process_config.run_in_core_agent.enabled") &&
processChecksEnabled
npmEnabled := p.sysProbeCfg.GetBool("network_config.enabled")
usmEnabled := p.sysProbeCfg.GetBool("service_monitoring_config.enabled")

return processChecksInProcessAgent || npmEnabled || usmEnabled
}

func (p profiler) apmEnabled() bool {
Expand Down
35 changes: 33 additions & 2 deletions comp/core/profiler/impl/profiler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ func createGenericConfig(t *testing.T) model.Config {
mockConfig.SetWithoutSource("process_config.expvar_port", port)
mockConfig.SetWithoutSource("security_agent.expvar_port", port)

mockConfig.SetWithoutSource("process_config.run_in_core_agent.enabled", false)
mockConfig.SetWithoutSource("process_config.enabled", false)
mockConfig.SetWithoutSource("process_config.container_collection.enabled", false)
mockConfig.SetWithoutSource("process_config.process_collection.enabled", false)
mockConfig.SetWithoutSource("apm_config.enabled", false)
mockConfig.SetWithoutSource("system_probe_config.enabled", false)

return mockConfig
}
Expand Down Expand Up @@ -212,14 +212,45 @@ func TestTimeout(t *testing.T) {
profileDuration: 10 * time.Second,
expTimeout: baseTimeout + 6*(10*time.Second),
},
{
name: "Process Agent Checks in Core Agent",
extraCfgs: map[string]interface{}{
"process_config.run_in_core_agent.enabled": true,
},
extraSysCfgs: map[string]interface{}{},
profileDuration: 10 * time.Second,
expTimeout: baseTimeout + 4*(10*time.Second),
},
{
name: "Process Agent Enabled, via NPM",
extraCfgs: map[string]interface{}{
"process_config.run_in_core_agent.enabled": true,
},
extraSysCfgs: map[string]interface{}{
"network_config.enabled": true,
},
profileDuration: 10 * time.Second,
expTimeout: baseTimeout + 8*(10*time.Second),
},
{
name: "Process Agent Enabled, via USM",
extraCfgs: map[string]interface{}{
"process_config.run_in_core_agent.enabled": true,
},
extraSysCfgs: map[string]interface{}{
"service_monitoring_config.enabled": true,
},
profileDuration: 10 * time.Second,
expTimeout: baseTimeout + 8*(10*time.Second),
},
{
name: "SysProbe Enabled",
extraCfgs: map[string]interface{}{},
extraSysCfgs: map[string]interface{}{
"system_probe_config.enabled": true,
},
profileDuration: 10 * time.Second,
expTimeout: baseTimeout + 6*(10*time.Second),
expTimeout: baseTimeout + 8*(10*time.Second), // config enables NPM, which enables process agent
},
{
name: "Everything Enabled",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,8 @@ func TestCollection(t *testing.T) {
t.Run(test.name, func(t *testing.T) {

overrides := map[string]interface{}{
"language_detection.enabled": true,
"language_detection.enabled": true,
"process_config.run_in_core_agent.enabled": false,
}

// We do not inject any collectors here; we instantiate
Expand Down
4 changes: 2 additions & 2 deletions pkg/config/config_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1543,9 +1543,9 @@ api_key:
## @param run_in_core_agent - custom object - optional
## Controls whether the process Agent or core Agent collects process and/or container information (Linux only).
# run_in_core_agent:
## @param enabled - boolean - optional - default: false
## @param enabled - boolean - optional - default: true
## Enables process/container collection on the core Agent instead of the process Agent.
# enabled: false
# enabled: true
{{ end }}

## @param process_collection - custom object - optional
Expand Down
2 changes: 1 addition & 1 deletion pkg/config/setup/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ func setupProcesses(config pkgconfigmodel.Setup) {
procBindEnvAndSetDefault(config, "process_config.process_collection.enabled", false)

// This allows for the process check to run in the core agent but is for linux only
procBindEnvAndSetDefault(config, "process_config.run_in_core_agent.enabled", false)
procBindEnvAndSetDefault(config, "process_config.run_in_core_agent.enabled", runtime.GOOS == "linux")

config.BindEnv("process_config.process_dd_url",
"DD_PROCESS_CONFIG_PROCESS_DD_URL",
Expand Down
2 changes: 1 addition & 1 deletion pkg/config/setup/process_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func TestProcessDefaultConfig(t *testing.T) {
},
{
key: "process_config.run_in_core_agent.enabled",
defaultValue: false,
defaultValue: runtime.GOOS == "linux",
},
{
key: "process_config.queue_size",
Expand Down
6 changes: 3 additions & 3 deletions pkg/flare/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,10 @@ func provideExtraFiles(fb flaretypes.FlareBuilder) error {
fb.AddFile("status.log", []byte("unable to get the status of the agent, is it running?")) //nolint:errcheck
fb.AddFile("config-check.log", []byte("unable to get loaded checks config, is the agent running?")) //nolint:errcheck
} else {
fb.AddFileFromFunc("tagger-list.json", getAgentTaggerList) //nolint:errcheck
fb.AddFileFromFunc("workload-list.log", getAgentWorkloadList) //nolint:errcheck
fb.AddFileFromFunc("process-agent_tagger-list.json", getProcessAgentTaggerList) //nolint:errcheck
fb.AddFileFromFunc("tagger-list.json", getAgentTaggerList) //nolint:errcheck
fb.AddFileFromFunc("workload-list.log", getAgentWorkloadList) //nolint:errcheck
if !pkgconfigsetup.Datadog().GetBool("process_config.run_in_core_agent.enabled") {
fb.AddFileFromFunc("process-agent_tagger-list.json", getProcessAgentTaggerList) //nolint:errcheck
getChecksFromProcessAgent(fb, getProcessAPIAddressPort)
}
}
Expand Down
20 changes: 16 additions & 4 deletions pkg/process/metadata/workloadmeta/collector/process_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,34 +171,45 @@ func TestProcessCollector(t *testing.T) {
// the remote process collector is enabled.
func TestEnabled(t *testing.T) {
type testCase struct {
name string
processCollectionEnabled, remoteProcessCollectorEnabled bool
expectEnabled bool
flavor string
name string
processCollectionEnabled, remoteProcessCollectorEnabled, runInCoreAgentEnabled bool
expectEnabled bool
flavor string
}

testCases := []testCase{
{
name: "process check enabled",
processCollectionEnabled: true,
remoteProcessCollectorEnabled: false,
runInCoreAgentEnabled: false,
flavor: flavor.ProcessAgent,
expectEnabled: false,
},
{
name: "remote collector disabled",
processCollectionEnabled: false,
remoteProcessCollectorEnabled: false,
runInCoreAgentEnabled: false,
flavor: flavor.ProcessAgent,
expectEnabled: false,
},
{
name: "collector enabled",
processCollectionEnabled: false,
remoteProcessCollectorEnabled: true,
runInCoreAgentEnabled: false,
flavor: flavor.ProcessAgent,
expectEnabled: true,
},
{
name: "collector enabled but in core agent",
processCollectionEnabled: false,
remoteProcessCollectorEnabled: true,
runInCoreAgentEnabled: true,
flavor: flavor.ProcessAgent,
expectEnabled: false,
},
}

for _, tc := range testCases {
Expand All @@ -208,6 +219,7 @@ func TestEnabled(t *testing.T) {
cfg := configmock.New(t)
cfg.SetWithoutSource("process_config.process_collection.enabled", tc.processCollectionEnabled)
cfg.SetWithoutSource("language_detection.enabled", tc.remoteProcessCollectorEnabled)
cfg.SetWithoutSource("process_config.run_in_core_agent.enabled", tc.runInCoreAgentEnabled)

assert.Equal(t, tc.expectEnabled, Enabled(cfg))
})
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Each section from every release note are combined when the
# CHANGELOG.rst is rendered. So the text needs to be worded so that
# it does not depend on any information only available in another
# section. This may mean repeating some details, but each section
# must be readable independently of the other.
#
# Each section note must be formatted as reStructuredText.
---
enhancements:
- |
Process checks will now run in the core agent by default on Linux. Process Agent will only run if needed for other configured features.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ security_agent:
cmd_port: {{.SecurityCmdPort}}

process_config:
run_in_core_agent:
enabled: false
process_collection:
enabled: true
cmd_port: {{.ProcessCmdPort}}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ security_agent:
cmd_port: {{.SecurityCmdPort}}

process_config:
run_in_core_agent:
enabled: false
process_collection:
enabled: true
cmd_port: {{.ProcessCmdPort}}
17 changes: 12 additions & 5 deletions test/new-e2e/tests/agent-subcommands/flare/flare_common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
package flare

import (
"runtime"
"time"

"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -40,9 +41,12 @@ func (v *baseFlareSuite) TestFlareDefaultFiles() {
assertLogsFolderOnlyContainsLogFile(v.T(), flare)
assertEtcFolderOnlyContainsConfigFile(v.T(), flare)

assertFileContains(v.T(), flare, "process_check_output.json", "'process_config.process_collection.enabled' is disabled")
assertFileNotContains(v.T(), flare, "container_check_output.json", "'process_config.container_collection.enabled' is disabled")
assertFileNotContains(v.T(), flare, "process_discovery_check_output.json", "'process_config.process_discovery.enabled' is disabled")
if runtime.GOOS != "linux" {
assertFilesExist(v.T(), flare, []string{"process-agent_tagger-list.json"})
assertFileContains(v.T(), flare, "process_check_output.json", "'process_config.process_collection.enabled' is disabled")
assertFileNotContains(v.T(), flare, "container_check_output.json", "'process_config.container_collection.enabled' is disabled")
assertFileNotContains(v.T(), flare, "process_discovery_check_output.json", "'process_config.process_discovery.enabled' is disabled")
}
}

func (v *baseFlareSuite) TestLocalFlareDefaultFiles() {
Expand Down Expand Up @@ -72,9 +76,12 @@ func (v *baseFlareSuite) TestFlareProfiling() {
assert.Contains(v.T(), logs, "Setting runtime_block_profile_rate to 5000")
assert.Contains(v.T(), logs, "Getting a 31s profile snapshot from core.")
assert.Contains(v.T(), logs, "Getting a 31s profile snapshot from security-agent.")
assert.Contains(v.T(), logs, "Getting a 31s profile snapshot from process.")

assertFilesExist(v.T(), flare, profilingFiles)

if runtime.GOOS != "linux" {
assert.Contains(v.T(), logs, "Getting a 31s profile snapshot from process.")
assertFilesExist(v.T(), flare, profilingNonLinuxFiles)
}
}

func requestAgentFlareAndFetchFromFakeIntake(v *baseFlareSuite, flareArgs ...agentclient.AgentArgsOption) (flare.Flare, string) {
Expand Down
14 changes: 8 additions & 6 deletions test/new-e2e/tests/agent-subcommands/flare/flare_files.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ var nonLocalMetadataFlareFiles = []string{
}

var nonLocalFlareFiles = []string{
"process-agent_tagger-list.json",
"tagger-list.json",
"workload-list.log",
"agent_open_files.txt",
Expand Down Expand Up @@ -94,18 +93,21 @@ var profilingFiles = []string{
"profiles/core-block.pprof",
"profiles/core-cpu.pprof",
"profiles/core-mutex.pprof",
"profiles/process-1st-heap.pprof",
"profiles/process-2nd-heap.pprof",
"profiles/process-block.pprof",
"profiles/process-cpu.pprof",
"profiles/process-mutex.pprof",
"profiles/trace-1st-heap.pprof",
"profiles/trace-2nd-heap.pprof",
"profiles/trace-block.pprof",
"profiles/trace-cpu.pprof",
"profiles/trace-mutex.pprof",
}

var profilingNonLinuxFiles = []string{
"profiles/process-1st-heap.pprof",
"profiles/process-2nd-heap.pprof",
"profiles/process-block.pprof",
"profiles/process-cpu.pprof",
"profiles/process-mutex.pprof",
}

// untestedFiles contains some untested files that needs specific scenario which should be added later.
//
//nolint:unused
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,6 @@ func (v *linuxFlareSuite) TestzzzFlareWithAllConfiguration() {
extraCustomConfigFiles := []string{"etc/confd/dist/test.yaml", "etc/confd/dist/test.yml", "etc/confd/dist/test.yml.test", "etc/confd/checksd/test.yml"}
assertFilesExist(v.T(), flare, extraCustomConfigFiles)

assertFileNotContains(v.T(), flare, "process_check_output.json", "'process_config.process_collection.enabled' is disabled")
assertFileContains(v.T(), flare, "container_check_output.json", "'process_config.container_collection.enabled' is disabled")
assertFileContains(v.T(), flare, "process_discovery_check_output.json", "'process_config.process_discovery.enabled' is disabled")

filesRegistredInPermissionsLog := append(systemProbeDummyFiles, "/etc/datadog-agent/auth_token")
assertFileContains(v.T(), flare, "permissions.log", filesRegistredInPermissionsLog...)
}
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ func fetchAndCheckStatus(v *baseStatusSuite, expectedSections []expectedSection)
}, 2*time.Minute, 20*time.Second)
}

func (v *baseStatusSuite) TestDefaultInstallStatus() {
func (v *baseStatusSuite) testDefaultInstallStatus(processAgentContain, processAgentNotContain []string) {
expectedSections := []expectedSection{
{
name: `Agent \(.*\)`, // TODO: verify that the right version is output
Expand Down Expand Up @@ -176,7 +176,8 @@ func (v *baseStatusSuite) TestDefaultInstallStatus() {
{
name: "Process Agent",
shouldBePresent: true,
shouldNotContain: []string{"Status: Not running or unreachable"},
shouldContain: processAgentContain,
shouldNotContain: processAgentNotContain,
},
{
name: "Remote Configuration",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,7 @@ func (v *linuxStatusSuite) TestChecksMetadataUnix() {

fetchAndCheckStatus(&v.baseStatusSuite, expectedSections)
}

func (v *linuxStatusSuite) TestDefaultInstallStatus() {
v.testDefaultInstallStatus([]string{"Status: Not running or unreachable"}, nil)
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,7 @@ func (v *windowsStatusSuite) TestChecksMetadataWindows() {

fetchAndCheckStatus(&v.baseStatusSuite, expectedSections)
}

func (v *windowsStatusSuite) TestDefaultInstallStatus() {
v.testDefaultInstallStatus(nil, []string{"Status: Not running or unreachable"})
}
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
network_config:
enabled: true
runtime_security_config:
enabled: true
enabled: true
8 changes: 5 additions & 3 deletions test/new-e2e/tests/installer/script/default_script_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@ import (
"os"
"strings"

"github.com/DataDog/datadog-agent/pkg/util/testutil/flake"
awshost "github.com/DataDog/datadog-agent/test/new-e2e/pkg/provisioners/aws/host"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/utils/e2e/client"
e2eos "github.com/DataDog/test-infra-definitions/components/os"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v3"

"github.com/DataDog/datadog-agent/pkg/util/testutil/flake"
awshost "github.com/DataDog/datadog-agent/test/new-e2e/pkg/provisioners/aws/host"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/utils/e2e/client"
)

type installScriptDefaultSuite struct {
Expand All @@ -40,6 +41,7 @@ func (s *installScriptDefaultSuite) TestInstall() {
"DD_SITE=datadoghq.com",
"DD_APM_INSTRUMENTATION_LIBRARIES=java:1,python:2,js:5,dotnet:3",
"DD_APM_INSTRUMENTATION_ENABLED=host",
"DD_NETWORK_CONFIG_ENABLED=true", // necessary to get process-agent running
Copy link
Member

@arbll arbll Feb 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I follow why both this and the removal of units assertions are needed ? My understanding is that enabling this setting should lead to the previous behavior and require no extra changes

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This only applies to this specific test

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR changes it so process checks run in core agent by default on Linux, and thus process-agent will not be running. It will start up but then exit because it isn't needed.

"DD_RUNTIME_SECURITY_CONFIG_ENABLED=true",
"DD_SBOM_CONTAINER_IMAGE_ENABLED=true",
"DD_SBOM_HOST_ENABLED=true",
Expand Down
Loading