From ac585a670af9be45bbb8111968bd2b9497422c73 Mon Sep 17 00:00:00 2001 From: "e.zhirov" Date: Tue, 20 Dec 2022 15:29:18 +0100 Subject: [PATCH 1/3] createdump: only dump committed memory Dumping memory regions as they are listed in /proc/pid/maps results in increase of RAM usage of the target application on some Linux kernels. This change uses /proc/pid/pagemap to check if the page is committed before adding it to the regions list. As the file is not available on kernels 4.0 and 4.1 without elevated permissions there's a fallback to previous behavior. --- src/coreclr/debug/createdump/crashinfo.cpp | 113 ++++++++++-------- src/coreclr/debug/createdump/crashinfo.h | 6 +- .../debug/createdump/crashinfounix.cpp | 28 +++-- 3 files changed, 90 insertions(+), 57 deletions(-) diff --git a/src/coreclr/debug/createdump/crashinfo.cpp b/src/coreclr/debug/createdump/crashinfo.cpp index 529ed2f7f35777..139fe1a4706c90 100644 --- a/src/coreclr/debug/createdump/crashinfo.cpp +++ b/src/coreclr/debug/createdump/crashinfo.cpp @@ -30,7 +30,7 @@ CrashInfo::CrashInfo(pid_t pid, bool gatherFrames, pid_t crashThread, uint32_t s m_task = 0; #else m_auxvValues.fill(0); - m_fd = -1; + m_fdMem = -1; #endif } @@ -683,72 +683,89 @@ CrashInfo::InsertMemoryRegion(uint64_t address, size_t size) int CrashInfo::InsertMemoryRegion(const MemoryRegion& region) { - // First check if the full memory region can be added without conflicts and is fully valid. - const auto& found = m_memoryRegions.find(region); - if (found == m_memoryRegions.end()) + // Check if the new region overlaps with the previously added ones + const auto& conflictingRegion = m_memoryRegions.find(region); + const bool hasConflict = conflictingRegion != m_memoryRegions.end(); + if (hasConflict && conflictingRegion->Contains(region)) { - // If the region is valid, add the full memory region - if (ValidRegion(region)) - { - m_memoryRegions.insert(region); - return region.SizeInPages(); - } - } - else - { - // If the memory region is wholly contained in region found - if (found->Contains(region)) - { - return 0; - } + // The region is contained in the one we added before + // Nothing to do + return 0; } - // Either part of the region was invalid, part of it hasn't been added or the backed - // by memory state is different. - uint64_t start = region.StartAddress(); - // The region overlaps/conflicts with one already in the set so add one page at a - // time to avoid the overlapping pages. - uint64_t numberPages = region.SizeInPages(); + // Go page by page and split the region into valid sub-regions + uint64_t pageStart = region.StartAddress(); + uint64_t numberPages = region.Size() / PAGE_SIZE; + uint64_t subRegionStart, subRegionEnd; int pagesAdded = 0; - - for (size_t p = 0; p < numberPages; p++, start += PAGE_SIZE) + subRegionStart = subRegionEnd = pageStart; + for (size_t p = 0; p < numberPages; p++, pageStart += PAGE_SIZE) { - MemoryRegion memoryRegionPage(region.Flags(), start, start + PAGE_SIZE); + MemoryRegion page(region.Flags(), pageStart, pageStart + PAGE_SIZE); + + // avoid searching for conflicts if we know we don't have one + const bool pageHasConflicts = hasConflict && m_memoryRegions.find(page) != m_memoryRegions.end(); + // avoid validating the page if it conflicts: we won't add it in any case + const bool pageIsValid = !pageHasConflicts && PageMappedToPhysicalMemory(pageStart) && PageCanBeRead(pageStart); - const auto& found = m_memoryRegions.find(memoryRegionPage); - if (found == m_memoryRegions.end()) + if (pageIsValid) { - // All the single pages added here will be combined in CombineMemoryRegions() - if (ValidRegion(memoryRegionPage)) + subRegionEnd = page.EndAddress(); + pagesAdded++; + } + else + { + // the next page is not valid thus sub-region is complete + if (subRegionStart != subRegionEnd) { - m_memoryRegions.insert(memoryRegionPage); - pagesAdded++; + m_memoryRegions.insert(MemoryRegion(region.Flags(), subRegionStart, subRegionEnd)); } + subRegionStart = subRegionEnd = page.EndAddress(); } } + // add the last sub-region if it's not empty + if (subRegionStart != subRegionEnd) + { + m_memoryRegions.insert(MemoryRegion(region.Flags(), subRegionStart, subRegionEnd)); + } return pagesAdded; } -// -// Validates a memory region -// bool -CrashInfo::ValidRegion(const MemoryRegion& region) -{ - uint64_t start = region.StartAddress(); - uint64_t numberPages = region.SizeInPages(); - for (size_t p = 0; p < numberPages; p++, start += PAGE_SIZE) +CrashInfo::PageMappedToPhysicalMemory(uint64_t start) { + // https://www.kernel.org/doc/Documentation/vm/pagemap.txt + if (m_fdPagemap == -1) { - BYTE buffer[1]; - size_t read; + // Weren't able to open pagemap file, so don't run this check + // Permission issues are expected on Linux kernels 4.0 and 4.1 + return true; + } - if (!ReadProcessMemory((void*)start, buffer, 1, &read)) - { - return false; - } + uint64_t pagemapOffset = (start / PAGE_SIZE) * sizeof(uint64_t); + lseek64(m_fdPagemap, (off64_t) pagemapOffset, SEEK_SET); + uint64_t value; + size_t res = read(m_fdPagemap, (void*)&value, sizeof(value)); + if (res == (size_t) -1) + { + int readErrno = errno; + TRACE("Reading of pagemap file FAILED, addr: %" PRIA PRIx ", pagemap offset: %" PRIA PRIx ", size: %zu, ERRNO %d: %s\n", start, pagemapOffset, sizeof(value), readErrno, strerror(readErrno)); + // still try to put this page in the dump file + return true; } - return true; + + bool is_page_present = (value & ((uint64_t)1 << 63)) != 0; + bool is_page_swapped = (value & ((uint64_t)1 << 62)) != 0; + TRACE_VERBOSE("Pagemap value for %" PRIA PRIx ", pagemap offset %" PRIA PRIx " is %" PRIA PRIx " -> %s\n", start, pagemapOffset, value, is_page_present ? "in memory" : (is_page_swapped ? "in swap" : "NOT in memory")); + return is_page_present || is_page_swapped; +} + +bool +CrashInfo::PageCanBeRead(uint64_t start) +{ + BYTE buffer[1]; + size_t read; + return ReadProcessMemory((void*)start, buffer, 1, &read); } // diff --git a/src/coreclr/debug/createdump/crashinfo.h b/src/coreclr/debug/createdump/crashinfo.h index 7e46c65552ae6f..59c18581767b7a 100644 --- a/src/coreclr/debug/createdump/crashinfo.h +++ b/src/coreclr/debug/createdump/crashinfo.h @@ -58,7 +58,8 @@ class CrashInfo : public ICLRDataEnumMemoryRegionsCallback, public ICLRDataLoggi vm_map_t m_task; // the mach task for the process #else bool m_canUseProcVmReadSyscall; - int m_fd; // /proc//mem handle + int m_fdMem; // /proc//mem handle + int m_fdPagemap; // /proc//pagemap handle #endif std::string m_coreclrPath; // the path of the coreclr module or empty if none uint64_t m_runtimeBaseAddress; @@ -158,7 +159,8 @@ class CrashInfo : public ICLRDataEnumMemoryRegionsCallback, public ICLRDataLoggi void AddOrReplaceModuleMapping(CLRDATA_ADDRESS baseAddress, ULONG64 size, const std::string& pszName); int InsertMemoryRegion(const MemoryRegion& region); uint32_t GetMemoryRegionFlags(uint64_t start); - bool ValidRegion(const MemoryRegion& region); + bool PageCanBeRead(uint64_t start); + bool PageMappedToPhysicalMemory(uint64_t start); void Trace(const char* format, ...); void TraceVerbose(const char* format, ...); }; diff --git a/src/coreclr/debug/createdump/crashinfounix.cpp b/src/coreclr/debug/createdump/crashinfounix.cpp index 77a5439629206d..ab0b8c6e3144a9 100644 --- a/src/coreclr/debug/createdump/crashinfounix.cpp +++ b/src/coreclr/debug/createdump/crashinfounix.cpp @@ -19,8 +19,8 @@ CrashInfo::Initialize() char memPath[128]; _snprintf_s(memPath, sizeof(memPath), sizeof(memPath), "/proc/%lu/mem", m_pid); - m_fd = open(memPath, O_RDONLY); - if (m_fd == -1) + m_fdMem = open(memPath, O_RDONLY); + if (m_fdMem == -1) { int err = errno; const char* message = "Problem accessing memory"; @@ -35,6 +35,15 @@ CrashInfo::Initialize() printf_error("%s: open(%s) FAILED %s (%d)\n", message, memPath, strerror(err), err); return false; } + + char pagemapPath[128]; + _snprintf_s(pagemapPath, sizeof(pagemapPath), sizeof(pagemapPath), "/proc/%lu/pagemap", m_pid); + m_fdPagemap = open(pagemapPath, O_RDONLY); + if (m_fdPagemap == -1) + { + TRACE("open(%s) FAILED %d (%s), will fallback to dumping all memory regions without checking if they are committed\n", pagemapPath, errno, strerror(errno)); + } + // Get the process info if (!GetStatus(m_pid, &m_ppid, &m_tgid, &m_name)) { @@ -57,10 +66,15 @@ CrashInfo::CleanupAndResumeProcess() waitpid(thread->Tid(), &waitStatus, __WALL); } } - if (m_fd != -1) + if (m_fdMem != -1) + { + close(m_fdMem); + m_fdMem = -1; + } + if (m_fdPagemap != -1) { - close(m_fd); - m_fd = -1; + close(m_fdPagemap); + m_fdPagemap = -1; } } @@ -443,8 +457,8 @@ CrashInfo::ReadProcessMemory(void* address, void* buffer, size_t size, size_t* r // After all, the use of process_vm_readv is largely as a // performance optimization. m_canUseProcVmReadSyscall = false; - assert(m_fd != -1); - *read = pread64(m_fd, buffer, size, (off64_t)address); + assert(m_fdMem != -1); + *read = pread64(m_fdMem, buffer, size, (off64_t)address); } if (*read == (size_t)-1) From 7f46e2f8a96ba0f89514c809f467c66cb24e43c6 Mon Sep 17 00:00:00 2001 From: "e.zhirov" Date: Tue, 20 Dec 2022 17:09:46 +0100 Subject: [PATCH 2/3] do not try to read pagemap on macos --- src/coreclr/debug/createdump/crashinfo.cpp | 50 ++++++++++++---------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/src/coreclr/debug/createdump/crashinfo.cpp b/src/coreclr/debug/createdump/crashinfo.cpp index 139fe1a4706c90..9889da9f3962a9 100644 --- a/src/coreclr/debug/createdump/crashinfo.cpp +++ b/src/coreclr/debug/createdump/crashinfo.cpp @@ -733,31 +733,37 @@ CrashInfo::InsertMemoryRegion(const MemoryRegion& region) } bool -CrashInfo::PageMappedToPhysicalMemory(uint64_t start) { - // https://www.kernel.org/doc/Documentation/vm/pagemap.txt - if (m_fdPagemap == -1) - { - // Weren't able to open pagemap file, so don't run this check - // Permission issues are expected on Linux kernels 4.0 and 4.1 +CrashInfo::PageMappedToPhysicalMemory(uint64_t start) +{ + #ifdef __APPLE__ + // this check has not been implemented yet for macos return true; - } + #else + // https://www.kernel.org/doc/Documentation/vm/pagemap.txt + if (m_fdPagemap == -1) + { + // Weren't able to open pagemap file, so don't run this check + // Permission issues are expected on Linux kernels 4.0 and 4.1 + return true; + } - uint64_t pagemapOffset = (start / PAGE_SIZE) * sizeof(uint64_t); - lseek64(m_fdPagemap, (off64_t) pagemapOffset, SEEK_SET); - uint64_t value; - size_t res = read(m_fdPagemap, (void*)&value, sizeof(value)); - if (res == (size_t) -1) - { - int readErrno = errno; - TRACE("Reading of pagemap file FAILED, addr: %" PRIA PRIx ", pagemap offset: %" PRIA PRIx ", size: %zu, ERRNO %d: %s\n", start, pagemapOffset, sizeof(value), readErrno, strerror(readErrno)); - // still try to put this page in the dump file - return true; - } + uint64_t pagemapOffset = (start / PAGE_SIZE) * sizeof(uint64_t); + lseek64(m_fdPagemap, (off64_t) pagemapOffset, SEEK_SET); + uint64_t value; + size_t res = read(m_fdPagemap, (void*)&value, sizeof(value)); + if (res == (size_t) -1) + { + int readErrno = errno; + TRACE("Reading of pagemap file FAILED, addr: %" PRIA PRIx ", pagemap offset: %" PRIA PRIx ", size: %zu, ERRNO %d: %s\n", start, pagemapOffset, sizeof(value), readErrno, strerror(readErrno)); + // still try to put this page in the dump file + return true; + } - bool is_page_present = (value & ((uint64_t)1 << 63)) != 0; - bool is_page_swapped = (value & ((uint64_t)1 << 62)) != 0; - TRACE_VERBOSE("Pagemap value for %" PRIA PRIx ", pagemap offset %" PRIA PRIx " is %" PRIA PRIx " -> %s\n", start, pagemapOffset, value, is_page_present ? "in memory" : (is_page_swapped ? "in swap" : "NOT in memory")); - return is_page_present || is_page_swapped; + bool is_page_present = (value & ((uint64_t)1 << 63)) != 0; + bool is_page_swapped = (value & ((uint64_t)1 << 62)) != 0; + TRACE_VERBOSE("Pagemap value for %" PRIA PRIx ", pagemap offset %" PRIA PRIx " is %" PRIA PRIx " -> %s\n", start, pagemapOffset, value, is_page_present ? "in memory" : (is_page_swapped ? "in swap" : "NOT in memory")); + return is_page_present || is_page_swapped; + #endif } bool From 07d8aff0f499eac3c608012e9b73b0c77fee24fb Mon Sep 17 00:00:00 2001 From: "e.zhirov" Date: Wed, 21 Dec 2022 12:12:06 +0100 Subject: [PATCH 3/3] Address review comments --- src/coreclr/debug/createdump/crashinfo.cpp | 26 ++++++++++++++++------ 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/coreclr/debug/createdump/crashinfo.cpp b/src/coreclr/debug/createdump/crashinfo.cpp index 9889da9f3962a9..c6def66aa4a012 100644 --- a/src/coreclr/debug/createdump/crashinfo.cpp +++ b/src/coreclr/debug/createdump/crashinfo.cpp @@ -732,30 +732,42 @@ CrashInfo::InsertMemoryRegion(const MemoryRegion& region) return pagesAdded; } +// +// Check the page is really used by the application before adding it to the dump +// On some kernels reading a region from createdump results in committing this region in the parent application +// That leads to OOM in container environment and unnecesserally increses the size of the dump file +// However this is an optimization: if it fails we still try to add the page to the dump +// bool CrashInfo::PageMappedToPhysicalMemory(uint64_t start) { - #ifdef __APPLE__ - // this check has not been implemented yet for macos + #if !defined(__linux__) + // this check has not been implemented yet for other unix systems return true; #else // https://www.kernel.org/doc/Documentation/vm/pagemap.txt if (m_fdPagemap == -1) { // Weren't able to open pagemap file, so don't run this check - // Permission issues are expected on Linux kernels 4.0 and 4.1 + // Expected on kernels 4.0 and 4.1 as we need CAP_SYS_ADMIN to open /proc/pid/pagemap + // On kernels after 4.2 we only need PTRACE_MODE_READ_FSCREDS as we are ok with zeroed PFNs return true; } uint64_t pagemapOffset = (start / PAGE_SIZE) * sizeof(uint64_t); - lseek64(m_fdPagemap, (off64_t) pagemapOffset, SEEK_SET); + uint64_t seekResult = lseek64(m_fdPagemap, (off64_t) pagemapOffset, SEEK_SET); + if (seekResult != pagemapOffset) + { + int seekErrno = errno; + TRACE("Seeking in pagemap file FAILED, addr: %" PRIA PRIx ", pagemap offset: %" PRIA PRIx ", ERRNO %d: %s\n", start, pagemapOffset, seekErrno, strerror(seekErrno)); + return true; + } uint64_t value; - size_t res = read(m_fdPagemap, (void*)&value, sizeof(value)); - if (res == (size_t) -1) + size_t readResult = read(m_fdPagemap, (void*)&value, sizeof(value)); + if (readResult == (size_t) -1) { int readErrno = errno; TRACE("Reading of pagemap file FAILED, addr: %" PRIA PRIx ", pagemap offset: %" PRIA PRIx ", size: %zu, ERRNO %d: %s\n", start, pagemapOffset, sizeof(value), readErrno, strerror(readErrno)); - // still try to put this page in the dump file return true; }