Skip to content

Commit

Permalink
runtime-rs: enable using hugepages
Browse files Browse the repository at this point in the history
enable the functionality of using hugepages in container

Fixes: #5560
Signed-off-by: Zhongtao Hu <[email protected]>
  • Loading branch information
Tim-0731-Hzt committed Nov 10, 2022
1 parent 56641bc commit e7fae46
Show file tree
Hide file tree
Showing 14 changed files with 282 additions and 18 deletions.
20 changes: 19 additions & 1 deletion src/runtime-rs/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions src/runtime-rs/config/configuration-dragonball.toml.in
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,14 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_DB@"
# of shim, does not need an external virtiofsd process.
shared_fs = "@DBSHAREDFS@"

# Enable huge pages for VM RAM, default false
# Enabling this will result in the VM memory
# being allocated using huge pages.
# This is useful when you want to use vhost-user network
# stacks within the container. This will automatically
# result in memory pre allocation
#enable_hugepages = true

[agent.@PROJECT_TYPE@]
container_pipe_size=@PIPESIZE@
# If enabled, make the agent display debug-level messages.
Expand Down
7 changes: 7 additions & 0 deletions src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,11 +186,18 @@ impl DragonballInner {

fn set_vm_base_config(&mut self) -> Result<()> {
let serial_path = [&self.run_dir, "console.sock"].join("/");
let (mem_type, mem_file_path) = if self.config.memory_info.enable_hugepages {
(String::from("hugetlbfs"), String::from("/dev/hugepages"))
} else {
(String::from("shmem"), String::from(""))
};
let vm_config = VmConfigInfo {
serial_path: Some(serial_path),
mem_size_mib: self.config.memory_info.default_memory as usize,
vcpu_count: self.config.cpu_info.default_vcpus as u8,
max_vcpu_count: self.config.cpu_info.default_maxvcpus as u8,
mem_type,
mem_file_path,
..Default::default()
};
info!(sl!(), "vm config: {:?}", vm_config);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -327,9 +327,9 @@ impl VmmInstance {
}
}
}
return Err(anyhow::anyhow!(
Err(anyhow::anyhow!(
"After {} attempts, it still doesn't work.",
REQUEST_RETRY
));
))
}
}
4 changes: 2 additions & 2 deletions src/runtime-rs/crates/persist/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ pub fn to_disk<T: serde::Serialize>(value: &T, sid: &str) -> Result<()> {
serde_json::to_writer_pretty(f, &j)?;
return Ok(());
}
return Err(anyhow!("invalid sid {}", sid));
Err(anyhow!("invalid sid {}", sid))
}

pub fn from_disk<T>(sid: &str) -> Result<T>
Expand All @@ -41,7 +41,7 @@ where
let reader = BufReader::new(file);
return serde_json::from_reader(reader).map_err(|e| anyhow!(e.to_string()));
}
return Err(anyhow!("invalid sid {}", sid));
Err(anyhow!("invalid sid {}", sid))
}

#[cfg(test)]
Expand Down
2 changes: 2 additions & 0 deletions src/runtime-rs/crates/resource/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ license = "Apache-2.0"

[dev-dependencies]
test-utils = { path = "../../../libs/test-utils" }
tempfile = "3.2.0"

[dependencies]
anyhow = "^1.0"
async-trait = "0.1.48"
bitflags = "1.2.1"
byte-unit = "4.0.14"
cgroups-rs = "0.2.9"
futures = "0.3.11"
lazy_static = "1.4.0"
Expand Down
4 changes: 2 additions & 2 deletions src/runtime-rs/crates/resource/src/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,10 @@ impl ResourceManager {
pub async fn handler_volumes(
&self,
cid: &str,
oci_mounts: &[oci::Mount],
spec: &oci::Spec,
) -> Result<Vec<Arc<dyn Volume>>> {
let inner = self.inner.read().await;
inner.handler_volumes(cid, oci_mounts).await
inner.handler_volumes(cid, spec).await
}

pub async fn dump(&self) {
Expand Down
4 changes: 2 additions & 2 deletions src/runtime-rs/crates/resource/src/manager_inner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,10 +185,10 @@ impl ResourceManagerInner {
pub async fn handler_volumes(
&self,
cid: &str,
oci_mounts: &[oci::Mount],
spec: &oci::Spec,
) -> Result<Vec<Arc<dyn Volume>>> {
self.volume_resource
.handler_volumes(&self.share_fs, cid, oci_mounts)
.handler_volumes(&self.share_fs, cid, spec)
.await
}

Expand Down
1 change: 1 addition & 0 deletions src/runtime-rs/crates/resource/src/share_fs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use share_virtio_fs_standalone::ShareVirtioFsStandalone;
mod utils;
mod virtio_fs_share_mount;
use virtio_fs_share_mount::VirtiofsShareMount;
pub use virtio_fs_share_mount::EPHEMERAL_PATH;

use std::sync::Arc;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use std::path::Path;

const WATCHABLE_PATH_NAME: &str = "watchable";
const WATCHABLE_BIND_DEV_TYPE: &str = "watchable-bind";
const EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral";
pub const EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral";

use super::{
utils, ShareFsMount, ShareFsMountResult, ShareFsRootfsConfig, ShareFsVolumeConfig,
Expand Down
219 changes: 219 additions & 0 deletions src/runtime-rs/crates/resource/src/volume/hugepage.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//

use std::{
collections::HashMap,
fs::File,
io::{BufRead, BufReader},
};

use agent::Storage;
use anyhow::{anyhow, Context, Result};
use byte_unit::Byte;
use kata_sys_util::fs::get_base_name;
use kata_types::mount::KATA_EPHEMERAL_VOLUME_TYPE;

use crate::share_fs::EPHEMERAL_PATH;

use super::Volume;

const PROC_MOUNTS_FILE: &str = "/proc/mounts";

pub(crate) struct Hugepage {
storage: Option<Storage>,
mount: oci::Mount,
}

/// handle hugepage
impl Hugepage {
pub(crate) fn new(
mount: &oci::Mount,
options_map: Option<HashMap<Byte, u64>>,
fs_options: Option<Vec<String>>,
) -> Result<Self> {
// Create mount option string
let option = if let Some(page_size) = get_page_size(fs_options) {
let page_size =
Byte::from_str(page_size).context("failed to create Byte object from String")?;
options_map
.context("failed to get opetions map")?
.get(&page_size)
.map(|size| format!("pagesize={},size={}", page_size.get_bytes(), size))
} else {
None
}
.context("failed to get huge page options")?;
let base_name = get_base_name(mount.source.clone())?
.into_string()
.map_err(|e| anyhow!("{:?}", e))?;
let mut mount = mount.clone();
// Set the mount source path to a path that resides inside the VM
mount.source = format!("{}{}{}", EPHEMERAL_PATH, "/", base_name);
// Set the mount type to "bind"
mount.r#type = "bind".to_string();

// Create a storage struct so that kata agent is able to create
// hugetlbfs backed volume inside the VM
let storage = Storage {
driver: KATA_EPHEMERAL_VOLUME_TYPE.to_string(),
source: "nodev".to_string(),
fs_type: "hugetlbfs".to_string(),
mount_point: mount.source.clone(),
options: vec![option],
..Default::default()
};
Ok(Self {
storage: Some(storage),
mount,
})
}
}

impl Volume for Hugepage {
fn get_volume_mount(&self) -> Result<Vec<oci::Mount>> {
Ok(vec![self.mount.clone()])
}

fn get_storage(&self) -> Result<Vec<agent::Storage>> {
let s = if let Some(s) = self.storage.as_ref() {
vec![s.clone()]
} else {
vec![]
};
Ok(s)
}

fn cleanup(&self) -> Result<()> {
todo!()
}
}

pub(crate) fn is_huge_page(m: &oci::Mount) -> Result<(bool, Option<Vec<String>>)> {
if m.source.is_empty() {
return Err(anyhow!("empty mount source"));
}
let file = File::open(PROC_MOUNTS_FILE).context("failed open file")?;
let reader = BufReader::new(file);
for line in reader.lines().flatten() {
let items: Vec<&str> = line.split(' ').collect();
if m.source == items[1] && items[2] == "hugetlbfs" {
let fs_options: Vec<&str> = items[3].split(',').collect();
return Ok((
true,
Some(
fs_options
.iter()
.map(|&s| s.to_string())
.collect::<Vec<String>>(),
),
));
}
}
Ok((false, None))
}

pub(crate) fn get_huge_page_option(spec: &oci::Spec) -> Result<Option<HashMap<Byte, u64>>> {
if let Some(l) = &spec.linux {
if let Some(r) = &l.resources {
let hugepage_limits = r.hugepage_limits.clone();
let mut options_map: HashMap<Byte, u64> = HashMap::new();
for hugepage_limit in hugepage_limits {
// the pagesize send from oci spec is MB or GB, change it to Mi and Gi
let page_size = hugepage_limit.page_size.replace("B", "i");
let page_size = Byte::from_str(page_size)
.context("failed to create Byte object from String")?;
options_map.insert(page_size, hugepage_limit.limit);
}
return Ok(Some(options_map));
}
return Ok(None);
}
Ok(None)
}

fn get_page_size(fs_options: Option<Vec<String>>) -> Option<String> {
if let Some(fs_options) = fs_options {
for fs_option in fs_options {
if fs_option.starts_with("pagesize=") {
return fs_option
.strip_prefix("pagesize=")
.map(|s| format!("{}i", s));
}
}
}
None
}

#[cfg(test)]
mod tests {

use std::{collections::HashMap, fs};

use crate::volume::hugepage::get_page_size;

use super::{get_huge_page_option, is_huge_page};
use byte_unit::Byte;
use nix::mount::{mount, umount, MsFlags};
use oci::{Linux, LinuxHugepageLimit, LinuxResources};

#[test]
fn test_get_huge_page_option() {
let format_sizes = ["1GB", "2MB"];
let mut huge_page_limits: Vec<LinuxHugepageLimit> = vec![];
for format_size in format_sizes {
huge_page_limits.push(LinuxHugepageLimit {
page_size: format_size.to_string(),
limit: 100000,
});
}

let spec = oci::Spec {
linux: Some(Linux {
resources: Some(LinuxResources {
hugepage_limits: huge_page_limits,
..Default::default()
}),
..Default::default()
}),
..Default::default()
};

assert!(get_huge_page_option(&spec).unwrap().is_some());

let mut expect_res = HashMap::new();
expect_res.insert(Byte::from_str("1Gi").ok().unwrap(), 100000);
expect_res.insert(Byte::from_str("2Mi").ok().unwrap(), 100000);
assert_eq!(get_huge_page_option(&spec).unwrap().unwrap(), expect_res);
}

#[test]
fn test_get_huge_page_size() {
let format_sizes = ["1Gi", "2Mi"];
for format_size in format_sizes {
let dir = tempfile::tempdir().unwrap();
let dst = dir.path().join(format!("hugepages-{}", format_size));
fs::create_dir_all(&dst).unwrap();
mount(
Some("nodev"),
&dst,
Some("hugetlbfs"),
MsFlags::MS_NODEV,
Some(format!("pagesize={}", format_size).as_str()),
)
.unwrap();
let mount = oci::Mount {
source: dst.to_str().unwrap().to_string(),
..Default::default()
};
let (res, option) = is_huge_page(&mount).unwrap();
assert!(res);
let page_size = Byte::from_str(get_page_size(option).unwrap()).unwrap();
assert_eq!(page_size, Byte::from_str(format_size).unwrap());
umount(&dst).unwrap();
fs::remove_dir(&dst).unwrap();
}
}
}
Loading

0 comments on commit e7fae46

Please sign in to comment.