Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add demo repositories mechanism to populate user's space #2207

Merged
merged 20 commits into from
Jan 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/_nebari/stages/kubernetes_services/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ class IdleCuller(schema.Base):

class JupyterLab(schema.Base):
idle_culler: IdleCuller = IdleCuller()
initial_repositories: typing.List[typing.Dict[str, str]] = []


class InputSchema(schema.Base):
Expand Down Expand Up @@ -344,6 +345,7 @@ class CondaStoreInputVars(schema.Base):
class JupyterhubInputVars(schema.Base):
jupyterhub_theme: Dict[str, Any] = Field(alias="jupyterhub-theme")
jupyterlab_image: ImageNameTag = Field(alias="jupyterlab-image")
initial_repositories: str = Field(alias="initial-repositories")
jupyterhub_overrides: List[str] = Field(alias="jupyterhub-overrides")
jupyterhub_stared_storage: str = Field(alias="jupyterhub-shared-storage")
jupyterhub_shared_endpoint: str = Field(None, alias="jupyterhub-shared-endpoint")
Expand Down Expand Up @@ -488,6 +490,7 @@ def input_vars(self, stage_outputs: Dict[str, Dict[str, Any]]):
idle_culler_settings=self.config.jupyterlab.idle_culler.dict(),
argo_workflows_enabled=self.config.argo_workflows.enabled,
jhub_apps_enabled=self.config.jhub_apps.enabled,
initial_repositories=str(self.config.jupyterlab.initial_repositories),
)

dask_gateway_vars = DaskGatewayInputVars(
Expand Down
6 changes: 6 additions & 0 deletions src/_nebari/stages/kubernetes_services/template/jupyterhub.tf
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ variable "jupyterlab-profiles" {
description = "JupyterHub profiles to expose to user"
}

variable "initial-repositories" {
description = "Map of folder location and git repo url to clone"
type = string
}

variable "jupyterhub-hub-extraEnv" {
description = "Extracted overrides to merge with jupyterhub.hub.extraEnv"
type = string
Expand Down Expand Up @@ -129,6 +134,7 @@ module "jupyterhub" {
jupyterhub-hub-extraEnv = var.jupyterhub-hub-extraEnv

idle-culler-settings = var.idle-culler-settings
initial-repositories = var.initial-repositories

jupyterlab-pioneer-enabled = var.jupyterlab-pioneer-enabled
jupyterlab-pioneer-log-format = var.jupyterlab-pioneer-log-format
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,14 @@ resource "kubernetes_config_map" "jupyterlab-settings" {
filename => file("${path.module}/files/jupyterlab/${filename}")
}
}

resource "kubernetes_config_map" "git_clone_update" {
metadata {
name = "git-clone-update"
namespace = var.namespace
}

data = {
"git-clone-update.sh" = "${file("${path.module}/files/extras/git_clone_update.sh")}"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/bin/sh

################################################################################
# Git Clone and/or Update Script
#
# This script automates Git repository handling with the following features:
#
# 1. Clones/Updates a Git repository into a specified folder;
# 2. Creates a `.firstrun` file in the folder to mark the script's execution, ensuring it only runs once for each folder.
#
# Usage: ./git_clone_update.sh "<folder1> <git_repo_url1>" [...]
# - <folderX>: Path to the folder where the Git repository will be cloned or updated.
# - <git_repo_urlX>: URL of the Git repository to clone or update.
################################################################################

# Define colors for messages and command output
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[0;33m'
NC='\033[0m'

ERROR_LOG=".git-sync-errors.txt"

echo -e "${GREEN}Starting execution...${NC}"

# Check if at least one pair of folder and git repo URL is provided
if [ "$#" -lt 1 ] || [ "$1" = "--help" ]; then
echo "Usage: $0 \"<folder1> <git_repo_url1>\" \"<folder2> <git_repo_url2>\" ..."

# Exit with status code 0 if '--help' is provided, otherwise exit with status code 1
[ "$1" = "--help" ] && exit 0 || exit 1
fi

clone_update_repository() {
# Clone or update a Git repository into a specified folder,
# and create a `.firstrun` file to mark the script's execution.

local folder_path="$1"
local git_repo_url="$2"

local firstrun_file="$folder_path/.firstrun"

if [ -f "$firstrun_file" ]; then
echo -e "The script has already been run for ${folder_path}. Skipping. ${GREEN}✅${NC}"
else
if [ ! -d "$folder_path" ]; then
mkdir -p "$folder_path"
fi

if [ -d "$folder_path/.git" ]; then
echo -e "Updating Git repository in ${folder_path}..."
(cd "$folder_path" && git pull)
else
echo -e "Cloning Git repository to ${folder_path}..."
(git clone "$git_repo_url" "$folder_path")
fi

touch "$firstrun_file"
echo -e "Execution for ${folder_path} completed. ${GREEN}✅${NC}"
fi
}


# Iterate through pairs and run in parallel
for pair in "$@"; do
# Split the pair into folder_path and git_repo_url using space as the delimiter
folder_path=$(echo "$pair" | cut -d ' ' -f1)
git_repo_url=$(echo "$pair" | cut -d ' ' -f2-)

if [ -z "$folder_path" ] || [ -z "$git_repo_url" ]; then
# Initialize error log
echo -e "${RED}Invalid argument format: \"${pair}\". Please provide folder path and Git repository URL in the correct order.${NC}" >> "$ERROR_LOG"
else
clone_update_repository "$folder_path" "$git_repo_url" || echo -e "${RED}Error executing for ${folder_path}.${NC}" >> "$ERROR_LOG"
chown -R 1000:100 "$folder_path" # User permissions for JupyterLab user
fi
done

wait

if [ -s "$ERROR_LOG" ]; then
echo -e "${RED}Some operations failed. See errors in '${ERROR_LOG}'.${NC}"
chown 1000:100 "$ERROR_LOG"
else
echo -e "${GREEN}All operations completed successfully. ✅${NC}"
fi
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import ast
import copy
import functools
import json
Expand Down Expand Up @@ -232,6 +233,85 @@ def base_profile_extra_mounts():
}


def configure_user_provisioned_repositories(username):
# Define paths and configurations
pvc_home_mount_path = f"home/{username}"

git_repos_provision_pvc = z2jh.get_config("custom.initial-repositories")
git_clone_update_config = {
"name": "git-clone-update",
"configMap": {"name": "git-clone-update", "defaultMode": 511},
}

# Convert the string configuration to a list of dictionaries
def string_to_objects(input_string):
try:
result = ast.literal_eval(input_string)
if isinstance(result, list) and all(
isinstance(item, dict) for item in result
):
return result
else:
raise ValueError(
"Input string does not contain a list of dictionaries."
)
except (ValueError, SyntaxError):
# Return an error message if the input string is not a list of dictionaries
raise ValueError(f"Invalid input string format: {input_string}")

git_repos_provision_pvc = string_to_objects(git_repos_provision_pvc)

if not git_repos_provision_pvc:
return {}

# Define the extra pod configuration for the volumes
extra_pod_config = {
"volumes": [{"name": "git-clone-update", **git_clone_update_config}]
}

extras_git_clone_cp_path = f"/mnt/{pvc_home_mount_path}/.git-clone-update.sh"

BASH_EXECUTION = "./.git-clone-update.sh"

for local_repo_pair in git_repos_provision_pvc:
for path, remote_url in local_repo_pair.items():
BASH_EXECUTION += f" '{path} {remote_url}'"

EXEC_OWNERSHIP_CHANGE = " && ".join(
[
f"cp /mnt/extras/git-clone-update.sh {extras_git_clone_cp_path}",
f"chmod 777 {extras_git_clone_cp_path}",
f"chown -R 1000:100 {extras_git_clone_cp_path}",
f"cd /mnt/{pvc_home_mount_path}",
BASH_EXECUTION,
f"rm -f {extras_git_clone_cp_path}",
]
)

# Define init containers configuration
init_containers = [
{
"name": "pre-populate-git-repos",
"image": "bitnami/git",
"command": ["sh", "-c", EXEC_OWNERSHIP_CHANGE],
"securityContext": {"runAsUser": 0},
"volumeMounts": [
{
"mountPath": f"/mnt/{pvc_home_mount_path}",
"name": "home",
"subPath": pvc_home_mount_path,
},
{"mountPath": "/mnt/extras", "name": "git-clone-update"},
],
}
]

return {
"extra_pod_config": extra_pod_config,
"init_containers": init_containers,
}


def configure_user(username, groups, uid=1000, gid=100):
environment = {
# nss_wrapper
Expand Down Expand Up @@ -416,6 +496,7 @@ def render_profile(profile, username, groups, keycloak_profilenames):
profile_conda_store_mounts(username, groups),
base_profile_extra_mounts(),
configure_user(username, groups),
configure_user_provisioned_repositories(username),
profile_kubespawner_override,
],
{},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ resource "helm_release" "jupyterhub" {
conda-store-service-name = var.conda-store-service-name
conda-store-jhub-apps-token = var.conda-store-jhub-apps-token
jhub-apps-enabled = var.jhub-apps-enabled
initial-repositories = var.initial-repositories
skel-mount = {
name = kubernetes_config_map.etc-skel.metadata.0.name
namespace = kubernetes_config_map.etc-skel.metadata.0.namespace
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,9 @@ variable "jupyterlab-pioneer-log-format" {
description = "Logging format for JupyterLab Pioneer"
type = string
}

variable "initial-repositories" {
description = "Map of folder location and git repo url to clone"
type = string
default = "[]"
}
Loading