diff --git a/tools/diagnose-fluid-alluxio.sh b/tools/diagnose-fluid-alluxio.sh index 4fe88d013a7..63161644799 100644 --- a/tools/diagnose-fluid-alluxio.sh +++ b/tools/diagnose-fluid-alluxio.sh @@ -21,7 +21,16 @@ print_usage() { run() { echo echo "-----------------run $*------------------" - timeout 10s "$@" + if command -v timeout >/dev/null 2>&1; then + timeout 10s "$@" + elif command -v gtimeout >/dev/null 2>&1; then + gtimeout 10s "$@" + elif command -v perl >/dev/null 2>&1; then + # Use Perl to enforce timeout on systems without GNU coreutils (like standard macOS) + perl -e 'alarm shift; exec @ARGV' 10 "$@" + else + "$@" + fi if [ $? != 0 ]; then echo "failed to collect info: $*" fi @@ -57,6 +66,45 @@ runtime_pod_logs() { core_component "${runtime_namespace}" "alluxio-fuse" "role=alluxio-fuse" "release=${runtime_name}" } +serverless_pod_logs() { + # Check if kubectl is available + if ! command -v kubectl >/dev/null 2>&1; then + echo "kubectl not found, skipping serverless pod logs collection" + return + fi + + local namespace="${runtime_namespace}" + local dataset_id="${runtime_namespace}-${runtime_name}" + # fluid.io/dataset falls back to the Dataset UID when namespace-name >= 63 chars (DNS1035 limit) + if [[ ${#dataset_id} -ge 63 ]]; then + dataset_id=$(kubectl get dataset "${runtime_name}" -n "${namespace}" -o jsonpath='{.metadata.uid}' 2>/dev/null) || dataset_id="${runtime_namespace}-${runtime_name}" + fi + local label_selector="serverless.fluid.io/inject=true,fluid.io/dataset=${dataset_id}" + + # Get all pods with the serverless inject label + local pods + pods=$(kubectl get po -n "${namespace}" -l "${label_selector}" -o jsonpath='{.items[*].metadata.name}' 2>/dev/null) + if [[ $? -ne 0 ]]; then + echo "failed to get serverless pods in namespace ${namespace}" + return + fi + + if [[ -n "$pods" ]]; then + mkdir -p "$diagnose_dir/pods-${namespace}-serverless" + for po in ${pods}; do + # Find all containers containing fluid-fuse (covers init-fluid-fuse and fluid-fuse) + local containers + containers=$(kubectl get po "${po}" -n "${namespace}" -o jsonpath='{.spec.initContainers[*].name} {.spec.containers[*].name}' 2>/dev/null | tr ' ' '\n' | grep -E '^(init-)?fluid-fuse$') + if [[ -n "$containers" ]]; then + for container in ${containers}; do + kubectl logs "${po}" -c "${container}" -n "${namespace}" &>"$diagnose_dir/pods-${namespace}-serverless/${po}-${container}.log" 2>&1 + done + fi + done + fi + return +} + core_component() { # namespace container selectors... local namespace="$1" @@ -105,6 +153,7 @@ pd_collect() { pod_status "${fluid_namespace}" pod_status "${runtime_namespace}" runtime_pod_logs + serverless_pod_logs fluid_pod_logs kubectl_resource archive diff --git a/tools/diagnose-fluid-curvine.sh b/tools/diagnose-fluid-curvine.sh index c7801a817ba..0d5cf33fa00 100644 --- a/tools/diagnose-fluid-curvine.sh +++ b/tools/diagnose-fluid-curvine.sh @@ -21,7 +21,16 @@ print_usage() { run() { echo echo "-----------------run $*------------------" - timeout 10s "$@" + if command -v timeout >/dev/null 2>&1; then + timeout 10s "$@" + elif command -v gtimeout >/dev/null 2>&1; then + gtimeout 10s "$@" + elif command -v perl >/dev/null 2>&1; then + # Use Perl to enforce timeout on systems without GNU coreutils (like standard macOS) + perl -e 'alarm shift; exec @ARGV' 10 "$@" + else + "$@" + fi if [ $? != 0 ]; then echo "failed to collect info: $*" fi @@ -49,6 +58,45 @@ runtime_pod_logs() { core_component "${runtime_namespace}" "client" "cacheruntime.fluid.io/component-name=${runtime_name}-client" } +serverless_pod_logs() { + # Check if kubectl is available + if ! command -v kubectl >/dev/null 2>&1; then + echo "kubectl not found, skipping serverless pod logs collection" + return + fi + + local namespace="${runtime_namespace}" + local dataset_id="${runtime_namespace}-${runtime_name}" + # fluid.io/dataset falls back to the Dataset UID when namespace-name >= 63 chars (DNS1035 limit) + if [[ ${#dataset_id} -ge 63 ]]; then + dataset_id=$(kubectl get dataset "${runtime_name}" -n "${namespace}" -o jsonpath='{.metadata.uid}' 2>/dev/null) || dataset_id="${runtime_namespace}-${runtime_name}" + fi + local label_selector="serverless.fluid.io/inject=true,fluid.io/dataset=${dataset_id}" + + # Get all pods with the serverless inject label + local pods + pods=$(kubectl get po -n "${namespace}" -l "${label_selector}" -o jsonpath='{.items[*].metadata.name}' 2>/dev/null) + if [[ $? -ne 0 ]]; then + echo "failed to get serverless pods in namespace ${namespace}" + return + fi + + if [[ -n "$pods" ]]; then + mkdir -p "$diagnose_dir/pods-${namespace}-serverless" + for po in ${pods}; do + # Find all containers containing fluid-fuse (covers init-fluid-fuse and fluid-fuse) + local containers + containers=$(kubectl get po "${po}" -n "${namespace}" -o jsonpath='{.spec.initContainers[*].name} {.spec.containers[*].name}' 2>/dev/null | tr ' ' '\n' | grep -E '^(init-)?fluid-fuse$') + if [[ -n "$containers" ]]; then + for container in ${containers}; do + kubectl logs "${po}" -c "${container}" -n "${namespace}" &>"$diagnose_dir/pods-${namespace}-serverless/${po}-${container}.log" 2>&1 + done + fi + done + fi + return +} + core_component() { # namespace container selectors... local namespace="$1" @@ -92,6 +140,7 @@ pd_collect() { pod_status "${fluid_namespace}" pod_status "${runtime_namespace}" runtime_pod_logs + serverless_pod_logs fluid_pod_logs kubectl_resource archive diff --git a/tools/diagnose-fluid-jindo.sh b/tools/diagnose-fluid-jindo.sh index 30e3096273c..171099eb01b 100644 --- a/tools/diagnose-fluid-jindo.sh +++ b/tools/diagnose-fluid-jindo.sh @@ -21,7 +21,16 @@ print_usage() { run() { echo echo "-----------------run $*------------------" - timeout 10s "$@" + if command -v timeout >/dev/null 2>&1; then + timeout 10s "$@" + elif command -v gtimeout >/dev/null 2>&1; then + gtimeout 10s "$@" + elif command -v perl >/dev/null 2>&1; then + # Use Perl to enforce timeout on systems without GNU coreutils (like standard macOS) + perl -e 'alarm shift; exec @ARGV' 10 "$@" + else + "$@" + fi if [ $? != 0 ]; then echo "failed to collect info: $*" fi @@ -55,6 +64,45 @@ runtime_pod_logs() { core_component "${runtime_namespace}" "jindofs-fuse" "role=jindofs-fuse" "release=${runtime_name}" } +serverless_pod_logs() { + # Check if kubectl is available + if ! command -v kubectl >/dev/null 2>&1; then + echo "kubectl not found, skipping serverless pod logs collection" + return + fi + + local namespace="${runtime_namespace}" + local dataset_id="${runtime_namespace}-${runtime_name}" + # fluid.io/dataset falls back to the Dataset UID when namespace-name >= 63 chars (DNS1035 limit) + if [[ ${#dataset_id} -ge 63 ]]; then + dataset_id=$(kubectl get dataset "${runtime_name}" -n "${namespace}" -o jsonpath='{.metadata.uid}' 2>/dev/null) || dataset_id="${runtime_namespace}-${runtime_name}" + fi + local label_selector="serverless.fluid.io/inject=true,fluid.io/dataset=${dataset_id}" + + # Get all pods with the serverless inject label + local pods + pods=$(kubectl get po -n "${namespace}" -l "${label_selector}" -o jsonpath='{.items[*].metadata.name}' 2>/dev/null) + if [[ $? -ne 0 ]]; then + echo "failed to get serverless pods in namespace ${namespace}" + return + fi + + if [[ -n "$pods" ]]; then + mkdir -p "$diagnose_dir/pods-${namespace}-serverless" + for po in ${pods}; do + # Find all containers containing fluid-fuse (covers init-fluid-fuse and fluid-fuse) + local containers + containers=$(kubectl get po "${po}" -n "${namespace}" -o jsonpath='{.spec.initContainers[*].name} {.spec.containers[*].name}' 2>/dev/null | tr ' ' '\n' | grep -E '^(init-)?fluid-fuse$') + if [[ -n "$containers" ]]; then + for container in ${containers}; do + kubectl logs "${po}" -c "${container}" -n "${namespace}" &>"$diagnose_dir/pods-${namespace}-serverless/${po}-${container}.log" 2>&1 + done + fi + done + fi + return +} + core_component() { # namespace container selectors... local namespace="$1" @@ -99,6 +147,7 @@ pd_collect() { pod_status "${fluid_namespace}" pod_status "${runtime_namespace}" runtime_pod_logs + serverless_pod_logs fluid_pod_logs kubectl_resource archive diff --git a/tools/diagnose-fluid-juicefs.sh b/tools/diagnose-fluid-juicefs.sh index 30d239713ae..b3f01b76891 100644 --- a/tools/diagnose-fluid-juicefs.sh +++ b/tools/diagnose-fluid-juicefs.sh @@ -21,7 +21,16 @@ print_usage() { run() { echo echo "-----------------run $*------------------" - timeout 10s "$@" + if command -v timeout >/dev/null 2>&1; then + timeout 10s "$@" + elif command -v gtimeout >/dev/null 2>&1; then + gtimeout 10s "$@" + elif command -v perl >/dev/null 2>&1; then + # Use Perl to enforce timeout on systems without GNU coreutils (like standard macOS) + perl -e 'alarm shift; exec @ARGV' 10 "$@" + else + "$@" + fi if [ $? != 0 ]; then echo "failed to collect info: $*" fi @@ -54,6 +63,45 @@ runtime_pod_logs() { core_component "${runtime_namespace}" "juicefs-fuse" "role=juicefs-fuse" "release=${runtime_name}" } +serverless_pod_logs() { + # Check if kubectl is available + if ! command -v kubectl >/dev/null 2>&1; then + echo "kubectl not found, skipping serverless pod logs collection" + return + fi + + local namespace="${runtime_namespace}" + local dataset_id="${runtime_namespace}-${runtime_name}" + # fluid.io/dataset falls back to the Dataset UID when namespace-name >= 63 chars (DNS1035 limit) + if [[ ${#dataset_id} -ge 63 ]]; then + dataset_id=$(kubectl get dataset "${runtime_name}" -n "${namespace}" -o jsonpath='{.metadata.uid}' 2>/dev/null) || dataset_id="${runtime_namespace}-${runtime_name}" + fi + local label_selector="serverless.fluid.io/inject=true,fluid.io/dataset=${dataset_id}" + + # Get all pods with the serverless inject label + local pods + pods=$(kubectl get po -n "${namespace}" -l "${label_selector}" -o jsonpath='{.items[*].metadata.name}' 2>/dev/null) + if [[ $? -ne 0 ]]; then + echo "failed to get serverless pods in namespace ${namespace}" + return + fi + + if [[ -n "$pods" ]]; then + mkdir -p "$diagnose_dir/pods-${namespace}-serverless" + for po in ${pods}; do + # Find all containers containing fluid-fuse (covers init-fluid-fuse and fluid-fuse) + local containers + containers=$(kubectl get po "${po}" -n "${namespace}" -o jsonpath='{.spec.initContainers[*].name} {.spec.containers[*].name}' 2>/dev/null | tr ' ' '\n' | grep -E '^(init-)?fluid-fuse$') + if [[ -n "$containers" ]]; then + for container in ${containers}; do + kubectl logs "${po}" -c "${container}" -n "${namespace}" &>"$diagnose_dir/pods-${namespace}-serverless/${po}-${container}.log" 2>&1 + done + fi + done + fi + return +} + core_component() { # namespace container selectors... local namespace="$1" @@ -98,6 +146,7 @@ pd_collect() { pod_status "${fluid_namespace}" pod_status "${runtime_namespace}" runtime_pod_logs + serverless_pod_logs fluid_pod_logs kubectl_resource archive