Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions .github/scripts/test_error_guidance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import io
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot where's this test used in github actions?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It wasn’t wired into GitHub Actions before. I’ve added it to .github/workflows/test-mlc-core-actions.yaml, so it now runs as Test 3c - error guidance handling in CI. Commit: bfd555a. No UI change.

import logging
import os
import sys
import unittest

sys.path.insert(0, os.path.abspath(os.path.join(
os.path.dirname(__file__), "..", "..")))

from mlc.error_codes import get_error_guidance
from mlc.main import _report_error, logger
from mlc.script_action import ScriptExecutionError


class ErrorGuidanceTest(unittest.TestCase):

def test_detects_disk_space_errors_from_message(self):
guidance = get_error_guidance(
1, "Command execution failed with error code 28. No space left on device.")

self.assertIsNotNone(guidance)
self.assertEqual(guidance["error_code"], 28)
self.assertIn("disk space", guidance["error_message"].lower())
self.assertTrue(any("Free disk space" in s for s in guidance["suggestions"]))

def test_detects_segmentation_fault_errors(self):
guidance = get_error_guidance(139, "Segmentation fault (core dumped)")

self.assertIsNotNone(guidance)
self.assertEqual(guidance["error_code"], 139)
self.assertIn("segmentation fault", guidance["error_message"].lower())

def test_report_error_logs_actionable_guidance(self):
stream = io.StringIO()
handler = logging.StreamHandler(stream)
handler.setLevel(logging.ERROR)
logger.addHandler(handler)

try:
error = ScriptExecutionError(
"Script run execution failed.",
script_name="detect,cpu",
run_args={"target": "script", "action": "run"},
error_code=139,
error_guidance=get_error_guidance(
139, "Segmentation fault (core dumped)")
)

_report_error(error)
finally:
logger.removeHandler(handler)

output = stream.getvalue()
self.assertIn("Detected error code: 139", output)
self.assertIn("Likely cause:", output)
self.assertIn("Suggestion:", output)


if __name__ == "__main__":
unittest.main()
4 changes: 4 additions & 0 deletions .github/workflows/test-mlc-core-actions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ jobs:
- name: Test 3b - pull repo --force handling
run: |
python .github/scripts/test_repo_pull_force.py

- name: Test 3c - error guidance handling
run: |
python .github/scripts/test_error_guidance.py

- name: Test 4 - list repo - List the existing repositories
run: |
Expand Down
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@ build
dist
*egg-info
__pycache__

.mlc-log.txt
150 changes: 141 additions & 9 deletions mlc/error_codes.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,71 @@
import re
from enum import Enum, auto


class ErrorCode(Enum):
"""Enum class for error codes in MLCFlow"""
# General errors (2000-2007)
AUTOMATION_SCRIPT_NOT_FOUND = (2000, "The specified automation script was not found")
AUTOMATION_SCRIPT_NOT_FOUND = (
2000, "The specified automation script was not found")
PATH_DOES_NOT_EXIST = (2001, "Provided path does not exists")
FILE_NOT_FOUND = (2002, "Required file was not found")
PERMISSION_DENIED = (2003, "Insufficient permission to execute the script")
IO_Error = (2004, "File I/O operation failed")
AUTOMATION_CUSTOM_ERROR = (2005, "Custom error triggered by the script")
UNSUPPORTED_OS = (2006, "The Operating System is not supported by the script")
UNSUPPORTED_OS = (
2006, "The Operating System is not supported by the script")
MISSING_ENV_VARIABLE = (2007, "Required environment variables are missing")

def __init__(self, code, description):
self.code = code
self.description = description


ERROR_CODES = {error.code for error in ErrorCode}
ERROR_CODE_PATTERNS = [
re.compile(r'(?:error|exit|return)\s+code\s*[:=]?\s*(\d+)', re.IGNORECASE),
re.compile(r'\[errno\s+(\d+)\]', re.IGNORECASE),
re.compile(r'signal\s+(\d+)', re.IGNORECASE),
]


class WarningCode(Enum):
"""Enum class for warning codes in MLCFlow"""
# General warnings (1000-1007)
IO_WARNING = (1000, "File I/O operation warning")
AUTOMATION_SCRIPT_NOT_TESTED = (1001, "the script is not tested on the current operatinig system or is in a development state")
AUTOMATION_SCRIPT_SKIPPED = (1002, "The script has been skipped during execution")
AUTOMATION_SCRIPT_NOT_TESTED = (
1001,
"the script is not tested on the current operatinig system or is in a development state")
AUTOMATION_SCRIPT_SKIPPED = (
1002, "The script has been skipped during execution")
AUTOMATION_CUSTOM_ERROR = (1003, "Custom warning triggered by the script")
NON_INTERACTIVE_ENV = (1004, "Non interactive environment detected")
ELEVATED_PERMISSION_NEEDED = (1005, "Elevated permission needed")
EMPTY_TARGET = (1006, "The specified target is empty")

def __init__(self, code, description):
self.code = code
self.description = description


def get_error_info(error_code):
"""Get the error message for a given error code"""
try:
return {"error_code": ErrorCode(error_code).code, "error_message": ErrorCode(error_code).description}
return {"error_code": ErrorCode(
error_code).code, "error_message": ErrorCode(error_code).description}
except ValueError:
return f"Unknown error code: {error_code}"


def get_warning_info(warning_code):
"""Get the warning message for a given warning code"""
try:
return {"warning_code": WarningCode(warning_code).code, "warning_message": WarningCode(warning_code).description}
return {"warning_code": WarningCode(
warning_code).code, "warning_message": WarningCode(warning_code).description}
except ValueError:
return f"Unknown warning code: {warning_code}"


def is_warning_code(code):
"""Check if a given code is a warning code"""
try:
Expand All @@ -56,6 +77,7 @@ def is_warning_code(code):
except ValueError:
return False


def is_error_code(code):
"""Check if a given code is an error code"""
try:
Expand All @@ -67,11 +89,121 @@ def is_error_code(code):
except ValueError:
return False


def get_code_type(code):
"""Get the type of a code (error or warning)"""
if is_error_code(code):
return "error"
elif is_warning_code(code):
return "warning"
else:
return "unknown"
return "unknown"


def _normalize_code(code):
"""Convert a code value to int when possible."""
if code is None:
return None
if isinstance(code, int):
return code
try:
return int(str(code).strip())
except (TypeError, ValueError):
return None


def detect_error_code(return_code=None, error_message=""):
"""Detect the most useful error code from a return value or error text."""
normalized_return_code = _normalize_code(return_code)
message = error_message or ""

for pattern in ERROR_CODE_PATTERNS:
match = pattern.search(message)
if match:
detected = _normalize_code(match.group(1))
# Prefer the code extracted from the error text when the outer
# result only carries a generic status such as 1.
if normalized_return_code in (None, 0, 1):
return detected

return normalized_return_code


def get_error_guidance(return_code=None, error_message=""):
"""Return actionable guidance for known error codes and failure patterns."""
error_code = detect_error_code(return_code, error_message)
guidance = {
"error_code": error_code,
"error_message": None,
"suggestions": [],
}

if error_code in ERROR_CODES:
error_info = get_error_info(error_code)
if isinstance(error_info, dict):
guidance["error_message"] = error_info["error_message"]

message = (error_message or "").lower()

if ("no space left on device" in message or
"disk full" in message or
"not enough space" in message):
guidance["error_message"] = guidance["error_message"] or \
"Likely disk space exhaustion while running the script"
guidance["suggestions"] = [
"Free disk space in the work/cache directories and retry the command.",
"Remove old artifacts or caches if they are no longer needed.",
]
# 139 = 128 + SIGSEGV(11), while some tools report the raw signal number
# 11 or its negative form -11.
elif ("segmentation fault" in message or error_code in [139, -11, 11]):
guidance["error_message"] = guidance["error_message"] or \
"A native program crashed with a segmentation fault"
guidance["suggestions"] = [
"Rerun with verbose logs to identify which native command crashed.",
"Check native dependencies, compiler/runtime compatibility, and input files.",
]
# Common downloader/network failure codes used by curl and similar tools:
# 6/7 resolve/connect failures, 28 timeout, 35 TLS/connect issue,
# 56 connection reset/read failure, 60 certificate validation failure.
elif ("network" in message or
"connection" in message or
"timed out" in message or
"temporary failure in name resolution" in message or
"could not resolve host" in message or
error_code in [6, 7, 28, 35, 56, 60]):
guidance["error_message"] = guidance["error_message"] or \
"Likely network or download failure while running the script"
guidance["suggestions"] = [
"Check internet connectivity, proxy/firewall settings, and remote endpoint availability.",
"Retry the command after verifying the network connection.",
]
elif error_code == 126:
guidance["error_message"] = guidance["error_message"] or \
"Command found but it could not be executed"
guidance["suggestions"] = [
"Check file permissions and whether the target command is executable.",
]
elif error_code == 127:
guidance["error_message"] = guidance["error_message"] or \
"Command not found during script execution"
guidance["suggestions"] = [
"Verify that the required tool is installed and available on PATH.",
]
elif error_code == 137:
guidance["error_message"] = guidance["error_message"] or \
"Process was terminated, often due to out-of-memory or a kill signal"
guidance["suggestions"] = [
"Check system memory limits and retry with fewer parallel jobs if possible.",
]
elif error_code == 130:
guidance["error_message"] = guidance["error_message"] or \
"The command was interrupted by the user or the environment"
guidance["suggestions"] = [
"Retry the command if the interruption was unexpected.",
]

if not guidance["error_message"] and not guidance["suggestions"]:
return None

return guidance
13 changes: 13 additions & 0 deletions mlc/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ def _report_error(e):
script_name = e.script_name
repo_alias = e.repo_alias
run_args = e.run_args
error_guidance = e.error_guidance or {}

if script_name:
# Build rerun command with user-facing inputs only
Expand All @@ -181,6 +182,18 @@ def _report_error(e):
logger.error(f"Failed script: {script_name}")
logger.error(f"To rerun just the failed part: {rerun_cmd}")

if error_guidance:
error_code = error_guidance.get("error_code")
if error_code is not None:
logger.error(f"Detected error code: {error_code}")

error_message = error_guidance.get("error_message")
if error_message:
logger.error(f"Likely cause: {error_message}")

for suggestion in error_guidance.get("suggestions", []):
logger.error(f"Suggestion: {suggestion}")

if e.version_info_file:
logger.error(f"Dependency versions: {e.version_info_file}")

Expand Down
13 changes: 11 additions & 2 deletions mlc/script_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import inspect
from .index import Index
from . import utils
from .error_codes import get_error_guidance
from .logger import logger


Expand Down Expand Up @@ -319,6 +320,8 @@ def call_script_module_function(self, function_name, run_args):

if result['return'] > 0:
error = result.get('error', "")
error_guidance = get_error_guidance(
result.get('error_code', result.get('return')), error)
_name_match = re.search(r'name\s*=\s*([^,)]+)', error)
_script_name = _name_match.group(1).strip() if _name_match else run_args.get(
'tags', run_args.get('details'))
Expand All @@ -338,7 +341,10 @@ def call_script_module_function(self, function_name, run_args):
raise ScriptExecutionError(
f"Script {function_name} execution failed in {module_path}. \nError : {error}",
script_name=_script_name, repo_alias=_repo_alias, module_path=module_path,
run_args=run_args, version_info_file=_version_info_file)
run_args=run_args, version_info_file=_version_info_file,
error_code=error_guidance.get(
'error_code') if error_guidance else None,
error_guidance=error_guidance)

if str(run_args.get("mlc_output")).lower() in [
"on", "true", "yes", "1"]:
Expand Down Expand Up @@ -680,10 +686,13 @@ def remote_docker(self, run_args):

class ScriptExecutionError(Exception):
def __init__(self, message, script_name=None, repo_alias=None,
module_path=None, run_args=None, version_info_file=None):
module_path=None, run_args=None, version_info_file=None,
error_code=None, error_guidance=None):
super().__init__(message)
self.script_name = script_name
self.repo_alias = repo_alias
self.module_path = module_path
self.run_args = run_args or {}
self.version_info_file = version_info_file
self.error_code = error_code
self.error_guidance = error_guidance
Loading