From 2a58424e1a0f7116d5a4571a4d4395eb0045cbff Mon Sep 17 00:00:00 2001 From: Namenot Date: Wed, 12 Mar 2025 17:55:22 +0100 Subject: [PATCH 1/2] Changes to autocompile_kernel.py The Changes to this are: 1. silenced certain print statements regarding: - the successful writing to file - the successful compilation of files - the execution of a program. 2. Created a folder structure for source files and binaries to declutter the notebooks root folder - .o files get put into the same temporary directory as the wrapper - source files get into a build/src/ folder - executables get put into the build/ folder - dependecies are now searched for in both the notebooks root folder aswell as the temporary directory 3. reimplemented the detect main function to remove duplicate compilations - the reimplementation only uses the compilers preprocessor & regex to find main implementations - This might not be as rigorous as the previous implementation but has the benefit of not needing to compile everything twice - If preferred I can also redo this, keep the compilation but also keep the object file, if main is not found. This would at least skip the duplicate compilations for not executable binaries. - Another solution would be to only go to the assembly step, this would forgo the complete compilation, and would produce a usable assembly file that can be checked for the main: lable. --- ckernel/autocompile_kernel.py | 147 ++++++++++++++++++++++++++-------- 1 file changed, 113 insertions(+), 34 deletions(-) diff --git a/ckernel/autocompile_kernel.py b/ckernel/autocompile_kernel.py index 10e0570..d2f1b3b 100644 --- a/ckernel/autocompile_kernel.py +++ b/ckernel/autocompile_kernel.py @@ -186,46 +186,53 @@ async def autocompile( # Get args specified in the code cell args = self.parse_args(code) + #Setup Folders + args.exe = "build/" + args.exe + args.filename = "build/src/" + args.filename + + if len(args.depends) > 0: + args.depends = args.depends.split() + for idx, dep in enumerate(args.depends): + if os.path.isfile(dep): + continue + + dep = str(self.twd) + "/obj/" + dep + if os.path.isfile(dep): + args.depends[idx] = dep + continue + + message = f'[ERROR] Could not satisfy Dependency! "{args.depends[idx]}" does not exist.' + self.print(message, dest=STDERR) + return error("MissingDependencies", message) + + args.depends = " ".join(args.depends) + + args.obj = str(self.twd) + "/obj/" + args.obj + + os.makedirs(os.path.dirname(args.exe), exist_ok=True) + os.makedirs(os.path.dirname(args.filename), exist_ok=True) + os.makedirs(os.path.dirname(args.obj), exist_ok=True) + if args.verbose or self.debug: nonempty_args = {k: v for k, v in args.__dict__.items() if v != ""} self.print(json.dumps(nonempty_args, indent=2), STDERR) with open(args.filename, "w", encoding="utf-8") as src: src.write(code) - self.print(f"wrote file {args.filename}") + self.log_info(f"wrote file {args.filename}") if args.compiler is None or not args.should_compile: # No compiler means nothing to compile, so exit return success(self.execution_count) - # Attempt to compile to .o. Do this silently at first, because if - # successful we want to detect whether main was defined before - # reporting the actual compilation command to the user - compile_cmd = self.command_compile( - args.compiler, args.cflags, args.LDFLAGS, args.filename, args.obj - ) - self.debug_msg("attempt to compile to .o") - self.log_info("attempt to compile to .o") - result, _, stderr = await compile_cmd.run_silent() - - if result != 0: - # failed to compile to .o, so report error - self.debug_msg("failed!") - self.log_info("failed!") - for line in stderr: - self.log_info(line.rstrip()) - self.print(line, dest=STDERR, end="") - return error("CompileFailed", "Compilation failed") - # compiled ok, continue to detect main self.debug_msg("detect whether main defined") - - result, *_ = await self.command_detect_main(args.obj).run_silent() - if result != 0: + result = await self.command_detect_main(args.compiler, args.filename) + if not result: # main not defined, so repeat to asynchronously report compilation # to .o and stop self.debug_msg("main not defined: compile to .o and stop") - self.print(f"$> {compile_cmd}") + self.log_info(f"$> {compile_cmd}") compile_cmd = self.command_compile( args.compiler, args.cflags, @@ -255,7 +262,7 @@ async def autocompile( args.depends, args.exe, ) - self.print(f"$> {compile_exe_cmd}") + self.log_info(f"$> {compile_exe_cmd}") # now add self.ck_dyn_obj to args.depends to add input wrappers compile_exe_cmd = self.command_compile_exe( @@ -276,7 +283,7 @@ async def autocompile( if not args.should_exec: return success(self.execution_count) run_exe = AsyncCommand(f"./{args.exe} {args.ARGS}", logger=self.log) - self.print(f"$> {run_exe}") + self.log_info(f"$> {run_exe}") with self.active_command( run_exe ) as command, self.stdin_trigger.ready() as trigger: @@ -373,14 +380,86 @@ def command_compile_exe( ) -> AsyncCommand: return AsyncCommand(f"{compiler} {cflags} {name} {depends} {ldflags} -o {exe}") - def command_detect_main(self, objfile: str) -> AsyncCommand: - if is_macOS: - # it seems that on macOS `int main()` is compiled to the symbol - # `_main` - cmd = f"""nm {objfile} | grep " T _main" """ - else: - cmd = f"""nm {objfile} | grep " T main" """ - return AsyncCommand(cmd) + async def command_detect_main(self, compiler: str, srcfile: str): + import re + + # run the preprocessor instead of the entire compiler + def preprocessorCMD(compiler: str, srcfile: str) -> AsyncCommand: + return AsyncCommand(f"{compiler} -E --no-line-commands {srcfile}") + + cmd = preprocessorCMD(compiler, srcfile) + + result, stdout_lines, errMsg = await cmd.run_silent() + if result != 0: + return error("Preprocessor Failed", errMsg) + + """Doing the following steps might be slower then a searching through the simple assembly file for the main/start tag""" + stdout_lines = " ".join(stdout_lines) + stdout_lines = re.sub('\s+', "", stdout_lines) + # remove all strings as they can be interpreted as valid code even tho they shouldn't + # (if you want to inject code via string you're on your own lol) + stdout_lines = re.sub('\"[^\"]*\"', '\"\"', stdout_lines) + stdout_lines = re.sub("\'[^\']*\'", "\'\'", stdout_lines) + + def matchBrackets(in_str: str, opener: str, closer: str): + listA = [(idx, +1) for idx, elem in enumerate(in_str) if elem == opener] + listB = [(idx, -1) for idx, elem in enumerate(in_str) if elem == closer] + + if len(listA) != len(listB): + return error("Bracket Matching", "Unequal Amount of Closing and Opening Brackets") + + combined = sorted(listA + listB) + + retList = [] + + depth = 0 + tpl = [None,None] + for bracket in combined: + depth += bracket[1] + + # closed bracked despite not being opened previously + # its technically possible to have a string that contains a single bracket that would + # trigger this! + # suggested fix: ignore opened bracket, without closed bracket + if depth < 0: + return error("Bracket Matching", "Could not Match brackets. Encountered Closing Bracket before Opening!") + + # root bracket opened + if depth == 1 and bracket[1] == +1: + tpl[0] = bracket[0] + + # root bracked closed + if depth == 0: + tpl[1] = bracket[0] + retList.append(tpl) + tpl = [None,None] + + return retList + + def removeBracketContent(in_str: str, removeIndecies, replacement: str): + for idxpair in reversed(removeIndecies): + in_str = in_str[:idxpair[0]] + replacement + in_str[idxpair[1]+1:] + + return in_str + + # remove the content between curly brackets , but keep them, + # this is useful for detecting whether the main function is defined properly + rmvCurly = matchBrackets(stdout_lines, '{', '}') + stdout_lines = removeBracketContent(stdout_lines, rmvCurly, "{}") + + # remove the content between round brackets , but keep them, + # this is useful for detecting whether the main function is defined properly + rmvRound = matchBrackets(stdout_lines, '(', ')') + stdout_lines = removeBracketContent(stdout_lines, rmvRound, "()") + + # remove everything enclosed in sqaure Brackets, including the Brackets themselves + rmvSquare = matchBrackets(stdout_lines, '[', ']') + stdout_lines = removeBracketContent(stdout_lines, rmvSquare, "") + + matchedMain = re.search("(int|void|)main\(\)\{\}", stdout_lines) + + # search for main definitions + return matchedMain != None def command_link_exe( self, compiler: str, ldflags: str, exe: str, objname: str, depends: str From 5f0f3b972079441ca3815c0abed29c61104e61a4 Mon Sep 17 00:00:00 2001 From: Namenot Date: Wed, 26 Mar 2025 15:12:51 +0100 Subject: [PATCH 2/2] Update autocompile_kernel.py drastically improved the detect main function --- ckernel/autocompile_kernel.py | 74 +++++------------------------------ 1 file changed, 9 insertions(+), 65 deletions(-) diff --git a/ckernel/autocompile_kernel.py b/ckernel/autocompile_kernel.py index d2f1b3b..d5f8caf 100644 --- a/ckernel/autocompile_kernel.py +++ b/ckernel/autocompile_kernel.py @@ -232,7 +232,6 @@ async def autocompile( # main not defined, so repeat to asynchronously report compilation # to .o and stop self.debug_msg("main not defined: compile to .o and stop") - self.log_info(f"$> {compile_cmd}") compile_cmd = self.command_compile( args.compiler, args.cflags, @@ -240,6 +239,7 @@ async def autocompile( args.filename, args.obj, ) + self.log_info(f"$> {compile_cmd}") await compile_cmd.run_with_output(self.stream_stdout, self.stream_stderr) return success(self.execution_count) @@ -381,8 +381,6 @@ def command_compile_exe( return AsyncCommand(f"{compiler} {cflags} {name} {depends} {ldflags} -o {exe}") async def command_detect_main(self, compiler: str, srcfile: str): - import re - # run the preprocessor instead of the entire compiler def preprocessorCMD(compiler: str, srcfile: str) -> AsyncCommand: return AsyncCommand(f"{compiler} -E --no-line-commands {srcfile}") @@ -392,74 +390,20 @@ def preprocessorCMD(compiler: str, srcfile: str) -> AsyncCommand: result, stdout_lines, errMsg = await cmd.run_silent() if result != 0: return error("Preprocessor Failed", errMsg) - - """Doing the following steps might be slower then a searching through the simple assembly file for the main/start tag""" + stdout_lines = " ".join(stdout_lines) - stdout_lines = re.sub('\s+', "", stdout_lines) - # remove all strings as they can be interpreted as valid code even tho they shouldn't - # (if you want to inject code via string you're on your own lol) - stdout_lines = re.sub('\"[^\"]*\"', '\"\"', stdout_lines) - stdout_lines = re.sub("\'[^\']*\'", "\'\'", stdout_lines) - - def matchBrackets(in_str: str, opener: str, closer: str): - listA = [(idx, +1) for idx, elem in enumerate(in_str) if elem == opener] - listB = [(idx, -1) for idx, elem in enumerate(in_str) if elem == closer] - if len(listA) != len(listB): - return error("Bracket Matching", "Unequal Amount of Closing and Opening Brackets") + import re - combined = sorted(listA + listB) - - retList = [] - - depth = 0 - tpl = [None,None] - for bracket in combined: - depth += bracket[1] - - # closed bracked despite not being opened previously - # its technically possible to have a string that contains a single bracket that would - # trigger this! - # suggested fix: ignore opened bracket, without closed bracket - if depth < 0: - return error("Bracket Matching", "Could not Match brackets. Encountered Closing Bracket before Opening!") - - # root bracket opened - if depth == 1 and bracket[1] == +1: - tpl[0] = bracket[0] - - # root bracked closed - if depth == 0: - tpl[1] = bracket[0] - retList.append(tpl) - tpl = [None,None] - - return retList + parameters = "[^\)]*" # encapsulate parameters + declared = "[^;\{]*\{" # ensure that no semicolon comes before the curly bracket + main_regex = f"main\s*\({parameters}\)\s*{declared}" # look for main(...){ - def removeBracketContent(in_str: str, removeIndecies, replacement: str): - for idxpair in reversed(removeIndecies): - in_str = in_str[:idxpair[0]] + replacement + in_str[idxpair[1]+1:] - - return in_str - - # remove the content between curly brackets , but keep them, - # this is useful for detecting whether the main function is defined properly - rmvCurly = matchBrackets(stdout_lines, '{', '}') - stdout_lines = removeBracketContent(stdout_lines, rmvCurly, "{}") - - # remove the content between round brackets , but keep them, - # this is useful for detecting whether the main function is defined properly - rmvRound = matchBrackets(stdout_lines, '(', ')') - stdout_lines = removeBracketContent(stdout_lines, rmvRound, "()") - - # remove everything enclosed in sqaure Brackets, including the Brackets themselves - rmvSquare = matchBrackets(stdout_lines, '[', ']') - stdout_lines = removeBracketContent(stdout_lines, rmvSquare, "") + res = re.split(main_regex, stdout_lines) # seperate the function at main(){ - matchedMain = re.search("(int|void|)main\(\)\{\}", stdout_lines) + openquotes = sum(part.count('"') for part in res[::-1]) % 2 - # search for main definitions - return matchedMain != None + return openquotes == 0 and len(res) > 1 def command_link_exe( self, compiler: str, ldflags: str, exe: str, objname: str, depends: str