[IR] Segmentation fault on loop (original) (raw)

December 19, 2024, 4:32pm 1

Hello! I’m trying to make a builtin function called strlen to my programming language using hard-coded llvm-ir, so I can link it to the main function. But I don’t know exactly how to do this. I found an approach, it almost works but then it segfaults on the loop of the strlen, and I can’t figure out how to fix it:

; ModuleID = 'GalaxyJIT'
source_filename = "GalaxyJIT"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define i32 @main() {
entry:
  %print = alloca i32, align 4
  %string_alloca = alloca [3 x i8], align 1
  store [3 x i8] c"oi\00", ptr %string_alloca, align 1
  %string_ptr = getelementptr [3 x i8], ptr %string_alloca, i32 0
  %0 = call i32 @writeln(ptr %string_ptr)
  store i32 %0, ptr %print, align 4
  ret i32 0
}

@new_line = constant [1 x i8] c"\0A"  ; Define a constant representing the newline character (0x0A)

declare i64 @write(i32, ptr, i64) ; Declare the write syscall, which takes a file descriptor (i32), a pointer to data (ptr), and the length of the data (i64)

; Implementation of writeln
define i64 @writeln(ptr %str) {
entry:
  ; Calculate the length of the string by calling strlen
  %len = call i64 @strlen(ptr %str)
  
  ; Call the write syscall to print the string
  call i64 @write(i32 1, ptr %str, i64 %len)  ; Write the string to stdout (file descriptor 1)
  
  ; Get the pointer to the newline character
  %newline_ptr = getelementptr [1 x i8], [1 x i8]* @new_line, i64 0, i64 0
  
  ; Call the write syscall again to print the newline
  %return = call i64 @write(i32 1, ptr %newline_ptr, i64 1)  ; Write the newline to stdout
  
  ret i64 %return  ; Return from the writeln function
}

define i64 @strlen(ptr %str) {
entry:
  %start_ptr = alloca ptr, align 8
  store ptr %str, ptr %start_ptr

  %counter = alloca i64, align 8
  store i64 0, ptr %counter

  br label %loop

loop:
  %current_ptr = load ptr, ptr %str

  %current_char = load i8, ptr %current_ptr

  %is_null = icmp eq i8 %current_char, 0
  br i1 %is_null, label %done, label %continue

continue:
  %next_ptr = getelementptr i8, ptr %current_ptr, i64 1
  store ptr %next_ptr, ptr %str

  %current_length = load i64, ptr %counter
  %new_length = add i64 %current_length, 1
  store i64 %new_length, ptr %counter

  br label %loop

done:
  %end_ptr = load ptr, ptr %str
  %start_value = load ptr, ptr %start_ptr

  %length_value = ptrtoint ptr %end_ptr to i64
  %start_value_int = ptrtoint ptr %start_value to i64
  %result = sub i64 %length_value, %start_value_int

  ret i64 %result
}

jdoerfert December 19, 2024, 5:44pm 2

Do yourself a favor, if possible, write your stuff in C and compile it to IR.

Here is the look at the IR:
You have counter, why not load+return that instead of some pointer arithmetic?

store ptr %next_ptr, ptr %str
should be
store ptr %next_ptr, ptr %current_ptr

wesuRage December 19, 2024, 6:57pm 3

Thank you