Strange assembly output when building Linux kernel for ARMv6/thumb (arm1136j-s)

74 Views Asked by At

I'm using buildroot-2023.02.7 to generate an linux image to an embedded system (actually an emulator). Decompressing fails after reaching decompress_kernel in arm/boot/compressed/misc.c. I'm trying to make sense of the generated assembly but it looks very strange. Any help of what may be causing the problem would be greatly appreciated :)

void
decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p,
        unsigned long free_mem_ptr_end_p,
        int arch_id)
{
    int ret;

    output_data     = (unsigned char *)output_start;
    free_mem_ptr        = free_mem_ptr_p;
    free_mem_end_ptr    = free_mem_ptr_end_p;
    __machine_arch_type = arch_id;

#ifdef CONFIG_ARCH_EP93XX
    ep93xx_decomp_setup();
#endif
    arch_decomp_setup();

    putstr("Uncompressing Linux...");
    ret = do_decompress(input_data, input_data_end - input_data,
                output_data, error);
    if (ret)
        error("decompressor returned an error");
    else
        putstr(" done, booting the kernel.\n");
}

The correspoding assembly that was generated, viewed from the zImage, loaded at 0x2070'0000:

20700920:       b5f0            push    {r4, r5, r6, r7, r14}

20700922:       4e27            ldr     r6, [pc, #156]  @ (0x207009c0)
20700924:       4c27            ldr     r4, [pc, #156]  @ (0x207009c4)
20700926:       447e            add     r6, r15
20700928:       5937            ldr     r7, [r6, r4]    <--- puts the address of 'error' in r7

2070092a:       2455            movs    r4, #85 @ 0x55  <--- puts 'U' in r4
2070092c:       6038            str     r0, [r7, #0]    <--- corrupts the code of 'error' with the contents of r0 (pointer to output_data)

2070092e:       4826            ldr     r0, [pc, #152]  @ (0x207009c8)
20700930:       4d26            ldr     r5, [pc, #152]  @ (0x207009cc)
20700932:       5830            ldr     r0, [r6, r0]
20700934:       447d            add     r5, r15
20700936:       6001            str     r1, [r0, #0]
20700938:       4925            ldr     r1, [pc, #148]  @ (0x207009d0)
2070093a:       b083            sub     sp, #12
2070093c:       5871            ldr     r1, [r6, r1]
2070093e:       3501            adds    r5, #1
20700940:       600a            str     r2, [r1, #0]
20700942:       4a24            ldr     r2, [pc, #144]  @ (0x207009d4)
20700944:       58b2            ldr     r2, [r6, r2]
20700946:       6013            str     r3, [r2, #0]

20700948:       4620            mov     r0, r4
2070094a:       f001 ff19       bl      0x20702780  <--- call putc with 'U' as argument

2070094e:       f815 4b01       ldrb.w  r4, [r5], #1   <--- load the address of "Uncompressing Linux..."
20700952:       b15c            cbz     r4, 0x2070096c
20700954:       2c0a            cmp     r4, #10
20700956:       d1f7            bne.n   0x20700948
20700958:       200d            movs    r0, #13
2070095a:       f001 ff11       bl      0x20702780 <--- will print "UUncompressing"
(...)

Other relevant details: Code is compiled in O0 with the toolchain built by buildroot. GCC 12.x, binutils 2.38, musl; Full args:

"arguments": [
      "output/host/bin/arm-buildroot-linux-musleabi-gcc",
      "-Wp,-MMD,arch/arm/boot/compressed/.misc.o.d",
      "-nostdinc",
      "-I./arch/arm/include",
      "-I./arch/arm/include/generated",
      "-I./include",
      "-I./arch/arm/include/uapi",
      "-I./arch/arm/include/generated/uapi",
      "-I./include/uapi",
      "-I./include/generated/uapi",
      "-include",
      "./include/linux/compiler-version.h",
      "-include",
      "./include/linux/kconfig.h",
      "-include",
      "./include/linux/compiler_types.h",
      "-D__KERNEL__",
      "-mlittle-endian",
      "-fmacro-prefix-map=./=",
      "-Wall",
      "-Wundef",
      "-Werror=strict-prototypes",
      "-Wno-trigraphs",
      "-fno-strict-aliasing",
      "-fno-common",
      "-fshort-wchar",
      "-fno-PIE",
      "-Werror=implicit-function-declaration",
      "-Werror=implicit-int",
      "-Werror=return-type",
      "-Wno-format-security",
      "-std=gnu11",
      "-fno-dwarf2-cfi-asm",
      "-mno-fdpic",
      "-mno-unaligned-access",
      "-fno-ipa-sra",
      "-mabi=aapcs-linux",
      "-mfpu=vfp",
      "-funwind-tables",
      "-mthumb",
      "-Wa,-mimplicit-it=always",
      "-Wa,-mno-warn-deprecated",
      "-D__LINUX_ARM_ARCH__=7",
      "-march=armv7-m",
      "-msoft-float",
      "-Uarm",
      "-fno-delete-null-pointer-checks",
      "-Wno-frame-address",
      "-Wno-format-truncation",
      "-Wno-format-overflow",
      "-Wno-address-of-packed-member",
      "-O2",
      "-fno-allow-store-data-races",
      "-Wframe-larger-than=1024",
      "-fstack-protector-strong",
      "-Wno-main",
      "-Wno-unused-but-set-variable",
      "-Wno-unused-const-variable",
      "-Wno-dangling-pointer",
      "-fomit-frame-pointer",
      "-ftrivial-auto-var-init=zero",
      "-fno-stack-clash-protection",
      "-Wdeclaration-after-statement",
      "-Wvla",
      "-Wno-pointer-sign",
      "-Wcast-function-type",
      "-Wno-stringop-truncation",
      "-Wno-stringop-overflow",
      "-Wno-restrict",
      "-Wno-maybe-uninitialized",
      "-Wno-array-bounds",
      "-Wno-alloc-size-larger-than",
      "-Wimplicit-fallthrough=5",
      "-fno-strict-overflow",
      "-fno-stack-check",
      "-fconserve-stack",
      "-Werror=date-time",
      "-Werror=incompatible-pointer-types",
      "-Werror=designated-init",
      "-Wno-packed-not-aligned",
      "-Wno-attribute-alias",
      "-DDISABLE_BRANCH_PROFILING",
      "-fpic",
      "-mno-single-pic-base",
      "-fno-builtin",
      "-I./scripts/dtc/libfdt",
      "-fno-stack-protector",
      "-Iarch/arm/boot/compressed",
      "-DKBUILD_MODFILE=\"arch/arm/boot/compressed/misc\"",
      "-DKBUILD_BASENAME=\"misc\"",
      "-DKBUILD_MODNAME=\"misc\"",
      "-D__KBUILD_MODNAME=kmod_misc",
      "-c",
      "-o",
      "arch/arm/boot/compressed/misc.o",
      "arch/arm/boot/compressed/misc.c"
    ],

I tried different configurations capable of generating thumb code and disabling optimizations but the code still does not make sense.

EDIT1: The intermediate assemly has the same problem :/ I realized that this happens on O2 (at least it's not immediately obvious in the other levels, i have to check). Also, buildroot is not honoring the optimization level..

decompress_kernel:
    .fnstart
    @ args = 0, pretend = 0, frame = 0
    @ frame_needed = 0, uses_anonymous_args = 0
    push    {r3, r4, r5, r6, r7, lr}
    .save {r3, r4, r5, r6, r7, lr}
    ldr r6, .L32
    ldr r4, .L32+4
.LPIC5:
    add r6, pc
    ldr r7, [r6, r4]
    movs    r4, #85
    str r0, [r7]
    ldr r0, .L32+8
    ldr r5, .L32+12
    ldr r0, [r6, r0]

EDIT2: This is the full decompress function:

    .section    .rodata.str1.4
    .align  2
.LC3:
    .ascii  "Uncompressing Linux...\000"
    .align  2
.LC4:
    .ascii  " done, booting the kernel.\012\000"
    .align  2
.LC5:
    .ascii  "decompressor returned an error\000"
    .text
    .align  1
    .p2align 2,,3
    .global decompress_kernel
    .syntax unified
    .thumb
    .thumb_func
    .type   decompress_kernel, %function
decompress_kernel:
    .fnstart
    @ args = 0, pretend = 0, frame = 0
    @ frame_needed = 0, uses_anonymous_args = 0
    push    {r3, r4, r5, r6, r7, lr}
    .save {r3, r4, r5, r6, r7, lr}
    ldr r6, .L32
    ldr r4, .L32+4
.LPIC5:
    add r6, pc
    ldr r7, [r6, r4]
    movs    r4, #85
    str r0, [r7]
    ldr r0, .L32+8
    ldr r5, .L32+12
    ldr r0, [r6, r0]
.LPIC3:
    add r5, pc
    str r1, [r0]
    ldr r1, .L32+16
    adds    r5, r5, #1
    ldr r1, [r6, r1]
    str r2, [r1]
    ldr r2, .L32+20
    ldr r2, [r6, r2]
    str r3, [r2]
.L21:
    mov r0, r4
    bl  putc(PLT)
    ldrb    r4, [r5], #1    @ zero_extendqisi2
    cbz r4, .L29
.L23:
    cmp r4, #10
    bne .L21
    movs    r0, #13
    bl  putc(PLT)
    mov r0, r4
    bl  putc(PLT)
    ldrb    r4, [r5], #1    @ zero_extendqisi2
    cmp r4, #0
    bne .L23
.L29:
    ldr r2, .L32+24
    ldr r1, .L32+28
    ldr r3, .L32+32
    ldr r0, [r6, r1]
    ldr r1, [r6, r2]
    ldr r3, [r6, r3]
    ldr r2, [r7]
    subs    r1, r1, r0
    bl  do_decompress(PLT)
    cbnz    r0, .L30
    movs    r4, #32
    ldr r5, .L32+36
.LPIC4:
    add r5, pc
    adds    r5, r5, #1
.L24:
    mov r0, r4
    bl  putc(PLT)
    ldrb    r4, [r5], #1    @ zero_extendqisi2
    cbz r4, .L31
.L26:
    cmp r4, #10
    bne .L24
    movs    r0, #13
    bl  putc(PLT)
    mov r0, r4
    bl  putc(PLT)
    ldrb    r4, [r5], #1    @ zero_extendqisi2
    cmp r4, #0
    bne .L26
.L31:
    pop {r3, r4, r5, r6, r7, pc}
.L30:
    ldr r0, .L32+40
.LPIC6:
    add r0, pc
    bl  error(PLT)
.L33:
    .align  2
.L32:
    .word   _GLOBAL_OFFSET_TABLE_-(.LPIC5+4)
    .word   output_data(GOT)
    .word   free_mem_ptr(GOT)
    .word   .LC3-(.LPIC3+4)
    .word   free_mem_end_ptr(GOT)
    .word   __machine_arch_type(GOT)
    .word   input_data_end(GOT)
    .word   input_data(GOT)
    .word   error(GOT)
    .word   .LC4-(.LPIC4+4)
    .word   .LC5-(.LPIC6+4)
    .fnend
    .size   decompress_kernel, .-decompress_kernel
    .section    .rodata.str1.4
0

There are 0 best solutions below