@John Bollinger
It is also very meaningful in the case of structures or arrays. In Windows, there is a type of programming called COM, and the well-known graphics API DirectX3D also uses COM. COM objects are generally called in this way.
typedef struct {
void (*func1)();
void (*func2)();
void (*func3)();
void (*func4)();
void (*func5)();
} i_ibject_vtable;
typedef struct {
i_ibject_vtable *vtable;
} i_object;
int object_create(i_object **);
int entry() {
i_object *p_object;
object_create(&p_object);
p_object->vtable->func1();
p_object->vtable->func2();
p_object->vtable->func3();
p_object->vtable->func4();
p_object->vtable->func5();
return 0;
}
.file "example.c"
# GNU C23 (Compiler-Explorer-Build-gcc--binutils-2.44) version 15.2.0 (x86_64-linux-gnu)
# compiled by GNU C version 11.4.0, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.1, isl version isl-0.24-GMP
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed: -mtune=generic -march=x86-64 -g -g0 -Ofast -fno-asynchronous-unwind-tables
.text
.p2align 4
.globl entry
.type entry, @function
entry:
subq $24, %rsp #,
# /app/example.c:18: object_create(&p_object);
leaq 8(%rsp), %rdi #, tmp114
call object_create #
# /app/example.c:20: p_object->vtable->func1();
movq 8(%rsp), %rax # p_object, p_object
# /app/example.c:20: p_object->vtable->func1();
movq (%rax), %rax # p_object.0_1->vtable, p_object.0_1->vtable
# /app/example.c:20: p_object->vtable->func1();
call *(%rax) # _2->func1
# /app/example.c:21: p_object->vtable->func2();
movq 8(%rsp), %rax # p_object, p_object
# /app/example.c:21: p_object->vtable->func2();
movq (%rax), %rax # p_object.1_4->vtable, p_object.1_4->vtable
# /app/example.c:21: p_object->vtable->func2();
call *8(%rax) # _5->func2
# /app/example.c:22: p_object->vtable->func3();
movq 8(%rsp), %rax # p_object, p_object
# /app/example.c:22: p_object->vtable->func3();
movq (%rax), %rax # p_object.2_7->vtable, p_object.2_7->vtable
# /app/example.c:22: p_object->vtable->func3();
call *16(%rax) # _8->func3
# /app/example.c:23: p_object->vtable->func4();
movq 8(%rsp), %rax # p_object, p_object
# /app/example.c:23: p_object->vtable->func4();
movq (%rax), %rax # p_object.3_10->vtable, p_object.3_10->vtable
# /app/example.c:23: p_object->vtable->func4();
call *24(%rax) # _11->func4
# /app/example.c:24: p_object->vtable->func5();
movq 8(%rsp), %rax # p_object, p_object
# /app/example.c:24: p_object->vtable->func5();
movq (%rax), %rax # p_object.4_13->vtable, p_object.4_13->vtable
# /app/example.c:24: p_object->vtable->func5();
call *32(%rax) # _14->func5
# /app/example.c:27: }
xorl %eax, %eax #
addq $24, %rsp #,
ret
.size entry, .-entry
.ident "GCC: (Compiler-Explorer-Build-gcc--binutils-2.44) 15.2.0"
.section .note.GNU-stack,"",@progbits
Manually saving these function pointers results in negative optimization
typedef struct {
void (*func1)();
void (*func2)();
void (*func3)();
void (*func4)();
void (*func5)();
} i_ibject_vtable;
typedef struct {
i_ibject_vtable *vtable;
} i_object;
int object_create(i_object **);
int entry() {
i_object *p_object;
object_create(&p_object);
i_ibject_vtable vtable;
__builtin_memcpy(&vtable, p_object->vtable, sizeof(vtable));
vtable.func1();
vtable.func2();
vtable.func3();
vtable.func4();
vtable.func5();
return 0;
}
.file "example.c"
# GNU C23 (Compiler-Explorer-Build-gcc--binutils-2.44) version 15.2.0 (x86_64-linux-gnu)
# compiled by GNU C version 11.4.0, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.1, isl version isl-0.24-GMP
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed: -mtune=generic -march=x86-64 -g -g0 -Ofast -fno-asynchronous-unwind-tables
.text
.p2align 4
.globl entry
.type entry, @function
entry:
subq $72, %rsp #,
# /app/example.c:18: object_create(&p_object);
leaq 8(%rsp), %rdi #, tmp106
call object_create #
# /app/example.c:21: __builtin_memcpy(&vtable, p_object->vtable, sizeof(vtable));
movq 8(%rsp), %rax # p_object, p_object
# /app/example.c:21: __builtin_memcpy(&vtable, p_object->vtable, sizeof(vtable));
movq (%rax), %rax # p_object.0_1->vtable, p_object.0_1->vtable
movdqu (%rax), %xmm0 # MEM <char[1:40]> [(void *)_2], MEM <char[1:40]> [(void *)_2]
movq %xmm0, %rdx # MEM <char[1:40]> [(void *)_2], tmp119
movaps %xmm0, 16(%rsp) # MEM <char[1:40]> [(void *)_2], MEM <char[1:40]> [(void *)&vtable]
movdqu 16(%rax), %xmm0 # MEM <char[1:40]> [(void *)_2], MEM <char[1:40]> [(void *)_2]
movq 32(%rax), %rax # MEM <char[1:40]> [(void *)_2], MEM <char[1:40]> [(void *)_2]
movaps %xmm0, 32(%rsp) # MEM <char[1:40]> [(void *)_2], MEM <char[1:40]> [(void *)&vtable]
movq %rax, 48(%rsp) # MEM <char[1:40]> [(void *)_2], MEM <char[1:40]> [(void *)&vtable]
# /app/example.c:23: vtable.func1();
call *%rdx # tmp119
# /app/example.c:24: vtable.func2();
call *24(%rsp) # vtable.func2
# /app/example.c:25: vtable.func3();
call *32(%rsp) # vtable.func3
# /app/example.c:26: vtable.func4();
call *40(%rsp) # vtable.func4
# /app/example.c:27: vtable.func5();
call *48(%rsp) # vtable.func5
# /app/example.c:30: }
xorl %eax, %eax #
addq $72, %rsp #,
ret
.size entry, .-entry
.ident "GCC: (Compiler-Explorer-Build-gcc--binutils-2.44) 15.2.0"
.section .note.GNU-stack,"",@progbits
I also can't do this for every object, because it's a tedious task. Once the compiler knows that i_object_vtable doesn't change often, it can optimize.
typedef struct {
void (*func1)();
void (*func2)();
void (*func3)();
void (*func4)();
void (*func5)();
} i_ibject_vtable;
typedef struct {
i_ibject_vtable *vtable;
} i_object;
__attribute__((malloc)) i_object *object_create();
int entry() {
i_object *p_object;
p_object = object_create();
p_object->vtable->func1();
p_object->vtable->func2();
p_object->vtable->func3();
p_object->vtable->func4();
p_object->vtable->func5();
// Saved the pointer into the register
p_object->vtable->func1();
p_object->vtable->func1();
p_object->vtable->func1();
return 0;
}
.file "example.c"
# GNU C23 (Compiler-Explorer-Build-gcc--binutils-2.44) version 15.2.0 (x86_64-linux-gnu)
# compiled by GNU C version 11.4.0, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.1, isl version isl-0.24-GMP
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed: -mtune=generic -march=x86-64 -g -g0 -Ofast -fno-asynchronous-unwind-tables
.text
.p2align 4
.globl entry
.type entry, @function
entry:
pushq %rbx #
# /app/example.c:18: p_object = object_create();
call object_create #
# /app/example.c:20: p_object->vtable->func1();
movq (%rax), %rbx # p_object_12->vtable, _1
# /app/example.c:20: p_object->vtable->func1();
call *(%rbx) # _1->func1
# /app/example.c:21: p_object->vtable->func2();
call *8(%rbx) # _1->func2
# /app/example.c:22: p_object->vtable->func3();
call *16(%rbx) # _1->func3
# /app/example.c:23: p_object->vtable->func4();
call *24(%rbx) # _1->func4
# /app/example.c:24: p_object->vtable->func5();
call *32(%rbx) # _1->func5
# /app/example.c:27: p_object->vtable->func1();
call *(%rbx) # _1->func1
# /app/example.c:28: p_object->vtable->func1();
call *(%rbx) # _1->func1
# /app/example.c:29: p_object->vtable->func1();
call *(%rbx) # _1->func1
# /app/example.c:32: }
xorl %eax, %eax #
popq %rbx #
ret
.size entry, .-entry
.ident "GCC: (Compiler-Explorer-Build-gcc--binutils-2.44) 15.2.0"
.section .note.GNU-stack,"",@progbits
Unfortunately, most APIs return error codes instead of pointers, making it impossible to use __attribute__.