• Invoke any function with custom stack (irrespective of signature)

    From Frederick Virchanza Gotham@21:1/5 to All on Fri Jul 14 05:11:23 2023
    My main language is C/C++ and so I'm used to seeing function signatures such as the following:

    void Func(void);
    int Func(void);
    void Func(double, int, char*);

    which would be written as follows in Visual Basic:

    Private Sub Func()
    Private Function Func() As Integer
    Private Sub Func(ByVal arg1 As Double, ByVal arg2 As Integer, ByVal arg3 As String)

    A few months ago, I devised a way in x86_64 assembler and C++ of dynamically allocating a separate stack to use for a single invocation of a function, and in the end I even got it working with exception handling. But it only worked for functions that don'
    t take an argument and don't return a value.

    Since then, I've tried to come up with a universal solution that will work for all function signatures (even those with supernumerary parameters, or that return a very big struct by value), and I've got it working for System V x86_64.

    Here's what I currently have, the assembler is inline among the C++ code:

    https://godbolt.org/z/W4x7vbE7q

    And here it is copy-pasted:

    #include <cassert> // assert
    #include <cstddef> // size_t
    #include <memory> // unique_ptr
    #include <utility> // forward

    thread_local char *p_original, *p_replacement;
    thread_local void (*f)(void);
    thread_local char *bottom_of_stack;

    extern "C" {
    void Assembler_set_bottom_of_stack (void) noexcept;
    void Assembler_set_stack_pointer_and_invoke(void) noexcept;
    }

    __asm("Assembler_set_bottom_of_stack: \n"
    ".intel_syntax noprefix \n"
    " mov r10, rsp \n"
    " add r10, 16 \n" // +8 return addr, +8 to be safe
    " mov QWORD PTR fs:bottom_of_stack@tpoff, r10 \n"
    " ret \n"
    ".att_syntax");

    template<typename R, typename... Params>
    class Invoker {

    Invoker(char *const arg_p, R(*const arg_f)(Params...)) noexcept
    {
    p_replacement = arg_p; // sets a thread_local variable
    f = reinterpret_cast<void (*)(void)>(arg_f); // sets a thread_local variable
    }

    public:
    R operator()(Params... args) // This could be static function but I like operator()
    {
    Assembler_set_bottom_of_stack();
    R (*const funcptr)(Params...) = reinterpret_cast<R(*)(Params...)>(Assembler_set_stack_pointer_and_invoke);
    return funcptr( std::forward<Params>(args)... );
    }

    friend class Stacker;
    };

    class Stacker {
    char *p;
    std::unique_ptr<char[]> mystack;

    public:

    Stacker(std::size_t const len) noexcept(false) // might throw bad_alloc
    {
    assert( len >= 128u );

    mystack.reset( new char[len] );
    p = mystack.get() + len - 16u;
    }

    Stacker(char *const arg, std::size_t const len) noexcept
    {
    assert( nullptr != arg );
    assert( len >= 128u );

    p = arg + len - 16u;
    }

    template<typename R, typename... Params>
    Invoker<R,Params...> operator()( R(*const arg)(Params...) )
    {
    return Invoker<R,Params...>(this->p, arg);
    }
    };

    /* In the following function written in x86_64 assembler using
    the System V calling convention, we can only clobber r10
    and r11 because all of the other caller-saved registers
    must be preserved for the 'jmp' to the target function. */

    __asm("Assembler_set_stack_pointer_and_invoke:\n"
    ".intel_syntax noprefix \n"
    // Step 1: Save the original stack pointer
    " mov QWORD PTR fs:p_original@tpoff, rsp \n"
    // Step 2: Retrieve the replacement stack pointer
    " push r15 \n" // save to restore later
    " mov r15, rsp \n" // pointer to the r15 we just pushed onto stack
    " add r15, 8 \n" // sets 'r15' to top of old stack
    " mov r10, QWORD PTR fs:p_replacement@tpoff \n" // sets 'r10' to top of new stack
    " mov rax, QWORD PTR fs:bottom_of_stack@tpoff \n" // sets 'rax' to bottom of old stack
    // Right now: R15 is the top of the old stack
    // R10 is the top of the new stack
    // RAX is the bottom of the old stack
    // We want to do:
    // while ( rax != r15 ) *r10-- = *rax--;
    // Step 3: Copy the old stack to the new stack (it might contain supernumerary arguments or a big return struct)
    " jmp cond \n" // Jump to condition of 'while' loop
    "loop: \n" // ----<----<----<----<----
    " mov r11, qword ptr [rax] \n" // |
    " mov qword ptr [r10], r11 \n" // ^
    " sub r10, 1 \n" // | Loop
    " sub rax, 1 \n" // |
    "cond: \n" // ^
    " cmp rax, r15 \n" // |
    " jne loop \n" // ---->---->---->---->----
    " pop r15 \n" // restore original value
    // Step 4: Change the stack pointer to the new stack =============================================
    " mov rsp, r10 \n" // ================================================= new stack
    // Step 5: Set the return address to after the 'jmp' instruction
    " lea r10, [Label_Jump_Back] \n"
    " add rsp, 8 \n" // This line and the next line replace the return address on the stack
    " push r10 \n" // This line and the previous line replace the return address on the stack
    // Step 5: Invoke the function
    " jmp QWORD PTR fs:f@tpoff \n" // --- Invoke the function!
    "Label_Jump_Back: \n"
    // Note: The label has already been popped off the stack by the callee
    // Step 9: Restore the original stack pointer
    " mov rsp, QWORD PTR fs:p_original@tpoff \n"
    // Step 10: Jump back to the original address
    " ret \n"
    ".att_syntax");

    // =================== And now the test code ===============================================

    #include <iostream> // cout, endl
    using std::cout, std::endl;

    struct VeryBigStruct {
    double a[3];
    int b[3];
    double c[3];
    int d[3];
    double e[3];
    int f[3];
    };

    VeryBigStruct Func2(int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8, int a9, int a10)
    {
    VeryBigStruct vbs;
    vbs.f[2] = a1+a2+a3+a4+a5+a6+a7+a8+a9+a10;
    return vbs;
    }

    int main(void)
    {
    cout << "first line in main\n";

    cout << "Retval: " << Func2(1,2,3,4,5,6,7,8,9,10).f[2] << endl;

    cout << "Retval: " << Stacker(1048576000u)(Func2)(1,2,3,4,5,6,7,8,9,10).f[2] << endl;

    cout << "last line in main\n";
    }

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)
  • From Frederick Virchanza Gotham@21:1/5 to Frederick Virchanza Gotham on Sat Jul 15 06:35:23 2023
    On Friday, July 14, 2023 at 1:12:54 PM UTC+1, Frederick Virchanza Gotham wrote:

    A few months ago, I devised a way in x86_64 assembler and C++ of dynamically allocating a separate stack to use for a single invocation of a function, and in the end I even got it working with exception handling. But it only worked for functions that
    don't take an argument and don't return a value.

    Since then, I've tried to come up with a universal solution that will work for all function signatures (even those with supernumerary parameters, or that return a very big struct by value), and I've got it working for System V x86_64.


    I'm trying to get it working with exception handling, but it's segfaulting. I've tried copying the entire stack but it's still segfaulting inside libgcc inside the function "uw_update_context_1" when it tries to copy the context struct. Anybody know what'
    s wrong with the following?

    https://godbolt.org/z/vv7hTGWdr

    And here it is copy-pasted:

    #include <cassert> // assert
    #include <cstddef> // size_t
    #include <climits> // ULONG_LONG_MAX
    #include <cstdlib> // strtoull
    #include <cstring> // strstr
    #include <cstdint> // UINTPTR_MAX
    #include <memory> // unique_ptr
    #include <utility> // forward
    #include <exception> // exception_ptr, current_exception #include <type_traits> // is_rvalue_reference, is_trivially_destructible

    #include <iostream> // cout, endl ============================= REMOVE THIS
    using std::cerr, std::cout, std::endl;

    #include <unistd.h> // lseek, read, close
    #include <fcntl.h> // open

    char *GetStackBottom(void) noexcept
    {
    static thread_local int fd = -1;

    if ( fd < 0 )
    {
    fd = ::open("/proc/thread-self/maps", O_RDONLY);

    if ( fd < 0 ) return nullptr;

    // std::atexit( [](){ ::close(fd); } ); - Won't work
    }

    if ( 0 != ::lseek(fd, 0, SEEK_SET) ) return nullptr;

    static char buf[8192u];
    if ( ::read(fd, buf, 8191u) < 32 ) return nullptr;
    buf[8191u] = '\0';

    char *vm = std::strstr(buf, "[stack]\n");
    if ( nullptr == vm ) return nullptr;

    while ( '\n' != *vm ) --vm;

    ++vm;

    while ( '-' != *vm ) ++vm;

    char *vm2 = ++vm;

    while ( ' ' != *vm2 ) ++vm2;

    *vm2 = '\0';

    static_assert( ULONG_LONG_MAX >= UINTPTR_MAX );
    long long unsigned const addr = std::strtoull(vm,nullptr,16u);

    return reinterpret_cast<char*>(addr);
    }

    thread_local char *p_original, *p_replacement;
    thread_local void (*f)(void), (*g)(void);
    thread_local char *bottom_of_stack;
    thread_local std::exception_ptr e;

    extern "C" {
    void Assembler_set_bottom_of_stack (void) noexcept;
    void Assembler_set_stack_pointer_and_invoke(void) noexcept;
    }

    __asm("Assembler_set_bottom_of_stack: \n"
    ".intel_syntax noprefix \n"
    " mov r10, rsp \n"
    " add r10, 16 \n" // +8 return addr, +8 to be safe
    " mov QWORD PTR fs:bottom_of_stack@tpoff, r10 \n"
    " ret \n"
    ".att_syntax");

    template<typename T> requires std::is_trivially_destructible_v<std::remove_cvref_t<T> >
    std::remove_cvref_t<T> dummy_prvalue(void) noexcept
    {
    typedef std::remove_cvref_t<T> TT;
    void (*const tmp)(void) = [](){};
    TT (*const funcptr)(void) = reinterpret_cast<TT(*)(void)>(tmp);
    return funcptr(); // guaranteed elision of move/copy operations here
    }

    template<typename R, typename... Params>
    class Invoker {

    static R exception_capable(Params... args) noexcept
    {
    cerr << "Entered exception_capable\n";

    R (*const funcptr)(Params...) = reinterpret_cast<R(*)(Params...)>(g);

    try
    {
    cerr << "Entered try-block\n";
    return funcptr( std::forward<Params>(args)... );
    }
    catch (...)
    {
    cerr << "Exception thrown!\n";
    e = std::current_exception();
    }

    return dummy_prvalue<R>();
    }

    Invoker(char *const arg_p, R(*const arg_f)(Params...)) noexcept
    {
    p_replacement = arg_p; // sets a thread_local variable
    g = reinterpret_cast<void (*)(void)>(arg_f); // sets a thread_local variable
    f = reinterpret_cast<void (*)(void)>(exception_capable); // sets a thread_local variable
    }

    public:
    R operator()(Params... args) noexcept(false) // This could be static function but I like operator()
    {
    //Assembler_set_bottom_of_stack();
    //cout << "\nBottom of stack: " << (void*)bottom_of_stack << " (my own assembler)\n";
    bottom_of_stack = GetStackBottom() - 8u;
    //cout << "Bottom of stack: " << (void*)bottom_of_stack << " (thread-self/maps)\n";
    e = nullptr;
    R (*const funcptr)(Params...) = reinterpret_cast<R(*)(Params...)>(Assembler_set_stack_pointer_and_invoke);
    R retval = funcptr( std::forward<Params>(args)... );
    if ( nullptr != e )
    {
    cerr << "About to rethrow!\n";
    std::rethrow_exception(e);
    }
    return retval;
    }

    friend class Stacker;
    };

    class Stacker {
    char *p;
    std::unique_ptr<char[]> mystack;

    public:

    Stacker(std::size_t const len) noexcept(false) // might throw bad_alloc
    {
    assert( len >= 128u );

    mystack.reset( new char[len] );
    p = mystack.get() + len - 16u;
    }

    Stacker(char *const arg, std::size_t const len) noexcept
    {
    assert( nullptr != arg );
    assert( len >= 128u );

    p = arg + len - 16u;
    }

    template<typename R, typename... Params>
    Invoker<R,Params...> operator()( R(*const arg)(Params...) ) noexcept
    {
    return Invoker<R,Params...>(this->p, arg);
    }
    };

    /* In the following function written in x86_64 assembler using
    the System V calling convention, we can only clobber r10
    and r11 because all of the other caller-saved registers
    must be preserved for the 'jmp' to the target function. */

    __asm("Assembler_set_stack_pointer_and_invoke:\n"
    ".intel_syntax noprefix \n"
    // Step 1: Save the original stack pointer
    " mov QWORD PTR fs:p_original@tpoff, rsp \n"
    // Step 2: Retrieve the replacement stack pointer
    " push r15 \n" // save to restore later
    " mov r15, rsp \n" // pointer to the r15 we just pushed onto stack
    " add r15, 8 \n" // sets 'r15' to top of old stack
    " mov r10, QWORD PTR fs:p_replacement@tpoff \n" // sets 'r10' to top of new stack
    " mov rax, QWORD PTR fs:bottom_of_stack@tpoff \n" // sets 'rax' to bottom of old stack
    // Right now: R15 is the top of the old stack
    // R10 is the top of the new stack
    // RAX is the bottom of the old stack
    // We want to do:
    // while ( rax != r15 ) *r10-- = *rax--;
    // Step 3: Copy the old stack to the new stack (it might contain supernumerary arguments or a big return struct)
    " jmp cond \n" // Jump to condition of 'while' loop
    "loop: \n" // ----<----<----<----<----
    " mov r11, qword ptr [rax] \n" // |
    " mov qword ptr [r10], r11 \n" // ^
    " sub r10, 1 \n" // | Loop
    " sub rax, 1 \n" // |
    "cond: \n" // ^
    " cmp rax, r15 \n" // |
    " jne loop \n" // ---->---->---->---->----
    " pop r15 \n" // restore original value
    // Step 4: Change the stack pointer to the new stack =============================================
    " mov rsp, r10 \n" // ================================================= new stack
    // Step 5: Set the return address to after the 'jmp' instruction
    " lea r10, [Label_Jump_Back] \n"
    " add rsp, 8 \n" // This line and the next line replace the return address on the stack
    " push r10 \n" // This line and the previous line replace the return address on the stack
    // Step 6: Invoke the function
    " jmp QWORD PTR fs:f@tpoff \n" // --- Invoke the function!
    "Label_Jump_Back: \n"
    // Note: The label has already been popped off the stack by the callee
    // Step 7: Restore the original stack pointer
    " mov rsp, QWORD PTR fs:p_original@tpoff \n"
    // Step 8: Jump back to the original address
    " ret \n"
    ".att_syntax");

    // =================== And now the test code ===============================================

    #include <iostream> // cout, endl
    using std::cout, std::endl;

    struct VeryBigStruct {
    double a[3];
    int b[3];
    double c[3];
    int d[3];
    double e[3];
    int f[3];
    };

    VeryBigStruct Func2(int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8, int a9, int a10)
    {
    cerr << "Entered Func2\n";
    VeryBigStruct vbs;
    vbs.f[2] = a1+a2+a3+a4+a5+a6+a7+a8+a9+a10;
    throw int(3);
    return vbs;
    }

    int main(void)
    {
    cout << "first line in main\n";

    #if 1
    try
    {
    cout << "Retval: " << Func2(1,2,7,4,5,6,7,8,9,10).f[2] << endl;
    }
    catch (int const n)
    {
    cout << "Caught an int: " << n << endl;
    }
    #endif

    try
    {
    cout << "Retval: " << Stacker(1048576000u)(Func2)(1,2,7,4,5,6,7,8,9,10).f[2] << endl;
    }
    catch (int const n)
    {
    cout << "Caught an int: " << n << endl;
    }

    cout << "last line in main\n";
    }

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)