• why does bsearch segfault on custom strcmp when qsort works fine?

    From Mark Summerfield@21:1/5 to All on Thu Aug 15 05:56:45 2024
    I have a program (complete source at the end) which correctly outputs this:

    ["charlie" "foxtrot" "oscar" "echo" "alpha" "golf" "tango" "delta" "bravo"] ["alpha" "bravo" "charlie" "delta" "echo" "foxtrot" "golf" "oscar" "tango"] check_array OK
    check_index_found true OK
    check_index_found false OK

    However, if you uncomment the "//#define BUG" line, the output (in gdb) is this:

    (gdb) run
    Starting program: /home/mark/tmp/mycmpstr/mycmpstr
    [Thread debugging using libthread_db enabled]
    Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". ["charlie" "foxtrot" "oscar" "echo" "alpha" "golf" "tango" "delta" "bravo"] ["alpha" "bravo" "charlie" "delta" "echo" "foxtrot" "golf" "oscar" "tango"] check_array OK

    Program received signal SIGSEGV, Segmentation fault.
    __strcmp_avx2 () at ../sysdeps/x86_64/multiarch/strcmp-avx2.S:283
    283 ../sysdeps/x86_64/multiarch/strcmp-avx2.S: No such file or directory. (gdb) bt
    #0 __strcmp_avx2 () at ../sysdeps/x86_64/multiarch/strcmp-avx2.S:283
    #1 0x00005555555553e0 in mystrcmp (s=0x555555556030, t=0x7fffffffde10) at mycmpstr.c:50
    #2 0x00007ffff7e0a53c in __GI_bsearch (__key=0x555555556030, __base=0x7fffffffddf0,
    __nmemb=<optimized out>, __size=8, __compar=0x5555555553b7 <mystrcmp>)
    at ../bits/stdlib-bsearch.h:33
    #3 0x0000555555555317 in main () at mycmpstr.c:30

    The difference is that without BUG defined I use my own binary search,
    but with BUG defined I use bsearch.

    Here's the complete source ~100 LOC:

    #include <stdbool.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>

    //#define BUG

    int mystrcmp(const void* s, const void* t);
    size_t search(const char* s, char** array, size_t size, bool* found);
    void dump(char** array, size_t size);
    void check_array(char** actual, char** expected, size_t size);
    void check_index_found(size_t a, size_t e, bool afound, bool efound);

    int main() {
    char* expected[] = {"alpha", "bravo", "charlie", "delta", "echo",
    "foxtrot", "golf", "oscar", "tango"};
    char* words[] = {"charlie", "foxtrot", "oscar", "echo", "alpha",
    "golf", "tango", "delta", "bravo"};
    const size_t size = sizeof(words) / sizeof(char*);
    dump(words, size);
    // mystrcmp works fine:
    qsort(words, size, sizeof(char*), mystrcmp);
    dump(words, size);
    check_array(words, expected, size);

    size_t index = 0;
    bool found = false;
    #ifdef BUG
    // mystrcmp segfaults:
    char* p = bsearch("oscar", words, size, sizeof(char*), mystrcmp);
    index = p - words[0];
    found = p != NULL;
    #else
    index = search("oscar", words, size, &found);
    #endif
    check_index_found(index, 7, found, true);

    index = 0;
    found = false;
    #ifdef BUG
    p = bsearch("X!@", words, size, sizeof(char*), mystrcmp);
    found = p != NULL;
    #else
    index = search("X!@", words, size, &found);
    #endif
    check_index_found(index, 0, found, false);
    }

    int mystrcmp(const void* s, const void* t) {
    return strcmp(*(const char**)s, *(const char**)t);
    }

    size_t search(const char* s, char** array, size_t size, bool* found) {
    *found = false;
    size_t index = 0;
    size_t low = 0;
    size_t high = size - 1;
    while (high && low <= high) {
    size_t mid = low + ((high - low) / 2);
    const char* value = array[mid];
    int cmp = strcmp(value, s);
    if (cmp == 0) {
    index = mid;
    *found = true;
    break;
    }
    if (cmp < 0)
    low = mid + 1;
    else
    high = mid - 1;
    }
    return index;
    }

    void dump(char** array, size_t size) {
    printf("[");
    for (size_t i = 0; i < size; ++i) {
    printf("\"%s\"", array[i]);
    if (i + 1 < size)
    printf(" ");
    }
    printf("]\n");
    }

    void check_array(char** actual, char** expected, size_t size) {
    bool ok = true;
    for (size_t i = 0; i < size; ++i) {
    if (strcmp(actual[i], expected[i])) {
    printf("ERROR: \"%s\" != \"%s\"\n", actual[i], expected[i]);
    ok = false;
    }
    }
    if (ok)
    printf("check_array OK\n");
    }

    void check_index_found(size_t a, size_t e, bool afound, bool efound) {
    if (afound && efound && a != e)
    printf("ERROR: %zu != %zu\n", a, e);
    else if (afound != efound)
    printf("ERROR: %d != %d\n", afound, efound);
    else
    printf("check_index_found %s OK\n", afound ? "true" : "false");
    }

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)
  • From Ike Naar@21:1/5 to Mark Summerfield on Thu Aug 15 08:55:45 2024
    On 2024-08-15, Mark Summerfield <mark@qtrac.eu> wrote:
    I have a program (complete source at the end) which correctly outputs this:

    ["charlie" "foxtrot" "oscar" "echo" "alpha" "golf" "tango" "delta" "bravo"] ["alpha" "bravo" "charlie" "delta" "echo" "foxtrot" "golf" "oscar" "tango"] check_array OK
    check_index_found true OK
    check_index_found false OK

    However, if you uncomment the "//#define BUG" line, the output (in gdb) is this:

    (gdb) run
    Starting program: /home/mark/tmp/mycmpstr/mycmpstr
    [Thread debugging using libthread_db enabled]
    Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". ["charlie" "foxtrot" "oscar" "echo" "alpha" "golf" "tango" "delta" "bravo"] ["alpha" "bravo" "charlie" "delta" "echo" "foxtrot" "golf" "oscar" "tango"] check_array OK

    Program received signal SIGSEGV, Segmentation fault.
    __strcmp_avx2 () at ../sysdeps/x86_64/multiarch/strcmp-avx2.S:283
    283 ../sysdeps/x86_64/multiarch/strcmp-avx2.S: No such file or directory. (gdb) bt
    #0 __strcmp_avx2 () at ../sysdeps/x86_64/multiarch/strcmp-avx2.S:283
    #1 0x00005555555553e0 in mystrcmp (s=0x555555556030, t=0x7fffffffde10) at mycmpstr.c:50
    #2 0x00007ffff7e0a53c in __GI_bsearch (__key=0x555555556030, __base=0x7fffffffddf0,
    __nmemb=<optimized out>, __size=8, __compar=0x5555555553b7 <mystrcmp>)
    at ../bits/stdlib-bsearch.h:33
    #3 0x0000555555555317 in main () at mycmpstr.c:30

    The difference is that without BUG defined I use my own binary search,
    but with BUG defined I use bsearch.

    You're mixing up char* and char** in a few places.

    [...]

    // mystrcmp segfaults:
    char* p = bsearch("oscar", words, size, sizeof(char*), mystrcmp);

    The elements of the words array have type pointer-to-char.
    So the first argument to bsearch should be the address of such an element, that is,
    a pointer-to-pointer-to-char and it should contain the adress of a pointer to the
    first character of the oscar string.
    Also, the value returned from bsearch should be interpreted as a pointer-to-pointer-to-char.

    char * key = "oscar";
    char * * p = bsearch(&key, words, size, sizeof(char*), mystrcmp);

    index = p - words[0];
    found = p != NULL:

    Two problems here: first, if bsearch returns NULL, the subtraction is ill-defined.
    Second, if bsearch returns non-null the index will be p - words, not p - words[0];

    found = p != NULL:
    if (found) index = p - words;

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)
  • From Richard Harnden@21:1/5 to Mark Summerfield on Thu Aug 15 10:06:53 2024
    On 15/08/2024 06:56, Mark Summerfield wrote:
    I have a program (complete source at the end) which correctly outputs this:

    ["charlie" "foxtrot" "oscar" "echo" "alpha" "golf" "tango" "delta" "bravo"] ["alpha" "bravo" "charlie" "delta" "echo" "foxtrot" "golf" "oscar" "tango"] check_array OK
    check_index_found true OK
    check_index_found false OK

    However, if you uncomment the "//#define BUG" line, the output (in gdb) is this:

    (gdb) run
    Starting program: /home/mark/tmp/mycmpstr/mycmpstr
    [Thread debugging using libthread_db enabled]
    Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". ["charlie" "foxtrot" "oscar" "echo" "alpha" "golf" "tango" "delta" "bravo"] ["alpha" "bravo" "charlie" "delta" "echo" "foxtrot" "golf" "oscar" "tango"] check_array OK

    Program received signal SIGSEGV, Segmentation fault.
    __strcmp_avx2 () at ../sysdeps/x86_64/multiarch/strcmp-avx2.S:283
    283 ../sysdeps/x86_64/multiarch/strcmp-avx2.S: No such file or directory. (gdb) bt
    #0 __strcmp_avx2 () at ../sysdeps/x86_64/multiarch/strcmp-avx2.S:283
    #1 0x00005555555553e0 in mystrcmp (s=0x555555556030, t=0x7fffffffde10) at mycmpstr.c:50
    #2 0x00007ffff7e0a53c in __GI_bsearch (__key=0x555555556030, __base=0x7fffffffddf0,
    __nmemb=<optimized out>, __size=8, __compar=0x5555555553b7 <mystrcmp>)
    at ../bits/stdlib-bsearch.h:33
    #3 0x0000555555555317 in main () at mycmpstr.c:30

    The difference is that without BUG defined I use my own binary search,
    but with BUG defined I use bsearch.

    [...]

    int mystrcmp(const void* s, const void* t) {
    return strcmp(*(const char**)s, *(const char**)t);

    You don't need to cast void*s, change that to:
    return strcmp(s, t);

    }


    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)
  • From Michael S@21:1/5 to Richard Harnden on Thu Aug 15 15:10:09 2024
    On Thu, 15 Aug 2024 10:06:53 +0100
    Richard Harnden <richard.nospam@gmail.invalid> wrote:

    On 15/08/2024 06:56, Mark Summerfield wrote:
    I have a program (complete source at the end) which correctly
    outputs this:

    ["charlie" "foxtrot" "oscar" "echo" "alpha" "golf" "tango" "delta"
    "bravo"] ["alpha" "bravo" "charlie" "delta" "echo" "foxtrot" "golf"
    "oscar" "tango"] check_array OK
    check_index_found true OK
    check_index_found false OK

    However, if you uncomment the "//#define BUG" line, the output (in
    gdb) is this:

    (gdb) run
    Starting program: /home/mark/tmp/mycmpstr/mycmpstr
    [Thread debugging using libthread_db enabled]
    Using host libthread_db library
    "/lib/x86_64-linux-gnu/libthread_db.so.1". ["charlie" "foxtrot"
    "oscar" "echo" "alpha" "golf" "tango" "delta" "bravo"] ["alpha"
    "bravo" "charlie" "delta" "echo" "foxtrot" "golf" "oscar" "tango"] check_array OK

    Program received signal SIGSEGV, Segmentation fault.
    __strcmp_avx2 () at ../sysdeps/x86_64/multiarch/strcmp-avx2.S:283
    283 ../sysdeps/x86_64/multiarch/strcmp-avx2.S: No such file
    or directory. (gdb) bt
    #0 __strcmp_avx2 () at
    ../sysdeps/x86_64/multiarch/strcmp-avx2.S:283 #1
    0x00005555555553e0 in mystrcmp (s=0x555555556030, t=0x7fffffffde10)
    at mycmpstr.c:50 #2 0x00007ffff7e0a53c in __GI_bsearch (__key=0x555555556030, __base=0x7fffffffddf0, __nmemb=<optimized
    , __size=8, __compar=0x5555555553b7 <mystrcmp>) at out>../bits/stdlib-bsearch.h:33 #3 0x0000555555555317 in main ()
    at mycmpstr.c:30

    The difference is that without BUG defined I use my own binary
    search, but with BUG defined I use bsearch.

    [...]

    int mystrcmp(const void* s, const void* t) {
    return strcmp(*(const char**)s, *(const char**)t);

    You don't need to cast void*s, change that to:
    return strcmp(s, t);

    }


    No, his mystrcmp() is correct. The bugs are elsewhere, as explained by
    Ike Naar.

    Unfortunately, unlike the previous case with attempt to modify string
    literals, these bugs are less likely to lead to interesting discussion.

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)
  • From Mark Summerfield@21:1/5 to Ike Naar on Thu Aug 15 16:43:16 2024
    On Thu, 15 Aug 2024 08:55:45 -0000 (UTC), Ike Naar wrote:

    [snip]
    The elements of the words array have type pointer-to-char.
    So the first argument to bsearch should be the address of such an
    element, that is,
    a pointer-to-pointer-to-char and it should contain the adress of a
    pointer to the first character of the oscar string.
    Also, the value returned from bsearch should be interpreted as a pointer-to-pointer-to-char.

    char * key = "oscar";
    char * * p = bsearch(&key, words, size, sizeof(char*), mystrcmp);

    index = p - words[0];
    found = p != NULL:

    Two problems here: first, if bsearch returns NULL, the subtraction is ill-defined.
    Second, if bsearch returns non-null the index will be p - words, not p - words[0];

    found = p != NULL:
    if (found) index = p - words;

    Thank you! That solved the problem and clarified my mistakes.
    By changing the code along the suggested lines it works great:

    char* s = "oscar";
    char** p = bsearch(&s, words, size, sizeof(char*), mystrcmp);
    if (p) {
    index = p - words;
    found = true;
    }

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)