-
Type: Improvement
-
Resolution: Fixed
-
Priority: Major - P3
-
Affects Version/s: None
-
Component/s: None
-
3
-
Storage - Ra 2022-05-30
Occasionally WT coredumps will contain 9000+ frames indicating a stack overflow occurred. This appears to be a result of calling the error handler from the error handler path. This is problematic when the cause of the error is printf being unable to print.
An example callstack looks like this:
#9753 0x00000000005ad1ec in __split_verify_intl_key_order (session=0x18b88d0, page=0x7f51f80c60b0) at ../src/btree/bt_split.c:96 #9752 0x00000000005065c3 in __wt_errx_func (session=0x18b88d0, func=0x690cf0 <__PRETTY_FUNCTION__.39251> "__split_verify_intl_key_order", line=96, fmt=0x6900d0 "%s") at ../src/support/err.c:329 #9751 0x00000000005061de in __eventv (session=0x18b88d0, msg_event=false, error=0, func=0x690cf0 <__PRETTY_FUNCTION__.39251> "__split_verify_intl_key_order", line=96, fmt=0x6900d0 "%s", ap=0x7f520ea5b640) at ../src/support/err.c:263 #9750 0x00000000005059b6 in __handle_error_default (handler=0x8f1da0 <event_handler>, wt_session=0x18b88d0, error=0, errmsg=0x7f520ea5a520 "[1648726398:224851][21816:0x7f520ea5c700], file:wt.wt, WT_SESSION.rollback_transaction: __split_verify_intl_key_order, 96: cmp < 0") at ../src/support/err.c:26 #9749 0x0000000000505809 in __wt_fprintf (session=0x18b88d0, fstr=0x190f958, fmt=0x677550 "%s\n") at ../src/include/os_fstream_inline.h:66 #9748 0x0000000000505753 in __wt_vfprintf (session=0x18b88d0, fstr=0x190f958, fmt=0x677550 "%s\n", ap=0x7f520ea5a390) at ../src/include/os_fstream_inline.h:51 #9747 0x00000000004be7d1 in __stdio_printf (session=0x18b88d0, fs=0x190f958, fmt=0x677550 "%s\n", ap=0x7f520ea5a390) at ../src/os_common/os_fstream_stdio.c:53 #9746 0x00000000005064ec in __wt_err_func (session=0x18b88d0, error=5, func=0x66f880 <__PRETTY_FUNCTION__.39246> "__stdio_printf", line=53, fmt=0x66f830 "%s: printf") at ../src/support/err.c:309 #9745 0x00000000005061de in __eventv (session=0x18b88d0, msg_event=false, error=5, func=0x66f880 <__PRETTY_FUNCTION__.39246> "__stdio_printf", line=53, fmt=0x66f830 "%s: printf", ap=0x7f520ea5a220) at ../src/support/err.c:263 #9744 0x00000000005059b6 in __handle_error_default (handler=0x8f1da0 <event_handler>, wt_session=0x18b88d0, error=5, errmsg=0x7f520ea59100 "[1648726398:224877][21816:0x7f520ea5c700], file:wt.wt, WT_SESSION.rollback_transaction: __stdio_printf, 53: stderr: printf: Input/output error") at ../src/support/err.c:26 #9743 0x0000000000505809 in __wt_fprintf (session=0x18b88d0, fstr=0x190f958, fmt=0x677550 "%s\n") at ../src/include/os_fstream_inline.h:66 #9742 0x0000000000505753 in __wt_vfprintf (session=0x18b88d0, fstr=0x190f958, fmt=0x677550 "%s\n", ap=0x7f520ea58f70) at ../src/include/os_fstream_inline.h:51 #9741 0x00000000004be7d1 in __stdio_printf (session=0x18b88d0, fs=0x190f958, fmt=0x677550 "%s\n", ap=0x7f520ea58f70) at ../src/os_common/os_fstream_stdio.c:53 #9740 0x00000000005064ec in __wt_err_func (session=0x18b88d0, error=5, func=0x66f880 <__PRETTY_FUNCTION__.39246> "__stdio_printf", line=53, fmt=0x66f830 "%s: printf") at ../src/support/err.c:309 #9739 0x00000000005061de in __eventv (session=0x18b88d0, msg_event=false, error=5, func=0x66f880 <__PRETTY_FUNCTION__.39246> "__stdio_printf", line=53, fmt=0x66f830 "%s: printf", ap=0x7f520ea58e00) at ../src/support/err.c:263 #9738 0x00000000005059b6 in __handle_error_default (handler=0x8f1da0 <event_handler>, wt_session=0x18b88d0, error=5, errmsg=0x7f520ea57ce0 "[1648726398:224888][21816:0x7f520ea5c700], file:wt.wt, WT_SESSION.rollback_transaction: __stdio_printf, 53: stderr: printf: Input/output error") at ../src/support/err.c:26 #9737 0x0000000000505809 in __wt_fprintf (session=0x18b88d0, fstr=0x190f958, fmt=0x677550 "%s\n") at ../src/include/os_fstream_inline.h:66 #9736 0x0000000000505753 in __wt_vfprintf (session=0x18b88d0, fstr=0x190f958, fmt=0x677550 "%s\n", ap=0x7f520ea57b50) at ../src/include/os_fstream_inline.h:51 #9735 0x00000000004be7d1 in __stdio_printf (session=0x18b88d0, fs=0x190f958, fmt=0x677550 "%s\n", ap=0x7f520ea57b50) at ../src/os_common/os_fstream_stdio.c:53 #9734 0x00000000005064ec in __wt_err_func (session=0x18b88d0, error=5, func=0x66f880 <__PRETTY_FUNCTION__.39246> "__stdio_printf", line=53, fmt=0x66f830 "%s: printf") at ../src/support/err.c:309 #9733 0x00000000005061de in __eventv (session=0x18b88d0, msg_event=false, error=5, func=0x66f880 <__PRETTY_FUNCTION__.39246> "__stdio_printf", line=53, fmt=0x66f830 "%s: printf", ap=0x7f520ea579e0) at ../src/support/err.c:263 #9732 0x00000000005059b6 in __handle_error_default (handler=0x8f1da0 <event_handler>, wt_session=0x18b88d0, error=5, errmsg=0x7f520ea568c0 "[1648726398:224896][21816:0x7f520ea5c700], file:wt.wt, WT_SESSION.rollback_transaction: __stdio_printf, 53: stderr: printf: Input/output error") at ../src/support/err.c:26 #9731 0x0000000000505809 in __wt_fprintf (session=0x18b88d0, fstr=0x190f958, fmt=0x677550 "%s\n") at ../src/include/os_fstream_inline.h:66 #9730 0x0000000000505753 in __wt_vfprintf (session=0x18b88d0, fstr=0x190f958, fmt=0x677550 "%s\n", ap=0x7f520ea56730) at ../src/include/os_fstream_inline.h:51 #9729 0x00000000004be7d1 in __stdio_printf (session=0x18b88d0, fs=0x190f958, fmt=0x677550 "%s\n", ap=0x7f520ea56730) at ../src/os_common/os_fstream_stdio.c:53 #9728 0x00000000005064ec in __wt_err_func (session=0x18b88d0, error=5, func=0x66f880 <__PRETTY_FUNCTION__.39246> "__stdio_printf", line=53, fmt=0x66f830 "%s: printf") at ../src/support/err.c:309 #9727 0x00000000005061de in __eventv (session=0x18b88d0, msg_event=false, error=5, func=0x66f880 <__PRETTY_FUNCTION__.39246> "__stdio_printf", line=53, fmt=0x66f830 "%s: printf", ap=0x7f520ea565c0) at ../src/support/err.c:263 #9726 0x00000000005059b6 in __handle_error_default (handler=0x8f1da0 <event_handler>, wt_session=0x18b88d0, error=5, errmsg=0x7f520ea554a0 "[1648726398:224901][21816:0x7f520ea5c700], file:wt.wt, WT_SESSION.rollback_transaction: __stdio_printf, 53: stderr: printf: Input/output error") at ../src/support/err.c:26 #9725 0x0000000000505809 in __wt_fprintf (session=0x18b88d0, fstr=0x190f958, fmt=0x677550 "%s\n") at ../src/include/os_fstream_inline.h:66 #9724 0x0000000000505753 in __wt_vfprintf (session=0x18b88d0, fstr=0x190f958, fmt=0x677550 "%s\n", ap=0x7f520ea55310) at ../src/include/os_fstream_inline.h:51 #9723 0x00000000004be7d1 in __stdio_printf (session=0x18b88d0, fs=0x190f958, fmt=0x677550 "%s\n", ap=0x7f520ea55310) at ../src/os_common/os_fstream_stdio.c:53 53 WT_RET_MSG(session, EIO, "%s: printf", fs->name);
While not a big issue it does make debugging the core rather annoying. I think the fix here would be to replace the WT_RET_MSG in __stdio_printf with just a WT_RET.
Scope:
- Validate that a stack overflow can be generated by making the call the vfprintf fail in the printf function.
- Come up with a solution or used the propose solution to fix the issue.