[erlang-bugs] SIGSEGV in R11B-5 in mbc_alloc() at beam/erl_alloc_util.c:657

Scott Lystig Fritchie fritchie@REDACTED
Sun Sep 23 04:25:47 CEST 2007


Good evening, everyone.  Has anyone managed to get a segmentation
violation happening in mbc_alloc()?

I'm trying to get more info from colleagues to see if this kind of
crash has happened before.  As far as I know so far, this thing is
rare but happened during a 24-hour stress test: some protocol load
generator probably had this machine running at less than 20% total CPU
idle and therefore probably creating a fairly heavy load on a 2-node
Mnesia cluster (symmetric table replicas) using a mix of disc_copies
and ram_copies tables.

Platform details:

[root@REDACTED ~]$ uname -a
Linux tc-40432-3s 2.6.9-42.ELsmp #1 SMP Sat Aug 12 09:39:11 CDT 2006 i686 athlon i386 GNU/Linux
[root@REDACTED ~]$ cat /etc/redhat-release 
CentOS release 4.4 (Final)
[root@REDACTED ~]$ free
             total       used       free     shared    buffers     cached
Mem:      16631988    2222040   14409948          0     158240    1864300
-/+ buffers/cache:     199500   16432488
Swap:      2031608          0    2031608

The app was running with arguments like this.  If I recall correctly,
the ERL_MAX_ETS_TABLES is there to help in case there are a large
number of Mnesia transactions trying to execute simultaneously.  I
hope a SIGSEGV crash wouldn't happen if that limit were reached, but
if it's possible, then that's the likely trigger, I suppose.

env ERL_MAX_ETS_TABLES=10007 \
erl \
    +A 64 +K true -smp auto -noinput -noshell \
    -sname ${FOO}$1 \
    -kernel dist_auto_connect once \
    -kernel net_ticktime $CLUSTER_TIMEOUT \
    -mnesia dump_log_write_threshold 500000 \
    -mnesia dump_log_time_threshold 900000 \
    -boot $BAR/lib/app/baz \
    -pz $BOO/lib/app \
    -config $BAR/var/data/node$1.config \
    -zoo_config $BAR/etc/zoo.conf \
    -foo_config $BAR/etc/foo.conf

Thanks for listening.

-Scott

Core was generated by `/usr/local/gemini/ert/R11B-5/lib/erlang/erts-5.5.5/bin/beam.smp -A 64 -K true -'.
Program terminated with signal 11, Segmentation fault.
Reading symbols from /lib/libutil.so.1...done.
Loaded symbols for /lib/libutil.so.1
Reading symbols from /lib/libdl.so.2...done.
Loaded symbols for /lib/libdl.so.2
Reading symbols from /lib/tls/libm.so.6...done.
Loaded symbols for /lib/tls/libm.so.6
Reading symbols from /lib/tls/libpthread.so.0...done.
Loaded symbols for /lib/tls/libpthread.so.0
Reading symbols from /usr/lib/libncurses.so.5...done.
Loaded symbols for /usr/lib/libncurses.so.5
Reading symbols from /lib/tls/librt.so.1...done.
Loaded symbols for /lib/tls/librt.so.1
Reading symbols from /lib/tls/libc.so.6...done.
Loaded symbols for /lib/tls/libc.so.6
Reading symbols from /lib/ld-linux.so.2...done.
Loaded symbols for /lib/ld-linux.so.2
#0  mbc_alloc (allctr=0x818a580, size=Variable "size" is not available.
) at beam/erl_alloc_util.c:657
657     beam/erl_alloc_util.c: No such file or directory.
        in beam/erl_alloc_util.c
(gdb) where
#0  mbc_alloc (allctr=0x818a580, size=Variable "size" is not available.
) at beam/erl_alloc_util.c:657
#1  0x0807656e in erts_alcu_alloc_ts (type=106, extra=0x818a580, size=168)
    at beam/erl_alloc_util.c:2158
#2  0x080caa58 in db_get_term (tb=0xa7a8dd16, old=0x0, offset=2879645435, 
    obj=8) at beam/erl_alloc.h:174
#3  0x080d737f in db_put_hash (proc=0xaaeb0424, tbl=0x8b2b726c, 
    obj=2812862006, ret=0x80c52a9) at beam/erl_db_hash.c:2077
#4  0x080c52a9 in ets_insert_2 (A__p=0xaaeb0424, A_1=15519, A_2=2812862006)
    at beam/erl_db.c:627
#5  0x080fe71d in process_main () at beam/beam_emu.c:2293
#6  0x080b00cf in sched_thread_func (vesdp=0xb6e2c968)
    at beam/erl_process.c:947
#7  0x08147f1a in thr_wrapper (vtwd=0xbfffeee0) at common/ethread.c:503
#8  0x00444371 in start_thread () from /lib/tls/libpthread.so.0
#9  0x001f0ffe in clone () from /lib/tls/libc.so.6
(gdb) p nxt_blk
$1 = (Block_t *) 0x9d4dcda4
(gdb) p *nxt_blk
$2 = 634994553
(gdb) p nxt_blk_sz
$3 = 634994552
(gdb) p last_blk_flg
$4 = 0
(gdb) p blk
$5 = (Block_t *) 0x9d4dccf4
(gdb) p *blk
$6 = 176
(gdb) p *allctr
$7 = {name_prefix = 0x814c232 "ets_", alloc_no = 8, name = {alloc = 0, 
    realloc = 0, free = 0}, vsn_str = 0x814d0f9 "0.9", sbc_threshold = 524288, 
  sbc_move_threshold = 80, main_carrier_size = 131072, max_mseg_sbcs = 256, 
  max_mseg_mbcs = 10, largest_mbc_size = 5242880, smallest_mbc_size = 1048576, 
  mbc_growth_stages = 10, mseg_opt = {cache = 1, preserv = 1, 
    abs_shrink_th = 4145152, rel_shrink_th = 20}, mbc_header_size = 20, 
  min_mbc_size = 16384, min_mbc_first_free_size = 4096, min_block_size = 32, 
  mbc_list = {first = 0xb7ca6008, last = 0x829a5008}, sbc_list = {first = 0x0, 
    last = 0x0}, main_carrier = 0xb7ca6008, 
  get_free_block = 0x8078b60 <bf_get_free_block>, 
  link_free_block = 0x8078a20 <bf_link_free_block>, 
  unlink_free_block = 0x8078ad0 <bf_unlink_free_block>, 
  info_options = 0x8078cc4 <info_options>, 
  get_next_mbc_size = 0x80731c0 <get_next_mbc_size>, creating_mbc = 0, 
  destroying_mbc = 0, init_atoms = 0x8078c2c <init_atoms>, mutex = {mtx = {
      pt_mtx = {__data = {__lock = 2, __count = 0, __owner = 14990, 
          __kind = 0, __nusers = 1, __spins = 0}, 
        __size = "\002\000\000\000\000\000\000\000\216:\000\000\000\000\000\000\001\000\000\000\000\000\000", __align = 2}, is_rec_mtx = 0, prev = 0x81c17a0, 
      next = 0x8189a8c}}, thread_safe = 1, ts_list = {prev = 0x0, next = 0x0}, 
  atoms_initialized = 0, stopped = 0, calls = {this_alloc = {giga_no = 1, 
      no = 620152686}, this_free = {giga_no = 1, no = 619361257}, 
    this_realloc = {giga_no = 0, no = 15006964}, mseg_alloc = {giga_no = 0, 
      no = 14}, mseg_dealloc = {giga_no = 0, no = 4}, mseg_realloc = {
      giga_no = 0, no = 0}, sys_alloc = {giga_no = 0, no = 396}, sys_free = {
      giga_no = 0, no = 246}, sys_realloc = {giga_no = 0, no = 0}}, sbcs = {
    curr_mseg = {no = 0, size = 0}, curr_sys_alloc = {no = 0, size = 0}, 
    max = {no = 0, size = 0}, max_ever = {no = 0, size = 0}, blocks = {curr = {
        no = 0, size = 0}, max = {no = 0, size = 0}, max_ever = {no = 0, 
        size = 0}}}, mbcs = {curr_mseg = {no = 10, size = 29376512}, 
    curr_sys_alloc = {no = 150, size = 156368928}, max = {no = 160, 
      size = 185745440}, max_ever = {no = 0, size = 0}, blocks = {curr = {
        no = 791428, size = 75470808}, max = {no = 1891844, size = 161084480}, 
      max_ever = {no = 0, size = 0}}}}
(gdb) p blk_sz
$8 = 176
(gdb) p last_blk_flg
$9 = 0
(gdb) p size
Variable "size" is not available.
(gdb) up
#1  0x0807656e in erts_alcu_alloc_ts (type=106, extra=0x818a580, size=168)
    at beam/erl_alloc_util.c:2158
2158    in beam/erl_alloc_util.c
(gdb) p size
$10 = 168
(gdb) p res
No symbol "res" in current context.
(gdb) p allctr->calls.this_alloc
No symbol "allctr" in current context.
(gdb) p type
$11 = 106
(gdb) p extra
$12 = (void *) 0x818a580
(gdb) p (Allctr_t *) extra
$13 = (struct Allctr_t_ *) 0x818a580
(gdb) p * (Allctr_t *) extra
$14 = {name_prefix = 0x814c232 "ets_", alloc_no = 8, name = {alloc = 0, 
    realloc = 0, free = 0}, vsn_str = 0x814d0f9 "0.9", sbc_threshold = 524288, 
  sbc_move_threshold = 80, main_carrier_size = 131072, max_mseg_sbcs = 256, 
  max_mseg_mbcs = 10, largest_mbc_size = 5242880, smallest_mbc_size = 1048576, 
  mbc_growth_stages = 10, mseg_opt = {cache = 1, preserv = 1, 
    abs_shrink_th = 4145152, rel_shrink_th = 20}, mbc_header_size = 20, 
  min_mbc_size = 16384, min_mbc_first_free_size = 4096, min_block_size = 32, 
  mbc_list = {first = 0xb7ca6008, last = 0x829a5008}, sbc_list = {first = 0x0, 
    last = 0x0}, main_carrier = 0xb7ca6008, 
  get_free_block = 0x8078b60 <bf_get_free_block>, 
  link_free_block = 0x8078a20 <bf_link_free_block>, 
  unlink_free_block = 0x8078ad0 <bf_unlink_free_block>, 
  info_options = 0x8078cc4 <info_options>, 
  get_next_mbc_size = 0x80731c0 <get_next_mbc_size>, creating_mbc = 0, 
  destroying_mbc = 0, init_atoms = 0x8078c2c <init_atoms>, mutex = {mtx = {
      pt_mtx = {__data = {__lock = 2, __count = 0, __owner = 14990, 
          __kind = 0, __nusers = 1, __spins = 0}, 
        __size = "\002\000\000\000\000\000\000\000\216:\000\000\000\000\000\000\001\000\000\000\000\000\000", __align = 2}, is_rec_mtx = 0, prev = 0x81c17a0, 
      next = 0x8189a8c}}, thread_safe = 1, ts_list = {prev = 0x0, next = 0x0}, 
  atoms_initialized = 0, stopped = 0, calls = {this_alloc = {giga_no = 1, 
      no = 620152686}, this_free = {giga_no = 1, no = 619361257}, 
    this_realloc = {giga_no = 0, no = 15006964}, mseg_alloc = {giga_no = 0, 
      no = 14}, mseg_dealloc = {giga_no = 0, no = 4}, mseg_realloc = {
      giga_no = 0, no = 0}, sys_alloc = {giga_no = 0, no = 396}, sys_free = {
      giga_no = 0, no = 246}, sys_realloc = {giga_no = 0, no = 0}}, sbcs = {
    curr_mseg = {no = 0, size = 0}, curr_sys_alloc = {no = 0, size = 0}, 
    max = {no = 0, size = 0}, max_ever = {no = 0, size = 0}, blocks = {curr = {
        no = 0, size = 0}, max = {no = 0, size = 0}, max_ever = {no = 0, 
        size = 0}}}, mbcs = {curr_mseg = {no = 10, size = 29376512}, 
    curr_sys_alloc = {no = 150, size = 156368928}, max = {no = 160, 
      size = 185745440}, max_ever = {no = 0, size = 0}, blocks = {curr = {
        no = 791428, size = 75470808}, max = {no = 1891844, size = 161084480}, 
      max_ever = {no = 0, size = 0}}}}



More information about the erlang-bugs mailing list