Frequent Crashes In Timer Code

RB
Ross Beer
Thu, Mar 8, 2018 12:32 PM

Hi Ming,

When using Asterisk I am frequently seeing issues with PJPROJECT timers, please see the below backtrace and attachments to Asterisk Issue: https://issues.asterisk.org/jira/browse/ASTERISK-27230

!@!@!@! thread1.txt !@!@!@!

$1 = {si_signo = 11, si_errno = 0, si_code = 1, _sifields = {_pad = {-640055112, 32646, 0 <repeats 26 times>}, _kill = {si_pid = -640055112, si_uid = 32646}, _timer = {si_tid = -640055112, si_overrun = 32646, si_sigval = {sival_int = 0, sival_ptr = 0x0}}, _rt = {si_pid = -640055112, si_uid = 32646, si_sigval = {sival_int = 0, sival_ptr = 0x0}}, _sigchld = {si_pid = -640055112, si_uid = 32646, si_status = 0, si_utime = 0, si_stime = 0}, _sigfault = {si_addr = 0x7f86d9d988b8}, _sigpoll = {si_band = 140217157257400, si_fd = 0}}}
Signal        Stop Print Pass to program Description
SIGSEGV      Yes Yes Yes Segmentation fault

Thread 1 (Thread 0x7f8616897700 (LWP 24001)):
#0  0x00007f861c2ae6cb in pop_freelist (ht=0x230fc00) at ../src/pj/timer.c:150
new_id = 1218303520
#1  0x00007f861c2aeebd in schedule_entry (ht=0x230fc00, entry=0x7f84581a6798, future_time=0x7f8616896960) at ../src/pj/timer.c:313
#2  0x00007f861c2af3b7 in schedule_w_grp_lock (ht=0x230fc00, entry=0x7f84581a6798, delay=0x7f861c501780 <t1_timer_val>, set_id=1, id_val=1, grp_lock=0x7f84009788c8) at ../src/pj/timer.c:527
status = 0
expires = {sec = 7814694, msec = 891}
#3  0x00007f861c2af486 in pj_timer_heap_schedule_w_grp_lock (ht=0x230fc00, entry=0x7f84581a6798, delay=0x7f861c501780 <t1_timer_val>, id_val=1, grp_lock=0x7f84009788c8) at ../src/pj/timer.c:580
#4  0x00007f861c21bf27 in tsx_schedule_timer (tsx=0x7f84581a6618, entry=0x7f84581a6798, delay=0x7f861c501780 <t1_timer_val>, active_id=1) at ../src/pjsip/sip_transaction.c:982
timer_heap = 0x230fc00
status = 0
#5  0x00007f861c21e8d2 in tsx_on_state_null (tsx=0x7f84581a6618, event=0x7f8616896a50) at ../src/pjsip/sip_transaction.c:2476
tdata = 0x7f85e401fef8
status = 0
#6  0x00007f861c21d5e6 in pjsip_tsx_send_msg (tsx=0x7f84581a6618, tdata=0x7f85e401fef8) at ../src/pjsip/sip_transaction.c:1792
event = {prev = 0x7f84009788c8, next = 0x7f8400978918, type = PJSIP_EVENT_TX_MSG, body = {timer = {entry = 0x7f85e401fef8}, tsx_state = {src = {rdata = 0x7f85e401fef8, tdata = 0x7f85e401fef8, timer = 0x7f85e401fef8, status = -469631240, data = 0x7f85e401fef8}, tsx = 0x7f84111d1188, prev_state = 9930952, type = 32644}, tx_msg = {tdata = 0x7f85e401fef8}, tx_error = {tdata = 0x7f85e401fef8, tsx = 0x7f84111d1188}, rx_msg = {rdata = 0x7f85e401fef8}, user = {user1 = 0x7f85e401fef8, user2 = 0x7f84111d1188, user3 = 0x7f84009788c8, user4 = 0x7f8616896aa0}}}
status = 32646
#7  0x00007f861c22275c in pjsip_dlg_send_request (dlg=0x7f84111d0ec8, tdata=0x7f85e401fef8, mod_data_id=-1, mod_data=0x0) at ../src/pjsip/sip_dialog.c:1285
tsx_count = 1
tsx = 0x7f84581a6618
msg = 0x7f85e4020be0
status = 0
#8  0x00007f861c1eaaed in pjsip_evsub_send_request (sub=0x7f840d631958, tdata=0x7f85e401fef8) at ../src/pjsip-simple/evsub.c:1387
status = 32645
#9  0x00007f85ed3a7c50 in send_notify (tdata=<optimized out>, sub_tree=0x7f840c4c7ce0) at res_pjsip_pubsub.c:1962
res = <optimized out>
evsub = <optimized out>
tdata = 0x7f85e401fef8
#10 0x00007f85ed3a7c50 in send_notify (sub_tree=0x7f840c4c7ce0, force_full_state=force_full_state@entry=0) at res_pjsip_pubsub.c:2411
evsub = <optimized out>
tdata = 0x7f85e401fef8
#11 0x00007f85ed3a84b1 in ast_sip_subscription_notify (sub=0x7f840cccbae0, notify_data=notify_data@entry=0x7f8616896bb0, terminate=0) at res_pjsip_pubsub.c:2521
res = <optimized out>
dlg = 0x7f84111d0ec8
#12 0x00007f84ca7d2a49 in notify_task (obj=0x7f851c0066e0) at res_pjsip_exten_state.c:259
task_data = 0x7f851c0066e0
data = {body_type = 0x7f84ca7d33d0 "ast_sip_exten_state_data", body_data = 0x7f851c0066e0}
#13 0x00000000005f1b6a in ast_taskprocessor_execute (tps=0x36141c0) at taskprocessor.c:971
local = {local_data = 0x7f8616897700, data = 0x5f96ad <ast_threadstorage_set_ptr+60>}
t = 0x7f851c013900
size = 1
PRETTY_FUNCTION = "ast_taskprocessor_execute"
#14 0x00000000005fbb4a in execute_tasks (data=0x36141c0) at threadpool.c:1322
tps = 0x36141c0
#15 0x00000000005f1b6a in ast_taskprocessor_execute (tps=0x230d640) at taskprocessor.c:971
local = {local_data = 0x7f8616896c80, data = 0x230f220}
t = 0x7f851c013e00
size = 36762200
PRETTY_FUNCTION = "ast_taskprocessor_execute"
#16 0x00000000005f9d94 in threadpool_execute (pool=0x230f270) at threadpool.c:351
PRETTY_FUNCTION = "threadpool_execute"
#17 0x00000000005fb4a8 in worker_active (worker=0x7f850001fc00) at threadpool.c:1105
alive = 0
#18 0x00000000005fb260 in worker_start (arg=0x7f850001fc00) at threadpool.c:1024
worker = 0x7f850001fc00
saved_state = (DEAD | unknown: 32644)
PRETTY_FUNCTION = "worker_start"
#19 0x0000000000607d5e in dummy_start (data=0x7f850000ef90) at utils.c:1238
__cancel_buf = {__cancel_jmp_buf = {{__cancel_jmp_buf = {0, 8868561065908639346, 140213839243903, 140213880452864, 507904, 507904, 8868561065933805170, -8926318494827800974}, __mask_was_saved = 0}}, __pad = {0x7f8616896df0, 0x0, 0x0, 0x0}}
__cancel_routine = 0x4530e5 <ast_unregister_thread>
__cancel_arg = 0x7f8616897700
__not_first_call = 0
ret = 0x7f86196908d8
a = {start_routine = 0x5fb1d9 <worker_start>, data = 0x7f850001fc00, name = 0x7f8500007050 "worker_start        started at [ 1079] threadpool.c worker_thread_start()"}
#20 0x00007f861a09f61a in start_thread () at /usr/lib64/libpthread.so.0
#21 0x00007f86193db5fd in clone () at /usr/lib64/libc.so.6

Is there anything that can be done at a PJPROJECT level to resolve the issue?

Kind regards,

Ross

Hi Ming, When using Asterisk I am frequently seeing issues with PJPROJECT timers, please see the below backtrace and attachments to Asterisk Issue: https://issues.asterisk.org/jira/browse/ASTERISK-27230 !@!@!@! thread1.txt !@!@!@! $1 = {si_signo = 11, si_errno = 0, si_code = 1, _sifields = {_pad = {-640055112, 32646, 0 <repeats 26 times>}, _kill = {si_pid = -640055112, si_uid = 32646}, _timer = {si_tid = -640055112, si_overrun = 32646, si_sigval = {sival_int = 0, sival_ptr = 0x0}}, _rt = {si_pid = -640055112, si_uid = 32646, si_sigval = {sival_int = 0, sival_ptr = 0x0}}, _sigchld = {si_pid = -640055112, si_uid = 32646, si_status = 0, si_utime = 0, si_stime = 0}, _sigfault = {si_addr = 0x7f86d9d988b8}, _sigpoll = {si_band = 140217157257400, si_fd = 0}}} Signal Stop Print Pass to program Description SIGSEGV Yes Yes Yes Segmentation fault Thread 1 (Thread 0x7f8616897700 (LWP 24001)): #0 0x00007f861c2ae6cb in pop_freelist (ht=0x230fc00) at ../src/pj/timer.c:150 new_id = 1218303520 #1 0x00007f861c2aeebd in schedule_entry (ht=0x230fc00, entry=0x7f84581a6798, future_time=0x7f8616896960) at ../src/pj/timer.c:313 #2 0x00007f861c2af3b7 in schedule_w_grp_lock (ht=0x230fc00, entry=0x7f84581a6798, delay=0x7f861c501780 <t1_timer_val>, set_id=1, id_val=1, grp_lock=0x7f84009788c8) at ../src/pj/timer.c:527 status = 0 expires = {sec = 7814694, msec = 891} #3 0x00007f861c2af486 in pj_timer_heap_schedule_w_grp_lock (ht=0x230fc00, entry=0x7f84581a6798, delay=0x7f861c501780 <t1_timer_val>, id_val=1, grp_lock=0x7f84009788c8) at ../src/pj/timer.c:580 #4 0x00007f861c21bf27 in tsx_schedule_timer (tsx=0x7f84581a6618, entry=0x7f84581a6798, delay=0x7f861c501780 <t1_timer_val>, active_id=1) at ../src/pjsip/sip_transaction.c:982 timer_heap = 0x230fc00 status = 0 #5 0x00007f861c21e8d2 in tsx_on_state_null (tsx=0x7f84581a6618, event=0x7f8616896a50) at ../src/pjsip/sip_transaction.c:2476 tdata = 0x7f85e401fef8 status = 0 #6 0x00007f861c21d5e6 in pjsip_tsx_send_msg (tsx=0x7f84581a6618, tdata=0x7f85e401fef8) at ../src/pjsip/sip_transaction.c:1792 event = {prev = 0x7f84009788c8, next = 0x7f8400978918, type = PJSIP_EVENT_TX_MSG, body = {timer = {entry = 0x7f85e401fef8}, tsx_state = {src = {rdata = 0x7f85e401fef8, tdata = 0x7f85e401fef8, timer = 0x7f85e401fef8, status = -469631240, data = 0x7f85e401fef8}, tsx = 0x7f84111d1188, prev_state = 9930952, type = 32644}, tx_msg = {tdata = 0x7f85e401fef8}, tx_error = {tdata = 0x7f85e401fef8, tsx = 0x7f84111d1188}, rx_msg = {rdata = 0x7f85e401fef8}, user = {user1 = 0x7f85e401fef8, user2 = 0x7f84111d1188, user3 = 0x7f84009788c8, user4 = 0x7f8616896aa0}}} status = 32646 #7 0x00007f861c22275c in pjsip_dlg_send_request (dlg=0x7f84111d0ec8, tdata=0x7f85e401fef8, mod_data_id=-1, mod_data=0x0) at ../src/pjsip/sip_dialog.c:1285 tsx_count = 1 tsx = 0x7f84581a6618 msg = 0x7f85e4020be0 status = 0 #8 0x00007f861c1eaaed in pjsip_evsub_send_request (sub=0x7f840d631958, tdata=0x7f85e401fef8) at ../src/pjsip-simple/evsub.c:1387 status = 32645 #9 0x00007f85ed3a7c50 in send_notify (tdata=<optimized out>, sub_tree=0x7f840c4c7ce0) at res_pjsip_pubsub.c:1962 res = <optimized out> evsub = <optimized out> tdata = 0x7f85e401fef8 #10 0x00007f85ed3a7c50 in send_notify (sub_tree=0x7f840c4c7ce0, force_full_state=force_full_state@entry=0) at res_pjsip_pubsub.c:2411 evsub = <optimized out> tdata = 0x7f85e401fef8 #11 0x00007f85ed3a84b1 in ast_sip_subscription_notify (sub=0x7f840cccbae0, notify_data=notify_data@entry=0x7f8616896bb0, terminate=0) at res_pjsip_pubsub.c:2521 res = <optimized out> dlg = 0x7f84111d0ec8 #12 0x00007f84ca7d2a49 in notify_task (obj=0x7f851c0066e0) at res_pjsip_exten_state.c:259 task_data = 0x7f851c0066e0 data = {body_type = 0x7f84ca7d33d0 "ast_sip_exten_state_data", body_data = 0x7f851c0066e0} #13 0x00000000005f1b6a in ast_taskprocessor_execute (tps=0x36141c0) at taskprocessor.c:971 local = {local_data = 0x7f8616897700, data = 0x5f96ad <ast_threadstorage_set_ptr+60>} t = 0x7f851c013900 size = 1 __PRETTY_FUNCTION__ = "ast_taskprocessor_execute" #14 0x00000000005fbb4a in execute_tasks (data=0x36141c0) at threadpool.c:1322 tps = 0x36141c0 #15 0x00000000005f1b6a in ast_taskprocessor_execute (tps=0x230d640) at taskprocessor.c:971 local = {local_data = 0x7f8616896c80, data = 0x230f220} t = 0x7f851c013e00 size = 36762200 __PRETTY_FUNCTION__ = "ast_taskprocessor_execute" #16 0x00000000005f9d94 in threadpool_execute (pool=0x230f270) at threadpool.c:351 __PRETTY_FUNCTION__ = "threadpool_execute" #17 0x00000000005fb4a8 in worker_active (worker=0x7f850001fc00) at threadpool.c:1105 alive = 0 #18 0x00000000005fb260 in worker_start (arg=0x7f850001fc00) at threadpool.c:1024 worker = 0x7f850001fc00 saved_state = (DEAD | unknown: 32644) __PRETTY_FUNCTION__ = "worker_start" #19 0x0000000000607d5e in dummy_start (data=0x7f850000ef90) at utils.c:1238 __cancel_buf = {__cancel_jmp_buf = {{__cancel_jmp_buf = {0, 8868561065908639346, 140213839243903, 140213880452864, 507904, 507904, 8868561065933805170, -8926318494827800974}, __mask_was_saved = 0}}, __pad = {0x7f8616896df0, 0x0, 0x0, 0x0}} __cancel_routine = 0x4530e5 <ast_unregister_thread> __cancel_arg = 0x7f8616897700 __not_first_call = 0 ret = 0x7f86196908d8 a = {start_routine = 0x5fb1d9 <worker_start>, data = 0x7f850001fc00, name = 0x7f8500007050 "worker_start started at [ 1079] threadpool.c worker_thread_start()"} #20 0x00007f861a09f61a in start_thread () at /usr/lib64/libpthread.so.0 #21 0x00007f86193db5fd in clone () at /usr/lib64/libc.so.6 Is there anything that can be done at a PJPROJECT level to resolve the issue? Kind regards, Ross