/* $Id$ */ /* * Copyright (C) 2008-2011 Teluu Inc. (http://www.teluu.com) * Copyright (C) 2003-2008 Benny Prijono * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #define THIS_FILE "silencedet.c" #if 1 # define TRACE_(x) PJ_LOG(5,x) #else # define TRACE_(x) #endif /** * This enumeration specifies operation mode of silence detector */ typedef enum pjmedia_silence_det_mode { VAD_MODE_NONE, VAD_MODE_FIXED, VAD_MODE_ADAPTIVE } pjmedia_silence_det_mode; /** * Default settings */ #define DEF_RECALC_ON_VOICED 4000 /* Time to recalculate threshold in voiced condition, in ms */ #define DEF_RECALC_ON_SILENCE 2000 /* Time to recalculate threshold in silence condition, in ms. */ #define DEF_BEFORE_SILENCE 400 /* Silence time before really changing state into SILENCE, in ms. */ #define DEF_THRESHOLD 1000 /* Default threshold. */ /** * This enumeration specifies the states of the silence detector. */ enum pjmedia_silence_det_state { STATE_SILENCE, STATE_START_SILENCE, STATE_VOICED }; /** * This structure holds the silence detector state. */ struct pjmedia_silence_det { char objname[PJ_MAX_OBJ_NAME]; /**< VAD name. */ int mode; /**< VAD mode. */ unsigned ptime; /**< Frame time, in msec. */ unsigned threshold; /**< Current threshold level. */ unsigned sum_level; /**< Total sum of recent level. */ unsigned sum_cnt; /**< Number of level summed. */ unsigned silence_timer; /**< Silence condition timer. */ unsigned voiced_timer; /**< Voiced condition timer. */ enum pjmedia_silence_det_state state;/**< Silence detector state. */ unsigned recalc_on_voiced; /**< Setting of time to recalc threshold in voiced condition. */ unsigned recalc_on_silence; /**< Setting of time to recalc threshold in silence condition.*/ unsigned before_silence; /**< Setting of silence time before really changing state into SILENCE, in ms. */ }; PJ_DEF(pj_status_t) pjmedia_silence_det_create( pj_pool_t *pool, unsigned clock_rate, unsigned samples_per_frame, pjmedia_silence_det **p_sd) { pjmedia_silence_det *sd; PJ_ASSERT_RETURN(pool && p_sd, PJ_EINVAL); sd = PJ_POOL_ZALLOC_T(pool, pjmedia_silence_det); pj_ansi_snprintf(sd->objname, PJ_MAX_OBJ_NAME, "sd%p", sd); sd->objname[PJ_MAX_OBJ_NAME-1] = '\0'; sd->ptime = samples_per_frame * 1000 / clock_rate; /* Default settings */ pjmedia_silence_det_set_params(sd, -1, -1, -1); /* Restart in adaptive, silent mode */ pjmedia_silence_det_set_adaptive( sd, -1 ); *p_sd = sd; return PJ_SUCCESS; } PJ_DEF(pj_status_t) pjmedia_silence_det_set_name( pjmedia_silence_det *sd, const char *name) { PJ_ASSERT_RETURN(sd && name, PJ_EINVAL); pj_ansi_snprintf(sd->objname, PJ_MAX_OBJ_NAME, name, sd); sd->objname[PJ_MAX_OBJ_NAME-1] = '\0'; return PJ_SUCCESS; } PJ_DEF(pj_status_t) pjmedia_silence_det_set_adaptive(pjmedia_silence_det *sd, int threshold) { PJ_ASSERT_RETURN(sd, PJ_EINVAL); if (threshold < 0) threshold = DEF_THRESHOLD; sd->mode = VAD_MODE_ADAPTIVE; sd->threshold = threshold; return PJ_SUCCESS; } PJ_DEF(pj_status_t) pjmedia_silence_det_set_fixed( pjmedia_silence_det *sd, int threshold ) { PJ_ASSERT_RETURN(sd, PJ_EINVAL); if (threshold < 0) threshold = DEF_THRESHOLD; sd->mode = VAD_MODE_FIXED; sd->threshold = threshold; return PJ_SUCCESS; } PJ_DEF(pj_status_t) pjmedia_silence_det_set_params( pjmedia_silence_det *sd, int before_silence, int recalc_time1, int recalc_time2) { PJ_ASSERT_RETURN(sd, PJ_EINVAL); if (recalc_time1 < 0) recalc_time1 = DEF_RECALC_ON_VOICED; if (recalc_time2 < 0) recalc_time2 = DEF_RECALC_ON_SILENCE; if (before_silence < 0) before_silence = DEF_BEFORE_SILENCE; sd->recalc_on_voiced = recalc_time1; sd->recalc_on_silence = recalc_time2; sd->before_silence = before_silence; return PJ_SUCCESS; } PJ_DEF(pj_status_t) pjmedia_silence_det_disable( pjmedia_silence_det *sd ) { PJ_ASSERT_RETURN(sd, PJ_EINVAL); sd->mode = VAD_MODE_NONE; return PJ_SUCCESS; } PJ_DEF(pj_int32_t) pjmedia_calc_avg_signal( const pj_int16_t samples[], pj_size_t count) { pj_uint32_t sum = 0; const pj_int16_t * pcm = samples; const pj_int16_t * end = samples + count; if (count==0) return 0; while (pcm != end) { if (*pcm < 0) sum -= *pcm++; else sum += *pcm++; } return (pj_int32_t)(sum / count); } PJ_DEF(pj_bool_t) pjmedia_silence_det_apply( pjmedia_silence_det *sd, pj_uint32_t level) { int avg_recent_level; if (sd->mode == VAD_MODE_NONE) return PJ_FALSE; if (sd->mode == VAD_MODE_FIXED) return (level < sd->threshold); /* Calculating recent level */ sd->sum_level += level; ++sd->sum_cnt; avg_recent_level = (sd->sum_level / sd->sum_cnt); if (level > sd->threshold || level >= PJMEDIA_SILENCE_DET_MAX_THRESHOLD) { sd->silence_timer = 0; sd->voiced_timer += sd->ptime; switch(sd->state) { case STATE_VOICED: if (sd->voiced_timer > sd->recalc_on_voiced) { /* Voiced for long time (>recalc_on_voiced), current * threshold seems to be too low. */ sd->threshold = (avg_recent_level + sd->threshold) >> 1; TRACE_((THIS_FILE,"Re-adjust threshold (in talk burst)" "to %d", sd->threshold)); sd->voiced_timer = 0; /* Reset sig_level */ sd->sum_level = avg_recent_level; sd->sum_cnt = 1; } break; case STATE_SILENCE: TRACE_((THIS_FILE,"Starting talk burst (level=%d threshold=%d)", level, sd->threshold)); case STATE_START_SILENCE: sd->state = STATE_VOICED; /* Reset sig_level */ sd->sum_level = level; sd->sum_cnt = 1; break; default: pj_assert(0); break; } } else { sd->voiced_timer = 0; sd->silence_timer += sd->ptime; switch(sd->state) { case STATE_SILENCE: if (sd->silence_timer >= sd->recalc_on_silence) { sd->threshold = avg_recent_level << 1; TRACE_((THIS_FILE,"Re-adjust threshold (in silence)" "to %d", sd->threshold)); sd->silence_timer = 0; /* Reset sig_level */ sd->sum_level = avg_recent_level; sd->sum_cnt = 1; } break; case STATE_VOICED: sd->state = STATE_START_SILENCE; /* Reset sig_level */ sd->sum_level = level; sd->sum_cnt = 1; case STATE_START_SILENCE: if (sd->silence_timer >= sd->before_silence) { sd->state = STATE_SILENCE; sd->threshold = avg_recent_level << 1; TRACE_((THIS_FILE,"Starting silence (level=%d " "threshold=%d)", level, sd->threshold)); /* Reset sig_level */ sd->sum_level = avg_recent_level; sd->sum_cnt = 1; } break; default: pj_assert(0); break; } } return (sd->state == STATE_SILENCE); } PJ_DEF(pj_bool_t) pjmedia_silence_det_detect( pjmedia_silence_det *sd, const pj_int16_t samples[], pj_size_t count, pj_int32_t *p_level) { pj_uint32_t level; /* Calculate average signal level. */ level = pjmedia_calc_avg_signal(samples, count); /* Report to caller, if required. */ if (p_level) *p_level = level; return pjmedia_silence_det_apply(sd, level); }