/* * iLBC - a library for the iLBC codec * * enhancer.c - The iLBC low bit rate speech codec. * * Adapted by Steve Underwood from the reference * iLBC code supplied in RFC3951. * * Original code Copyright (C) The Internet Society (2004). * All changes to produce this version Copyright (C) 2008 by Steve Underwood * All Rights Reserved. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * $Id: enhancer.c,v 1.2 2008/03/06 12:27:38 steveu Exp $ */ /*! \file */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include "ilbc.h" #include "constants.h" #include "filter.h" #include "enhancer.h" /*----------------------------------------------------------------* * Find index in array such that the array element with said * index is the element of said array closest to "value" * according to the squared-error criterion *---------------------------------------------------------------*/ static void NearestNeighbor(int *index, /* (o) index of array element closest to value */ const float *array, /* (i) data array */ float value, /* (i) value */ int arlength) /* (i) dimension of data array */ { int i; float bestcrit; float crit; crit = array[0] - value; bestcrit = crit*crit; *index = 0; for (i = 1; i < arlength; i++) { crit = array[i] - value; crit = crit*crit; if (crit < bestcrit) { bestcrit= crit; *index = i; } } } /*----------------------------------------------------------------* * compute cross correlation between sequences *---------------------------------------------------------------*/ static void mycorr1(float *corr, /* (o) correlation of seq1 and seq2 */ float *seq1, /* (i) first sequence */ int dim1, /* (i) dimension first seq1 */ const float *seq2, /* (i) second sequence */ int dim2) /* (i) dimension seq2 */ { int i; int j; for (i = 0; i <= dim1 - dim2; i++) { corr[i] = 0.0f; for (j = 0; j < dim2; j++) corr[i] += seq1[i + j]*seq2[j]; } } /*----------------------------------------------------------------* * upsample finite array assuming zeros outside bounds *---------------------------------------------------------------*/ static void enh_upsample(float *useq1, /* (o) upsampled output sequence */ float *seq1, /* (i) unupsampled sequence */ int dim1, /* (i) dimension seq1 */ int hfl) /* (i) polyphase filter length=2*hfl+1 */ { float *pu; float *ps; int i; int j; int k; int q; int filterlength; int hfl2; const float *polyp[ENH_UPS0]; /* pointers to polyphase columns */ const float *pp; /* define pointers for filter */ filterlength = 2*hfl + 1; if (filterlength > dim1) { hfl2 = (int) (dim1/2); for (j = 0; j < ENH_UPS0; j++) polyp[j] = polyphaserTbl + j*filterlength + hfl - hfl2; hfl = hfl2; filterlength = 2*hfl + 1; } else { for (j = 0; j < ENH_UPS0; j++) polyp[j] = polyphaserTbl+j*filterlength; } /* filtering: filter overhangs left side of sequence */ pu = useq1; for (i = hfl; i < filterlength; i++) { for (j = 0; j < ENH_UPS0; j++) { *pu = 0.0f; pp = polyp[j]; ps = seq1 + i; for (k = 0; k <= i; k++) *pu += *ps-- * *pp++; pu++; } } /* filtering: simple convolution=inner products */ for (i = filterlength; i < dim1; i++) { for (j = 0; j < ENH_UPS0; j++) { *pu = 0.0f; pp = polyp[j]; ps = seq1 + i; for (k = 0; k < filterlength; k++) *pu += *ps-- * *pp++; pu++; } } /* filtering: filter overhangs right side of sequence */ for (q = 1; q <= hfl; q++) { for (j = 0; j < ENH_UPS0; j++) { *pu = 0.0f; pp = polyp[j] + q; ps = seq1 + dim1 - 1; for (k = 0; k < filterlength - q; k++) *pu += *ps-- * *pp++; pu++; } } } /*----------------------------------------------------------------* * find segment starting near idata+estSegPos that has highest * correlation with idata+centerStartPos through * idata+centerStartPos+ENH_BLOCKL-1 segment is found at a * resolution of ENH_UPSO times the original of the original * sampling rate *---------------------------------------------------------------*/ static void refiner(float *seg, /* (o) segment array */ float *updStartPos, /* (o) updated start point */ float *idata, /* (i) original data buffer */ int idatal, /* (i) dimension of idata */ int centerStartPos, /* (i) beginning center segment */ float estSegPos, /* (i) estimated beginning other segment */ float period) /* (i) estimated pitch period */ { int estSegPosRounded; int searchSegStartPos; int searchSegEndPos; int corrdim; int tloc; int tloc2; int i; int st; int en; int fraction; float vect[ENH_VECTL]; float corrVec[ENH_CORRDIM]; float maxv; float corrVecUps[ENH_CORRDIM*ENH_UPS0]; period = period; /* defining array bounds */ estSegPosRounded = (int)(estSegPos - 0.5f); searchSegStartPos = estSegPosRounded - ENH_SLOP; if (searchSegStartPos < 0) searchSegStartPos = 0; searchSegEndPos = estSegPosRounded + ENH_SLOP; if (searchSegEndPos + ENH_BLOCKL >= idatal) searchSegEndPos = idatal - ENH_BLOCKL - 1; corrdim = searchSegEndPos - searchSegStartPos + 1; /* compute upsampled correlation (corr33) and find location of max */ mycorr1(corrVec, idata + searchSegStartPos, corrdim + ENH_BLOCKL - 1, idata + centerStartPos, ENH_BLOCKL); enh_upsample(corrVecUps, corrVec, corrdim, ENH_FL0); tloc = 0; maxv = corrVecUps[0]; for (i = 1; i < ENH_UPS0*corrdim; i++) { if (corrVecUps[i]>maxv) { tloc = i; maxv = corrVecUps[i]; } } /* make vector can be upsampled without ever running outside bounds */ *updStartPos = (float) searchSegStartPos + (float) tloc/(float) ENH_UPS0 + 1.0f; tloc2 = (int) (tloc/ENH_UPS0); if (tloc > tloc2*ENH_UPS0) tloc2++; st = searchSegStartPos + tloc2 - ENH_FL0; if (st < 0) { memset(vect, 0, -st*sizeof(float)); memcpy(&vect[-st], idata, (ENH_VECTL + st)*sizeof(float)); } else { en = st + ENH_VECTL; if (en>idatal) { memcpy(vect, &idata[st], (ENH_VECTL - (en - idatal))*sizeof(float)); memset(&vect[ENH_VECTL - (en - idatal)], 0, (en - idatal)*sizeof(float)); } else { memcpy(vect, &idata[st], ENH_VECTL*sizeof(float)); } } fraction = tloc2*ENH_UPS0 - tloc; /* compute the segment (this is actually a convolution) */ mycorr1(seg, vect, ENH_VECTL, polyphaserTbl + (2*ENH_FL0 + 1)*fraction, 2*ENH_FL0 + 1); } /*----------------------------------------------------------------* * find the smoothed output data *---------------------------------------------------------------*/ static void smath(float *odata, /* (o) smoothed output */ float *sseq, /* (i) said second sequence of waveforms */ int hl, /* (i) 2*hl+1 is sseq dimension */ float alpha0) /* (i) max smoothing energy fraction */ { int i; int k; float w00; float w10; float w11; float A; float B; float C; float *psseq; float err,errs; float surround[ILBC_BLOCK_LEN_MAX]; /* shape contributed by other than current */ float wt[2*ENH_HL + 1]; /* waveform weighting to get surround shape */ float denom; /* create shape of contribution from all waveforms except the current one */ for (i = 1; i <= 2*hl + 1; i++) wt[i - 1] = 0.5f*(1.0f - cosf(2.0f*PI*i/(2.0f*hl + 2.0f))); wt[hl] = 0.0f; /* for clarity, not used */ for (i = 0; i < ENH_BLOCKL; i++) surround[i] = sseq[i]*wt[0]; for (k = 1; k < hl; k++) { psseq = sseq + k*ENH_BLOCKL; for (i = 0; i < ENH_BLOCKL; i++) surround[i] += psseq[i]*wt[k]; } for (k = hl + 1; k <= 2*hl; k++) { psseq = sseq + k*ENH_BLOCKL; for (i = 0; i < ENH_BLOCKL; i++) surround[i] += psseq[i]*wt[k]; } /* compute some inner products */ w00 = w10 = w11 = 0.0f; psseq = sseq + hl*ENH_BLOCKL; /* current block */ for (i = 0; i < ENH_BLOCKL; i++) { w00 += psseq[i]*psseq[i]; w11 += surround[i]*surround[i]; w10 += surround[i]*psseq[i]; } if (fabsf(w11) < 1.0f) w11 = 1.0f; C = sqrtf(w00/w11); /* first try enhancement without power-constraint */ errs = 0.0f; psseq = sseq + hl*ENH_BLOCKL; for (i = 0; i < ENH_BLOCKL; i++) { odata[i] = C*surround[i]; err = psseq[i] - odata[i]; errs += err*err; } /* if constraint violated by first try, add constraint */ if (errs > alpha0 * w00) { if (w00 < 1) w00 = 1; denom = (w11*w00 - w10*w10)/(w00*w00); if (denom > 0.0001f) { /* eliminates numerical problems for if smooth */ A = sqrtf((alpha0 - alpha0*alpha0/4)/denom); B = -alpha0/2.0f - A*w10/w00; B = B + 1.0f; } else { /* essentially no difference between cycles; smoothing not needed */ A = 0.0f; B = 1.0f; } /* create smoothed sequence */ psseq = sseq + hl*ENH_BLOCKL; for (i = 0; i < ENH_BLOCKL; i++) odata[i] = A*surround[i] + B*psseq[i]; } } /*----------------------------------------------------------------* * get the pitch-synchronous sample sequence *---------------------------------------------------------------*/ static void getsseq(float *sseq, /* (o) the pitch-synchronous sequence */ float *idata, /* (i) original data */ int idatal, /* (i) dimension of data */ int centerStartPos, /* (i) where current block starts */ float *period, /* (i) rough-pitch-period array */ const float *plocs, /* (i) where periods of period array are taken */ int periodl, /* (i) dimension period array */ int hl) /* (i) 2*hl+1 is the number of sequences */ { int i; int centerEndPos; int q; float blockStartPos[2*ENH_HL + 1]; int lagBlock[2*ENH_HL + 1]; float plocs2[ENH_PLOCSL]; float *psseq; centerEndPos = centerStartPos + ENH_BLOCKL - 1; /* present */ NearestNeighbor(lagBlock + hl, plocs, 0.5f*(centerStartPos + centerEndPos), periodl); blockStartPos[hl] = (float) centerStartPos; psseq = sseq + ENH_BLOCKL*hl; memcpy(psseq, idata + centerStartPos, ENH_BLOCKL*sizeof(float)); /* past */ for (q = hl - 1; q >= 0; q--) { blockStartPos[q] = blockStartPos[q + 1] - period[lagBlock[q + 1]]; NearestNeighbor(lagBlock + q, plocs, blockStartPos[q] + ENH_BLOCKL_HALF - period[lagBlock[q + 1]], periodl); if (blockStartPos[q] - ENH_OVERHANG >= 0) { refiner(sseq + q*ENH_BLOCKL, blockStartPos + q, idata, idatal, centerStartPos, blockStartPos[q], period[lagBlock[q + 1]]); } else { psseq = sseq + q*ENH_BLOCKL; memset(psseq, 0, ENH_BLOCKL*sizeof(float)); } } /* future */ for (i = 0; i < periodl; i++) plocs2[i] = plocs[i] - period[i]; for (q = hl + 1; q <= 2*hl; q++) { NearestNeighbor(lagBlock + q, plocs2, blockStartPos[q - 1] + ENH_BLOCKL_HALF, periodl); blockStartPos[q] = blockStartPos[q - 1] + period[lagBlock[q]]; if (blockStartPos[q] + ENH_BLOCKL + ENH_OVERHANG < idatal) { refiner(sseq + ENH_BLOCKL*q, blockStartPos + q, idata, idatal, centerStartPos, blockStartPos[q], period[lagBlock[q]]); } else { psseq = sseq + q*ENH_BLOCKL; memset(psseq, 0, ENH_BLOCKL*sizeof(float)); } } } /*----------------------------------------------------------------* * perform enhancement on idata+centerStartPos through * idata+centerStartPos+ENH_BLOCKL-1 *---------------------------------------------------------------*/ static void enhancer(float *odata, /* (o) smoothed block, dimension blockl */ float *idata, /* (i) data buffer used for enhancing */ int idatal, /* (i) dimension idata */ int centerStartPos, /* (i) first sample current block within idata */ float alpha0, /* (i) max correction-energy-fraction (in [0,1]) */ float *period, /* (i) pitch period array */ const float *plocs, /* (i) locations where period array values valid */ int periodl) /* (i) dimension of period and plocs */ { float sseq[(2*ENH_HL + 1)*ENH_BLOCKL]; /* get said second sequence of segments */ getsseq(sseq, idata, idatal, centerStartPos, period, plocs, periodl, ENH_HL); /* compute the smoothed output from said second sequence */ smath(odata, sseq, ENH_HL, alpha0); } /*----------------------------------------------------------------* * cross correlation *---------------------------------------------------------------*/ float xCorrCoef(float *target, /* (i) first array */ float *regressor, /* (i) second array */ int subl) /* (i) dimension arrays */ { int i; float ftmp1; float ftmp2; ftmp1 = 0.0f; ftmp2 = 0.0f; for (i = 0; i < subl; i++) { ftmp1 += target[i]*regressor[i]; ftmp2 += regressor[i]*regressor[i]; } if (ftmp1 > 0.0f) return (float)(ftmp1*ftmp1/ftmp2); return 0.0f; } /*----------------------------------------------------------------* * interface for enhancer *---------------------------------------------------------------*/ int enhancerInterface(float *out, /* (o) enhanced signal */ float *in, /* (i) unenhanced signal */ ilbc_decode_state_t *iLBCdec_inst) /* (i) buffers etc */ { float *enh_buf; float *enh_period; int iblock; int isample; int lag = 0; int ilag; int i; int ioffset; float cc; float maxcc; float ftmp1; float ftmp2; float *inPtr; float *enh_bufPtr1; float *enh_bufPtr2; float plc_pred[ENH_BLOCKL]; float lpState[6]; float downsampled[(ENH_NBLOCKS*ENH_BLOCKL + 120)/2]; int inLen = ENH_NBLOCKS*ENH_BLOCKL + 120; int start; int plc_blockl; int inlag; enh_buf = iLBCdec_inst->enh_buf; enh_period = iLBCdec_inst->enh_period; memmove(enh_buf, &enh_buf[iLBCdec_inst->blockl], (ENH_BUFL - iLBCdec_inst->blockl)*sizeof(float)); memcpy(&enh_buf[ENH_BUFL - iLBCdec_inst->blockl], in, iLBCdec_inst->blockl*sizeof(float)); if (iLBCdec_inst->mode == 30) plc_blockl = ENH_BLOCKL; else plc_blockl = 40; /* when 20 ms frame, move processing one block */ ioffset = 0; if (iLBCdec_inst->mode == 20) ioffset = 1; i = 3 - ioffset; memmove(enh_period, &enh_period[i], (ENH_NBLOCKS_TOT - i)*sizeof(float)); /* Set state information to the 6 samples right before the samples to be downsampled. */ memcpy(lpState, enh_buf + (ENH_NBLOCKS_EXTRA + ioffset)*ENH_BLOCKL - 126, 6*sizeof(float)); /* Down sample a factor 2 to save computations */ DownSample(enh_buf + (ENH_NBLOCKS_EXTRA + ioffset)*ENH_BLOCKL - 120, lpFilt_coefsTbl, inLen - ioffset*ENH_BLOCKL, lpState, downsampled); /* Estimate the pitch in the down sampled domain. */ for (iblock = 0; iblock maxcc) { maxcc = cc; lag = ilag; } } /* Store the estimated lag in the non-downsampled domain */ enh_period[iblock + ENH_NBLOCKS_EXTRA + ioffset] = (float) lag*2.0f; } /* PLC was performed on the previous packet */ if (iLBCdec_inst->prev_enh_pl == 1) { inlag = (int) enh_period[ENH_NBLOCKS_EXTRA + ioffset]; lag = inlag - 1; maxcc = xCorrCoef(in, in + lag, plc_blockl); for (ilag = inlag; ilag <= inlag + 1; ilag++) { cc = xCorrCoef(in, in + ilag, plc_blockl); if (cc > maxcc) { maxcc = cc; lag = ilag; } } enh_period[ENH_NBLOCKS_EXTRA + ioffset - 1] = (float) lag; /* compute new concealed residual for the old lookahead, mix the forward PLC with a backward PLC from the new frame */ inPtr = &in[lag - 1]; enh_bufPtr1 = &plc_pred[plc_blockl - 1]; if (lag > plc_blockl) start = plc_blockl; else start = lag; for (isample = start; isample > 0; isample--) *enh_bufPtr1-- = *inPtr--; enh_bufPtr2=&enh_buf[ENH_BUFL - 1 - iLBCdec_inst->blockl]; for (isample = (plc_blockl - 1 - lag); isample >= 0; isample--) *enh_bufPtr1-- = *enh_bufPtr2--; /* limit energy change */ ftmp2 = 0.0f; ftmp1 = 0.0f; for (i = 0; i < plc_blockl; i++) { ftmp2 += enh_buf[ENH_BUFL - 1 - iLBCdec_inst->blockl - i]*enh_buf[ENH_BUFL - 1 - iLBCdec_inst->blockl - i]; ftmp1 += plc_pred[i]*plc_pred[i]; } ftmp1 = sqrtf(ftmp1/(float) plc_blockl); ftmp2 = sqrtf(ftmp2/(float) plc_blockl); if (ftmp1 > 2.0f*ftmp2 && ftmp1 > 0.0f) { for (i = 0; i < plc_blockl - 10; i++) plc_pred[i] *= 2.0f*ftmp2/ftmp1; for (i = plc_blockl - 10; i < plc_blockl; i++) plc_pred[i]*=(float)(i - plc_blockl + 10)*(1.0f - 2.0f*ftmp2/ftmp1)/10.0f + 2.0f*ftmp2/ftmp1; } enh_bufPtr1 = &enh_buf[ENH_BUFL - 1 - iLBCdec_inst->blockl]; for (i = 0; i < plc_blockl; i++) { ftmp1 = (float) (i + 1)/(float) (plc_blockl + 1); *enh_bufPtr1 *= ftmp1; *enh_bufPtr1 += (1.0f - ftmp1)*plc_pred[plc_blockl - 1 - i]; enh_bufPtr1--; } } if (iLBCdec_inst->mode == 20) { /* Enhancer with 40 samples delay */ for (iblock = 0; iblock < 2; iblock++) { enhancer(out + iblock*ENH_BLOCKL, enh_buf, ENH_BUFL, (5 + iblock)*ENH_BLOCKL + 40, ENH_ALPHA0, enh_period, enh_plocsTbl, ENH_NBLOCKS_TOT); } } else if (iLBCdec_inst->mode == 30) { /* Enhancer with 80 samples delay */ for (iblock = 0; iblock < 3; iblock++) { enhancer(out + iblock*ENH_BLOCKL, enh_buf, ENH_BUFL, (4 + iblock)*ENH_BLOCKL, ENH_ALPHA0, enh_period, enh_plocsTbl, ENH_NBLOCKS_TOT); } } return lag*2; }