Newer
Older
monitord / lame-3.97 / libmp3lame / .svn / text-base / encoder.c.svn-base
@root root on 23 Jan 2012 19 KB Migration from SVN revision 455
/*
 *	LAME MP3 encoding engine
 *
 *	Copyright (c) 1999 Mark Taylor
 *	Copyright (c) 2000-2002 Takehiro Tominaga
 *	Copyright (c) 2000-2005 Robert Hegemann
 *	Copyright (c) 2001 Gabriel Bouvigne
 *	Copyright (c) 2001 John Dahlstrom
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

/* $Id: encoder.c,v 1.90.2.1 2005/11/20 14:08:24 bouvigne Exp $ */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <assert.h>

#include "lame.h"
#include "util.h"
#include "newmdct.h"
#include "psymodel.h"
#include "quantize.h"
#include "quantize_pvt.h"
#include "bitstream.h"
#include "VbrTag.h"
#include "vbrquantize.h"

#ifdef WITH_DMALLOC
#include <dmalloc.h>
#endif


/*
 * auto-adjust of ATH, useful for low volume
 * Gabriel Bouvigne 3 feb 2001
 *
 * modifies some values in
 *   gfp->internal_flags->ATH
 *   (gfc->ATH)
 */
static void
adjust_ATH(lame_internal_flags * const gfc)
{
    FLOAT   gr2_max, max_pow;

    if (gfc->ATH->use_adjust == 0) {
        gfc->ATH->adjust = 1.0; /* no adjustment */
        return;
    }

    /* jd - 2001 mar 12, 27, jun 30 */
    /* loudness based on equal loudness curve; */
    /* use granule with maximum combined loudness */
    max_pow = gfc->loudness_sq[0][0];
    gr2_max = gfc->loudness_sq[1][0];
    if (gfc->channels_out == 2) {
        max_pow += gfc->loudness_sq[0][1];
        gr2_max += gfc->loudness_sq[1][1];
    }
    else {
        max_pow += max_pow;
        gr2_max += gr2_max;
    }
    if (gfc->mode_gr == 2) {
        max_pow = Max(max_pow, gr2_max);
    }
    max_pow *= 0.5; /* max_pow approaches 1.0 for full band noise */

    /* jd - 2001 mar 31, jun 30 */
    /* user tuning of ATH adjustment region */
    max_pow *= gfc->ATH->aa_sensitivity_p;

    /*  adjust ATH depending on range of maximum value
     */

    /* jd - 2001 feb27, mar12,20, jun30, jul22 */
    /* continuous curves based on approximation */
    /* to GB's original values. */
    /* For an increase in approximate loudness, */
    /* set ATH adjust to adjust_limit immediately */
    /* after a delay of one frame. */
    /* For a loudness decrease, reduce ATH adjust */
    /* towards adjust_limit gradually. */
    /* max_pow is a loudness squared or a power. */
    if (max_pow > 0.03125) { /* ((1 - 0.000625)/ 31.98) from curve below */
        if (gfc->ATH->adjust >= 1.0) {
            gfc->ATH->adjust = 1.0;
        }
        else {
            /* preceding frame has lower ATH adjust; */
            /* ascend only to the preceding adjust_limit */
            /* in case there is leading low volume */
            if (gfc->ATH->adjust < gfc->ATH->adjust_limit) {
                gfc->ATH->adjust = gfc->ATH->adjust_limit;
            }
        }
        gfc->ATH->adjust_limit = 1.0;
    }
    else {      /* adjustment curve */
        /* about 32 dB maximum adjust (0.000625) */
        FLOAT adj_lim_new = 31.98 * max_pow + 0.000625;
        if (gfc->ATH->adjust >= adj_lim_new) { /* descend gradually */
            gfc->ATH->adjust *= adj_lim_new * 0.075 + 0.925;
            if (gfc->ATH->adjust < adj_lim_new) { /* stop descent */
                gfc->ATH->adjust = adj_lim_new;
            }
        }
        else {  /* ascend */
            if (gfc->ATH->adjust_limit >= adj_lim_new) {
                gfc->ATH->adjust = adj_lim_new;
            }
            else { /* preceding frame has lower ATH adjust; */
                /* ascend only to the preceding adjust_limit */
                if (gfc->ATH->adjust < gfc->ATH->adjust_limit) {
                    gfc->ATH->adjust = gfc->ATH->adjust_limit;
                }
            }
        }
        gfc->ATH->adjust_limit = adj_lim_new;
    }
}

/***********************************************************************
 *
 *  some simple statistics
 *
 *  bitrate index 0: free bitrate -> not allowed in VBR mode
 *  : bitrates, kbps depending on MPEG version
 *  bitrate index 15: forbidden
 *
 *  mode_ext:
 *  0:  LR
 *  1:  LR-i
 *  2:  MS
 *  3:  MS-i
 *
 ***********************************************************************/

static void
updateStats(lame_internal_flags * const gfc)
{
    int     gr, ch;
    assert(gfc->bitrate_index < 16u);
    assert(gfc->mode_ext < 4u);

    /* count bitrate indices */
    gfc->bitrate_stereoMode_Hist[gfc->bitrate_index][4]++;
    gfc->bitrate_stereoMode_Hist[15][4]++;

    /* count 'em for every mode extension in case of 2 channel encoding */
    if (gfc->channels_out == 2) {
        gfc->bitrate_stereoMode_Hist[gfc->bitrate_index][gfc->mode_ext]++;
        gfc->bitrate_stereoMode_Hist[15][gfc->mode_ext]++;
    }
    for (gr = 0; gr < gfc->mode_gr; ++gr) {
        for (ch = 0; ch < gfc->channels_out; ++ch) {
            int     bt = gfc->l3_side.tt[gr][ch].block_type;
            int     mf = gfc->l3_side.tt[gr][ch].mixed_block_flag;
            if (mf)
                bt = 4;
            gfc->bitrate_blockType_Hist[gfc->bitrate_index][bt]++;
            gfc->bitrate_blockType_Hist[gfc->bitrate_index][5]++;
            gfc->bitrate_blockType_Hist[15][bt]++;
            gfc->bitrate_blockType_Hist[15][5]++;
        }
    }
}




static void
lame_encode_frame_init(lame_global_flags * const gfp, const sample_t * inbuf[2])
{
    lame_internal_flags *gfc = gfp->internal_flags;

    int     ch, gr;

    if (gfc->lame_encode_frame_init == 0) {
        /* prime the MDCT/polyphase filterbank with a short block */
        int     i, j;
        sample_t primebuff0[286 + 1152 + 576];
        sample_t primebuff1[286 + 1152 + 576];
        gfc->lame_encode_frame_init = 1;
        for (i = 0, j = 0; i < 286 + 576 * (1 + gfc->mode_gr); ++i) {
            if (i < 576 * gfc->mode_gr) {
                primebuff0[i] = 0;
                if (gfc->channels_out == 2)
                    primebuff1[i] = 0;
            }
            else {
                primebuff0[i] = inbuf[0][j];
                if (gfc->channels_out == 2)
                    primebuff1[i] = inbuf[1][j];
                ++j;
            }
        }
        /* polyphase filtering / mdct */
        for (gr = 0; gr < gfc->mode_gr; gr++) {
            for (ch = 0; ch < gfc->channels_out; ch++) {
                gfc->l3_side.tt[gr][ch].block_type = SHORT_TYPE;
            }
        }
        mdct_sub48(gfc, primebuff0, primebuff1);

        /* check FFT will not use a negative starting offset */
#if 576 < FFTOFFSET
# error FFTOFFSET greater than 576: FFT uses a negative offset
#endif
        /* check if we have enough data for FFT */
        assert(gfc->mf_size >= (BLKSIZE + gfp->framesize - FFTOFFSET));
        /* check if we have enough data for polyphase filterbank */
        assert(gfc->mf_size >= (512 + gfp->framesize - 32));
    }

}







/************************************************************************
*
* encodeframe()           Layer 3
*
* encode a single frame
*
************************************************************************
lame_encode_frame()


                       gr 0            gr 1
inbuf:           |--------------|--------------|--------------|


Polyphase (18 windows, each shifted 32)
gr 0:
window1          <----512---->
window18                 <----512---->

gr 1:
window1                         <----512---->
window18                                <----512---->



MDCT output:  |--------------|--------------|--------------|

FFT's                    <---------1024---------->
                                         <---------1024-------->



    inbuf = buffer of PCM data size=MP3 framesize
    encoder acts on inbuf[ch][0], but output is delayed by MDCTDELAY
    so the MDCT coefficints are from inbuf[ch][-MDCTDELAY]

    psy-model FFT has a 1 granule delay, so we feed it data for the 
    next granule.
    FFT is centered over granule:  224+576+224
    So FFT starts at:   576-224-MDCTDELAY

    MPEG2:  FFT ends at:  BLKSIZE+576-224-MDCTDELAY      (1328)
    MPEG1:  FFT ends at:  BLKSIZE+2*576-224-MDCTDELAY    (1904)

    MPEG2:  polyphase first window:  [0..511]
                      18th window:   [544..1055]          (1056)
    MPEG1:            36th window:   [1120..1631]         (1632)
            data needed:  512+framesize-32

    A close look newmdct.c shows that the polyphase filterbank
    only uses data from [0..510] for each window.  Perhaps because the window
    used by the filterbank is zero for the last point, so Takehiro's
    code doesn't bother to compute with it.

    FFT starts at 576-224-MDCTDELAY (304)  = 576-FFTOFFSET

*/

typedef FLOAT chgrdata[2][2];


int
lame_encode_mp3_frame(       /* Output */
                         lame_global_flags * const gfp, /* Context */
                         sample_t * inbuf_l, /* Input */
                         sample_t * inbuf_r, /* Input */
                         unsigned char *mp3buf, /* Output */
                         int mp3buf_size)
{                       /* Output */
    int     mp3count;
    III_psy_ratio masking_LR[2][2]; /*LR masking & energy */
    III_psy_ratio masking_MS[2][2]; /*MS masking & energy */
    III_psy_ratio(*masking)[2][2]; /*pointer to selected maskings */
    const sample_t *inbuf[2];
    lame_internal_flags *gfc = gfp->internal_flags;

    FLOAT   tot_ener[2][4];
    FLOAT   ms_ener_ratio[2] = { .5, .5 };
    chgrdata pe, pe_MS;
    chgrdata *pe_use;

    int     ch, gr;

    FLOAT   ms_ratio_next = 0.;
    FLOAT   ms_ratio_prev = 0.;


    inbuf[0] = inbuf_l;
    inbuf[1] = inbuf_r;

    if (gfc->lame_encode_frame_init == 0) {
        /*first run? */
        lame_encode_frame_init(gfp, inbuf);

    }


    /********************** padding *****************************/
    /* padding method as described in 
     * "MPEG-Layer3 / Bitstream Syntax and Decoding"
     * by Martin Sieler, Ralph Sperschneider
     *
     * note: there is no padding for the very first frame
     *
     * Robert Hegemann 2000-06-22
     */
    gfc->padding = FALSE;
    if ((gfc->slot_lag -= gfc->frac_SpF) < 0) {
        gfc->slot_lag += gfp->out_samplerate;
        gfc->padding = TRUE;
    }



    /****************************************
    *   Stage 1: psychoacoustic model       *
    ****************************************/

    if (gfc->psymodel) {
        /* psychoacoustic model
         * psy model has a 1 granule (576) delay that we must compensate for
         * (mt 6/99).
         */
        int     ret;
        const sample_t *bufp[2]; /* address of beginning of left & right granule */
        int     blocktype[2];

        ms_ratio_prev = gfc->ms_ratio[gfc->mode_gr - 1];
        for (gr = 0; gr < gfc->mode_gr; gr++) {

            for (ch = 0; ch < gfc->channels_out; ch++)
                bufp[ch] = &inbuf[ch][576 + gr * 576 - FFTOFFSET];

            if (gfp->psymodel == PSY_NSPSYTUNE) {
                ret = L3psycho_anal_ns(gfp, bufp, gr,
                                       &gfc->ms_ratio[gr], &ms_ratio_next,
                                       masking_LR, masking_MS,
                                       pe[gr], pe_MS[gr], tot_ener[gr], blocktype);
            }
            else {
                ret = L3psycho_anal(gfp, bufp, gr,
                                    &gfc->ms_ratio[gr], &ms_ratio_next,
                                    masking_LR, masking_MS,
                                    pe[gr], pe_MS[gr], tot_ener[gr], blocktype);
            }
            if (ret != 0)
                return -4;

            if (gfp->mode == JOINT_STEREO) {
                ms_ener_ratio[gr] = tot_ener[gr][2] + tot_ener[gr][3];
                if (ms_ener_ratio[gr] > 0)
                    ms_ener_ratio[gr] = tot_ener[gr][3] / ms_ener_ratio[gr];
            }

            /* block type flags */
            for (ch = 0; ch < gfc->channels_out; ch++) {
                gr_info *cod_info = &gfc->l3_side.tt[gr][ch];
                cod_info->block_type = blocktype[ch];
                cod_info->mixed_block_flag = 0;
            }
        }
    }
    else {
        /*no psy model */
        memset((char *) masking_LR, 0, sizeof(masking_LR));
        memset((char *) masking_MS, 0, sizeof(masking_MS));
        for (gr = 0; gr < gfc->mode_gr; gr++)
            for (ch = 0; ch < gfc->channels_out; ch++) {
                gfc->l3_side.tt[gr][ch].block_type = NORM_TYPE;
                gfc->l3_side.tt[gr][ch].mixed_block_flag = 0;
                pe_MS[gr][ch] = pe[gr][ch] = 700;
            }
    }



    /* auto-adjust of ATH, useful for low volume */
    adjust_ATH(gfc);


    /****************************************
    *   Stage 2: MDCT                       *
    ****************************************/

    /* polyphase filtering / mdct */
    mdct_sub48(gfc, inbuf[0], inbuf[1]);


    /****************************************
    *   Stage 3: MS/LR decision             *
    ****************************************/

    /* Here will be selected MS or LR coding of the 2 stereo channels */
    gfc->mode_ext = MPG_MD_LR_LR;

    if (gfp->force_ms) {
        gfc->mode_ext = MPG_MD_MS_LR;
    }
    else if (gfp->mode == JOINT_STEREO) {
        int     check_ms_stereo = 1;
        /* ms_ratio = is scaled, for historical reasons, to look like
           a ratio of side_channel / total.
           0 = signal is 100% mono
           .5 = L & R uncorrelated
         */

        /* [0] and [1] are the results for the two granules in MPEG-1,
         * in MPEG-2 it's only a faked averaging of the same value
         * _prev is the value of the last granule of the previous frame
         * _next is the value of the first granule of the next frame
         */
        if (gfp->psymodel == PSY_GPSYCHO) {
            FLOAT   ms_ratio_ave1;
            FLOAT   ms_ratio_ave2;
            FLOAT   threshold1 = 0.35;
            FLOAT   threshold2 = 0.45;

            /* take an average */
            if (gfc->mode_gr == 1) {
                /* MPEG2 - no second granule */
                ms_ratio_ave1 = 0.33 * (gfc->ms_ratio[0] + ms_ratio_prev + ms_ratio_next);
                ms_ratio_ave2 = gfc->ms_ratio[0];
            }
            else {
                ms_ratio_ave1 =
                    0.25 * (gfc->ms_ratio[0] + gfc->ms_ratio[1] + ms_ratio_prev + ms_ratio_next);
                ms_ratio_ave2 = 0.50 * (gfc->ms_ratio[0] + gfc->ms_ratio[1]);
            }


            if (ms_ratio_ave1 >= threshold1 || ms_ratio_ave2 >= threshold2)
                check_ms_stereo = 0;
        }

        if (check_ms_stereo) {
            FLOAT   sum_pe_MS = 0;
            FLOAT   sum_pe_LR = 0;
            for (gr = 0; gr < gfc->mode_gr; gr++) {
                for (ch = 0; ch < gfc->channels_out; ch++) {
                    sum_pe_MS += pe_MS[gr][ch];
                    sum_pe_LR += pe[gr][ch];
                }
            }

            /* based on PE: M/S coding would not use much more bits than L/R */
            if (((gfp->psymodel == PSY_GPSYCHO) && sum_pe_MS <= 1.07 * sum_pe_LR) ||
                ((gfp->psymodel == PSY_NSPSYTUNE) && sum_pe_MS <= 1.00 * sum_pe_LR)) {

                gr_info *gi0 = &gfc->l3_side.tt[0][0];
                gr_info *gi1 = &gfc->l3_side.tt[gfc->mode_gr - 1][0];

                if (gi0[0].block_type == gi0[1].block_type &&
                    gi1[0].block_type == gi1[1].block_type) {

                    gfc->mode_ext = MPG_MD_MS_LR;
                }
            }
        }
    }

    /* bit and noise allocation */
    if (gfc->mode_ext == MPG_MD_MS_LR) {
        masking = &masking_MS; /* use MS masking */
        pe_use = &pe_MS;
    }
    else {
        masking = &masking_LR; /* use LR masking */
        pe_use = &pe;
    }


#if defined(HAVE_GTK)
    /* copy data for MP3 frame analyzer */
    if (gfp->analysis && gfc->pinfo != NULL) {
        for (gr = 0; gr < gfc->mode_gr; gr++) {
            for (ch = 0; ch < gfc->channels_out; ch++) {
                gfc->pinfo->ms_ratio[gr] = gfc->ms_ratio[gr];
                gfc->pinfo->ms_ener_ratio[gr] = ms_ener_ratio[gr];
                gfc->pinfo->blocktype[gr][ch] = gfc->l3_side.tt[gr][ch].block_type;
                gfc->pinfo->pe[gr][ch] = (*pe_use)[gr][ch];
                memcpy(gfc->pinfo->xr[gr][ch], &gfc->l3_side.tt[gr][ch].xr, sizeof(FLOAT) * 576);
                /* in psymodel, LR and MS data was stored in pinfo.  
                   switch to MS data: */
                if (gfc->mode_ext == MPG_MD_MS_LR) {
                    gfc->pinfo->ers[gr][ch] = gfc->pinfo->ers[gr][ch + 2];
                    memcpy(gfc->pinfo->energy[gr][ch], gfc->pinfo->energy[gr][ch + 2],
                           sizeof(gfc->pinfo->energy[gr][ch]));
                }
            }
        }
    }
#endif


    /****************************************
    *   Stage 4: quantization loop          *
    ****************************************/

    if (gfp->psymodel == PSY_NSPSYTUNE) {
        if (gfp->VBR == vbr_off || gfp->VBR == vbr_abr) {
            static FLOAT fircoef[9] = {
                -0.0207887 * 5, -0.0378413 * 5, -0.0432472 * 5, -0.031183 * 5,
                7.79609e-18 * 5, 0.0467745 * 5, 0.10091 * 5, 0.151365 * 5,
                0.187098 * 5
            };

            int     i;
            FLOAT   f;

            for (i = 0; i < 18; i++)
                gfc->nsPsy.pefirbuf[i] = gfc->nsPsy.pefirbuf[i + 1];

            f = 0.0;
            for (gr = 0; gr < gfc->mode_gr; gr++)
                for (ch = 0; ch < gfc->channels_out; ch++)
                    f += (*pe_use)[gr][ch];
            gfc->nsPsy.pefirbuf[18] = f;

            f = gfc->nsPsy.pefirbuf[9];
            for (i = 0; i < 9; i++)
                f += (gfc->nsPsy.pefirbuf[i] + gfc->nsPsy.pefirbuf[18 - i]) * fircoef[i];

            f = (670 * 5 * gfc->mode_gr * gfc->channels_out) / f;
            for (gr = 0; gr < gfc->mode_gr; gr++) {
                for (ch = 0; ch < gfc->channels_out; ch++) {
                    (*pe_use)[gr][ch] *= f;
                }
            }
        }
    }

    switch (gfp->VBR) {
    default:
    case vbr_off:
        CBR_iteration_loop(gfp, *pe_use, ms_ener_ratio, *masking);
        break;
    case vbr_mt:
    case vbr_rh:
    case vbr_mtrh:
        VBR_iteration_loop(gfp, *pe_use, ms_ener_ratio, *masking);
        break;
    case vbr_abr:
        ABR_iteration_loop(gfp, *pe_use, ms_ener_ratio, *masking);
        break;
    }



    /****************************************
    *   Stage 5: bitstream formatting       *
    ****************************************/


    /*  write the frame to the bitstream  */
    format_bitstream(gfp);

    /* copy mp3 bit buffer into array */
    mp3count = copy_buffer(gfc, mp3buf, mp3buf_size, 1);




    if (gfp->bWriteVbrTag)
        AddVbrFrame(gfp);


#if defined(HAVE_GTK)
    if (gfp->analysis && gfc->pinfo != NULL) {
        for (ch = 0; ch < gfc->channels_out; ch++) {
            int     j;
            for (j = 0; j < FFTOFFSET; j++)
                gfc->pinfo->pcmdata[ch][j] = gfc->pinfo->pcmdata[ch][j + gfp->framesize];
            for (j = FFTOFFSET; j < 1600; j++) {
                gfc->pinfo->pcmdata[ch][j] = inbuf[ch][j - FFTOFFSET];
            }
        }
        set_frame_pinfo(gfp, *masking);
    }
#endif

#ifdef BRHIST
    updateStats(gfc);
#endif

    return mp3count;
}