[Ffmpeg-devel] [PATCH] DV weights (original) (raw)

Dan Maas dmaas
Sat Feb 25 20:41:33 CET 2006

Previous message: [Ffmpeg-devel] Re: h264
Next message: [Ffmpeg-devel] [PATCH] DV weights
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

This patch implements AC coefficient weighing (per SMPTE 314M) for the DV encoder and decoder.

I have also attached the Python script I used to make the tables.

Weighing is implemented as fixed-point multiplication. I have chosen the number of bits of precision that gives the smallest error without causing 32-bit overflows. Weighing is inherently a lossy process because weighted coefficients have fewer bits than unweighted ones. But, in no case is the error greater than one least-significant bit. (the Python script checks this)

Performance impact is less than 1%.

Regards, Dan -------------- next part -------------- Index: libavcodec/dv.c

RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/dv.c,v retrieving revision 1.75 diff -u -p -u -r1.75 dv.c --- libavcodec/dv.c 24 Feb 2006 09:16:26 -0000 1.75 +++ libavcodec/dv.c 25 Feb 2006 19:29:18 -0000 @@ -253,6 +253,7 @@ static int dvvideo_init(AVCodecContext * typedef struct BlockInfo { const uint8_t shift_table; const uint8_t scan_table; + const int iweight_table; uint8_t pos; /* position in block / uint8_t dct_mode; uint8_t partial_bit_count; @@ -295,6 +296,7 @@ static void dv_decode_ac(GetBitContext * int last_index = get_bits_size(gb); const uint8_t scan_table = mb->scan_table; const uint8_t shift_table = mb->shift_table; + const int iweight_table = mb->iweight_table; int pos = mb->pos; int partial_bit_count = mb->partial_bit_count; int level, pos1, run, vlc_len, index; @@ -344,7 +346,12 @@ static void dv_decode_ac(GetBitContext * assert(level); pos1 = scan_table[pos]; - block[pos1] = level << shift_table[pos1]; + level <<= shift_table[pos1]; + + /* unweigh, round, and shift down / + level = (leveliweight_table[pos] + (1 << (dv_iweight_bits-1))) >> dv_iweight_bits; + + block[pos1] = level; UPDATE_CACHE(re, gb); } @@ -410,6 +417,7 @@ static inline void dv_decode_video_segme dct_mode = get_bits1(&gb); mb->dct_mode = dct_mode; mb->scan_table = s->dv_zigzag[dct_mode]; + mb->iweight_table = dct_mode ? dv_iweight_248 : dv_iweight_88; class1 = get_bits(&gb, 2); mb->shift_table = s->dv_idct_shift[class1 == 3][dct_mode] [quant + dv_quant_offset[class1]]; @@ -648,7 +656,7 @@ static always_inline PutBitContext dv_e } static always_inline void dv_set_class_number(DCTELEM blk, EncBlockInfo bi, - const uint8_t zigzag_scan, int bias) + const uint8_t zigzag_scan, const int weight, int bias) { int i, area; static const int classes[] = {12, 24, 36, 0xffff}; @@ -665,7 +673,11 @@ static always_inline void dv_set_class_n if (level+15 > 30U) { bi->sign[i] = (level>>31)&1; - bi->mb[i] = level= ABS(level)>>4; + / weigh it and and shift down into range, adding for rounding / + / the extra division by a factor of 2^4 reverses the 8x expansion of the DCT + AND the 2x doubling of the weights / + level = (ABS(level) weight[i] + (1<<(dv_weight_bits+3))) >> (dv_weight_bits+4); + bi->mb[i] = level; if(level>max) max= level; bi->bit_size[area] += dv_rl2vlc_size(i - prev - 1, level); bi->next[prev]= i; @@ -872,7 +887,9 @@ static inline void dv_encode_video_segme s->fdctenc_blk->dct_mode; dv_set_class_number(block, enc_blk, - enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4); + enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, + enc_blk->dct_mode ? dv_weight_248 : dv_weight_88, + j/4); init_put_bits(pb, ptr, block_sizes[j]/8); put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2)); Index: libavcodec/dvdata.h

RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/dvdata.h,v retrieving revision 1.15 diff -u -p -u -r1.15 dvdata.h --- libavcodec/dvdata.h 12 Jan 2006 22:43:15 -0000 1.15 +++ libavcodec/dvdata.h 25 Feb 2006 19:29:21 -0000 @@ -1256,6 +1256,51 @@ static const uint16_t dv_place_411[1350] 0x0834, 0x2320, 0x2f44, 0x3810, 0x1658, };

+/* DV25/50 DCT coefficient weights and inverse weights / +/ created by dvtables.py */ +static const int dv_weight_bits = 18; +static const int dv_weight_88[64] = {

131072, 257107, 257107, 242189, 252167, 242189, 235923, 237536,
237536, 235923, 229376, 231390, 223754, 231390, 229376, 222935,
224969, 217965, 217965, 224969, 222935, 200636, 218652, 211916,
212325, 211916, 218652, 200636, 188995, 196781, 205965, 206433,
206433, 205965, 196781, 188995, 185364, 185364, 200636, 200704,
200636, 185364, 185364, 174609, 180568, 195068, 195068, 180568,
174609, 170091, 175557, 189591, 175557, 170091, 165371, 170627,
170627, 165371, 160727, 153560, 160727, 144651, 144651, 136258, +}; +static const int dv_weight_248[64] = {
131072, 242189, 257107, 237536, 229376, 200636, 242189, 223754,
224969, 196781, 262144, 242189, 229376, 200636, 257107, 237536,
211916, 185364, 235923, 217965, 229376, 211916, 206433, 180568,
242189, 223754, 224969, 196781, 211916, 185364, 235923, 217965,
200704, 175557, 222935, 205965, 200636, 185364, 195068, 170627,
229376, 211916, 206433, 180568, 200704, 175557, 222935, 205965,
175557, 153560, 188995, 174609, 165371, 144651, 200636, 185364,
195068, 170627, 175557, 153560, 188995, 174609, 165371, 144651, +}; +static const int dv_iweight_bits = 14; +static const int dv_iweight_88[64] = {
32768, 16710, 16710, 17735, 17015, 17735, 18197, 18079,
18079, 18197, 18725, 18559, 19196, 18559, 18725, 19284,
19108, 19692, 19692, 19108, 19284, 21400, 19645, 20262,
20214, 20262, 19645, 21400, 22733, 21845, 20867, 20815,
20815, 20867, 21845, 22733, 23173, 23173, 21400, 21400,
21400, 23173, 23173, 24600, 23764, 22017, 22017, 23764,
24600, 25267, 24457, 22672, 24457, 25267, 25971, 25191,
25191, 25971, 26715, 27962, 26715, 29642, 29642, 31536, +}; +static const int dv_iweight_248[64] = {
32768, 17735, 16710, 18079, 18725, 21400, 17735, 19196,
19108, 21845, 16384, 17735, 18725, 21400, 16710, 18079,
20262, 23173, 18197, 19692, 18725, 20262, 20815, 23764,
17735, 19196, 19108, 21845, 20262, 23173, 18197, 19692,
21400, 24457, 19284, 20867, 21400, 23173, 22017, 25191,
18725, 20262, 20815, 23764, 21400, 24457, 19284, 20867,
24457, 27962, 22733, 24600, 25971, 29642, 21400, 23173,
22017, 25191, 24457, 27962, 22733, 24600, 25971, 29642, +};
static const uint16_t dv_audio_shuffle525[10][9] = { { 0, 30, 60, 20, 50, 80, 10, 40, 70 }, /* 1st channel */ { 6, 36, 66, 26, 56, 86, 16, 46, 76 }, -------------- next part --------------

#!/usr/bin/python

dvtables - precompute some tables for ffmpeg's DV codec

Copyright (C) 2006 Daniel Maas

last update 25 Feb 2006

dvtables is free software; you can redistribute it and/or modify

it under the terms of the GNU General Public License as published by

the Free Software Foundation; either version 2 of the License, or

(at your option) any later version.

dvtables is distributed in the hope that it will be useful,

but WITHOUT ANY WARRANTY; without even the implied warranty of

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

GNU General Public License for more details.

You should have received a copy of the GNU General Public License

along with this program; if not, write to the Free Software

Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

import sys from math import *

def clamp(x, lo, hi): if x < lo: return lo elif x > hi: return hi else: return x

DV25 and DV50 zig-zag permutations, as used by ffmpeg

zigzag_88 = ( 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 )

zigzag_248 = ( 0, 8, 1, 9, 16, 24, 2, 10, 17, 25, 32, 40, 48, 56, 33, 41, 18, 26, 3, 11, 4, 12, 19, 27, 34, 42, 49, 57, 50, 58, 35, 43, 20, 28, 5, 13, 6, 14, 21, 29, 36, 44, 51, 59, 52, 60, 37, 45, 22, 30, 7, 15, 23, 31, 38, 46, 53, 61, 54, 62, 39, 47, 55, 63, )

Compute and print zig-zagged DV25/DV50 forward weight tables

The spec gives weights as floating point numbers, where the "unity"

value is 1/2. (i.e. a weight of 1/2 leaves the coefficient unchanged)

To make the math simpler, we double the weights so that "unity" is 1,

and double the incoming AC coefficients as well, so the result is the same.

weight formulas from SMPTE 314M

def CS(x): return cos(pi * (x/16.0))

w = [1.0, CS(4)/(4.0CS(7)CS(2)), CS(4)/(2.0CS(6)), 1.0/(2.0CS(5)), 7.0/8.0, CS(4)/CS(3), CS(4)/CS(2), CS(4)/CS(1)]

def compute_weight_88(h,v): # compute weight per the spec if h == 0 and v == 0: weight = 0.25 else: weight = w[h]*w[v]/2.0 # double it weight *= 2.0 return weight

def compute_weight_248(h,v): # compute weight per the spec
if h == 0 and v == 0: weight = 0.25 elif v < 4: weight = w[h] * w[2v]/2.0 else: weight = w[h] * w[2(v-4)]/2.0 # double it weight *= 2.0 return weight

We implement weighing as fixed-point integer multiplication.

The maximum possible precision is 18 bits, to avoid overflowing 32

bits when multiplying by 10-bit DCT levels which have been expanded

by a factor of 8 by the DCT, times two because we double the

coefficients).

weight_bits = 18 print "static const int dv_weight_bits = %d;" % weight_bits

forward weights for 8-8 DCT

weights_88 = range(64)

fwd_error = 0

for v in range(8): for h in range(8): fweight = compute_weight_88(h,v)

    # convert to fixed-point and round to closest value
    iweight = int(fweight * (1 << weight_bits) + 0.5)
    weights_88[8*v+h] = iweight

    # check the precision of the integer weight for all possible input levels
    for level in range(1024):
        fresult = int(level * fweight + 0.5)
        iresult = (level * iweight + (1 << (weight_bits-1))) >> weight_bits
        if fresult != iresult:
            # ensure the error is no more than one least-significant bit
            assert abs(fresult - iresult) < 2
            fwd_error += 1
            #sys.stderr.write("error level %d weight %f | %d result %d | %d\n" % (level,fweight,iweight,fresult,iresult))

forward weights for 2-4-8 DCT

weights_248 = range(64)

for v in range(8): # interlace fields v = (v/2) + 4*(v&1)

for h in range(8):
    fweight = compute_weight_248(h,v)

    # convert to fixed-point and round to closest value
    iweight = int(fweight * (1 << weight_bits) + 0.5)
    weights_248[8*v+h] = iweight

    # check the precision of the integer weight for all possible input levels
    for level in range(1024):
        fresult = int(level * fweight + 0.5)
        iresult = (level * iweight + (1 << (weight_bits-1))) >> weight_bits
        if fresult != iresult:
            # ensure the error is no more than one least-significant bit
            assert abs(fresult - iresult) < 2
            fwd_error += 1
            #sys.stderr.write("error level %d weight %f | %d result %d | %d\n" % (level,fweight,iweight,fresult,iresult))

if 0: sys.stderr.write("DV25/50 forward level/weight errors %d\n" % fwd_error)

print "static const int dv_weight_88[64] = {" for j in range(8): print "", for i in range(8): print "%6d," % (weights_88[zigzag_88[8*j+i]]), print "" print "};"

print "static const int dv_weight_248[64] = {" for j in range(8): print "", for i in range(8): print "%6d," % (weights_248[zigzag_248[8*j+i]]), print "" print "};"

Compute inverse DV25/DV50 weights in fixed point

Weighing and then unweighing an AC coefficient is inherently a lossy

process, because the weighted coefficients have one bit less

precision than unweighted ones. The best can do is minimize the

error by using weights of sufficient precision.

Fixed-point precision of inverse weights

The maximum possible precision is 24 bits, to avoid 32-bit overflow when multiplying

by 8-bit weighted coefficients.

However, using more than 14 bits of precision does not reduce error any further

iweight_bits = 14

print "static const int dv_iweight_bits = %d;" % iweight_bits

def make_fixed_dv25(w): w = 1.0 / float(w) w *= (1 << weight_bits) * (1 << iweight_bits) return int(w + 0.5)

inverse weights for 8-8 DCT

print "static const int dv_iweight_88[64] = {" for j in range(8): print "", for i in range(8): w = weights_88[zigzag_88[8*j+i]] print "%5d," % (make_fixed_dv25(w)), print "" print "};"

inverse weights for 2-4-8 DCT

print "static const int dv_iweight_248[64] = {" for j in range(8): print "", for i in range(8): w = weights_248[zigzag_248[8*j+i]] print "%5d," % (make_fixed_dv25(w)), print "" print "};"

Find the error created by weighing and then unweighing an AC coefficient.

We test all possible combinations of weight and level.

if 0: inv_error = 0 for i in range(128): for level in range(1024): if i >= 64: fwd_weight = weights_248[zigzag_248[i-64]] else: fwd_weight = weights_88[zigzag_88[i]]

        inv_weight = make_fixed_dv25(fwd_weight)
        
        # weigh the coefficient
        weighted = (level * fwd_weight + (1 << (weight_bits-1))) >> weight_bits
            
        # unweigh the coefficient
        result = (weighted * inv_weight + (1 << (iweight_bits-1))) >> iweight_bits

        # compare input vs output
        if result != level:
            # make sure the error is at most one least-significant bit
            assert abs(result-level) < 2
            #sys.stderr.write("level %d fwd_weight %d inv_weight %d weighted %d result %d\n" % (level,fwd_weight,inv_weight,weighted,result))
            inv_error += 1

sys.stderr.write("DV25/50 inverse level/weight errors %d\n" % inv_error)

DV50 macroblock locations

find_macroblock() returns a 16-bit quantity

lower 8 bits are horizontal index, upper 8 bits are vertical index

def find_macroblock_dv50(channel, seq, vid_block_num, is_pal): if is_pal: ysupers = 24 else: ysupers = 20

# X and Y indices of the superblock we're inside
super_x_order = (2,1,3,0,4)
super_y_bases = (2,6,8,0,4)
super_x = super_x_order[vid_block_num % 5]
super_y = (2*(super_y_bases[vid_block_num % 5] + seq) + channel) % ysupers

# index within the superblock
within_super = vid_block_num / 5

# locate us within the superblock's "brick" pattern
if(within_super < 3):
within_super_x = 0;
within_super_y = within_super;
elif(within_super < 6):
within_super_x = 1;
within_super_y = 2 - (within_super-3);
elif(within_super < 9):
within_super_x = 2;
within_super_y = (within_super-6);
elif(within_super < 12):
within_super_x = 3;
within_super_y = 2 - (within_super-9);
elif(within_super < 15):
within_super_x = 4;
within_super_y = (within_super-12);
elif(within_super < 18):
within_super_x = 5;
within_super_y = 2 - (within_super-15);
elif(within_super < 21):
within_super_x = 6;
within_super_y = (within_super-18);
elif(within_super < 24):
within_super_x = 7;
within_super_y = 2 - (within_super-21);
else:
within_super_x = 8;
within_super_y = (within_super-24);

return ((3*super_y + within_super_y) << 8) | \
4*(9*super_x + within_super_x)

print macroblock location table for DV50 NTSC

if 0: # 2 channels per frame, 10 DIF sequences per channel, # 27 video segments per DIF sequence, 5 macroblocks per video segment

print "static const uint16_t dv_place_422_525[2*10*27*5] = {"
for slice in range(2*10*27):
    channel = slice / (10*27)
    slice %= (10*27)
    seq = slice / 27
    slice %= 27
    vid_block_num = 5*slice

    print "",
    for mb in range(5):
        print "0x%04x," % (find_macroblock_dv50(channel, seq, vid_block_num + mb, 0)),
    print ""

print "};"

print macroblock location table for DV50 PAL

if 0: # 2 channels per frame, 12 DIF sequences per channel, # 27 video segments per DIF sequence, 5 macroblocks per video segment

print "static const uint16_t dv_place_422_625[2*12*27*5] = {"
for slice in range(2*12*27):
    channel = slice / (12*27)
    slice %= (12*27)
    seq = slice / 27
    slice %= 27
    vid_block_num = 5*slice

    print "",
    for mb in range(5):
        print "0x%04x," % (find_macroblock_dv50(channel, seq, vid_block_num + mb, 1)),
    print ""

print "};"

Previous message: [Ffmpeg-devel] Re: h264
Next message: [Ffmpeg-devel] [PATCH] DV weights
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

More information about the ffmpeg-devel mailing list