[Ffmpeg-devel] [PATCH] DV weights (original) (raw)
Dan Maas dmaas
Sat Feb 25 20:41:33 CET 2006
- Previous message: [Ffmpeg-devel] Re: h264
- Next message: [Ffmpeg-devel] [PATCH] DV weights
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
This patch implements AC coefficient weighing (per SMPTE 314M) for the DV encoder and decoder.
I have also attached the Python script I used to make the tables.
Weighing is implemented as fixed-point multiplication. I have chosen the number of bits of precision that gives the smallest error without causing 32-bit overflows. Weighing is inherently a lossy process because weighted coefficients have fewer bits than unweighted ones. But, in no case is the error greater than one least-significant bit. (the Python script checks this)
Performance impact is less than 1%.
Regards, Dan -------------- next part -------------- Index: libavcodec/dv.c
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/dv.c,v retrieving revision 1.75 diff -u -p -u -r1.75 dv.c --- libavcodec/dv.c 24 Feb 2006 09:16:26 -0000 1.75 +++ libavcodec/dv.c 25 Feb 2006 19:29:18 -0000 @@ -253,6 +253,7 @@ static int dvvideo_init(AVCodecContext * typedef struct BlockInfo { const uint8_t *shift_table; const uint8_t scan_table; + const int iweight_table; uint8_t pos; / position in block / uint8_t dct_mode; uint8_t partial_bit_count; @@ -295,6 +296,7 @@ static void dv_decode_ac(GetBitContext * int last_index = get_bits_size(gb); const uint8_t scan_table = mb->scan_table; const uint8_t shift_table = mb->shift_table; + const int iweight_table = mb->iweight_table; int pos = mb->pos; int partial_bit_count = mb->partial_bit_count; int level, pos1, run, vlc_len, index; @@ -344,7 +346,12 @@ static void dv_decode_ac(GetBitContext * assert(level); pos1 = scan_table[pos]; - block[pos1] = level << shift_table[pos1]; + level <<= shift_table[pos1]; + + /* unweigh, round, and shift down */ + level = (level*iweight_table[pos] + (1 << (dv_iweight_bits-1))) >> dv_iweight_bits; + + block[pos1] = level; UPDATE_CACHE(re, gb); } @@ -410,6 +417,7 @@ static inline void dv_decode_video_segme dct_mode = get_bits1(&gb); mb->dct_mode = dct_mode; mb->scan_table = s->dv_zigzag[dct_mode]; + mb->iweight_table = dct_mode ? dv_iweight_248 : dv_iweight_88; class1 = get_bits(&gb, 2); mb->shift_table = s->dv_idct_shift[class1 == 3][dct_mode] [quant + dv_quant_offset[class1]]; @@ -648,7 +656,7 @@ static always_inline PutBitContext dv_e } static always_inline void dv_set_class_number(DCTELEM blk, EncBlockInfo bi, - const uint8_t zigzag_scan, int bias) + const uint8_t zigzag_scan, const int weight, int bias) { int i, area; static const int classes[] = {12, 24, 36, 0xffff}; @@ -665,7 +673,11 @@ static always_inline void dv_set_class_n if (level+15 > 30U) { bi->sign[i] = (level>>31)&1; - bi->mb[i] = level= ABS(level)>>4; + / weigh it and and shift down into range, adding for rounding / + / the extra division by a factor of 2^4 reverses the 8x expansion of the DCT + AND the 2x doubling of the weights */ + level = (ABS(level) * weight[i] + (1<<(dv_weight_bits+3))) >> (dv_weight_bits+4); + bi->mb[i] = level; if(level>max) max= level; bi->bit_size[area] += dv_rl2vlc_size(i - prev - 1, level); bi->next[prev]= i; @@ -872,7 +887,9 @@ static inline void dv_encode_video_segme s->fdctenc_blk->dct_mode; dv_set_class_number(block, enc_blk, - enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4); + enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, + enc_blk->dct_mode ? dv_weight_248 : dv_weight_88, + j/4); init_put_bits(pb, ptr, block_sizes[j]/8); put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2)); Index: libavcodec/dvdata.h
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/dvdata.h,v retrieving revision 1.15 diff -u -p -u -r1.15 dvdata.h --- libavcodec/dvdata.h 12 Jan 2006 22:43:15 -0000 1.15 +++ libavcodec/dvdata.h 25 Feb 2006 19:29:21 -0000 @@ -1256,6 +1256,51 @@ static const uint16_t dv_place_411[1350] 0x0834, 0x2320, 0x2f44, 0x3810, 0x1658, };
+/* DV25/50 DCT coefficient weights and inverse weights / +/ created by dvtables.py */ +static const int dv_weight_bits = 18; +static const int dv_weight_88[64] = {
- 131072, 257107, 257107, 242189, 252167, 242189, 235923, 237536,
- 237536, 235923, 229376, 231390, 223754, 231390, 229376, 222935,
- 224969, 217965, 217965, 224969, 222935, 200636, 218652, 211916,
- 212325, 211916, 218652, 200636, 188995, 196781, 205965, 206433,
- 206433, 205965, 196781, 188995, 185364, 185364, 200636, 200704,
- 200636, 185364, 185364, 174609, 180568, 195068, 195068, 180568,
- 174609, 170091, 175557, 189591, 175557, 170091, 165371, 170627,
- 170627, 165371, 160727, 153560, 160727, 144651, 144651, 136258, +}; +static const int dv_weight_248[64] = {
- 131072, 242189, 257107, 237536, 229376, 200636, 242189, 223754,
- 224969, 196781, 262144, 242189, 229376, 200636, 257107, 237536,
- 211916, 185364, 235923, 217965, 229376, 211916, 206433, 180568,
- 242189, 223754, 224969, 196781, 211916, 185364, 235923, 217965,
- 200704, 175557, 222935, 205965, 200636, 185364, 195068, 170627,
- 229376, 211916, 206433, 180568, 200704, 175557, 222935, 205965,
- 175557, 153560, 188995, 174609, 165371, 144651, 200636, 185364,
- 195068, 170627, 175557, 153560, 188995, 174609, 165371, 144651, +}; +static const int dv_iweight_bits = 14; +static const int dv_iweight_88[64] = {
- 32768, 16710, 16710, 17735, 17015, 17735, 18197, 18079,
- 18079, 18197, 18725, 18559, 19196, 18559, 18725, 19284,
- 19108, 19692, 19692, 19108, 19284, 21400, 19645, 20262,
- 20214, 20262, 19645, 21400, 22733, 21845, 20867, 20815,
- 20815, 20867, 21845, 22733, 23173, 23173, 21400, 21400,
- 21400, 23173, 23173, 24600, 23764, 22017, 22017, 23764,
- 24600, 25267, 24457, 22672, 24457, 25267, 25971, 25191,
- 25191, 25971, 26715, 27962, 26715, 29642, 29642, 31536, +}; +static const int dv_iweight_248[64] = {
- 32768, 17735, 16710, 18079, 18725, 21400, 17735, 19196,
- 19108, 21845, 16384, 17735, 18725, 21400, 16710, 18079,
- 20262, 23173, 18197, 19692, 18725, 20262, 20815, 23764,
- 17735, 19196, 19108, 21845, 20262, 23173, 18197, 19692,
- 21400, 24457, 19284, 20867, 21400, 23173, 22017, 25191,
- 18725, 20262, 20815, 23764, 21400, 24457, 19284, 20867,
- 24457, 27962, 22733, 24600, 25971, 29642, 21400, 23173,
- 22017, 25191, 24457, 27962, 22733, 24600, 25971, 29642, +};
- static const uint16_t dv_audio_shuffle525[10][9] = { { 0, 30, 60, 20, 50, 80, 10, 40, 70 }, /* 1st channel */ { 6, 36, 66, 26, 56, 86, 16, 46, 76 }, -------------- next part --------------
#!/usr/bin/python
dvtables - precompute some tables for ffmpeg's DV codec
Copyright (C) 2006 Daniel Maas
last update 25 Feb 2006
dvtables is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
dvtables is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
import sys from math import *
def clamp(x, lo, hi): if x < lo: return lo elif x > hi: return hi else: return x
DV25 and DV50 zig-zag permutations, as used by ffmpeg
zigzag_88 = ( 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 )
zigzag_248 = ( 0, 8, 1, 9, 16, 24, 2, 10, 17, 25, 32, 40, 48, 56, 33, 41, 18, 26, 3, 11, 4, 12, 19, 27, 34, 42, 49, 57, 50, 58, 35, 43, 20, 28, 5, 13, 6, 14, 21, 29, 36, 44, 51, 59, 52, 60, 37, 45, 22, 30, 7, 15, 23, 31, 38, 46, 53, 61, 54, 62, 39, 47, 55, 63, )
Compute and print zig-zagged DV25/DV50 forward weight tables
The spec gives weights as floating point numbers, where the "unity"
value is 1/2. (i.e. a weight of 1/2 leaves the coefficient unchanged)
To make the math simpler, we double the weights so that "unity" is 1,
and double the incoming AC coefficients as well, so the result is the same.
weight formulas from SMPTE 314M
def CS(x): return cos(pi * (x/16.0))
w = [1.0, CS(4)/(4.0CS(7)CS(2)), CS(4)/(2.0CS(6)), 1.0/(2.0CS(5)), 7.0/8.0, CS(4)/CS(3), CS(4)/CS(2), CS(4)/CS(1)]
def compute_weight_88(h,v): # compute weight per the spec if h == 0 and v == 0: weight = 0.25 else: weight = w[h]*w[v]/2.0 # double it weight *= 2.0 return weight
def compute_weight_248(h,v):
# compute weight per the spec
if h == 0 and v == 0:
weight = 0.25
elif v < 4:
weight = w[h] * w[2v]/2.0
else:
weight = w[h] * w[2(v-4)]/2.0
# double it
weight *= 2.0
return weight
We implement weighing as fixed-point integer multiplication.
The maximum possible precision is 18 bits, to avoid overflowing 32
bits when multiplying by 10-bit DCT levels which have been expanded
by a factor of 8 by the DCT, times two because we double the
coefficients).
weight_bits = 18 print "static const int dv_weight_bits = %d;" % weight_bits
forward weights for 8-8 DCT
weights_88 = range(64)
fwd_error = 0
for v in range(8): for h in range(8): fweight = compute_weight_88(h,v)
# convert to fixed-point and round to closest value
iweight = int(fweight * (1 << weight_bits) + 0.5)
weights_88[8*v+h] = iweight
# check the precision of the integer weight for all possible input levels
for level in range(1024):
fresult = int(level * fweight + 0.5)
iresult = (level * iweight + (1 << (weight_bits-1))) >> weight_bits
if fresult != iresult:
# ensure the error is no more than one least-significant bit
assert abs(fresult - iresult) < 2
fwd_error += 1
#sys.stderr.write("error level %d weight %f | %d result %d | %d\n" % (level,fweight,iweight,fresult,iresult))
forward weights for 2-4-8 DCT
weights_248 = range(64)
for v in range(8): # interlace fields v = (v/2) + 4*(v&1)
for h in range(8):
fweight = compute_weight_248(h,v)
# convert to fixed-point and round to closest value
iweight = int(fweight * (1 << weight_bits) + 0.5)
weights_248[8*v+h] = iweight
# check the precision of the integer weight for all possible input levels
for level in range(1024):
fresult = int(level * fweight + 0.5)
iresult = (level * iweight + (1 << (weight_bits-1))) >> weight_bits
if fresult != iresult:
# ensure the error is no more than one least-significant bit
assert abs(fresult - iresult) < 2
fwd_error += 1
#sys.stderr.write("error level %d weight %f | %d result %d | %d\n" % (level,fweight,iweight,fresult,iresult))
if 0: sys.stderr.write("DV25/50 forward level/weight errors %d\n" % fwd_error)
print "static const int dv_weight_88[64] = {" for j in range(8): print "", for i in range(8): print "%6d," % (weights_88[zigzag_88[8*j+i]]), print "" print "};"
print "static const int dv_weight_248[64] = {" for j in range(8): print "", for i in range(8): print "%6d," % (weights_248[zigzag_248[8*j+i]]), print "" print "};"
Compute inverse DV25/DV50 weights in fixed point
Weighing and then unweighing an AC coefficient is inherently a lossy
process, because the weighted coefficients have one bit less
precision than unweighted ones. The best can do is minimize the
error by using weights of sufficient precision.
Fixed-point precision of inverse weights
The maximum possible precision is 24 bits, to avoid 32-bit overflow when multiplying
by 8-bit weighted coefficients.
However, using more than 14 bits of precision does not reduce error any further
iweight_bits = 14
print "static const int dv_iweight_bits = %d;" % iweight_bits
def make_fixed_dv25(w): w = 1.0 / float(w) w *= (1 << weight_bits) * (1 << iweight_bits) return int(w + 0.5)
inverse weights for 8-8 DCT
print "static const int dv_iweight_88[64] = {" for j in range(8): print "", for i in range(8): w = weights_88[zigzag_88[8*j+i]] print "%5d," % (make_fixed_dv25(w)), print "" print "};"
inverse weights for 2-4-8 DCT
print "static const int dv_iweight_248[64] = {" for j in range(8): print "", for i in range(8): w = weights_248[zigzag_248[8*j+i]] print "%5d," % (make_fixed_dv25(w)), print "" print "};"
Find the error created by weighing and then unweighing an AC coefficient.
We test all possible combinations of weight and level.
if 0: inv_error = 0 for i in range(128): for level in range(1024): if i >= 64: fwd_weight = weights_248[zigzag_248[i-64]] else: fwd_weight = weights_88[zigzag_88[i]]
inv_weight = make_fixed_dv25(fwd_weight)
# weigh the coefficient
weighted = (level * fwd_weight + (1 << (weight_bits-1))) >> weight_bits
# unweigh the coefficient
result = (weighted * inv_weight + (1 << (iweight_bits-1))) >> iweight_bits
# compare input vs output
if result != level:
# make sure the error is at most one least-significant bit
assert abs(result-level) < 2
#sys.stderr.write("level %d fwd_weight %d inv_weight %d weighted %d result %d\n" % (level,fwd_weight,inv_weight,weighted,result))
inv_error += 1
sys.stderr.write("DV25/50 inverse level/weight errors %d\n" % inv_error)
DV50 macroblock locations
find_macroblock() returns a 16-bit quantity
lower 8 bits are horizontal index, upper 8 bits are vertical index
def find_macroblock_dv50(channel, seq, vid_block_num, is_pal): if is_pal: ysupers = 24 else: ysupers = 20
# X and Y indices of the superblock we're inside
super_x_order = (2,1,3,0,4)
super_y_bases = (2,6,8,0,4)
super_x = super_x_order[vid_block_num % 5]
super_y = (2*(super_y_bases[vid_block_num % 5] + seq) + channel) % ysupers
# index within the superblock
within_super = vid_block_num / 5
# locate us within the superblock's "brick" pattern
if(within_super < 3):
within_super_x = 0;
within_super_y = within_super;
elif(within_super < 6):
within_super_x = 1;
within_super_y = 2 - (within_super-3);
elif(within_super < 9):
within_super_x = 2;
within_super_y = (within_super-6);
elif(within_super < 12):
within_super_x = 3;
within_super_y = 2 - (within_super-9);
elif(within_super < 15):
within_super_x = 4;
within_super_y = (within_super-12);
elif(within_super < 18):
within_super_x = 5;
within_super_y = 2 - (within_super-15);
elif(within_super < 21):
within_super_x = 6;
within_super_y = (within_super-18);
elif(within_super < 24):
within_super_x = 7;
within_super_y = 2 - (within_super-21);
else:
within_super_x = 8;
within_super_y = (within_super-24);
return ((3*super_y + within_super_y) << 8) | \
4*(9*super_x + within_super_x)
print macroblock location table for DV50 NTSC
if 0: # 2 channels per frame, 10 DIF sequences per channel, # 27 video segments per DIF sequence, 5 macroblocks per video segment
print "static const uint16_t dv_place_422_525[2*10*27*5] = {"
for slice in range(2*10*27):
channel = slice / (10*27)
slice %= (10*27)
seq = slice / 27
slice %= 27
vid_block_num = 5*slice
print "",
for mb in range(5):
print "0x%04x," % (find_macroblock_dv50(channel, seq, vid_block_num + mb, 0)),
print ""
print "};"
print macroblock location table for DV50 PAL
if 0: # 2 channels per frame, 12 DIF sequences per channel, # 27 video segments per DIF sequence, 5 macroblocks per video segment
print "static const uint16_t dv_place_422_625[2*12*27*5] = {"
for slice in range(2*12*27):
channel = slice / (12*27)
slice %= (12*27)
seq = slice / 27
slice %= 27
vid_block_num = 5*slice
print "",
for mb in range(5):
print "0x%04x," % (find_macroblock_dv50(channel, seq, vid_block_num + mb, 1)),
print ""
print "};"
- Previous message: [Ffmpeg-devel] Re: h264
- Next message: [Ffmpeg-devel] [PATCH] DV weights
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]