early-access version 1432
This commit is contained in:
637
externals/ffmpeg/doc/snow.txt
vendored
Executable file
637
externals/ffmpeg/doc/snow.txt
vendored
Executable file
@@ -0,0 +1,637 @@
|
||||
=============================================
|
||||
Snow Video Codec Specification Draft 20080110
|
||||
=============================================
|
||||
|
||||
Introduction:
|
||||
=============
|
||||
This specification describes the Snow bitstream syntax and semantics as
|
||||
well as the formal Snow decoding process.
|
||||
|
||||
The decoding process is described precisely and any compliant decoder
|
||||
MUST produce the exact same output for a spec-conformant Snow stream.
|
||||
For encoding, though, any process which generates a stream compliant to
|
||||
the syntactical and semantic requirements and which is decodable by
|
||||
the process described in this spec shall be considered a conformant
|
||||
Snow encoder.
|
||||
|
||||
Definitions:
|
||||
============
|
||||
|
||||
MUST the specific part must be done to conform to this standard
|
||||
SHOULD it is recommended to be done that way, but not strictly required
|
||||
|
||||
ilog2(x) is the rounded down logarithm of x with basis 2
|
||||
ilog2(0) = 0
|
||||
|
||||
Type definitions:
|
||||
=================
|
||||
|
||||
b 1-bit range coded
|
||||
u unsigned scalar value range coded
|
||||
s signed scalar value range coded
|
||||
|
||||
|
||||
Bitstream syntax:
|
||||
=================
|
||||
|
||||
frame:
|
||||
header
|
||||
prediction
|
||||
residual
|
||||
|
||||
header:
|
||||
keyframe b MID_STATE
|
||||
if(keyframe || always_reset)
|
||||
reset_contexts
|
||||
if(keyframe){
|
||||
version u header_state
|
||||
always_reset b header_state
|
||||
temporal_decomposition_type u header_state
|
||||
temporal_decomposition_count u header_state
|
||||
spatial_decomposition_count u header_state
|
||||
colorspace_type u header_state
|
||||
if (nb_planes > 2) {
|
||||
chroma_h_shift u header_state
|
||||
chroma_v_shift u header_state
|
||||
}
|
||||
spatial_scalability b header_state
|
||||
max_ref_frames-1 u header_state
|
||||
qlogs
|
||||
}
|
||||
if(!keyframe){
|
||||
update_mc b header_state
|
||||
if(update_mc){
|
||||
for(plane=0; plane<nb_plane_types; plane++){
|
||||
diag_mc b header_state
|
||||
htaps/2-1 u header_state
|
||||
for(i= p->htaps/2; i; i--)
|
||||
|hcoeff[i]| u header_state
|
||||
}
|
||||
}
|
||||
update_qlogs b header_state
|
||||
if(update_qlogs){
|
||||
spatial_decomposition_count u header_state
|
||||
qlogs
|
||||
}
|
||||
}
|
||||
|
||||
spatial_decomposition_type s header_state
|
||||
qlog s header_state
|
||||
mv_scale s header_state
|
||||
qbias s header_state
|
||||
block_max_depth s header_state
|
||||
|
||||
qlogs:
|
||||
for(plane=0; plane<nb_plane_types; plane++){
|
||||
quant_table[plane][0][0] s header_state
|
||||
for(level=0; level < spatial_decomposition_count; level++){
|
||||
quant_table[plane][level][1]s header_state
|
||||
quant_table[plane][level][3]s header_state
|
||||
}
|
||||
}
|
||||
|
||||
reset_contexts
|
||||
*_state[*]= MID_STATE
|
||||
|
||||
prediction:
|
||||
for(y=0; y<block_count_vertical; y++)
|
||||
for(x=0; x<block_count_horizontal; x++)
|
||||
block(0)
|
||||
|
||||
block(level):
|
||||
mvx_diff=mvy_diff=y_diff=cb_diff=cr_diff=0
|
||||
if(keyframe){
|
||||
intra=1
|
||||
}else{
|
||||
if(level!=max_block_depth){
|
||||
s_context= 2*left->level + 2*top->level + topleft->level + topright->level
|
||||
leaf b block_state[4 + s_context]
|
||||
}
|
||||
if(level==max_block_depth || leaf){
|
||||
intra b block_state[1 + left->intra + top->intra]
|
||||
if(intra){
|
||||
y_diff s block_state[32]
|
||||
cb_diff s block_state[64]
|
||||
cr_diff s block_state[96]
|
||||
}else{
|
||||
ref_context= ilog2(2*left->ref) + ilog2(2*top->ref)
|
||||
if(ref_frames > 1)
|
||||
ref u block_state[128 + 1024 + 32*ref_context]
|
||||
mx_context= ilog2(2*abs(left->mx - top->mx))
|
||||
my_context= ilog2(2*abs(left->my - top->my))
|
||||
mvx_diff s block_state[128 + 32*(mx_context + 16*!!ref)]
|
||||
mvy_diff s block_state[128 + 32*(my_context + 16*!!ref)]
|
||||
}
|
||||
}else{
|
||||
block(level+1)
|
||||
block(level+1)
|
||||
block(level+1)
|
||||
block(level+1)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
residual:
|
||||
residual2(luma)
|
||||
if (nb_planes > 2) {
|
||||
residual2(chroma_cr)
|
||||
residual2(chroma_cb)
|
||||
}
|
||||
|
||||
residual2:
|
||||
for(level=0; level<spatial_decomposition_count; level++){
|
||||
if(level==0)
|
||||
subband(LL, 0)
|
||||
subband(HL, level)
|
||||
subband(LH, level)
|
||||
subband(HH, level)
|
||||
}
|
||||
|
||||
subband:
|
||||
FIXME
|
||||
|
||||
nb_plane_types = gray ? 1 : 2;
|
||||
|
||||
Tag description:
|
||||
----------------
|
||||
|
||||
version
|
||||
0
|
||||
this MUST NOT change within a bitstream
|
||||
|
||||
always_reset
|
||||
if 1 then the range coder contexts will be reset after each frame
|
||||
|
||||
temporal_decomposition_type
|
||||
0
|
||||
|
||||
temporal_decomposition_count
|
||||
0
|
||||
|
||||
spatial_decomposition_count
|
||||
FIXME
|
||||
|
||||
colorspace_type
|
||||
0 unspecified YCbCr
|
||||
1 Gray
|
||||
2 Gray + Alpha
|
||||
3 GBR
|
||||
4 GBRA
|
||||
this MUST NOT change within a bitstream
|
||||
|
||||
chroma_h_shift
|
||||
log2(luma.width / chroma.width)
|
||||
this MUST NOT change within a bitstream
|
||||
|
||||
chroma_v_shift
|
||||
log2(luma.height / chroma.height)
|
||||
this MUST NOT change within a bitstream
|
||||
|
||||
spatial_scalability
|
||||
0
|
||||
|
||||
max_ref_frames
|
||||
maximum number of reference frames
|
||||
this MUST NOT change within a bitstream
|
||||
|
||||
update_mc
|
||||
indicates that motion compensation filter parameters are stored in the
|
||||
header
|
||||
|
||||
diag_mc
|
||||
flag to enable faster diagonal interpolation
|
||||
this SHOULD be 1 unless it turns out to be covered by a valid patent
|
||||
|
||||
htaps
|
||||
number of half pel interpolation filter taps, MUST be even, >0 and <10
|
||||
|
||||
hcoeff
|
||||
half pel interpolation filter coefficients, hcoeff[0] are the 2 middle
|
||||
coefficients [1] are the next outer ones and so on, resulting in a filter
|
||||
like: ...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ...
|
||||
the sign of the coefficients is not explicitly stored but alternates
|
||||
after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,...
|
||||
hcoeff[0] is not explicitly stored but found by subtracting the sum
|
||||
of all stored coefficients with signs from 32
|
||||
hcoeff[0]= 32 - hcoeff[1] - hcoeff[2] - ...
|
||||
a good choice for hcoeff and htaps is
|
||||
htaps= 6
|
||||
hcoeff={40,-10,2}
|
||||
an alternative which requires more computations at both encoder and
|
||||
decoder side and may or may not be better is
|
||||
htaps= 8
|
||||
hcoeff={42,-14,6,-2}
|
||||
|
||||
|
||||
ref_frames
|
||||
minimum of the number of available reference frames and max_ref_frames
|
||||
for example the first frame after a key frame always has ref_frames=1
|
||||
|
||||
spatial_decomposition_type
|
||||
wavelet type
|
||||
0 is a 9/7 symmetric compact integer wavelet
|
||||
1 is a 5/3 symmetric compact integer wavelet
|
||||
others are reserved
|
||||
stored as delta from last, last is reset to 0 if always_reset || keyframe
|
||||
|
||||
qlog
|
||||
quality (logarithmic quantizer scale)
|
||||
stored as delta from last, last is reset to 0 if always_reset || keyframe
|
||||
|
||||
mv_scale
|
||||
stored as delta from last, last is reset to 0 if always_reset || keyframe
|
||||
FIXME check that everything works fine if this changes between frames
|
||||
|
||||
qbias
|
||||
dequantization bias
|
||||
stored as delta from last, last is reset to 0 if always_reset || keyframe
|
||||
|
||||
block_max_depth
|
||||
maximum depth of the block tree
|
||||
stored as delta from last, last is reset to 0 if always_reset || keyframe
|
||||
|
||||
quant_table
|
||||
quantization table
|
||||
|
||||
|
||||
Highlevel bitstream structure:
|
||||
==============================
|
||||
--------------------------------------------
|
||||
| Header |
|
||||
--------------------------------------------
|
||||
| ------------------------------------ |
|
||||
| | Block0 | |
|
||||
| | split? | |
|
||||
| | yes no | |
|
||||
| | ......... intra? | |
|
||||
| | : Block01 : yes no | |
|
||||
| | : Block02 : ....... .......... | |
|
||||
| | : Block03 : : y DC : : ref index: | |
|
||||
| | : Block04 : : cb DC : : motion x : | |
|
||||
| | ......... : cr DC : : motion y : | |
|
||||
| | ....... .......... | |
|
||||
| ------------------------------------ |
|
||||
| ------------------------------------ |
|
||||
| | Block1 | |
|
||||
| ... |
|
||||
--------------------------------------------
|
||||
| ------------ ------------ ------------ |
|
||||
|| Y subbands | | Cb subbands| | Cr subbands||
|
||||
|| --- --- | | --- --- | | --- --- ||
|
||||
|| |LL0||HL0| | | |LL0||HL0| | | |LL0||HL0| ||
|
||||
|| --- --- | | --- --- | | --- --- ||
|
||||
|| --- --- | | --- --- | | --- --- ||
|
||||
|| |LH0||HH0| | | |LH0||HH0| | | |LH0||HH0| ||
|
||||
|| --- --- | | --- --- | | --- --- ||
|
||||
|| --- --- | | --- --- | | --- --- ||
|
||||
|| |HL1||LH1| | | |HL1||LH1| | | |HL1||LH1| ||
|
||||
|| --- --- | | --- --- | | --- --- ||
|
||||
|| --- --- | | --- --- | | --- --- ||
|
||||
|| |HH1||HL2| | | |HH1||HL2| | | |HH1||HL2| ||
|
||||
|| ... | | ... | | ... ||
|
||||
| ------------ ------------ ------------ |
|
||||
--------------------------------------------
|
||||
|
||||
Decoding process:
|
||||
=================
|
||||
|
||||
------------
|
||||
| |
|
||||
| Subbands |
|
||||
------------ | |
|
||||
| | ------------
|
||||
| Intra DC | |
|
||||
| | LL0 subband prediction
|
||||
------------ |
|
||||
\ Dequantization
|
||||
------------------- \ |
|
||||
| Reference frames | \ IDWT
|
||||
| ------- ------- | Motion \ |
|
||||
||Frame 0| |Frame 1|| Compensation . OBMC v -------
|
||||
| ------- ------- | --------------. \------> + --->|Frame n|-->output
|
||||
| ------- ------- | -------
|
||||
||Frame 2| |Frame 3||<----------------------------------/
|
||||
| ... |
|
||||
-------------------
|
||||
|
||||
|
||||
Range Coder:
|
||||
============
|
||||
|
||||
Binary Range Coder:
|
||||
-------------------
|
||||
The implemented range coder is an adapted version based upon "Range encoding:
|
||||
an algorithm for removing redundancy from a digitised message." by G. N. N.
|
||||
Martin.
|
||||
The symbols encoded by the Snow range coder are bits (0|1). The
|
||||
associated probabilities are not fix but change depending on the symbol mix
|
||||
seen so far.
|
||||
|
||||
|
||||
bit seen | new state
|
||||
---------+-----------------------------------------------
|
||||
0 | 256 - state_transition_table[256 - old_state];
|
||||
1 | state_transition_table[ old_state];
|
||||
|
||||
state_transition_table = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27,
|
||||
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42,
|
||||
43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57,
|
||||
58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
|
||||
74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
|
||||
89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
|
||||
104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118,
|
||||
119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133,
|
||||
134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
|
||||
150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
|
||||
165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179,
|
||||
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194,
|
||||
195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209,
|
||||
210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225,
|
||||
226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240,
|
||||
241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
FIXME
|
||||
|
||||
|
||||
Range Coding of integers:
|
||||
-------------------------
|
||||
FIXME
|
||||
|
||||
|
||||
Neighboring Blocks:
|
||||
===================
|
||||
left and top are set to the respective blocks unless they are outside of
|
||||
the image in which case they are set to the Null block
|
||||
|
||||
top-left is set to the top left block unless it is outside of the image in
|
||||
which case it is set to the left block
|
||||
|
||||
if this block has no larger parent block or it is at the left side of its
|
||||
parent block and the top right block is not outside of the image then the
|
||||
top right block is used for top-right else the top-left block is used
|
||||
|
||||
Null block
|
||||
y,cb,cr are 128
|
||||
level, ref, mx and my are 0
|
||||
|
||||
|
||||
Motion Vector Prediction:
|
||||
=========================
|
||||
1. the motion vectors of all the neighboring blocks are scaled to
|
||||
compensate for the difference of reference frames
|
||||
|
||||
scaled_mv= (mv * (256 * (current_reference+1) / (mv.reference+1)) + 128)>>8
|
||||
|
||||
2. the median of the scaled left, top and top-right vectors is used as
|
||||
motion vector prediction
|
||||
|
||||
3. the used motion vector is the sum of the predictor and
|
||||
(mvx_diff, mvy_diff)*mv_scale
|
||||
|
||||
|
||||
Intra DC Prediction:
|
||||
====================
|
||||
the luma and chroma values of the left block are used as predictors
|
||||
|
||||
the used luma and chroma is the sum of the predictor and y_diff, cb_diff, cr_diff
|
||||
to reverse this in the decoder apply the following:
|
||||
block[y][x].dc[0] = block[y][x-1].dc[0] + y_diff;
|
||||
block[y][x].dc[1] = block[y][x-1].dc[1] + cb_diff;
|
||||
block[y][x].dc[2] = block[y][x-1].dc[2] + cr_diff;
|
||||
block[*][-1].dc[*]= 128;
|
||||
|
||||
|
||||
Motion Compensation:
|
||||
====================
|
||||
|
||||
Halfpel interpolation:
|
||||
----------------------
|
||||
Halfpel interpolation is done by convolution with the halfpel filter stored
|
||||
in the header:
|
||||
|
||||
horizontal halfpel samples are found by
|
||||
H1[y][x] = hcoeff[0]*(F[y][x ] + F[y][x+1])
|
||||
+ hcoeff[1]*(F[y][x-1] + F[y][x+2])
|
||||
+ hcoeff[2]*(F[y][x-2] + F[y][x+3])
|
||||
+ ...
|
||||
h1[y][x] = (H1[y][x] + 32)>>6;
|
||||
|
||||
vertical halfpel samples are found by
|
||||
H2[y][x] = hcoeff[0]*(F[y ][x] + F[y+1][x])
|
||||
+ hcoeff[1]*(F[y-1][x] + F[y+2][x])
|
||||
+ ...
|
||||
h2[y][x] = (H2[y][x] + 32)>>6;
|
||||
|
||||
vertical+horizontal halfpel samples are found by
|
||||
H3[y][x] = hcoeff[0]*(H2[y][x ] + H2[y][x+1])
|
||||
+ hcoeff[1]*(H2[y][x-1] + H2[y][x+2])
|
||||
+ ...
|
||||
H3[y][x] = hcoeff[0]*(H1[y ][x] + H1[y+1][x])
|
||||
+ hcoeff[1]*(H1[y+1][x] + H1[y+2][x])
|
||||
+ ...
|
||||
h3[y][x] = (H3[y][x] + 2048)>>12;
|
||||
|
||||
|
||||
F H1 F
|
||||
| | |
|
||||
| | |
|
||||
| | |
|
||||
F H1 F
|
||||
| | |
|
||||
| | |
|
||||
| | |
|
||||
F-------F-------F-> H1<-F-------F-------F
|
||||
v v v
|
||||
H2 H3 H2
|
||||
^ ^ ^
|
||||
F-------F-------F-> H1<-F-------F-------F
|
||||
| | |
|
||||
| | |
|
||||
| | |
|
||||
F H1 F
|
||||
| | |
|
||||
| | |
|
||||
| | |
|
||||
F H1 F
|
||||
|
||||
|
||||
unavailable fullpel samples (outside the picture for example) shall be equal
|
||||
to the closest available fullpel sample
|
||||
|
||||
|
||||
Smaller pel interpolation:
|
||||
--------------------------
|
||||
if diag_mc is set then points which lie on a line between 2 vertically,
|
||||
horizontally or diagonally adjacent halfpel points shall be interpolated
|
||||
linearly with rounding to nearest and halfway values rounded up.
|
||||
points which lie on 2 diagonals at the same time should only use the one
|
||||
diagonal not containing the fullpel point
|
||||
|
||||
|
||||
|
||||
F-->O---q---O<--h1->O---q---O<--F
|
||||
v \ / v \ / v
|
||||
O O O O O O O
|
||||
| / | \ |
|
||||
q q q q q
|
||||
| / | \ |
|
||||
O O O O O O O
|
||||
^ / \ ^ / \ ^
|
||||
h2-->O---q---O<--h3->O---q---O<--h2
|
||||
v \ / v \ / v
|
||||
O O O O O O O
|
||||
| \ | / |
|
||||
q q q q q
|
||||
| \ | / |
|
||||
O O O O O O O
|
||||
^ / \ ^ / \ ^
|
||||
F-->O---q---O<--h1->O---q---O<--F
|
||||
|
||||
|
||||
|
||||
the remaining points shall be bilinearly interpolated from the
|
||||
up to 4 surrounding halfpel and fullpel points, again rounding should be to
|
||||
nearest and halfway values rounded up
|
||||
|
||||
compliant Snow decoders MUST support 1-1/8 pel luma and 1/2-1/16 pel chroma
|
||||
interpolation at least
|
||||
|
||||
|
||||
Overlapped block motion compensation:
|
||||
-------------------------------------
|
||||
FIXME
|
||||
|
||||
LL band prediction:
|
||||
===================
|
||||
Each sample in the LL0 subband is predicted by the median of the left, top and
|
||||
left+top-topleft samples, samples outside the subband shall be considered to
|
||||
be 0. To reverse this prediction in the decoder apply the following.
|
||||
for(y=0; y<height; y++){
|
||||
for(x=0; x<width; x++){
|
||||
sample[y][x] += median(sample[y-1][x],
|
||||
sample[y][x-1],
|
||||
sample[y-1][x]+sample[y][x-1]-sample[y-1][x-1]);
|
||||
}
|
||||
}
|
||||
sample[-1][*]=sample[*][-1]= 0;
|
||||
width,height here are the width and height of the LL0 subband not of the final
|
||||
video
|
||||
|
||||
|
||||
Dequantization:
|
||||
===============
|
||||
FIXME
|
||||
|
||||
Wavelet Transform:
|
||||
==================
|
||||
|
||||
Snow supports 2 wavelet transforms, the symmetric biorthogonal 5/3 integer
|
||||
transform and an integer approximation of the symmetric biorthogonal 9/7
|
||||
daubechies wavelet.
|
||||
|
||||
2D IDWT (inverse discrete wavelet transform)
|
||||
--------------------------------------------
|
||||
The 2D IDWT applies a 2D filter recursively, each time combining the
|
||||
4 lowest frequency subbands into a single subband until only 1 subband
|
||||
remains.
|
||||
The 2D filter is done by first applying a 1D filter in the vertical direction
|
||||
and then applying it in the horizontal one.
|
||||
--------------- --------------- --------------- ---------------
|
||||
|LL0|HL0| | | | | | | | | | | |
|
||||
|---+---| HL1 | | L0|H0 | HL1 | | LL1 | HL1 | | | |
|
||||
|LH0|HH0| | | | | | | | | | | |
|
||||
|-------+-------|->|-------+-------|->|-------+-------|->| L1 | H1 |->...
|
||||
| | | | | | | | | | | |
|
||||
| LH1 | HH1 | | LH1 | HH1 | | LH1 | HH1 | | | |
|
||||
| | | | | | | | | | | |
|
||||
--------------- --------------- --------------- ---------------
|
||||
|
||||
|
||||
1D Filter:
|
||||
----------
|
||||
1. interleave the samples of the low and high frequency subbands like
|
||||
s={L0, H0, L1, H1, L2, H2, L3, H3, ... }
|
||||
note, this can end with a L or a H, the number of elements shall be w
|
||||
s[-1] shall be considered equivalent to s[1 ]
|
||||
s[w ] shall be considered equivalent to s[w-2]
|
||||
|
||||
2. perform the lifting steps in order as described below
|
||||
|
||||
5/3 Integer filter:
|
||||
1. s[i] -= (s[i-1] + s[i+1] + 2)>>2; for all even i < w
|
||||
2. s[i] += (s[i-1] + s[i+1] )>>1; for all odd i < w
|
||||
|
||||
\ | /|\ | /|\ | /|\ | /|\
|
||||
\|/ | \|/ | \|/ | \|/ |
|
||||
+ | + | + | + | -1/4
|
||||
/|\ | /|\ | /|\ | /|\ |
|
||||
/ | \|/ | \|/ | \|/ | \|/
|
||||
| + | + | + | + +1/2
|
||||
|
||||
|
||||
Snow's 9/7 Integer filter:
|
||||
1. s[i] -= (3*(s[i-1] + s[i+1]) + 4)>>3; for all even i < w
|
||||
2. s[i] -= s[i-1] + s[i+1] ; for all odd i < w
|
||||
3. s[i] += ( s[i-1] + s[i+1] + 4*s[i] + 8)>>4; for all even i < w
|
||||
4. s[i] += (3*(s[i-1] + s[i+1]) )>>1; for all odd i < w
|
||||
|
||||
\ | /|\ | /|\ | /|\ | /|\
|
||||
\|/ | \|/ | \|/ | \|/ |
|
||||
+ | + | + | + | -3/8
|
||||
/|\ | /|\ | /|\ | /|\ |
|
||||
/ | \|/ | \|/ | \|/ | \|/
|
||||
(| + (| + (| + (| + -1
|
||||
\ + /|\ + /|\ + /|\ + /|\ +1/4
|
||||
\|/ | \|/ | \|/ | \|/ |
|
||||
+ | + | + | + | +1/16
|
||||
/|\ | /|\ | /|\ | /|\ |
|
||||
/ | \|/ | \|/ | \|/ | \|/
|
||||
| + | + | + | + +3/2
|
||||
|
||||
optimization tips:
|
||||
following are exactly identical
|
||||
(3a)>>1 == a + (a>>1)
|
||||
(a + 4b + 8)>>4 == ((a>>2) + b + 2)>>2
|
||||
|
||||
16bit implementation note:
|
||||
The IDWT can be implemented with 16bits, but this requires some care to
|
||||
prevent overflows, the following list, lists the minimum number of bits needed
|
||||
for some terms
|
||||
1. lifting step
|
||||
A= s[i-1] + s[i+1] 16bit
|
||||
3*A + 4 18bit
|
||||
A + (A>>1) + 2 17bit
|
||||
|
||||
3. lifting step
|
||||
s[i-1] + s[i+1] 17bit
|
||||
|
||||
4. lifiting step
|
||||
3*(s[i-1] + s[i+1]) 17bit
|
||||
|
||||
|
||||
TODO:
|
||||
=====
|
||||
Important:
|
||||
finetune initial contexts
|
||||
flip wavelet?
|
||||
try to use the wavelet transformed predicted image (motion compensated image) as context for coding the residual coefficients
|
||||
try the MV length as context for coding the residual coefficients
|
||||
use extradata for stuff which is in the keyframes now?
|
||||
implement per picture halfpel interpolation
|
||||
try different range coder state transition tables for different contexts
|
||||
|
||||
Not Important:
|
||||
compare the 6 tap and 8 tap hpel filters (psnr/bitrate and subjective quality)
|
||||
spatial_scalability b vs u (!= 0 breaks syntax anyway so we can add a u later)
|
||||
|
||||
|
||||
Credits:
|
||||
========
|
||||
Michael Niedermayer
|
||||
Loren Merritt
|
||||
|
||||
|
||||
Copyright:
|
||||
==========
|
||||
GPL + GFDL + whatever is needed to make this a RFC
|
Reference in New Issue
Block a user