| 1 |
#include "reodft.h" |
| 2 |
#include "api/api.h" |
| 3 |
|
| 4 |
#ifdef HAVE_FFMPEG |
| 5 |
|
| 6 |
#include <libavcodec/avfft.h> |
| 7 |
|
| 8 |
|
| 9 |
typedef struct { |
| 10 |
solver super; |
| 11 |
} S; |
| 12 |
|
| 13 |
typedef struct { |
| 14 |
plan_rdft super; |
| 15 |
int n; /* problem size */ |
| 16 |
int m; /* m = log2(n) */ |
| 17 |
int k; /* ffmpeg kind */ |
| 18 |
DCTContext *s; |
| 19 |
} P; |
| 20 |
|
| 21 |
static int ffmpeg_kind( int fftwK ) { |
| 22 |
int r = -1; |
| 23 |
switch( fftwK ) { |
| 24 |
// DCT |
| 25 |
case FFTW_REDFT00: |
| 26 |
r = DCT_I; |
| 27 |
break; |
| 28 |
case FFTW_REDFT01: |
| 29 |
r = DCT_III; |
| 30 |
break; |
| 31 |
case FFTW_REDFT10: |
| 32 |
r = DCT_II; |
| 33 |
break; |
| 34 |
case FFTW_REDFT11: |
| 35 |
// DCT_IV is not supported in ffmpeg |
| 36 |
break; |
| 37 |
// DST |
| 38 |
case FFTW_RODFT00: |
| 39 |
r = DST_I; |
| 40 |
break; |
| 41 |
// the remaining DST are not supported in ffmpeg |
| 42 |
default: |
| 43 |
break; |
| 44 |
} |
| 45 |
return r; |
| 46 |
} |
| 47 |
|
| 48 |
static int fftw_kind( int ffmpegK ) { |
| 49 |
int r = -1; |
| 50 |
switch( ffmpegK ) { |
| 51 |
// DCT |
| 52 |
case DCT_I: |
| 53 |
r = FFTW_REDFT00; |
| 54 |
break; |
| 55 |
case DCT_III: |
| 56 |
r = FFTW_REDFT01; |
| 57 |
break; |
| 58 |
case DCT_II: |
| 59 |
r = FFTW_REDFT10; |
| 60 |
break; |
| 61 |
// DCT_IV is not supported in ffmpeg |
| 62 |
|
| 63 |
// DST |
| 64 |
case DST_I: |
| 65 |
r = FFTW_RODFT00; |
| 66 |
break; |
| 67 |
// the remaining DST are not supported in ffmpeg |
| 68 |
default: |
| 69 |
break; |
| 70 |
} |
| 71 |
return r; |
| 72 |
} |
| 73 |
|
| 74 |
static int _log2(unsigned int n) { |
| 75 |
unsigned int m = 0; |
| 76 |
for( m = 0; m < 31; m++ ) |
| 77 |
if ( ((1<<m)^n) == 0 ) |
| 78 |
break; |
| 79 |
return ( m == 32 ) ? -1 : m; |
| 80 |
} |
| 81 |
|
| 82 |
static int aligned(void * p) { |
| 83 |
/* currently ffmpeg uses 16-byte alignment for all architectures. This |
| 84 |
* function should probably be replaced with something built-in to |
| 85 |
* ffmpeg eventually. E.g. int av_aligned(void *p); |
| 86 |
*/ |
| 87 |
const unsigned int malloc_bytes = 16; |
| 88 |
unsigned long p1 = (unsigned long)p; |
| 89 |
unsigned long p2 = p1 & ~(malloc_bytes-1); |
| 90 |
return (p1 == p2); |
| 91 |
} |
| 92 |
|
| 93 |
static void apply(const plan *ego_, R *in, R *out) |
| 94 |
{ |
| 95 |
UNUSED(out); |
| 96 |
const P *ego = (const P *) ego_; |
| 97 |
av_dct_calc(ego->s,(FFTSample *)in); |
| 98 |
} |
| 99 |
|
| 100 |
static void awake(plan *ego_, enum wakefulness wakefulness) |
| 101 |
{ |
| 102 |
(void) ego_; /* UNUSED */ |
| 103 |
(void) wakefulness; /* UNUSED */ |
| 104 |
} |
| 105 |
|
| 106 |
static int applicable0(const problem *p_) |
| 107 |
{ |
| 108 |
const problem_rdft *p = (const problem_rdft *) p_; |
| 109 |
unsigned int m = _log2(p->sz->dims[0].n); |
| 110 |
int r; |
| 111 |
return ( 1 |
| 112 |
/* 1D data */ |
| 113 |
&& p->sz->rnk == 1 |
| 114 |
/* a single vector */ |
| 115 |
&& p->vecsz->rnk == 0 |
| 116 |
/* power of two */ |
| 117 |
&& m != -1 |
| 118 |
/* min / max vector lengths (imposed by ffmpeg) */ |
| 119 |
&& m >= 4 && m <= 14 |
| 120 |
/* contiguous memory */ |
| 121 |
&& p->sz->dims[0].is == 1 |
| 122 |
&& p->sz->dims[0].os == 1 |
| 123 |
// FIXME: out-of-place should also be fine, but requires additional |
| 124 |
// logic & allocation / deallocation of a temporary buffer |
| 125 |
/* in-place */ |
| 126 |
&& p->I == p->O |
| 127 |
// FIXME: misaligned should also be fine, but requires additional |
| 128 |
// logic & allocation / deallocation of a temporary buffer |
| 129 |
/* check alignment */ |
| 130 |
&& aligned(p->I) ); |
| 131 |
} |
| 132 |
|
| 133 |
static int applicable(const solver *ego, const problem *p_, |
| 134 |
const planner *plnr) |
| 135 |
{ |
| 136 |
UNUSED(ego); |
| 137 |
if (!applicable0(p_)) return 0; |
| 138 |
return 1; |
| 139 |
} |
| 140 |
|
| 141 |
static void destroy(plan *ego_) |
| 142 |
{ |
| 143 |
P *ego = (P *) ego_; |
| 144 |
if ( ego && ego->s ) { |
| 145 |
av_dct_end(ego->s); |
| 146 |
ego->s = (DCTContext *)0; |
| 147 |
} |
| 148 |
} |
| 149 |
|
| 150 |
static void print(const plan *ego_, printer *p) |
| 151 |
{ |
| 152 |
char *str; |
| 153 |
const P *ego = (const P *)ego_; |
| 154 |
p->print(p, "(reodft-ffmpeg-%D)",ego->n); |
| 155 |
} |
| 156 |
|
| 157 |
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) |
| 158 |
{ |
| 159 |
DCTContext *s; |
| 160 |
P *pln; |
| 161 |
const problem_rdft *p; |
| 162 |
plan *cld; |
| 163 |
R *buf; |
| 164 |
INT n; |
| 165 |
int m,k; |
| 166 |
opcnt ops; |
| 167 |
|
| 168 |
static const plan_adt padt = { |
| 169 |
X(rdft_solve), awake, print, destroy |
| 170 |
}; |
| 171 |
|
| 172 |
if (!applicable(ego_, p_, plnr)) |
| 173 |
return (plan *)0; |
| 174 |
|
| 175 |
p = (const problem_rdft *) p_; |
| 176 |
|
| 177 |
n = p->sz->dims[0].n - 1; |
| 178 |
|
| 179 |
pln = MKPLAN_RDFT(P, &padt, apply); |
| 180 |
if ( ! pln ) |
| 181 |
return (plan *)0; |
| 182 |
|
| 183 |
pln->n = n; |
| 184 |
pln->m = m = _log2(n); |
| 185 |
pln->k = k = ffmpeg_kind( p->kind[0] ); |
| 186 |
pln->s = s = av_dct_init(m,k); |
| 187 |
if ( ! s ) { |
| 188 |
X(ifree)(pln); |
| 189 |
return (plan *)0; |
| 190 |
} |
| 191 |
|
| 192 |
// XXX: leaving as zero for now, since it both varies by |
| 193 |
// architecture and also since the armv7 cycle timer is on the fritz |
| 194 |
X(ops_zero)(&pln->super.super.ops); |
| 195 |
return &(pln->super.super); |
| 196 |
} |
| 197 |
|
| 198 |
static solver *mksolver(void) |
| 199 |
{ |
| 200 |
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; |
| 201 |
S *slv = MKSOLVER(S, &sadt); |
| 202 |
return &(slv->super); |
| 203 |
} |
| 204 |
|
| 205 |
void X(reodft_ffmpeg_register)(planner *p) |
| 206 |
{ |
| 207 |
REGISTER_SOLVER(p, mksolver()); |
| 208 |
} |
| 209 |
|
| 210 |
#endif /* HAVE_FFMPEG */ |