| 1 |
#include "rdft.h" |
| 2 |
#include "api/api.h" |
| 3 |
|
| 4 |
#ifdef HAVE_FFMPEG |
| 5 |
|
| 6 |
#include <libavcodec/avfft.h> |
| 7 |
|
| 8 |
/* currently ffmpeg uses 16-byte alignment for all architectures. This |
| 9 |
* function should probably be replaced with something built-in to |
| 10 |
* ffmpeg eventually. E.g. bool av_aligned(void *p); */ |
| 11 |
#define FFMPEG_ALIGNMENT() 16 |
| 12 |
|
| 13 |
#define size_rank_ok(x) \ |
| 14 |
/* 1d data */ \ |
| 15 |
((x) == 1) |
| 16 |
#define vector_rank_ok(x) \ |
| 17 |
/* a single vector */ \ |
| 18 |
((x) == 0) |
| 19 |
#define nbits_ok(x) \ |
| 20 |
/* limits imposed by ffmpeg */ \ |
| 21 |
((_log2((x)) >= 4) && (_log2((x)) <= 16)) |
| 22 |
#define input_stride_ok(x) \ |
| 23 |
/* in-place -> i/o strides must be 1, regardless of kind */ \ |
| 24 |
((x) == 1) |
| 25 |
#define output_stride_ok(x) \ |
| 26 |
/* in-place -> i/o strides must be 1, regardless of kind */ \ |
| 27 |
((x) == 1) |
| 28 |
#define alignment_ok(i,o) \ |
| 29 |
/* in-place r2c / c2r -> pointers are always swapped */ \ |
| 30 |
aligned(&((i)[1])) |
| 31 |
#define io_place_ok(i,o) \ |
| 32 |
(i == o) |
| 33 |
|
| 34 |
typedef struct { |
| 35 |
solver super; |
| 36 |
} S; |
| 37 |
|
| 38 |
typedef struct { |
| 39 |
plan_rdft super; |
| 40 |
rdft_kind kind; |
| 41 |
int sign; |
| 42 |
int n; /* problem size */ |
| 43 |
int m; /* m = log2(n) */ |
| 44 |
int k; /* fftw kind */ |
| 45 |
RDFTContext *s; |
| 46 |
FFTSample *x; |
| 47 |
} P; |
| 48 |
|
| 49 |
/* fftw / ffmpeg rdft mappings */ |
| 50 |
|
| 51 |
static int ffmpeg_ok(int fftwK) { |
| 52 |
int r = 0; |
| 53 |
// for some reason R2HC is showing up as 11 instead of 0 |
| 54 |
if ( fftwK == 11 ) fftwK = 0; |
| 55 |
switch( fftwK ) { |
| 56 |
case FFTW_R2HC: |
| 57 |
case FFTW_HC2R: |
| 58 |
r = 1; |
| 59 |
break; |
| 60 |
default: |
| 61 |
break; |
| 62 |
}; |
| 63 |
return r; |
| 64 |
} |
| 65 |
static int ffmpeg_kind( int fftwKind, int sign ) { |
| 66 |
int i, r = -1; |
| 67 |
struct map_s { |
| 68 |
int fftwK; |
| 69 |
int sign; |
| 70 |
int ffmpegK; |
| 71 |
}; |
| 72 |
const struct map_s map[] = { |
| 73 |
{ FFTW_R2HC, FFTW_FORWARD, DFT_R2C }, |
| 74 |
{ FFTW_R2HC, FFTW_BACKWARD, IDFT_R2C }, |
| 75 |
{ FFTW_HC2R, FFTW_FORWARD, DFT_C2R }, |
| 76 |
{ FFTW_HC2R, FFTW_BACKWARD, IDFT_C2R }, |
| 77 |
}; |
| 78 |
const int nmap = sizeof(map) / sizeof(map[0]); |
| 79 |
for(i=0;i<nmap;i++) { |
| 80 |
if ( fftwKind = map[i].fftwK && sign == map[i].sign ) { |
| 81 |
r = map[i].ffmpegK; |
| 82 |
break; |
| 83 |
} |
| 84 |
} |
| 85 |
return r; |
| 86 |
} |
| 87 |
static int fftw_kind( int ffmpegK ) |
| 88 |
{ |
| 89 |
int i,r = -1; |
| 90 |
struct map_s { |
| 91 |
int ffmpegK; |
| 92 |
int fftwK; |
| 93 |
}; |
| 94 |
const struct map_s map[] = { |
| 95 |
{ DFT_R2C, FFTW_R2HC }, |
| 96 |
{ IDFT_R2C, FFTW_R2HC }, |
| 97 |
{ DFT_C2R, FFTW_HC2R }, |
| 98 |
{ IDFT_C2R, FFTW_HC2R }, |
| 99 |
}; |
| 100 |
const int nmap = sizeof(map) / sizeof(map[0]); |
| 101 |
for(i=0;i<nmap;i++) { |
| 102 |
if ( ffmpegK == map[i].ffmpegK ) { |
| 103 |
r = map[i].fftwK; |
| 104 |
break; |
| 105 |
} |
| 106 |
} |
| 107 |
return r; |
| 108 |
} |
| 109 |
static int fftw_sign( int ffmpegK ) |
| 110 |
{ |
| 111 |
int i,r=0; |
| 112 |
struct map_s { |
| 113 |
int ffmpegK; |
| 114 |
int sign; |
| 115 |
}; |
| 116 |
const struct map_s map[] = { |
| 117 |
{ DFT_R2C, FFTW_FORWARD }, |
| 118 |
{ IDFT_R2C, FFTW_BACKWARD }, |
| 119 |
{ DFT_C2R, FFTW_FORWARD }, |
| 120 |
{ IDFT_C2R, FFTW_BACKWARD }, |
| 121 |
}; |
| 122 |
const int nmap = sizeof(map) / sizeof(map[0]); |
| 123 |
for(i=0;i<nmap;i++) { |
| 124 |
if ( ffmpegK == map[i].ffmpegK ) { |
| 125 |
r = map[i].sign; |
| 126 |
break; |
| 127 |
} |
| 128 |
} |
| 129 |
return r; |
| 130 |
} |
| 131 |
|
| 132 |
static int _log2(unsigned int n) |
| 133 |
{ |
| 134 |
unsigned int m = 0; |
| 135 |
for( m = 0; m < 32; m++ ) |
| 136 |
if ( ((1<<m)^n) == 0 ) |
| 137 |
break; |
| 138 |
return ( m == 32 ) ? -1 : m; |
| 139 |
} |
| 140 |
|
| 141 |
static void unextract_reim(P *ego, const problem_rdft *p) |
| 142 |
{ |
| 143 |
// for in-place r2c or c2r transforms, the pointers are always swapped |
| 144 |
ego->x = (FFTSample *) &(p->I[1]); |
| 145 |
} |
| 146 |
|
| 147 |
static int aligned(void * p) |
| 148 |
{ |
| 149 |
unsigned long p1 = (unsigned long)p; |
| 150 |
unsigned long p2 = p1 & ~(FFMPEG_ALIGNMENT()-1); |
| 151 |
return (p1 == p2); |
| 152 |
} |
| 153 |
|
| 154 |
static void apply(const plan *ego_, R *I, R *O) |
| 155 |
{ |
| 156 |
const P *ego = (const P *) ego_; |
| 157 |
av_rdft_calc(ego->s,ego->x); |
| 158 |
} |
| 159 |
|
| 160 |
static void awake(plan *ego_, enum wakefulness wakefulness) |
| 161 |
{ |
| 162 |
(void) ego_; /* UNUSED */ |
| 163 |
(void) wakefulness; /* UNUSED */ |
| 164 |
} |
| 165 |
|
| 166 |
static int applicable0(const problem *p_) |
| 167 |
{ |
| 168 |
const problem_rdft *p = (const problem_rdft *) p_; |
| 169 |
int m = _log2(p->sz->dims[0].n); |
| 170 |
|
| 171 |
return( 1 |
| 172 |
&& size_rank_ok(p->sz->rnk) |
| 173 |
&& vector_rank_ok(p->vecsz->rnk) |
| 174 |
&& nbits_ok(p->sz->dims[0].n) |
| 175 |
&& input_stride_ok(p->sz->dims[0].is) |
| 176 |
&& output_stride_ok(p->sz->dims[0].os) |
| 177 |
&& io_place_ok(p->I,p->O) |
| 178 |
&& alignment_ok(p->I,p->O) ); |
| 179 |
} |
| 180 |
|
| 181 |
static int applicable(const solver *ego, const problem *p_, |
| 182 |
const planner *plnr) |
| 183 |
{ |
| 184 |
UNUSED(ego); |
| 185 |
if (!applicable0(p_)) return 0; |
| 186 |
return 1; |
| 187 |
} |
| 188 |
|
| 189 |
static void destroy(plan *ego_) |
| 190 |
{ |
| 191 |
P *ego = (P *) ego_; |
| 192 |
if ( ego && ego->s ) { |
| 193 |
av_rdft_end(ego->s); |
| 194 |
ego->s = (RDFTContext *)0; |
| 195 |
} |
| 196 |
} |
| 197 |
|
| 198 |
static void print(const plan *ego_, printer *p) |
| 199 |
{ |
| 200 |
const P *ego = (const P *)ego_; |
| 201 |
p->print(p, "(rdft-ffmpeg-%D)",ego->n); |
| 202 |
} |
| 203 |
|
| 204 |
static plan *mkplan(const solver *ego, const problem *p_, planner *plnr) |
| 205 |
{ |
| 206 |
const problem_rdft *p = (const problem_rdft *) p_; |
| 207 |
RDFTContext *s; |
| 208 |
P *pln; |
| 209 |
int n, m, k, d; |
| 210 |
|
| 211 |
static const plan_adt padt = { |
| 212 |
X(rdft_solve), awake, print, destroy |
| 213 |
}; |
| 214 |
|
| 215 |
if ( ! applicable(ego, p_, plnr) ) |
| 216 |
return (plan *)0; |
| 217 |
|
| 218 |
pln = MKPLAN_RDFT(P, &padt, apply); |
| 219 |
if ( ! pln ) |
| 220 |
return (plan *)0; |
| 221 |
|
| 222 |
pln->n = n = p->sz->dims[0].n; |
| 223 |
m = _log2(n); |
| 224 |
//pln->sign = xplan->sign; |
| 225 |
pln->kind = p->kind[0]; |
| 226 |
k = ffmpeg_kind( p->kind[0], pln->sign ); |
| 227 |
pln->s = av_rdft_init(m,k); |
| 228 |
if ( ! pln->s ) { |
| 229 |
X(ifree)(pln); |
| 230 |
return (plan *)0; |
| 231 |
} |
| 232 |
|
| 233 |
// XXX: leaving as zero for now, since it varies by architecture |
| 234 |
X(ops_zero)(& pln->super.super.ops); |
| 235 |
return &(pln->super.super); |
| 236 |
} |
| 237 |
|
| 238 |
static solver *mksolver(void) |
| 239 |
{ |
| 240 |
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; |
| 241 |
S *slv = MKSOLVER(S, &sadt); |
| 242 |
return &(slv->super); |
| 243 |
} |
| 244 |
|
| 245 |
void X(rdft_ffmpeg_register)(planner *p) |
| 246 |
{ |
| 247 |
REGISTER_SOLVER(p, mksolver()); |
| 248 |
} |
| 249 |
|
| 250 |
#endif /* HAVE_FFMPEG */ |