Fixed theora playback. Removed theoraplayer.

Still need to get proper audio output latency in some platforms.
This commit is contained in:
Juan Linietsky 2015-09-26 14:50:42 -03:00
parent ce6fefced8
commit c858515785
128 changed files with 562 additions and 55907 deletions

View file

@ -38,8 +38,8 @@ if (env["vorbis"]=="yes"):
if (env["tools"]=="yes"):
SConscript("convex_decomp/SCsub");
if env["theora"]=="yes":
SConscript("theoraplayer/SCsub")
#if env["theora"]=="yes":
# SConscript("theoraplayer/SCsub")
if (env["theora"]=="yes"):
SConscript("theora/SCsub");
if (env['speex']=='yes'):

View file

@ -82,6 +82,17 @@ Error AudioDriverPulseAudio::init() {
return OK;
}
float AudioDriverPulseAudio::get_latency() {
if (latency==0) { //only do this once since it's approximate anyway
int error_code;
pa_usec_t palat = pa_simple_get_latency( pulse,&error_code);
latency=double(palat)/1000000.0;
}
return latency;
}
void AudioDriverPulseAudio::thread_func(void* p_udata) {
AudioDriverPulseAudio* ad = (AudioDriverPulseAudio*)p_udata;
@ -121,6 +132,7 @@ void AudioDriverPulseAudio::thread_func(void* p_udata) {
ad->exit_thread = true;
break;
}
}
ad->thread_exited = true;
@ -185,6 +197,7 @@ AudioDriverPulseAudio::AudioDriverPulseAudio() {
mutex = NULL;
thread = NULL;
pulse = NULL;
latency=0;
}
AudioDriverPulseAudio::~AudioDriverPulseAudio() {

View file

@ -58,6 +58,8 @@ class AudioDriverPulseAudio : public AudioDriverSW {
mutable bool exit_thread;
bool pcm_open;
float latency;
public:
const char* get_name() const {
@ -72,6 +74,9 @@ public:
virtual void unlock();
virtual void finish();
virtual float get_latency();
AudioDriverPulseAudio();
~AudioDriverPulseAudio();
};

View file

@ -43,7 +43,10 @@
#endif
#ifdef THEORA_ENABLED
//#include "theora/video_stream_theora.h"
#include "theora/video_stream_theora.h"
#endif
#ifdef THEORAPLAYER_ENABLED
#include "theoraplayer/video_stream_theoraplayer.h"
#endif
@ -90,7 +93,10 @@ static ResourceFormatLoaderAudioStreamSpeex *speex_stream_loader=NULL;
#endif
#ifdef THEORA_ENABLED
//static ResourceFormatLoaderVideoStreamTheora* theora_stream_loader = NULL;
static ResourceFormatLoaderVideoStreamTheora* theora_stream_loader = NULL;
#endif
#ifdef THEORAPLAYER_ENABLED
static ResourceFormatLoaderVideoStreamTheoraplayer* theoraplayer_stream_loader = NULL;
#endif
@ -205,9 +211,12 @@ void register_driver_types() {
#endif
#ifdef THEORA_ENABLED
//theora_stream_loader = memnew( ResourceFormatLoaderVideoStreamTheora );
//ResourceLoader::add_resource_format_loader(theora_stream_loader);
//ObjectTypeDB::register_type<VideoStreamTheora>();
theora_stream_loader = memnew( ResourceFormatLoaderVideoStreamTheora );
ResourceLoader::add_resource_format_loader(theora_stream_loader);
ObjectTypeDB::register_type<VideoStreamTheora>();
#endif
#ifdef THEORAPLAYER_ENABLED
theoraplayer_stream_loader = memnew( ResourceFormatLoaderVideoStreamTheoraplayer );
ResourceLoader::add_resource_format_loader(theoraplayer_stream_loader);
ObjectTypeDB::register_type<VideoStreamTheoraplayer>();
@ -244,7 +253,10 @@ void unregister_driver_types() {
#endif
#ifdef THEORA_ENABLED
//memdelete (theora_stream_loader);
memdelete (theora_stream_loader);
#endif
#ifdef THEORAPLAYER_ENABLED
memdelete (theoraplayer_stream_loader);
#endif

View file

@ -1,16 +1,12 @@
#ifdef THEORA_ENABLED
#if 0
#include "video_stream_theora.h"
#include "os/os.h"
#include "yuv2rgb.h"
#include "globals.h"
AudioStream::UpdateMode VideoStreamTheora::get_update_mode() const {
return UPDATE_IDLE;
};
int VideoStreamTheora:: buffer_data() {
int VideoStreamPlaybackTheora:: buffer_data() {
char *buffer=ogg_sync_buffer(&oy,4096);
int bytes=file->get_buffer((uint8_t*)buffer, 4096);
@ -18,33 +14,13 @@ int VideoStreamTheora:: buffer_data() {
return(bytes);
}
int VideoStreamTheora::queue_page(ogg_page *page){
int VideoStreamPlaybackTheora::queue_page(ogg_page *page){
if(theora_p)ogg_stream_pagein(&to,page);
if(vorbis_p)ogg_stream_pagein(&vo,page);
return 0;
}
Image VideoStreamTheora::peek_frame() const {
if (frames_pending == 0)
return Image();
return Image(size.x, size.y, 0, format, frame_data);
};
Image VideoStreamTheora::pop_frame() {
Image ret = peek_frame();
frames_pending = 0;
return ret;
};
int VideoStreamTheora::get_pending_frame_count() const {
return frames_pending;
};
void VideoStreamTheora::video_write(void){
void VideoStreamPlaybackTheora::video_write(void){
th_ycbcr_buffer yuv;
int y_offset, uv_offset;
th_decode_ycbcr_out(td,yuv);
@ -78,25 +54,31 @@ void VideoStreamTheora::video_write(void){
int pitch = 4;
frame_data.resize(size.x * size.y * pitch);
DVector<uint8_t>::Write w = frame_data.write();
char* dst = (char*)w.ptr();
{
DVector<uint8_t>::Write w = frame_data.write();
char* dst = (char*)w.ptr();
uv_offset=(ti.pic_x/2)+(yuv[1].stride)*(ti.pic_y/2);
uv_offset=(ti.pic_x/2)+(yuv[1].stride)*(ti.pic_y/2);
if (px_fmt == TH_PF_444) {
if (px_fmt == TH_PF_444) {
yuv444_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0);
yuv444_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0);
} else if (px_fmt == TH_PF_422) {
} else if (px_fmt == TH_PF_422) {
yuv422_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0);
yuv422_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0);
} else if (px_fmt == TH_PF_420) {
} else if (px_fmt == TH_PF_420) {
yuv420_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[2].data, (uint8_t*)yuv[1].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0);
};
yuv420_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[2].data, (uint8_t*)yuv[1].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0);
};
format = Image::FORMAT_RGBA;
format = Image::FORMAT_RGBA;
}
Image img(size.x,size.y,0,Image::FORMAT_RGBA,frame_data); //zero copy image creation
texture->set_data(img); //zero copy send to visual server
/*
@ -194,7 +176,7 @@ void VideoStreamTheora::video_write(void){
frames_pending = 1;
}
void VideoStreamTheora::clear() {
void VideoStreamPlaybackTheora::clear() {
if (file_name == "")
return;
@ -218,7 +200,7 @@ void VideoStreamTheora::clear() {
}
ogg_sync_clear(&oy);
file_name = "";
//file_name = "";
theora_p = 0;
vorbis_p = 0;
@ -229,7 +211,7 @@ void VideoStreamTheora::clear() {
playing = false;
};
void VideoStreamTheora::set_file(const String& p_file) {
void VideoStreamPlaybackTheora::set_file(const String& p_file) {
ogg_packet op;
th_setup_info *ts = NULL;
@ -241,7 +223,7 @@ void VideoStreamTheora::set_file(const String& p_file) {
file = FileAccess::open(p_file, FileAccess::READ);
ERR_FAIL_COND(!file);
audio_frames_wrote = 0;
ogg_sync_init(&oy);
@ -386,6 +368,8 @@ void VideoStreamTheora::set_file(const String& p_file) {
size.x = w;
size.y = h;
texture->create(w,h,Image::FORMAT_RGBA,Texture::FLAG_FILTER|Texture::FLAG_VIDEO_SURFACE);
}else{
/* tear down the partial theora setup */
th_info_clear(&ti);
@ -399,7 +383,7 @@ void VideoStreamTheora::set_file(const String& p_file) {
vorbis_block_init(&vd,&vb);
fprintf(stderr,"Ogg logical stream %lx is Vorbis %d channel %ld Hz audio.\n",
vo.serialno,vi.channels,vi.rate);
_setup(vi.channels, vi.rate);
//_setup(vi.channels, vi.rate);
}else{
/* tear down the partial vorbis setup */
vorbis_info_clear(&vi);
@ -411,227 +395,299 @@ void VideoStreamTheora::set_file(const String& p_file) {
time=0;
};
float VideoStreamTheora::get_time() const {
float VideoStreamPlaybackTheora::get_time() const {
//print_line("total: "+itos(get_total())+" todo: "+itos(get_todo()));
//return MAX(0,time-((get_total())/(float)vi.rate));
return time-((get_total())/(float)vi.rate);
return time-AudioServer::get_singleton()->get_output_delay()-delay_compensation;//-((get_total())/(float)vi.rate);
};
void VideoStreamTheora::update() {
Ref<Texture> VideoStreamPlaybackTheora::get_texture() {
return texture;
}
void VideoStreamPlaybackTheora::update(float p_delta) {
if (!playing) {
//printf("not playing\n");
return;
};
double ctime =AudioServer::get_singleton()->get_mix_time();
//double ctime =AudioServer::get_singleton()->get_mix_time();
if (last_update_time) {
double delta = (ctime-last_update_time);
time+=delta;
//print_line("delta: "+rtos(delta));
}
last_update_time=ctime;
//print_line("play "+rtos(p_delta));
time+=p_delta;
if (videobuf_time>get_time())
return; //no new frames need to be produced
bool frame_done=false;
while (!frame_done) {
//a frame needs to be produced
ogg_packet op;
bool audio_pending = false;
int audio_todo = get_todo();
ogg_packet op;
int audio_pending = 0;
while (vorbis_p) {
int ret;
float **pcm;
bool buffer_full=false;
/* if there's pending, decoded audio, grab it */
if ((ret=vorbis_synthesis_pcmout(&vd,&pcm))>0) {
while (vorbis_p && audio_todo) {
int ret;
float **pcm;
/* if there's pending, decoded audio, grab it */
if ((ret=vorbis_synthesis_pcmout(&vd,&pcm))>0) {
const int AUXBUF_LEN=4096;
int to_read = ret;
int16_t aux_buffer[AUXBUF_LEN];
audio_pending = ret;
int16_t* out = get_write_buffer();
int count = 0;
int to_read = MIN(ret, audio_todo);
for (int i=0; i<to_read; i++) {
while(to_read) {
for(int j=0;j<vi.channels;j++){
int val=Math::fast_ftoi(pcm[j][i]*32767.f);
if(val>32767)val=32767;
if(val<-32768)val=-32768;
out[count++] = val;
int m = MIN(AUXBUF_LEN/vi.channels,to_read);
int count = 0;
for(int j=0;j<m;j++){
for(int i=0;i<vi.channels;i++){
int val=Math::fast_ftoi(pcm[i][j]*32767.f);
if(val>32767)val=32767;
if(val<-32768)val=-32768;
aux_buffer[count++] = val;
}
}
if (mix_callback) {
int mixed = mix_callback(mix_udata,aux_buffer,m);
to_read-=mixed;
if (mixed!=m) { //could mix no more
buffer_full=true;
break;
}
} else {
to_read-=m; //just pretend we sent the audio
}
}
int tr = vorbis_synthesis_read(&vd, ret-to_read);
audio_pending=true;
} else {
/* no pending audio; is there a pending packet to decode? */
if (ogg_stream_packetout(&vo,&op)>0){
if(vorbis_synthesis(&vb,&op)==0) { /* test for success! */
vorbis_synthesis_blockin(&vd,&vb);
}
} else { /* we need more data; break out to suck in another page */
//printf("need moar data\n");
break;
};
};
int tr = vorbis_synthesis_read(&vd, to_read);
audio_todo -= to_read;
audio_frames_wrote += to_read;
write(to_read);
audio_pending -= to_read;
if (audio_todo==0)
buffering=false;
}
} else {
/* no pending audio; is there a pending packet to decode? */
if (ogg_stream_packetout(&vo,&op)>0){
if(vorbis_synthesis(&vb,&op)==0) { /* test for success! */
vorbis_synthesis_blockin(&vd,&vb);
}
} else { /* we need more data; break out to suck in another page */
//printf("need moar data\n");
if (buffer_full)
break;
};
}
}
while(theora_p && !videobuf_ready){
/* theora is one in, one out... */
if(ogg_stream_packetout(&to,&op)>0){
while(theora_p && !frame_done){
/* theora is one in, one out... */
if(ogg_stream_packetout(&to,&op)>0){
if(pp_inc){
pp_level+=pp_inc;
th_decode_ctl(td,TH_DECCTL_SET_PPLEVEL,&pp_level,
sizeof(pp_level));
pp_inc=0;
}
/*HACK: This should be set after a seek or a gap, but we might not have
a granulepos for the first packet (we only have them for the last
packet on a page), so we just set it as often as we get it.
To do this right, we should back-track from the last packet on the
page and compute the correct granulepos for the first packet after
a seek or a gap.*/
if(op.granulepos>=0){
th_decode_ctl(td,TH_DECCTL_SET_GRANPOS,&op.granulepos,
sizeof(op.granulepos));
}
ogg_int64_t videobuf_granulepos;
if(th_decode_packetin(td,&op,&videobuf_granulepos)==0){
videobuf_time=th_granule_time(td,videobuf_granulepos);
//printf("frame time %f, play time %f, ready %i\n", (float)videobuf_time, get_time(), videobuf_ready);
/* is it already too old to be useful? This is only actually
useful cosmetically after a SIGSTOP. Note that we have to
decode the frame even if we don't show it (for now) due to
keyframing. Soon enough libtheora will be able to deal
with non-keyframe seeks. */
if(videobuf_time>=get_time())
videobuf_ready=1;
else{
/*If we are too slow, reduce the pp level.*/
pp_inc=pp_level>0?-1:0;
if(pp_inc){
pp_level+=pp_inc;
th_decode_ctl(td,TH_DECCTL_SET_PPLEVEL,&pp_level,
sizeof(pp_level));
pp_inc=0;
}
/*HACK: This should be set after a seek or a gap, but we might not have
a granulepos for the first packet (we only have them for the last
packet on a page), so we just set it as often as we get it.
To do this right, we should back-track from the last packet on the
page and compute the correct granulepos for the first packet after
a seek or a gap.*/
if(op.granulepos>=0){
th_decode_ctl(td,TH_DECCTL_SET_GRANPOS,&op.granulepos,
sizeof(op.granulepos));
}
ogg_int64_t videobuf_granulepos;
if(th_decode_packetin(td,&op,&videobuf_granulepos)==0){
videobuf_time=th_granule_time(td,videobuf_granulepos);
//printf("frame time %f, play time %f, ready %i\n", (float)videobuf_time, get_time(), videobuf_ready);
/* is it already too old to be useful? This is only actually
useful cosmetically after a SIGSTOP. Note that we have to
decode the frame even if we don't show it (for now) due to
keyframing. Soon enough libtheora will be able to deal
with non-keyframe seeks. */
if(videobuf_time>=get_time())
frame_done=true;
else{
/*If we are too slow, reduce the pp level.*/
pp_inc=pp_level>0?-1:0;
}
}
} else
break;
}
if (file && /*!videobuf_ready && */ file->eof_reached()) {
printf("video done, stopping\n");
stop();
return;
};
#if 0
if (!videobuf_ready || audio_todo > 0){
/* no data yet for somebody. Grab another page */
buffer_data();
while(ogg_sync_pageout(&oy,&og)>0){
queue_page(&og);
}
}
#else
if (!frame_done){
//what's the point of waiting for audio to grab a page?
} else
break;
}
buffer_data();
while(ogg_sync_pageout(&oy,&og)>0){
queue_page(&og);
}
}
#endif
/* If playback has begun, top audio buffer off immediately. */
//if(stateflag) audio_write_nonblocking();
if (/*!videobuf_ready && */ audio_pending == 0 && file->eof_reached()) {
printf("video done, stopping\n");
stop();
return;
};
/* are we at or past time for this video frame? */
if(videobuf_ready && videobuf_time<=get_time()){
if (!videobuf_ready || audio_todo > 0){
/* no data yet for somebody. Grab another page */
//video_write();
//videobuf_ready=0;
} else {
//printf("frame at %f not ready (time %f), ready %i\n", (float)videobuf_time, get_time(), videobuf_ready);
}
buffer_data();
while(ogg_sync_pageout(&oy,&og)>0){
queue_page(&og);
float tdiff=videobuf_time-get_time();
/*If we have lots of extra time, increase the post-processing level.*/
if(tdiff>ti.fps_denominator*0.25/ti.fps_numerator){
pp_inc=pp_level<pp_level_max?1:0;
}
else if(tdiff<ti.fps_denominator*0.05/ti.fps_numerator){
pp_inc=pp_level>0?-1:0;
}
}
/* If playback has begun, top audio buffer off immediately. */
//if(stateflag) audio_write_nonblocking();
video_write();
/* are we at or past time for this video frame? */
if(videobuf_ready && videobuf_time<=get_time()){
video_write();
videobuf_ready=0;
} else {
//printf("frame at %f not ready (time %f), ready %i\n", (float)videobuf_time, get_time(), videobuf_ready);
}
float tdiff=videobuf_time-get_time();
/*If we have lots of extra time, increase the post-processing level.*/
if(tdiff>ti.fps_denominator*0.25/ti.fps_numerator){
pp_inc=pp_level<pp_level_max?1:0;
}
else if(tdiff<ti.fps_denominator*0.05/ti.fps_numerator){
pp_inc=pp_level>0?-1:0;
}
};
bool VideoStreamTheora::_can_mix() const {
return !buffering;
};
void VideoStreamTheora::play() {
void VideoStreamPlaybackTheora::play() {
if (!playing)
last_update_time=0;
time=0;
playing = true;
delay_compensation=Globals::get_singleton()->get("audio/video_delay_compensation_ms");
delay_compensation/=1000.0;
};
void VideoStreamTheora::stop() {
void VideoStreamPlaybackTheora::stop() {
if (playing) {
clear();
set_file(file_name); //reset
}
playing = false;
last_update_time=0;
time=0;
};
bool VideoStreamTheora::is_playing() const {
bool VideoStreamPlaybackTheora::is_playing() const {
return playing;
};
void VideoStreamTheora::set_paused(bool p_paused) {
void VideoStreamPlaybackTheora::set_paused(bool p_paused) {
playing = !p_paused;
};
bool VideoStreamTheora::is_paused(bool p_paused) const {
bool VideoStreamPlaybackTheora::is_paused(bool p_paused) const {
return playing;
};
void VideoStreamTheora::set_loop(bool p_enable) {
void VideoStreamPlaybackTheora::set_loop(bool p_enable) {
};
bool VideoStreamTheora::has_loop() const {
bool VideoStreamPlaybackTheora::has_loop() const {
return false;
};
float VideoStreamTheora::get_length() const {
float VideoStreamPlaybackTheora::get_length() const {
return 0;
};
String VideoStreamTheora::get_stream_name() const {
String VideoStreamPlaybackTheora::get_stream_name() const {
return "";
};
int VideoStreamTheora::get_loop_count() const {
int VideoStreamPlaybackTheora::get_loop_count() const {
return 0;
};
float VideoStreamTheora::get_pos() const {
float VideoStreamPlaybackTheora::get_pos() const {
return get_time();
};
void VideoStreamTheora::seek_pos(float p_time) {
void VideoStreamPlaybackTheora::seek_pos(float p_time) {
// no
};
VideoStreamTheora::VideoStreamTheora() {
void VideoStreamPlaybackTheora::set_mix_callback(AudioMixCallback p_callback,void *p_userdata) {
mix_callback=p_callback;
mix_udata=p_userdata;
}
int VideoStreamPlaybackTheora::get_channels() const{
return vi.channels;
}
void VideoStreamPlaybackTheora::set_audio_track(int p_idx) {
}
int VideoStreamPlaybackTheora::get_mix_rate() const{
return vi.rate;
}
VideoStreamPlaybackTheora::VideoStreamPlaybackTheora() {
file = NULL;
theora_p = 0;
@ -640,11 +696,15 @@ VideoStreamTheora::VideoStreamTheora() {
playing = false;
frames_pending = 0;
videobuf_time = 0;
last_update_time =0;
buffering=false;
texture = Ref<ImageTexture>( memnew(ImageTexture ));
mix_callback=NULL;
mix_udata=NULL;
delay_compensation=0;
};
VideoStreamTheora::~VideoStreamTheora() {
VideoStreamPlaybackTheora::~VideoStreamPlaybackTheora() {
clear();
@ -653,10 +713,16 @@ VideoStreamTheora::~VideoStreamTheora() {
};
RES ResourceFormatLoaderVideoStreamTheora::load(const String &p_path,const String& p_original_path) {
RES ResourceFormatLoaderVideoStreamTheora::load(const String &p_path,const String& p_original_path, Error *r_error) {
if (r_error)
*r_error=ERR_FILE_CANT_OPEN;
VideoStreamTheora *stream = memnew(VideoStreamTheora);
stream->set_file(p_path);
if (r_error)
*r_error=OK;
return Ref<VideoStreamTheora>(stream);
}
@ -666,16 +732,16 @@ void ResourceFormatLoaderVideoStreamTheora::get_recognized_extensions(List<Strin
p_extensions->push_back("ogv");
}
bool ResourceFormatLoaderVideoStreamTheora::handles_type(const String& p_type) const {
return (p_type=="AudioStream" || p_type=="VideoStreamTheora");
return (p_type=="VideoStream" || p_type=="VideoStreamTheora");
}
String ResourceFormatLoaderVideoStreamTheora::get_resource_type(const String &p_path) const {
String exl=p_path.extension().to_lower();
if (exl=="ogm" || exl=="ogv")
return "AudioStreamTheora";
return "VideoStreamTheora";
return "";
}
#endif
#endif

View file

@ -10,9 +10,9 @@
#include "io/resource_loader.h"
#include "scene/resources/video_stream.h"
class VideoStreamTheora : public VideoStream {
class VideoStreamPlaybackTheora : public VideoStreamPlayback {
OBJ_TYPE(VideoStreamTheora, VideoStream);
OBJ_TYPE(VideoStreamPlaybackTheora, VideoStreamPlayback);
enum {
MAX_FRAMES = 4,
@ -58,16 +58,17 @@ class VideoStreamTheora : public VideoStream {
double last_update_time;
double time;
double delay_compensation;
Ref<ImageTexture> texture;
AudioMixCallback mix_callback;
void* mix_udata;
protected:
virtual UpdateMode get_update_mode() const;
virtual void update();
void clear();
virtual bool _can_mix() const;
public:
virtual void play();
@ -92,12 +93,36 @@ public:
void set_file(const String& p_file);
int get_pending_frame_count() const;
Image pop_frame();
Image peek_frame() const;
virtual Ref<Texture> get_texture();
virtual void update(float p_delta);
virtual void set_mix_callback(AudioMixCallback p_callback,void *p_userdata);
virtual int get_channels() const;
virtual int get_mix_rate() const;
virtual void set_audio_track(int p_idx);
VideoStreamPlaybackTheora();
~VideoStreamPlaybackTheora();
};
class VideoStreamTheora : public VideoStream {
OBJ_TYPE(VideoStreamTheora,VideoStream);
String file;
public:
Ref<VideoStreamPlayback> instance_playback() {
Ref<VideoStreamPlaybackTheora> pb = memnew( VideoStreamPlaybackTheora );
pb->set_file(file);
return pb;
}
void set_file(const String& p_file) { file=p_file; }
VideoStreamTheora();
~VideoStreamTheora();
};
class ResourceFormatLoaderVideoStreamTheora : public ResourceFormatLoader {

View file

@ -1,106 +0,0 @@
Import("env")
import string
sources = string.split("""
src/TheoraVideoClip.cpp
src/FFmpeg/TheoraVideoClip_FFmpeg.cpp
src/TheoraAsync.cpp
src/TheoraAudioInterface.cpp
src/TheoraException.cpp
src/TheoraWorkerThread.cpp
src/TheoraVideoManager.cpp
src/TheoraTimer.cpp
src/TheoraUtil.cpp
src/TheoraDataSource.cpp
src/TheoraAudioPacketQueue.cpp
src/TheoraFrameQueue.cpp
src/Theora/TheoraVideoClip_Theora.cpp
src/YUV/yuv_util.c
src/YUV/libyuv/src/row_any.cc
src/YUV/libyuv/src/compare_common.cc
src/YUV/libyuv/src/scale_neon.cc
src/YUV/libyuv/src/planar_functions.cc
src/YUV/libyuv/src/compare.cc
src/YUV/libyuv/src/scale_mips.cc
src/YUV/libyuv/src/scale_posix.cc
src/YUV/libyuv/src/row_posix.cc
src/YUV/libyuv/src/row_win.cc
src/YUV/libyuv/src/compare_neon.cc
src/YUV/libyuv/src/convert_from_argb.cc
src/YUV/libyuv/src/mjpeg_validate.cc
src/YUV/libyuv/src/convert_from.cc
src/YUV/libyuv/src/rotate_neon.cc
src/YUV/libyuv/src/row_neon.cc
src/YUV/libyuv/src/rotate_mips.cc
src/YUV/libyuv/src/compare_posix.cc
src/YUV/libyuv/src/row_mips.cc
src/YUV/libyuv/src/scale.cc
src/YUV/libyuv/src/scale_argb.cc
src/YUV/libyuv/src/mjpeg_decoder.cc
src/YUV/libyuv/src/scale_win.cc
src/YUV/libyuv/src/scale_common.cc
src/YUV/libyuv/src/scale_argb_neon.cc
src/YUV/libyuv/src/row_common.cc
src/YUV/libyuv/src/convert.cc
src/YUV/libyuv/src/format_conversion.cc
src/YUV/libyuv/src/rotate_argb.cc
src/YUV/libyuv/src/rotate.cc
src/YUV/libyuv/src/convert_argb.cc
src/YUV/libyuv/src/cpu_id.cc
src/YUV/libyuv/src/video_common.cc
src/YUV/libyuv/src/convert_to_argb.cc
src/YUV/libyuv/src/compare_win.cc
src/YUV/libyuv/src/convert_to_i420.cc
src/YUV/libyuv/src/convert_jpeg.cc
src/YUV/libyuv/yuv_libyuv.c
src/YUV/android/cpu-features.c
src/YUV/C/yuv420_grey_c.c
src/YUV/C/yuv420_yuv_c.c
src/YUV/C/yuv420_rgb_c.c
src/TheoraVideoFrame.cpp
""")
env_theora = env.Clone()
if env["platform"] == "iphone":
sources.append("src/AVFoundation/TheoraVideoClip_AVFoundation.mm")
env.Append(LINKFLAGS=['-framework', 'CoreVideo', '-framework', 'CoreMedia', '-framework', 'AVFoundation'])
if env["target"] == "release":
env_theora.Append(CPPFLAGS=["-D_IOS", "-D__ARM_NEON__", "-fstrict-aliasing", "-fmessage-length=210", "-fdiagnostics-show-note-include-stack", "-fmacro-backtrace-limit=0", "-fcolor-diagnostics", "-Wno-trigraphs", "-fpascal-strings", "-fvisibility=hidden", "-fvisibility-inlines-hidden"])
env_theora.Append(CPPFLAGS=["-D_LIB", "-D__THEORA"]) # removed -D_YUV_C
env_theora.Append(CPPFLAGS=["-D_YUV_LIBYUV"])
#env_theora.Append(CPPFLAGS=["-D_YUV_C"])
if env["platform"] == "iphone":
env_theora.Append(CPPFLAGS=["-D__AVFOUNDATION"])
else:
pass
#env_theora.Append(CPPFLAGS=["-D__FFMPEG"])
if env["platform"] == "android":
env_theora.Append(CPPFLAGS=["-D_ANDROID"])
if env["platform"] == "winrt":
env_theora.Append(CPPFLAGS=["-D_WINRT"])
env_theora.Append(CPPPATH=["#drivers/theoraplayer/include/theoraplayer", "#drivers/theoraplayer/src/YUV", "#drivers/theoraplayer/src/YUV/libyuv/include", "#drivers/theoraplayer/src/Theora", "#drivers/theoraplayer/src/AVFoundation"])
objs = []
env_theora.add_source_files(objs, ["video_stream_theoraplayer.cpp"])
if env['use_theoraplayer_binary'] == "yes":
if env["platform"] == "iphone":
env.Append(LIBPATH=['#drivers/theoraplayer/lib/ios'])
env.Append(LIBS=['theoraplayer', 'ogg', 'theora', 'tremor'])
if env["platform"] == "windows":
env.Append(LIBPATH=['#drivers/theoraplayer/lib/windows'])
env.Append(LINKFLAGS=['libtheoraplayer_static.lib', 'libogg.lib', 'libtheora.lib', 'libvorbis.lib'])
else:
env_theora.add_source_files(objs, sources)
env.drivers_sources += objs

View file

@ -1,51 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifndef _TheoraAsync_h
#define _TheoraAsync_h
#ifndef _WIN32
#include <pthread.h>
#endif
/// @note Based on hltypes::Thread
class TheoraMutex
{
public:
TheoraMutex();
~TheoraMutex();
void lock();
void unlock();
protected:
void* mHandle;
};
/// @note Based on hltypes::Thread
class TheoraThread
{
TheoraMutex mRunningMutex;
public:
TheoraThread();
virtual ~TheoraThread();
void start();
void stop();
void resume();
void pause();
bool isRunning();
virtual void execute() = 0;
void join();
protected:
void* mId;
volatile bool mRunning;
};
#endif

View file

@ -1,51 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifndef _TheoraAudioInterface_h
#define _TheoraAudioInterface_h
#include "TheoraExport.h"
class TheoraVideoClip;
/**
This is the class that serves as an interface between the library's audio
output and the audio playback library of your choice.
The class gets mono or stereo PCM data in in floating point data
*/
class TheoraPlayerExport TheoraAudioInterface
{
public:
//! PCM frequency, usualy 44100 Hz
int mFreq;
//! Mono or stereo
int mNumChannels;
//! Pointer to the parent TheoraVideoClip object
TheoraVideoClip* mClip;
TheoraAudioInterface(TheoraVideoClip* owner, int nChannels, int freq);
virtual ~TheoraAudioInterface();
//! A function that the TheoraVideoClip object calls once more audio packets are decoded
/*!
\param data contains one or two channels of float PCM data in the range [-1,1]
\param nSamples contains the number of samples that the data parameter contains in each channel
*/
virtual void insertData(float* data, int nSamples)=0;
};
class TheoraPlayerExport TheoraAudioInterfaceFactory
{
public:
//! VideoManager calls this when creating a new TheoraVideoClip object
virtual TheoraAudioInterface* createInstance(TheoraVideoClip* owner, int nChannels, int freq) = 0;
};
#endif

View file

@ -1,48 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifndef _TheoraAudioPacketQueue_h
#define _TheoraAudioPacketQueue_h
#include "TheoraExport.h"
class TheoraAudioInterface;
/**
This is an internal structure which TheoraVideoClip_Theora uses to store audio packets
*/
struct TheoraAudioPacket
{
float* pcm;
int numSamples; //! size in number of float samples (stereo has twice the number of samples)
TheoraAudioPacket* next; // pointer to the next audio packet, to implement a linked list
};
/**
This is a Mutex object, used in thread syncronization.
*/
class TheoraPlayerExport TheoraAudioPacketQueue
{
protected:
unsigned int mAudioFrequency, mNumAudioChannels;
TheoraAudioPacket* mTheoraAudioPacketQueue;
void _addAudioPacket(float* data, int numSamples);
public:
TheoraAudioPacketQueue();
~TheoraAudioPacketQueue();
float getAudioPacketQueueLength();
void addAudioPacket(float** buffer, int numSamples, float gain);
void addAudioPacket(float* buffer, int numSamples, float gain);
TheoraAudioPacket* popAudioPacket();
void destroyAudioPacket(TheoraAudioPacket* p);
void destroyAllAudioPackets();
void flushAudioPackets(TheoraAudioInterface* audioInterface);
};
#endif

View file

@ -1,89 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifndef _TheoraDataSource_h
#define _TheoraDataSource_h
#include <stdio.h>
#include <string>
#include "TheoraExport.h"
/**
This is a simple class that provides abstracted data feeding. You can use the
TheoraFileDataSource for regular file playback or you can implement your own
internet streaming solution, or a class that uses encrypted datafiles etc.
The sky is the limit
*/
class TheoraPlayerExport TheoraDataSource
{
public:
virtual ~TheoraDataSource();
/**
Reads nBytes bytes from data source and returns number of read bytes.
if function returns less bytes then nBytes, the system assumes EOF is reached.
*/
virtual int read(void* output,int nBytes)=0;
//! returns a string representation of the DataSource, eg 'File: source.ogg'
virtual std::string repr()=0;
//! position the source pointer to byte_index from the start of the source
virtual void seek(unsigned long byte_index)=0;
//! return the size of the stream in bytes
virtual unsigned long size()=0;
//! return the current position of the source pointer
virtual unsigned long tell()=0;
};
/**
provides standard file IO
*/
class TheoraPlayerExport TheoraFileDataSource : public TheoraDataSource
{
FILE* mFilePtr;
std::string mFilename;
unsigned long mSize;
void openFile();
public:
TheoraFileDataSource(std::string filename);
~TheoraFileDataSource();
int read(void* output,int nBytes);
void seek(unsigned long byte_index);
std::string repr() { return mFilename; }
unsigned long size();
unsigned long tell();
std::string getFilename() { return mFilename; }
};
/**
Pre-loads the entire file and streams from memory.
Very useful if you're continuously displaying a video and want to avoid disk reads.
Not very practical for large files.
*/
class TheoraPlayerExport TheoraMemoryFileDataSource : public TheoraDataSource
{
std::string mFilename;
unsigned long mSize, mReadPointer;
unsigned char* mData;
public:
TheoraMemoryFileDataSource(unsigned char* data, long size, const std::string& filename = "memory");
TheoraMemoryFileDataSource(std::string filename);
~TheoraMemoryFileDataSource();
int read(void* output,int nBytes);
void seek(unsigned long byte_index);
std::string repr() { return "MEM:"+mFilename; }
unsigned long size();
unsigned long tell();
std::string getFilename() { return mFilename; }
};
#endif

View file

@ -1,46 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifndef EXCEPTION_H
#define EXCEPTION_H
#include <string>
#include "TheoraExport.h"
class TheoraPlayerExport _TheoraGenericException
{
public:
std::string mErrText,mFile,mType;
int mLineNumber;
_TheoraGenericException(const std::string& errorText, std::string type = "",std::string file = "", int line = 0);
virtual ~_TheoraGenericException() {}
virtual std::string repr();
void writeOutput();
virtual const std::string& getErrorText() { return mErrText; }
const std::string getType(){ return mType; }
};
#define TheoraGenericException(msg) _TheoraGenericException(msg, "TheoraGenericException", __FILE__, __LINE__)
#define exception_cls(name) class name : public _TheoraGenericException \
{ \
public: \
name(const std::string& errorText,std::string type = "",std::string file = "",int line = 0) : \
_TheoraGenericException(errorText, type, file, line){} \
}
exception_cls(_KeyException);
#endif

View file

@ -1,38 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifndef _theoraVideoExport_h
#define _theoraVideoExport_h
#ifdef _LIB
#define TheoraPlayerExport
#define TheoraPlayerFnExport
#else
#ifdef _WIN32
#ifdef THEORAVIDEO_EXPORTS
#define TheoraPlayerExport __declspec(dllexport)
#define TheoraPlayerFnExport __declspec(dllexport)
#else
#define TheoraPlayerExport __declspec(dllimport)
#define TheoraPlayerFnExport __declspec(dllimport)
#endif
#else
#define TheoraPlayerExport __attribute__ ((visibility("default")))
#define TheoraPlayerFnExport __attribute__ ((visibility("default")))
#endif
#endif
#ifndef DEPRECATED_ATTRIBUTE
#ifdef _MSC_VER
#define DEPRECATED_ATTRIBUTE __declspec(deprecated("function is deprecated"))
#else
#define DEPRECATED_ATTRIBUTE __attribute__((deprecated))
#endif
#endif
#endif

View file

@ -1,95 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifndef _TheoraFrameQueue_h
#define _TheoraFrameQueue_h
#include "TheoraAsync.h"
#include <list>
#include "TheoraExport.h"
class TheoraVideoFrame;
class TheoraVideoClip;
/**
This class handles the frame queue. contains frames and handles their alloctation/deallocation
it is designed to be thread-safe
*/
class TheoraPlayerExport TheoraFrameQueue
{
protected:
std::list<TheoraVideoFrame*> mQueue;
TheoraVideoClip* mParent;
TheoraMutex mMutex;
//! implementation function that returns a TheoraVideoFrame instance
TheoraVideoFrame* createFrameInstance(TheoraVideoClip* clip);
public:
TheoraFrameQueue(TheoraVideoClip* parent);
~TheoraFrameQueue();
/**
\brief Returns the first available frame in the queue or NULL if no frames are available.
This function DOES NOT remove the frame from the queue, you have to do it manually
when you want to mark the frame as used by calling the pop() function.
*/
TheoraVideoFrame* getFirstAvailableFrame();
//! non-mutex version
TheoraVideoFrame* _getFirstAvailableFrame();
//! return the number of used (not ready) frames
int getUsedCount();
//! return the number of ready frames
int getReadyCount();
//! non-mutex version
int _getReadyCount();
/**
\brief remove the first N available frame from the queue.
Use this every time you display a frame so you can get the next one when the time comes.
This function marks the frame on the front of the queue as unused and it's memory then
get's used again in the decoding process.
If you don't call this, the frame queue will fill up with precached frames up to the
specified amount in the TheoraVideoManager class and you won't be able to advance the video.
*/
void pop(int n = 1);
//! This is an internal _pop function. use externally only in combination with lock() / unlock() calls
void _pop(int n);
//! frees all decoded frames for reuse (does not destroy memory, just marks them as free)
void clear();
//! Called by WorkerThreads when they need to unload frame data, do not call directly!
TheoraVideoFrame* requestEmptyFrame();
/**
\brief set's the size of the frame queue.
Beware, currently stored ready frames will be lost upon this call
*/
void setSize(int n);
//! return the size of the queue
int getSize();
//! return whether all frames in the queue are ready for display
bool isFull();
//! lock the queue's mutex manually
void lock();
//! unlock the queue's mutex manually
void unlock();
//! returns the internal frame queue. Warning: Always lock / unlock queue's mutex before accessing frames directly!
std::list<TheoraVideoFrame*>& _getFrameQueue();
};
#endif

View file

@ -1,18 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifndef _TheoraPixelTransform_h
#define _TheoraPixelTransform_h
struct TheoraPixelTransform
{
unsigned char *raw, *y, *u, *v, *out;
unsigned int w, h, rawStride, yStride, uStride, vStride;
};
#endif

View file

@ -1,17 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifndef _TheoraPlayer_h
#define _TheoraPlayer_h
#include "TheoraVideoManager.h"
#include "TheoraVideoClip.h"
#include "TheoraVideoFrame.h"
#endif

View file

@ -1,69 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifndef _TheoraTimer_h
#define _TheoraTimer_h
#include "TheoraExport.h"
/**
This is a Timer object, it is used to control the playback of a TheoraVideoClip.
You can inherit this class and make a timer that eg. plays twice as fast,
or playbacks an audio track and uses it's time offset for syncronizing Video etc.
*/
class TheoraPlayerExport TheoraTimer
{
protected:
//! Current time in seconds
float mTime,mSpeed;
//! Is the timer paused or not
bool mPaused;
public:
TheoraTimer();
virtual ~TheoraTimer();
virtual float getTime();
/**
\brief advance the time.
If you're using another synronization system, eg. an audio track,
then you can ignore this call or use it to perform other updates.
NOTE: this is called by TheoraVideoManager from the main thread
*/
virtual void update(float timeDelta);
virtual void pause();
virtual void play();
virtual bool isPaused();
virtual void stop();
/**
\brief set's playback speed
1.0 is the default. The speed factor multiplies time advance, thus
setting the value higher will increase playback speed etc.
NOTE: depending on Timer implementation, it may not support setting the speed
*/
virtual void setSpeed(float speed);
//! return the update speed 1.0 is the default
virtual float getSpeed();
/**
\brief change the current time.
if you're using another syncronization mechanism, make sure to adjust
the time offset there
*/
virtual void seek(float time);
};
#endif

View file

@ -1,32 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifndef _TheoraUtil_h
#define _TheoraUtil_h
#include <string>
#include <vector>
#ifndef THEORAUTIL_NOMACROS
#define foreach(type,lst) for (std::vector<type>::iterator it=lst.begin();it != lst.end(); ++it)
#define foreach_l(type,lst) for (std::list<type>::iterator it=lst.begin();it != lst.end(); ++it)
#define foreach_r(type,lst) for (std::vector<type>::reverse_iterator it=lst.rbegin();it != lst.rend(); ++it)
#define foreach_in_map(type,lst) for (std::map<std::string,type>::iterator it=lst.begin();it != lst.end(); ++it)
#endif
#define th_writelog(x) TheoraVideoManager::getSingleton().logMessage(x)
std::string str(int i);
std::string strf(float i);
void _psleep(int milliseconds);
int _nextPow2(int x);
#endif

View file

@ -1,282 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifndef _TheoraVideoClip_h
#define _TheoraVideoClip_h
#include <string>
#include "TheoraExport.h"
// forward class declarations
class TheoraMutex;
class TheoraFrameQueue;
class TheoraTimer;
class TheoraAudioInterface;
class TheoraWorkerThread;
class TheoraDataSource;
class TheoraVideoFrame;
/**
format of the TheoraVideoFrame pixels. Affects decoding time
*/
enum TheoraOutputMode
{
// A = full alpha (255), order of letters represents the byte order for a pixel
// A means the image is treated as if it contains an alpha channel, while X formats
// just mean that RGB frame is transformed to a 4 byte format
TH_UNDEFINED = 0,
TH_RGB = 1,
TH_RGBA = 2,
TH_RGBX = 3,
TH_ARGB = 4,
TH_XRGB = 5,
TH_BGR = 6,
TH_BGRA = 7,
TH_BGRX = 8,
TH_ABGR = 9,
TH_XBGR = 10,
TH_GREY = 11,
TH_GREY3 = 12,
TH_GREY3A = 13,
TH_GREY3X = 14,
TH_AGREY3 = 15,
TH_XGREY3 = 16,
TH_YUV = 17,
TH_YUVA = 18,
TH_YUVX = 19,
TH_AYUV = 20,
TH_XYUV = 21
};
/**
This object contains all data related to video playback, eg. the open source file,
the frame queue etc.
*/
class TheoraPlayerExport TheoraVideoClip
{
friend class TheoraWorkerThread;
friend class TheoraVideoFrame;
friend class TheoraVideoManager;
protected:
TheoraFrameQueue* mFrameQueue;
TheoraAudioInterface* mAudioInterface;
TheoraDataSource* mStream;
TheoraTimer *mTimer, *mDefaultTimer;
TheoraWorkerThread* mAssignedWorkerThread;
bool mUseAlpha;
bool mWaitingForCache;
// benchmark vars
int mNumDroppedFrames, mNumDisplayedFrames, mNumPrecachedFrames;
int mThreadAccessCount; //! counter used by TheoraVideoManager to schedule workload
int mSeekFrame; //! stores desired seek position as a frame number. next worker thread will do the seeking and reset this var to -1
float mDuration, mFrameDuration, mFPS;
float mPriority; //! User assigned priority. Default value is 1
std::string mName;
int mWidth, mHeight, mStride;
int mNumFrames;
int audio_track;
int mSubFrameWidth, mSubFrameHeight, mSubFrameOffsetX, mSubFrameOffsetY;
float mAudioGain; //! multiplier for audio samples. between 0 and 1
TheoraOutputMode mOutputMode, mRequestedOutputMode;
bool mFirstFrameDisplayed;
bool mAutoRestart;
bool mEndOfFile, mRestarted;
int mIteration, mPlaybackIteration; //! used to ensure smooth playback of looping videos
TheoraMutex* mAudioMutex; //! syncs audio decoding and extraction
TheoraMutex* mThreadAccessMutex;
/**
* Get the priority of a video clip. based on a forumula that includes user
* priority factor, whether the video is paused or not, how many precached
* frames it has etc.
* This function is used in TheoraVideoManager to efficiently distribute job
* assignments among worker threads
* @return priority number of this video clip
*/
int calculatePriority();
void readTheoraVorbisHeaders();
virtual void doSeek() = 0; //! called by WorkerThread to seek to mSeekFrame
virtual bool _readData() = 0;
bool isBusy();
/**
* decodes audio from the vorbis stream and stores it in audio packets
* This is an internal function of TheoraVideoClip, called regularly if playing an
* audio enabled video clip.
* @return last decoded timestamp (if found in decoded packet's granule position)
*/
virtual float decodeAudio() = 0;
int _getNumReadyFrames();
void resetFrameQueue();
int discardOutdatedFrames(float absTime);
float getAbsPlaybackTime();
virtual void load(TheoraDataSource* source) = 0;
virtual void _restart() = 0; // resets the decoder and stream but leaves the frame queue intact
public:
TheoraVideoClip(TheoraDataSource* data_source,
TheoraOutputMode output_mode,
int nPrecachedFrames,
bool usePower2Stride);
virtual ~TheoraVideoClip();
std::string getName();
//! Returns the string name of the decoder backend (eg. Theora, AVFoundation)
virtual std::string getDecoderName() = 0;
//! benchmark function
int getNumDisplayedFrames() { return mNumDisplayedFrames; }
//! benchmark function
int getNumDroppedFrames() { return mNumDroppedFrames; }
//! return width in pixels of the video clip
int getWidth();
//! return height in pixels of the video clip
int getHeight();
//! Width of the actual picture inside a video frame (depending on implementation, this may be equal to mWidth or differ within a codec block size (usually 16))
int getSubFrameWidth();
//! Height of the actual picture inside a video frame (depending on implementation, this may be equal to mHeight or differ within a codec block size (usually 16))
int getSubFrameHeight();
//! X Offset of the actual picture inside a video frame (depending on implementation, this may be 0 or within a codec block size (usually 16))
int getSubFrameOffsetX();
//! Y Offset of the actual picture inside a video frame (depending on implementation, this may be 0 or differ within a codec block size (usually 16))
int getSubFrameOffsetY();
/**
\brief return stride in pixels
If you've specified usePower2Stride when creating the TheoraVideoClip object
then this value will be the next power of two size compared to width,
eg: w=376, stride=512.
Otherwise, stride will be equal to width
*/
int getStride() { return mStride; }
//! retur the timer objet associated with this object
TheoraTimer* getTimer();
//! replace the timer object with a new one
void setTimer(TheoraTimer* timer);
//! used by TheoraWorkerThread, do not call directly
virtual bool decodeNextFrame() = 0;
//! advance time. TheoraVideoManager calls this
void update(float timeDelta);
/**
\brief update timer to the display time of the next frame
useful if you want to grab frames instead of regular display
\return time advanced. 0 if no frames are ready
*/
float updateToNextFrame();
TheoraFrameQueue* getFrameQueue();
/**
\brief pop the frame from the front of the FrameQueue
see TheoraFrameQueue::pop() for more details
*/
void popFrame();
/**
\brief Returns the first available frame in the queue or NULL if no frames are available.
see TheoraFrameQueue::getFirstAvailableFrame() for more details
*/
TheoraVideoFrame* getNextFrame();
/**
check if there is enough audio data decoded to submit to the audio interface
TheoraWorkerThread calls this
*/
virtual void decodedAudioCheck() = 0;
void setAudioInterface(TheoraAudioInterface* iface);
TheoraAudioInterface* getAudioInterface();
/**
\brief resize the frame queues
Warning: this call discards ready frames in the frame queue
*/
void setNumPrecachedFrames(int n);
//! returns the size of the frame queue
int getNumPrecachedFrames();
//! returns the number of ready frames in the frame queue
int getNumReadyFrames();
//! if you want to adjust the audio gain. range [0,1]
void setAudioGain(float gain);
float getAudioGain();
//! if you want the video to automatically and smoothly restart when the last frame is reached
void setAutoRestart(bool value);
bool getAutoRestart() { return mAutoRestart; }
void set_audio_track(int p_track) { audio_track=p_track; }
/**
TODO: user priority. Useful only when more than one video is being decoded
*/
void setPriority(float priority);
float getPriority();
//! Used by TheoraVideoManager to schedule work
float getPriorityIndex();
//! get the current time index from the timer object
float getTimePosition();
//! get the duration of the movie in seconds
float getDuration();
//! return the clips' frame rate, warning, fps can be a non integer number!
float getFPS();
//! get the number of frames in this movie
int getNumFrames() { return mNumFrames; }
//! return the current output mode for this video object
TheoraOutputMode getOutputMode();
/**
set a new output mode
Warning: this discards the frame queue. ready frames will be lost.
*/
void setOutputMode(TheoraOutputMode mode);
bool isDone();
void play();
void pause();
void restart();
bool isPaused();
void stop();
void setPlaybackSpeed(float speed);
float getPlaybackSpeed();
//! seek to a given time position
void seek(float time);
//! seek to a given frame number
void seekToFrame(int frame);
//! wait max_time for the clip to cache a given percentage of frames, factor in range [0,1]
void waitForCache(float desired_cache_factor = 0.5f, float max_wait_time = 1.0f);
};
#endif

View file

@ -1,56 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifndef _TheoraVideoFrame_h
#define _TheoraVideoFrame_h
#include "TheoraExport.h"
#include "TheoraVideoClip.h"
struct TheoraPixelTransform;
/**
*/
class TheoraPlayerExport TheoraVideoFrame
{
protected:
TheoraVideoClip* mParent;
unsigned char* mBuffer;
unsigned long mFrameNumber;
public:
//! global time in seconds this frame should be displayed on
float mTimeToDisplay;
//! whether the frame is ready for display or not
bool mReady;
//! indicates the frame is being used by TheoraWorkerThread instance
bool mInUse;
//! used to keep track of linear time in looping videos
int mIteration;
int mBpp;
TheoraVideoFrame(TheoraVideoClip* parent);
virtual ~TheoraVideoFrame();
//! internal function, do not use directly
void _setFrameNumber(unsigned long number) { mFrameNumber = number; }
//! returns the frame number of this frame in the theora stream
unsigned long getFrameNumber() { return mFrameNumber; }
void clear();
int getWidth();
int getStride();
int getHeight();
unsigned char* getBuffer();
//! Called by TheoraVideoClip to decode a source buffer onto itself
virtual void decode(struct TheoraPixelTransform* t);
};
#endif

View file

@ -1,110 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifndef _TheoraVideoManager_h
#define _TheoraVideoManager_h
#include <vector>
#include <list>
#include <string>
#include "TheoraExport.h"
#include "TheoraVideoClip.h"
#ifdef _WIN32
#pragma warning( disable: 4251 ) // MSVC++
#endif
// forward class declarations
class TheoraWorkerThread;
class TheoraMutex;
class TheoraDataSource;
class TheoraAudioInterfaceFactory;
/**
This is the main singleton class that handles all playback/sync operations
*/
class TheoraPlayerExport TheoraVideoManager
{
protected:
friend class TheoraWorkerThread;
typedef std::vector<TheoraVideoClip*> ClipList;
typedef std::vector<TheoraWorkerThread*> ThreadList;
//! stores pointers to worker threads which are decoding video and audio
ThreadList mWorkerThreads;
//! stores pointers to created video clips
ClipList mClips;
//! stores pointer to clips that were docoded in the past in order to achieve fair scheduling
std::list<TheoraVideoClip*> mWorkLog;
int mDefaultNumPrecachedFrames;
TheoraMutex* mWorkMutex;
TheoraAudioInterfaceFactory* mAudioFactory;
void createWorkerThreads(int n);
void destroyWorkerThreads();
float calcClipWorkTime(TheoraVideoClip* clip);
/**
* Called by TheoraWorkerThread to request a TheoraVideoClip instance to work on decoding
*/
TheoraVideoClip* requestWork(TheoraWorkerThread* caller);
public:
TheoraVideoManager(int num_worker_threads=1);
virtual ~TheoraVideoManager();
//! get the global reference to the manager instance
static TheoraVideoManager& getSingleton();
//! get the global pointer to the manager instance
static TheoraVideoManager* getSingletonPtr();
//! search registered clips by name
TheoraVideoClip* getVideoClipByName(std::string name);
TheoraVideoClip* createVideoClip(std::string filename,TheoraOutputMode output_mode=TH_RGB,int numPrecachedOverride=0,bool usePower2Stride=0, int p_track=0);
TheoraVideoClip* createVideoClip(TheoraDataSource* data_source,TheoraOutputMode output_mode=TH_RGB,int numPrecachedOverride=0,bool usePower2Stride=0, int p_audio_track=0);
void update(float timeDelta);
void destroyVideoClip(TheoraVideoClip* clip);
void setAudioInterfaceFactory(TheoraAudioInterfaceFactory* factory);
TheoraAudioInterfaceFactory* getAudioInterfaceFactory();
int getNumWorkerThreads();
void setNumWorkerThreads(int n);
void setDefaultNumPrecachedFrames(int n) { mDefaultNumPrecachedFrames=n; }
int getDefaultNumPrecachedFrames() { return mDefaultNumPrecachedFrames; }
//! used by libtheoraplayer functions
void logMessage(std::string msg);
/**
\brief you can set your own log function to recieve theora's log calls
This way you can integrate libtheoraplayer's log messages in your own
logging system, prefix them, mute them or whatever you want
*/
static void setLogFunction(void (*fn)(std::string));
//! get nicely formated version string
std::string getVersionString();
/**
\brief get version numbers
if c is negative, it means it's a release candidate -c
*/
void getVersion(int* a,int* b,int* c);
//! returns the supported decoders (eg. Theora, AVFoundation...)
std::vector<std::string> getSupportedDecoders();
};
#endif

View file

@ -1,32 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifndef _TheoraWorkerThread_h
#define _TheoraWorkerThread_h
#include "TheoraAsync.h"
class TheoraVideoClip;
/**
This is the worker thread, requests work from TheoraVideoManager
and decodes assigned TheoraVideoClip objects
*/
class TheoraWorkerThread : public TheoraThread
{
TheoraVideoClip* mClip;
public:
TheoraWorkerThread();
~TheoraWorkerThread();
TheoraVideoClip* getAssignedClip() { return mClip; }
//! Main Thread Body - do not call directly!
void execute();
};
#endif

View file

@ -1,47 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#if defined(__AVFOUNDATION) && !defined(_TheoraVideoClip_AVFoundation_h)
#define _TheoraVideoClip_AVFoundation_h
#include "TheoraAudioPacketQueue.h"
#include "TheoraVideoClip.h"
#ifndef AVFOUNDATION_CLASSES_DEFINED
class AVAssetReader;
class AVAssetReaderTrackOutput;
#endif
class TheoraVideoClip_AVFoundation : public TheoraVideoClip, public TheoraAudioPacketQueue
{
protected:
bool mLoaded;
int mFrameNumber;
AVAssetReader* mReader;
AVAssetReaderTrackOutput *mOutput, *mAudioOutput;
unsigned int mReadAudioSamples;
void unload();
void doSeek();
public:
TheoraVideoClip_AVFoundation(TheoraDataSource* data_source,
TheoraOutputMode output_mode,
int nPrecachedFrames,
bool usePower2Stride);
~TheoraVideoClip_AVFoundation();
bool _readData();
bool decodeNextFrame();
void _restart();
void load(TheoraDataSource* source);
float decodeAudio();
void decodedAudioCheck();
std::string getDecoderName() { return "AVFoundation"; }
};
#endif

View file

@ -1,457 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifdef __AVFOUNDATION
#define AVFOUNDATION_CLASSES_DEFINED
#import <AVFoundation/AVFoundation.h>
#include "TheoraAudioInterface.h"
#include "TheoraDataSource.h"
#include "TheoraException.h"
#include "TheoraTimer.h"
#include "TheoraUtil.h"
#include "TheoraFrameQueue.h"
#include "TheoraVideoFrame.h"
#include "TheoraVideoManager.h"
#include "TheoraVideoClip_AVFoundation.h"
#include "TheoraPixelTransform.h"
#ifdef _AVFOUNDATION_BGRX
// a fast function developed to use kernel byte swapping calls to optimize alpha decoding.
// In AVFoundation, BGRX mode conversion is prefered to YUV conversion because apple's YUV
// conversion on iOS seems to run faster than libtheoraplayer's implementation
// This may change in the future with more optimizations to libtheoraplayers's YUV conversion
// code, making this function obsolete.
static void bgrx2rgba(unsigned char* dest, int w, int h, struct TheoraPixelTransform* t)
{
unsigned register int a;
unsigned int *dst = (unsigned int*) dest, *dstEnd;
unsigned char* src = t->raw;
int y, x, ax;
for (y = 0; y < h; ++y, src += t->rawStride)
{
for (x = 0, ax = w * 4, dstEnd = dst + w; dst != dstEnd; x += 4, ax += 4, ++dst)
{
// use the full alpha range here because the Y channel has already been converted
// to RGB and that's in [0, 255] range.
a = src[ax];
*dst = (OSReadSwapInt32(src, x) >> 8) | (a << 24);
}
}
}
#endif
static CVPlanarPixelBufferInfo_YCbCrPlanar getYUVStruct(void* src)
{
CVPlanarPixelBufferInfo_YCbCrPlanar* bigEndianYuv = (CVPlanarPixelBufferInfo_YCbCrPlanar*) src;
CVPlanarPixelBufferInfo_YCbCrPlanar yuv;
yuv.componentInfoY.offset = OSSwapInt32(bigEndianYuv->componentInfoY.offset);
yuv.componentInfoY.rowBytes = OSSwapInt32(bigEndianYuv->componentInfoY.rowBytes);
yuv.componentInfoCb.offset = OSSwapInt32(bigEndianYuv->componentInfoCb.offset);
yuv.componentInfoCb.rowBytes = OSSwapInt32(bigEndianYuv->componentInfoCb.rowBytes);
yuv.componentInfoCr.offset = OSSwapInt32(bigEndianYuv->componentInfoCr.offset);
yuv.componentInfoCr.rowBytes = OSSwapInt32(bigEndianYuv->componentInfoCr.rowBytes);
return yuv;
}
TheoraVideoClip_AVFoundation::TheoraVideoClip_AVFoundation(TheoraDataSource* data_source,
TheoraOutputMode output_mode,
int nPrecachedFrames,
bool usePower2Stride):
TheoraVideoClip(data_source, output_mode, nPrecachedFrames, usePower2Stride),
TheoraAudioPacketQueue()
{
mLoaded = 0;
mReader = NULL;
mOutput = mAudioOutput = NULL;
mReadAudioSamples = mAudioFrequency = mNumAudioChannels = 0;
}
TheoraVideoClip_AVFoundation::~TheoraVideoClip_AVFoundation()
{
unload();
}
void TheoraVideoClip_AVFoundation::unload()
{
if (mOutput != NULL || mAudioOutput != NULL || mReader != NULL)
{
NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
if (mOutput != NULL)
{
[mOutput release];
mOutput = NULL;
}
if (mAudioOutput)
{
[mAudioOutput release];
mAudioOutput = NULL;
}
if (mReader != NULL)
{
[mReader release];
mReader = NULL;
}
[pool release];
}
}
bool TheoraVideoClip_AVFoundation::_readData()
{
return 1;
}
bool TheoraVideoClip_AVFoundation::decodeNextFrame()
{
if (mReader == NULL || mEndOfFile) return 0;
AVAssetReaderStatus status = [mReader status];
if (status == AVAssetReaderStatusFailed)
{
// This can happen on iOS when you suspend the app... Only happens on the device, iOS simulator seems to work fine.
th_writelog("AVAssetReader reading failed, restarting...");
mSeekFrame = mTimer->getTime() * mFPS;
// just in case
if (mSeekFrame < 0) mSeekFrame = 0;
if (mSeekFrame > mDuration * mFPS - 1) mSeekFrame = mDuration * mFPS - 1;
_restart();
status = [mReader status];
if (status == AVAssetReaderStatusFailed)
{
th_writelog("AVAssetReader restart failed!");
return 0;
}
th_writelog("AVAssetReader restart succeeded!");
}
TheoraVideoFrame* frame = mFrameQueue->requestEmptyFrame();
if (!frame) return 0;
CMSampleBufferRef sampleBuffer = NULL;
NSAutoreleasePool* pool = NULL;
CMTime presentationTime;
if (mAudioInterface) decodeAudio();
if (status == AVAssetReaderStatusReading)
{
pool = [[NSAutoreleasePool alloc] init];
while ((sampleBuffer = [mOutput copyNextSampleBuffer]))
{
presentationTime = CMSampleBufferGetOutputPresentationTimeStamp(sampleBuffer);
frame->mTimeToDisplay = (float) CMTimeGetSeconds(presentationTime);
frame->mIteration = mIteration;
frame->_setFrameNumber(mFrameNumber);
++mFrameNumber;
if (frame->mTimeToDisplay < mTimer->getTime() && !mRestarted && mFrameNumber % 16 != 0)
{
// %16 operation is here to prevent a playback halt during video playback if the decoder can't keep up with demand.
#ifdef _DEBUG
th_writelog(mName + ": pre-dropped frame " + str(mFrameNumber - 1));
#endif
++mNumDisplayedFrames;
++mNumDroppedFrames;
CMSampleBufferInvalidate(sampleBuffer);
CFRelease(sampleBuffer);
sampleBuffer = NULL;
continue; // drop frame
}
CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
CVPixelBufferLockBaseAddress(imageBuffer, 0);
void *baseAddress = CVPixelBufferGetBaseAddress(imageBuffer);
mStride = CVPixelBufferGetBytesPerRow(imageBuffer);
size_t width = CVPixelBufferGetWidth(imageBuffer);
size_t height = CVPixelBufferGetHeight(imageBuffer);
TheoraPixelTransform t;
memset(&t, 0, sizeof(TheoraPixelTransform));
#ifdef _AVFOUNDATION_BGRX
if (mOutputMode == TH_BGRX || mOutputMode == TH_RGBA)
{
t.raw = (unsigned char*) baseAddress;
t.rawStride = mStride;
}
else
#endif
{
CVPlanarPixelBufferInfo_YCbCrPlanar yuv = getYUVStruct(baseAddress);
t.y = (unsigned char*) baseAddress + yuv.componentInfoY.offset; t.yStride = yuv.componentInfoY.rowBytes;
t.u = (unsigned char*) baseAddress + yuv.componentInfoCb.offset; t.uStride = yuv.componentInfoCb.rowBytes;
t.v = (unsigned char*) baseAddress + yuv.componentInfoCr.offset; t.vStride = yuv.componentInfoCr.rowBytes;
}
#ifdef _AVFOUNDATION_BGRX
if (mOutputMode == TH_RGBA)
{
for (int i = 0; i < 1000; ++i)
bgrx2rgba(frame->getBuffer(), mWidth / 2, mHeight, &t);
frame->mReady = true;
}
else
#endif
frame->decode(&t);
CVPixelBufferUnlockBaseAddress(imageBuffer, 0);
CMSampleBufferInvalidate(sampleBuffer);
CFRelease(sampleBuffer);
break; // TODO - should this really be a while loop instead of an if block?
}
}
if (pool) [pool release];
if (!frame->mReady) // in case the frame wasn't used
{
frame->mInUse = 0;
}
if (sampleBuffer == NULL && mReader.status == AVAssetReaderStatusCompleted) // other cases could be app suspended
{
if (mAutoRestart)
{
++mIteration;
_restart();
}
else
{
unload();
mEndOfFile = true;
}
return 0;
}
return 1;
}
void TheoraVideoClip_AVFoundation::_restart()
{
mEndOfFile = false;
unload();
load(mStream);
mRestarted = true;
}
void TheoraVideoClip_AVFoundation::load(TheoraDataSource* source)
{
mStream = source;
mFrameNumber = 0;
mEndOfFile = false;
TheoraFileDataSource* fileDataSource = dynamic_cast<TheoraFileDataSource*>(source);
std::string filename;
if (fileDataSource != NULL) filename = fileDataSource->getFilename();
else
{
TheoraMemoryFileDataSource* memoryDataSource = dynamic_cast<TheoraMemoryFileDataSource*>(source);
if (memoryDataSource != NULL) filename = memoryDataSource->getFilename();
else throw TheoraGenericException("Unable to load MP4 file");
}
NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
NSString* path = [NSString stringWithUTF8String:filename.c_str()];
NSError* err;
NSURL *url = [NSURL fileURLWithPath:path];
AVAsset* asset = [[AVURLAsset alloc] initWithURL:url options:nil];
mReader = [[AVAssetReader alloc] initWithAsset:asset error:&err];
NSArray* tracks = [asset tracksWithMediaType:AVMediaTypeVideo];
if ([tracks count] == 0)
throw TheoraGenericException("Unable to open video file: " + filename);
AVAssetTrack *videoTrack = [tracks objectAtIndex:0];
NSArray* audioTracks = [asset tracksWithMediaType:AVMediaTypeAudio];
if (audio_track >= audioTracks.count)
audio_track = 0;
AVAssetTrack *audioTrack = audioTracks.count > 0 ? [audioTracks objectAtIndex:audio_track] : NULL;
printf("*********** using audio track %i\n", audio_track);
#ifdef _AVFOUNDATION_BGRX
bool yuv_output = (mOutputMode != TH_BGRX && mOutputMode != TH_RGBA);
#else
bool yuv_output = true;
#endif
NSDictionary *videoOptions = [NSDictionary dictionaryWithObjectsAndKeys:[NSNumber numberWithInt:(yuv_output) ? kCVPixelFormatType_420YpCbCr8Planar : kCVPixelFormatType_32BGRA], kCVPixelBufferPixelFormatTypeKey, nil];
mOutput = [[AVAssetReaderTrackOutput alloc] initWithTrack:videoTrack outputSettings:videoOptions];
[mReader addOutput:mOutput];
if ([mOutput respondsToSelector:@selector(setAlwaysCopiesSampleData:)]) // Not supported on iOS versions older than 5.0
mOutput.alwaysCopiesSampleData = NO;
mFPS = videoTrack.nominalFrameRate;
mWidth = mSubFrameWidth = mStride = videoTrack.naturalSize.width;
mHeight = mSubFrameHeight = videoTrack.naturalSize.height;
mFrameDuration = 1.0f / mFPS;
mDuration = (float) CMTimeGetSeconds(asset.duration);
if (mFrameQueue == NULL)
{
mFrameQueue = new TheoraFrameQueue(this);
mFrameQueue->setSize(mNumPrecachedFrames);
}
if (mSeekFrame != -1)
{
mFrameNumber = mSeekFrame;
[mReader setTimeRange: CMTimeRangeMake(CMTimeMakeWithSeconds(mSeekFrame / mFPS, 1), kCMTimePositiveInfinity)];
}
if (audioTrack)
{
TheoraAudioInterfaceFactory* audio_factory = TheoraVideoManager::getSingleton().getAudioInterfaceFactory();
if (audio_factory)
{
NSDictionary *audioOptions = [NSDictionary dictionaryWithObjectsAndKeys:
[NSNumber numberWithInt:kAudioFormatLinearPCM], AVFormatIDKey,
[NSNumber numberWithBool:NO], AVLinearPCMIsNonInterleaved,
[NSNumber numberWithBool:NO], AVLinearPCMIsBigEndianKey,
[NSNumber numberWithBool:YES], AVLinearPCMIsFloatKey,
[NSNumber numberWithInt:32], AVLinearPCMBitDepthKey,
nil];
mAudioOutput = [[AVAssetReaderTrackOutput alloc] initWithTrack:audioTrack outputSettings:audioOptions];
[mReader addOutput:mAudioOutput];
if ([mAudioOutput respondsToSelector:@selector(setAlwaysCopiesSampleData:)]) // Not supported on iOS versions older than 5.0
mAudioOutput.alwaysCopiesSampleData = NO;
NSArray* desclst = audioTrack.formatDescriptions;
CMAudioFormatDescriptionRef desc = (CMAudioFormatDescriptionRef) [desclst objectAtIndex:0];
const AudioStreamBasicDescription* audioDesc = CMAudioFormatDescriptionGetStreamBasicDescription(desc);
mAudioFrequency = (unsigned int) audioDesc->mSampleRate;
mNumAudioChannels = audioDesc->mChannelsPerFrame;
if (mSeekFrame != -1)
{
mReadAudioSamples = mFrameNumber * (mAudioFrequency * mNumAudioChannels) / mFPS;
}
else mReadAudioSamples = 0;
if (mAudioInterface == NULL)
setAudioInterface(audio_factory->createInstance(this, mNumAudioChannels, mAudioFrequency));
}
}
#ifdef _DEBUG
else if (!mLoaded)
{
th_writelog("-----\nwidth: " + str(mWidth) + ", height: " + str(mHeight) + ", fps: " + str((int) getFPS()));
th_writelog("duration: " + strf(mDuration) + " seconds\n-----");
}
#endif
[mReader startReading];
[pool release];
mLoaded = true;
}
void TheoraVideoClip_AVFoundation::decodedAudioCheck()
{
if (!mAudioInterface || mTimer->isPaused()) return;
mAudioMutex->lock();
flushAudioPackets(mAudioInterface);
mAudioMutex->unlock();
}
float TheoraVideoClip_AVFoundation::decodeAudio()
{
if (mRestarted) return -1;
if (mReader == NULL || mEndOfFile) return 0;
AVAssetReaderStatus status = [mReader status];
if (mAudioOutput)
{
CMSampleBufferRef sampleBuffer = NULL;
NSAutoreleasePool* pool = NULL;
bool mutexLocked = 0;
float factor = 1.0f / (mAudioFrequency * mNumAudioChannels);
float videoTime = (float) mFrameNumber / mFPS;
float min = mFrameQueue->getSize() / mFPS + 1.0f;
if (status == AVAssetReaderStatusReading)
{
pool = [[NSAutoreleasePool alloc] init];
// always buffer up of audio ahead of the frames
while (mReadAudioSamples * factor - videoTime < min)
{
if ((sampleBuffer = [mAudioOutput copyNextSampleBuffer]))
{
AudioBufferList audioBufferList;
CMBlockBufferRef blockBuffer = NULL;
CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(sampleBuffer, NULL, &audioBufferList, sizeof(audioBufferList), NULL, NULL, 0, &blockBuffer);
for (int y = 0; y < audioBufferList.mNumberBuffers; ++y)
{
AudioBuffer audioBuffer = audioBufferList.mBuffers[y];
float *frame = (float*) audioBuffer.mData;
if (!mutexLocked)
{
mAudioMutex->lock();
mutexLocked = 1;
}
addAudioPacket(frame, audioBuffer.mDataByteSize / (mNumAudioChannels * sizeof(float)), mAudioGain);
mReadAudioSamples += audioBuffer.mDataByteSize / (sizeof(float));
}
CFRelease(blockBuffer);
CMSampleBufferInvalidate(sampleBuffer);
CFRelease(sampleBuffer);
}
else
{
[mAudioOutput release];
mAudioOutput = nil;
break;
}
}
[pool release];
}
if (mutexLocked) mAudioMutex->unlock();
}
return -1;
}
void TheoraVideoClip_AVFoundation::doSeek()
{
#if _DEBUG
th_writelog(mName + " [seek]: seeking to frame " + str(mSeekFrame));
#endif
int frame;
float time = mSeekFrame / getFPS();
mTimer->seek(time);
bool paused = mTimer->isPaused();
if (!paused) mTimer->pause(); // pause until seeking is done
mEndOfFile = false;
mRestarted = false;
resetFrameQueue();
unload();
load(mStream);
if (mAudioInterface)
{
mAudioMutex->lock();
destroyAllAudioPackets();
mAudioMutex->unlock();
}
if (!paused) mTimer->play();
mSeekFrame = -1;
}
#endif

View file

@ -1,439 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifdef __FFMPEG
#include "TheoraAudioInterface.h"
#include "TheoraDataSource.h"
#include "TheoraException.h"
#include "TheoraTimer.h"
#include "TheoraUtil.h"
#include "TheoraFrameQueue.h"
#include "TheoraVideoFrame.h"
#include "TheoraVideoManager.h"
#include "TheoraVideoClip_FFmpeg.h"
#include "TheoraPixelTransform.h"
#define READ_BUFFER_SIZE 4096
#ifdef __cplusplus
#define __STDC_CONSTANT_MACROS
#ifdef _STDINT_H
#undef _STDINT_H
#endif
# include <stdint.h>
#endif
#define _FFMPEG_DEBUG
extern "C"
{
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include "libavutil/avassert.h"
}
static bool ffmpegInitialised = 0;
static int readFunction(void* data, uint8_t* buf, int buf_size)
{
#ifdef _FFMPEG_DEBUG
th_writelog("reading " + str(buf_size) + " bytes");
#endif
TheoraDataSource* src = (TheoraDataSource*) data;
return src->read(buf, buf_size);
}
static int64_t seekFunction(void* data, int64_t offset, int whence)
{
#ifdef _FFMPEG_DEBUG
th_writelog("seeking: offset = " + str((long) offset) + ", whence = " + str(whence));
#endif
TheoraDataSource* src = (TheoraDataSource*) data;
if (whence == AVSEEK_SIZE)
return src->size();
else if (whence == SEEK_SET)
src->seek((long) offset);
else if (whence == SEEK_END)
src->seek(src->size() - (long) offset);
return src->tell();
}
static void avlog_theoraplayer(void* p, int level, const char* fmt, va_list vargs)
{
th_writelog(fmt);
static char logstr[2048];
vsprintf(logstr, fmt, vargs);
th_writelog("ffmpeg: " + std::string(logstr));
}
std::string text;
static void _log(const char* s)
{
text += s;
// th_writelog(text);
// text = "";
}
static void _log(const char c)
{
char s[2] = {c, 0};
_log(s);
}
static const AVCodec *next_codec_for_id(enum AVCodecID id, const AVCodec *prev,
int encoder)
{
while ((prev = av_codec_next(prev))) {
if (prev->id == id &&
(encoder ? av_codec_is_encoder(prev) : av_codec_is_decoder(prev)))
return prev;
}
return NULL;
}
static int compare_codec_desc(const void *a, const void *b)
{
const AVCodecDescriptor **da = (const AVCodecDescriptor **) a;
const AVCodecDescriptor **db = (const AVCodecDescriptor **) b;
return (*da)->type != (*db)->type ? (*da)->type - (*db)->type :
strcmp((*da)->name, (*db)->name);
}
static unsigned get_codecs_sorted(const AVCodecDescriptor ***rcodecs)
{
const AVCodecDescriptor *desc = NULL;
const AVCodecDescriptor **codecs;
unsigned nb_codecs = 0, i = 0;
while ((desc = avcodec_descriptor_next(desc)))
++nb_codecs;
if (!(codecs = (const AVCodecDescriptor**) av_calloc(nb_codecs, sizeof(*codecs)))) {
av_log(NULL, AV_LOG_ERROR, "Out of memory\n");
exit(1);
}
desc = NULL;
while ((desc = avcodec_descriptor_next(desc)))
codecs[i++] = desc;
av_assert0(i == nb_codecs);
qsort(codecs, nb_codecs, sizeof(*codecs), compare_codec_desc);
*rcodecs = codecs;
return nb_codecs;
}
static char get_media_type_char(enum AVMediaType type)
{
switch (type) {
case AVMEDIA_TYPE_VIDEO: return 'V';
case AVMEDIA_TYPE_AUDIO: return 'A';
case AVMEDIA_TYPE_DATA: return 'D';
case AVMEDIA_TYPE_SUBTITLE: return 'S';
case AVMEDIA_TYPE_ATTACHMENT:return 'T';
default: return '?';
}
}
static void print_codecs_for_id(enum AVCodecID id, int encoder)
{
const AVCodec *codec = NULL;
_log(encoder ? "encoders" : "decoders");
while ((codec = next_codec_for_id(id, codec, encoder)))
_log(codec->name);
_log(")");
}
int show_codecs(void *optctx, const char *opt, const char *arg)
{
const AVCodecDescriptor **codecs;
unsigned i, nb_codecs = get_codecs_sorted(&codecs);
char tmp[1024];
th_writelog("Codecs:\n"
" D..... = Decoding supported\n"
" .E.... = Encoding supported\n"
" ..V... = Video codec\n"
" ..A... = Audio codec\n"
" ..S... = Subtitle codec\n"
" ...I.. = Intra frame-only codec\n"
" ....L. = Lossy compression\n"
" .....S = Lossless compression\n"
" -------\n");
for (i = 0; i < nb_codecs; ++i) {
const AVCodecDescriptor *desc = codecs[i];
const AVCodec *codec = NULL;
_log(" ");
_log(avcodec_find_decoder(desc->id) ? "D" : ".");
_log(avcodec_find_encoder(desc->id) ? "E" : ".");
_log(get_media_type_char(desc->type));
_log((desc->props & AV_CODEC_PROP_INTRA_ONLY) ? "I" : ".");
_log((desc->props & AV_CODEC_PROP_LOSSY) ? "L" : ".");
_log((desc->props & AV_CODEC_PROP_LOSSLESS) ? "S" : ".");
sprintf(tmp, " %-20s %s", desc->name, desc->long_name ? desc->long_name : "");
_log(tmp);
/* print decoders/encoders when there's more than one or their
* names are different from codec name */
while ((codec = next_codec_for_id(desc->id, codec, 0))) {
if (strcmp(codec->name, desc->name)) {
print_codecs_for_id(desc->id, 0);
break;
}
}
codec = NULL;
while ((codec = next_codec_for_id(desc->id, codec, 1))) {
if (strcmp(codec->name, desc->name)) {
print_codecs_for_id(desc->id, 1);
break;
}
}
_log("\n");
}
av_free(codecs);
av_log(0, 0, "%s", text.c_str());
return 0;
}
TheoraVideoClip_FFmpeg::TheoraVideoClip_FFmpeg(TheoraDataSource* data_source,
TheoraOutputMode output_mode,
int nPrecachedFrames,
bool usePower2Stride):
TheoraVideoClip(data_source, output_mode, nPrecachedFrames, usePower2Stride),
TheoraAudioPacketQueue()
{
mFormatContext = NULL;
mCodecContext = NULL;
mCodec = NULL;
mFrame = NULL;
mVideoStreamIndex = -1;
}
TheoraVideoClip_FFmpeg::~TheoraVideoClip_FFmpeg()
{
unload();
}
void TheoraVideoClip_FFmpeg::load(TheoraDataSource* source)
{
mVideoStreamIndex = -1;
mFrameNumber = 0;
AVDictionary* optionsDict = NULL;
if (!ffmpegInitialised)
{
#ifdef _FFMPEG_DEBUG
th_writelog("Initializing ffmpeg");
#endif
th_writelog("avcodec version: " + str(avcodec_version()));
av_register_all();
av_log_set_level(AV_LOG_DEBUG);
av_log_set_callback(avlog_theoraplayer);
ffmpegInitialised = 1;
//show_codecs(0, 0, 0);
}
mInputBuffer = (unsigned char*) av_malloc(READ_BUFFER_SIZE);
mAvioContext = avio_alloc_context(mInputBuffer, READ_BUFFER_SIZE, 0, source, &readFunction, NULL, &seekFunction);
#ifdef _FFMPEG_DEBUG
th_writelog(mName + ": avio context created");
#endif
mFormatContext = avformat_alloc_context();
#ifdef _FFMPEG_DEBUG
th_writelog(mName + ": avformat context created");
#endif
mFormatContext->pb = mAvioContext;
int err;
if ((err = avformat_open_input(&mFormatContext, "", NULL, NULL)) != 0)
{
th_writelog(mName + ": avformat input opening failed!");
th_writelog(mName + ": error_code: " + str(err));
return;
}
#ifdef _FFMPEG_DEBUG
th_writelog(mName + ": avformat input opened");
#endif
// Retrieve stream information
if (avformat_find_stream_info(mFormatContext, NULL) < 0)
return; // Couldn't find stream information
#ifdef _FFMPEG_DEBUG
th_writelog(mName + ": got stream info");
#endif
// Dump information about file onto standard error
// av_dump_format(mFormatContext, 0, "", 0);
// Find the first video stream
for (int i = 0; i < mFormatContext->nb_streams; ++i)
{
if(mFormatContext->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
{
mVideoStreamIndex = i;
break;
}
}
if (mVideoStreamIndex == -1)
return; // Didn't find a video stream
#ifdef _FFMPEG_DEBUG
th_writelog(mName + ": Found video stream at index " + str(mVideoStreamIndex));
#endif
// Get a pointer to the codec context for the video stream
mCodecContext = mFormatContext->streams[mVideoStreamIndex]->codec;
// Find the decoder for the video stream
mCodec = avcodec_find_decoder(mCodecContext->codec_id);
if (mCodec == NULL)
{
th_writelog("Unsupported codec!");
return; // Codec not found
}
// Open codec
if(avcodec_open2(mCodecContext, mCodec, &optionsDict) < 0)
return; // Could not open codec
#ifdef _FFMPEG_DEBUG
th_writelog(mName + ": Codec opened");
#endif
mFrame = avcodec_alloc_frame();
#ifdef _FFMPEG_DEBUG
th_writelog(mName + ": Frame allocated");
#endif
//AVRational rational = mCodecContext->time_base;
mFPS = 25; //TODOOOOOO!!!
mWidth = mStride = mCodecContext->width;
mHeight = mCodecContext->height;
mFrameDuration = 1.0f / mFPS;
mDuration = mFormatContext->duration / AV_TIME_BASE;
if (mFrameQueue == NULL) // todo - why is this set in the backend class? it should be set in the base class, check other backends as well
{
mFrameQueue = new TheoraFrameQueue(this);
mFrameQueue->setSize(mNumPrecachedFrames);
}
}
void TheoraVideoClip_FFmpeg::unload()
{
if (mInputBuffer)
{
// av_free(mInputBuffer);
mInputBuffer = NULL;
}
if (mAvioContext)
{
av_free(mAvioContext);
mAvioContext = NULL;
}
if (mFrame)
{
av_free(mFrame);
mFrame = NULL;
}
if (mCodecContext)
{
avcodec_close(mCodecContext);
mCodecContext = NULL;
}
if (mFormatContext)
{
avformat_close_input(&mFormatContext);
mFormatContext = NULL;
}
}
bool TheoraVideoClip_FFmpeg::_readData()
{
return 1;
}
bool TheoraVideoClip_FFmpeg::decodeNextFrame()
{
TheoraVideoFrame* frame = mFrameQueue->requestEmptyFrame();
if (!frame) return 0;
AVPacket packet;
int frameFinished;
while (av_read_frame(mFormatContext, &packet) >= 0)
{
if (packet.stream_index == mVideoStreamIndex)
{
avcodec_decode_video2(mCodecContext, mFrame, &frameFinished, &packet);
if (frameFinished)
{
TheoraPixelTransform t;
memset(&t, 0, sizeof(TheoraPixelTransform));
t.y = mFrame->data[0]; t.yStride = mFrame->linesize[0];
t.u = mFrame->data[1]; t.uStride = mFrame->linesize[1];
t.v = mFrame->data[2]; t.vStride = mFrame->linesize[2];
frame->decode(&t);
frame->mTimeToDisplay = mFrameNumber / mFPS;
frame->mIteration = mIteration;
frame->_setFrameNumber(mFrameNumber++);
av_free_packet(&packet);
break;
}
}
av_free_packet(&packet);
}
return 1;
}
void TheoraVideoClip_FFmpeg::decodedAudioCheck()
{
if (!mAudioInterface || mTimer->isPaused()) return;
mAudioMutex->lock();
flushAudioPackets(mAudioInterface);
mAudioMutex->unlock();
}
float TheoraVideoClip_FFmpeg::decodeAudio()
{
return -1;
}
void TheoraVideoClip_FFmpeg::doSeek()
{
}
void TheoraVideoClip_FFmpeg::_restart()
{
}
#endif

View file

@ -1,53 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#if defined(__FFMPEG) && !defined(_TheoraVideoClip_FFmpeg_h)
#define _TheoraVideoClip_FFmpeg_h
#include "TheoraAudioPacketQueue.h"
#include "TheoraVideoClip.h"
struct AVFormatContext;
struct AVCodecContext;
struct AVCodec;
struct AVFrame;
struct AVIOContext;
class TheoraVideoClip_FFmpeg : public TheoraVideoClip, public TheoraAudioPacketQueue
{
protected:
bool mLoaded;
AVFormatContext* mFormatContext;
AVCodecContext* mCodecContext;
AVIOContext* mAvioContext;
AVCodec* mCodec;
AVFrame* mFrame;
unsigned char* mInputBuffer;
int mVideoStreamIndex;
int mFrameNumber;
void unload();
void doSeek();
public:
TheoraVideoClip_FFmpeg(TheoraDataSource* data_source,
TheoraOutputMode output_mode,
int nPrecachedFrames,
bool usePower2Stride);
~TheoraVideoClip_FFmpeg();
bool _readData();
bool decodeNextFrame();
void _restart();
void load(TheoraDataSource* source);
float decodeAudio();
void decodedAudioCheck();
std::string getDecoderName() { return "FFmpeg"; }
};
#endif

View file

@ -1,703 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifdef __THEORA
#include <memory.h>
#include <algorithm>
#include "TheoraVideoManager.h"
#include "TheoraFrameQueue.h"
#include "TheoraVideoFrame.h"
#include "TheoraAudioInterface.h"
#include "TheoraTimer.h"
#include "TheoraDataSource.h"
#include "TheoraUtil.h"
#include "TheoraException.h"
#include "TheoraVideoClip_Theora.h"
#include "TheoraPixelTransform.h"
TheoraVideoClip_Theora::TheoraVideoClip_Theora(TheoraDataSource* data_source,
TheoraOutputMode output_mode,
int nPrecachedFrames,
bool usePower2Stride):
TheoraVideoClip(data_source, output_mode, nPrecachedFrames, usePower2Stride),
TheoraAudioPacketQueue()
{
mInfo.TheoraDecoder = NULL;
mInfo.TheoraSetup = NULL;
mVorbisStreams = mTheoraStreams = 0;
mReadAudioSamples = 0;
mLastDecodedFrameNumber = 0;
}
TheoraVideoClip_Theora::~TheoraVideoClip_Theora()
{
if (mInfo.TheoraDecoder)
{
th_decode_free(mInfo.TheoraDecoder);
th_setup_free(mInfo.TheoraSetup);
if (mAudioInterface)
{
vorbis_dsp_clear(&mInfo.VorbisDSPState);
vorbis_block_clear(&mInfo.VorbisBlock);
}
ogg_stream_clear(&mInfo.TheoraStreamState);
th_comment_clear(&mInfo.TheoraComment);
th_info_clear(&mInfo.TheoraInfo);
ogg_stream_clear(&mInfo.VorbisStreamState);
vorbis_comment_clear(&mInfo.VorbisComment);
vorbis_info_clear(&mInfo.VorbisInfo);
ogg_sync_clear(&mInfo.OggSyncState);
}
}
bool TheoraVideoClip_Theora::_readData()
{
int audio_eos = 0, serno;
float audio_time = 0;
float time = mTimer->getTime();
if (mRestarted) time = 0;
for (;;)
{
char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096);
int bytes_read = mStream->read(buffer, 4096);
ogg_sync_wrote(&mInfo.OggSyncState, bytes_read);
if (bytes_read < 4096)
{
if (bytes_read == 0)
{
if (!mAutoRestart) mEndOfFile = true;
return 0;
}
}
// when we fill the stream with enough pages, it'll start spitting out packets
// which contain keyframes, delta frames or audio data
while (ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage) > 0)
{
serno = ogg_page_serialno(&mInfo.OggPage);
if (serno == mInfo.TheoraStreamState.serialno) ogg_stream_pagein(&mInfo.TheoraStreamState, &mInfo.OggPage);
if (mAudioInterface && serno == mInfo.VorbisStreamState.serialno)
{
ogg_int64_t g = ogg_page_granulepos(&mInfo.OggPage);
audio_time = (float) vorbis_granule_time(&mInfo.VorbisDSPState, g);
audio_eos = ogg_page_eos(&mInfo.OggPage);
ogg_stream_pagein(&mInfo.VorbisStreamState, &mInfo.OggPage);
}
}
if (!(mAudioInterface && !audio_eos && audio_time < time + 1.0f))
break;
}
return 1;
}
bool TheoraVideoClip_Theora::decodeNextFrame()
{
if (mEndOfFile) return 0;
TheoraVideoFrame* frame = mFrameQueue->requestEmptyFrame();
if (!frame) return 0; // max number of precached frames reached
bool should_restart = 0;
ogg_packet opTheora;
ogg_int64_t granulePos;
th_ycbcr_buffer buff;
int ret, nAttempts;
for (;;)
{
// ogg_stream_packetout can return -1 and the official docs suggest to do subsequent calls until it succeeds
// because the data is out of sync. still will limit the number of attempts just in case
for (ret = -1, nAttempts = 0; ret < 0 && nAttempts < 100; nAttempts++)
{
ret = ogg_stream_packetout(&mInfo.TheoraStreamState, &opTheora);
}
if (ret > 0)
{
int status = th_decode_packetin(mInfo.TheoraDecoder, &opTheora, &granulePos);
if (status != 0 && status != TH_DUPFRAME) continue; // 0 means success
float time = (float) th_granule_time(mInfo.TheoraDecoder, granulePos);
unsigned long frame_number = (unsigned long) th_granule_frame(mInfo.TheoraDecoder, granulePos);
if (time < mTimer->getTime() && !mRestarted && frame_number % 16 != 0)
{
// %16 operation is here to prevent a playback halt during video playback if the decoder can't keep up with demand.
#ifdef _DEBUG
th_writelog(mName + ": pre-dropped frame " + str((int) frame_number));
#endif
++mNumDroppedFrames;
continue; // drop frame
}
frame->mTimeToDisplay = time - mFrameDuration;
frame->mIteration = mIteration;
frame->_setFrameNumber(frame_number);
mLastDecodedFrameNumber = frame_number;
th_decode_ycbcr_out(mInfo.TheoraDecoder, buff);
TheoraPixelTransform t;
memset(&t, 0, sizeof(TheoraPixelTransform));
t.y = buff[0].data; t.yStride = buff[0].stride;
t.u = buff[1].data; t.uStride = buff[1].stride;
t.v = buff[2].data; t.vStride = buff[2].stride;
frame->decode(&t);
break;
}
else
{
if (!_readData())
{
frame->mInUse = 0;
should_restart = mAutoRestart;
break;
}
}
}
if (mAudioInterface != NULL)
{
mAudioMutex->lock();
decodeAudio();
mAudioMutex->unlock();
}
if (should_restart)
{
++mIteration;
_restart();
}
return 1;
}
void TheoraVideoClip_Theora::_restart()
{
bool paused = mTimer->isPaused();
if (!paused) mTimer->pause();
long granule=0;
th_decode_ctl(mInfo.TheoraDecoder,TH_DECCTL_SET_GRANPOS,&granule,sizeof(granule));
th_decode_free(mInfo.TheoraDecoder);
mInfo.TheoraDecoder=th_decode_alloc(&mInfo.TheoraInfo,mInfo.TheoraSetup);
ogg_stream_reset(&mInfo.TheoraStreamState);
if (mAudioInterface)
{
// empty the DSP buffer
//float **pcm;
//int len = vorbis_synthesis_pcmout(&mInfo.VorbisDSPState,&pcm);
//if (len) vorbis_synthesis_read(&mInfo.VorbisDSPState,len);
ogg_packet opVorbis;
mReadAudioSamples = 0;
while (ogg_stream_packetout(&mInfo.VorbisStreamState,&opVorbis) > 0)
{
if (vorbis_synthesis(&mInfo.VorbisBlock,&opVorbis) == 0)
vorbis_synthesis_blockin(&mInfo.VorbisDSPState,&mInfo.VorbisBlock);
}
ogg_stream_reset(&mInfo.VorbisStreamState);
}
ogg_sync_reset(&mInfo.OggSyncState);
mStream->seek(0);
ogg_int64_t granulePos = 0;
th_decode_ctl(mInfo.TheoraDecoder, TH_DECCTL_SET_GRANPOS, &granulePos, sizeof(granule));
mEndOfFile = false;
mRestarted = 1;
if (!paused) mTimer->play();
}
void TheoraVideoClip_Theora::load(TheoraDataSource* source)
{
#ifdef _DEBUG
th_writelog("-----");
#endif
mStream = source;
readTheoraVorbisHeaders();
mInfo.TheoraDecoder = th_decode_alloc(&mInfo.TheoraInfo,mInfo.TheoraSetup);
mWidth = mInfo.TheoraInfo.frame_width;
mHeight = mInfo.TheoraInfo.frame_height;
mSubFrameWidth = mInfo.TheoraInfo.pic_width;
mSubFrameHeight = mInfo.TheoraInfo.pic_height;
mSubFrameOffsetX = mInfo.TheoraInfo.pic_x;
mSubFrameOffsetY = mInfo.TheoraInfo.pic_y;
mStride = (mStride == 1) ? mStride = _nextPow2(getWidth()) : getWidth();
mFPS = mInfo.TheoraInfo.fps_numerator / (float) mInfo.TheoraInfo.fps_denominator;
#ifdef _DEBUG
th_writelog("width: " + str(mWidth) + ", height: " + str(mHeight) + ", fps: " + str((int) getFPS()));
#endif
mFrameQueue = new TheoraFrameQueue(this);
mFrameQueue->setSize(mNumPrecachedFrames);
// find out the duration of the file by seeking to the end
// having ogg decode pages, extract the granule pos from
// the last theora page and seek back to beginning of the file
long streamSize = mStream->size(), seekPos;
for (int i = 1; i <= 50; ++i)
{
ogg_sync_reset(&mInfo.OggSyncState);
seekPos = streamSize - 4096 * i;
if (seekPos < 0) seekPos = 0;
mStream->seek(seekPos);
char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096 * i);
int bytes_read = mStream->read(buffer, 4096 * i);
ogg_sync_wrote(&mInfo.OggSyncState, bytes_read);
ogg_sync_pageseek(&mInfo.OggSyncState, &mInfo.OggPage);
for (;;)
{
int ret = ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage);
if (ret == 0) break;
// if page is not a theora page, skip it
if (ogg_page_serialno(&mInfo.OggPage) != mInfo.TheoraStreamState.serialno) continue;
ogg_int64_t granule = ogg_page_granulepos(&mInfo.OggPage);
if (granule >= 0)
{
mNumFrames = (int) th_granule_frame(mInfo.TheoraDecoder, granule) + 1;
}
else if (mNumFrames > 0)
++mNumFrames; // append delta frames at the end to get the exact numbe
}
if (mNumFrames > 0 || streamSize - 4096 * i < 0) break;
}
if (mNumFrames < 0)
th_writelog("unable to determine file duration!");
else
{
mDuration = mNumFrames / mFPS;
#ifdef _DEBUG
th_writelog("duration: " + strf(mDuration) + " seconds");
#endif
}
// restore to beginning of stream.
ogg_sync_reset(&mInfo.OggSyncState);
mStream->seek(0);
if (mVorbisStreams) // if there is no audio interface factory defined, even though the video
// clip might have audio, it will be ignored
{
vorbis_synthesis_init(&mInfo.VorbisDSPState, &mInfo.VorbisInfo);
vorbis_block_init(&mInfo.VorbisDSPState, &mInfo.VorbisBlock);
mNumAudioChannels = mInfo.VorbisInfo.channels;
mAudioFrequency = (int) mInfo.VorbisInfo.rate;
// create an audio interface instance if available
TheoraAudioInterfaceFactory* audio_factory = TheoraVideoManager::getSingleton().getAudioInterfaceFactory();
printf("**** audio factory is %p\n", audio_factory);
if (audio_factory) setAudioInterface(audio_factory->createInstance(this, mNumAudioChannels, mAudioFrequency));
}
mFrameDuration = 1.0f / getFPS();
#ifdef _DEBUG
th_writelog("-----");
#endif
}
void TheoraVideoClip_Theora::readTheoraVorbisHeaders()
{
ogg_packet tempOggPacket;
bool done = false;
bool decode_audio=TheoraVideoManager::getSingleton().getAudioInterfaceFactory() != NULL;
//init Vorbis/Theora Layer
//Ensure all structures get cleared out.
memset(&mInfo.OggSyncState, 0, sizeof(ogg_sync_state));
memset(&mInfo.OggPage, 0, sizeof(ogg_page));
memset(&mInfo.VorbisStreamState, 0, sizeof(ogg_stream_state));
memset(&mInfo.TheoraStreamState, 0, sizeof(ogg_stream_state));
memset(&mInfo.TheoraInfo, 0, sizeof(th_info));
memset(&mInfo.TheoraComment, 0, sizeof(th_comment));
memset(&mInfo.VorbisInfo, 0, sizeof(vorbis_info));
memset(&mInfo.VorbisDSPState, 0, sizeof(vorbis_dsp_state));
memset(&mInfo.VorbisBlock, 0, sizeof(vorbis_block));
memset(&mInfo.VorbisComment, 0, sizeof(vorbis_comment));
ogg_sync_init(&mInfo.OggSyncState);
th_comment_init(&mInfo.TheoraComment);
th_info_init(&mInfo.TheoraInfo);
vorbis_info_init(&mInfo.VorbisInfo);
vorbis_comment_init(&mInfo.VorbisComment);
while (!done)
{
char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096);
int bytes_read = mStream->read(buffer, 4096);
ogg_sync_wrote(&mInfo.OggSyncState, bytes_read);
if (bytes_read == 0)
break;
while (ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage) > 0)
{
ogg_stream_state OggStateTest;
//is this an initial header? If not, stop
if (!ogg_page_bos(&mInfo.OggPage))
{
//This is done blindly, because stream only accept themselves
if (mTheoraStreams) ogg_stream_pagein(&mInfo.TheoraStreamState, &mInfo.OggPage);
if (mVorbisStreams) ogg_stream_pagein(&mInfo.VorbisStreamState, &mInfo.OggPage);
done=true;
break;
}
ogg_stream_init(&OggStateTest, ogg_page_serialno(&mInfo.OggPage));
ogg_stream_pagein(&OggStateTest, &mInfo.OggPage);
ogg_stream_packetout(&OggStateTest, &tempOggPacket);
//identify the codec
int ret;
if (!mTheoraStreams)
{
ret = th_decode_headerin(&mInfo.TheoraInfo, &mInfo.TheoraComment, &mInfo.TheoraSetup, &tempOggPacket);
if (ret > 0)
{
//This is the Theora Header
memcpy(&mInfo.TheoraStreamState, &OggStateTest, sizeof(OggStateTest));
mTheoraStreams = 1;
continue;
}
}
if (decode_audio && !mVorbisStreams &&
vorbis_synthesis_headerin(&mInfo.VorbisInfo, &mInfo.VorbisComment, &tempOggPacket) >=0)
{
//This is vorbis header
memcpy(&mInfo.VorbisStreamState, &OggStateTest, sizeof(OggStateTest));
mVorbisStreams = 1;
continue;
}
//Hmm. I guess it's not a header we support, so erase it
ogg_stream_clear(&OggStateTest);
}
}
while ((mTheoraStreams && (mTheoraStreams < 3)) ||
(mVorbisStreams && (mVorbisStreams < 3)))
{
//Check 2nd'dary headers... Theora First
int iSuccess;
while (mTheoraStreams && mTheoraStreams < 3 &&
(iSuccess = ogg_stream_packetout(&mInfo.TheoraStreamState, &tempOggPacket)))
{
if (iSuccess < 0)
throw TheoraGenericException("Error parsing Theora stream headers.");
if (!th_decode_headerin(&mInfo.TheoraInfo, &mInfo.TheoraComment, &mInfo.TheoraSetup, &tempOggPacket))
throw TheoraGenericException("invalid theora stream");
++mTheoraStreams;
} //end while looking for more theora headers
//look 2nd vorbis header packets
while (mVorbisStreams < 3 && (iSuccess = ogg_stream_packetout(&mInfo.VorbisStreamState, &tempOggPacket)))
{
if (iSuccess < 0)
throw TheoraGenericException("Error parsing vorbis stream headers");
if (vorbis_synthesis_headerin(&mInfo.VorbisInfo, &mInfo.VorbisComment,&tempOggPacket))
throw TheoraGenericException("invalid stream");
++mVorbisStreams;
} //end while looking for more vorbis headers
//Not finished with Headers, get some more file data
if (ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage) > 0)
{
if (mTheoraStreams) ogg_stream_pagein(&mInfo.TheoraStreamState, &mInfo.OggPage);
if (mVorbisStreams) ogg_stream_pagein(&mInfo.VorbisStreamState, &mInfo.OggPage);
}
else
{
char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096);
int bytes_read = mStream->read(buffer, 4096);
ogg_sync_wrote(&mInfo.OggSyncState, bytes_read);
if (bytes_read == 0)
throw TheoraGenericException("End of file found prematurely");
}
} //end while looking for all headers
// writelog("Vorbis Headers: " + str(mVorbisHeaders) + " Theora Headers : " + str(mTheoraHeaders));
}
void TheoraVideoClip_Theora::decodedAudioCheck()
{
if (!mAudioInterface || mTimer->isPaused()) return;
mAudioMutex->lock();
flushAudioPackets(mAudioInterface);
mAudioMutex->unlock();
}
float TheoraVideoClip_Theora::decodeAudio()
{
if (mRestarted) return -1;
ogg_packet opVorbis;
float **pcm;
int len = 0;
float timestamp = -1;
bool read_past_timestamp = 0;
float factor = 1.0f / mAudioFrequency;
float videoTime = (float) mLastDecodedFrameNumber / mFPS;
float min = mFrameQueue->getSize() / mFPS + 1.0f;
for (;;)
{
len = vorbis_synthesis_pcmout(&mInfo.VorbisDSPState, &pcm);
if (len == 0)
{
if (ogg_stream_packetout(&mInfo.VorbisStreamState, &opVorbis) > 0)
{
if (vorbis_synthesis(&mInfo.VorbisBlock, &opVorbis) == 0)
{
if (timestamp < 0 && opVorbis.granulepos >= 0)
{
timestamp = (float) vorbis_granule_time(&mInfo.VorbisDSPState, opVorbis.granulepos);
}
else if (timestamp >= 0) read_past_timestamp = 1;
vorbis_synthesis_blockin(&mInfo.VorbisDSPState, &mInfo.VorbisBlock);
}
continue;
}
else
{
float audioTime = mReadAudioSamples * factor;
// always buffer up of audio ahead of the frames
if (audioTime - videoTime < min)
{
if (!_readData()) break;
}
else
break;
}
}
addAudioPacket(pcm, len, mAudioGain);
mReadAudioSamples += len;
if (read_past_timestamp) timestamp += (float) len / mInfo.VorbisInfo.rate;
vorbis_synthesis_read(&mInfo.VorbisDSPState, len); // tell vorbis we read a number of samples
}
return timestamp;
}
long TheoraVideoClip_Theora::seekPage(long targetFrame, bool return_keyframe)
{
int i,seek_min = 0, seek_max = (int) mStream->size();
long frame;
ogg_int64_t granule = 0;
if (targetFrame == 0) mStream->seek(0);
for (i = (targetFrame == 0) ? 100 : 0; i < 100; ++i)
{
ogg_sync_reset(&mInfo.OggSyncState);
mStream->seek((seek_min + seek_max) / 2); // do a binary search
memset(&mInfo.OggPage, 0, sizeof(ogg_page));
ogg_sync_pageseek(&mInfo.OggSyncState, &mInfo.OggPage);
for (;i < 1000;)
{
int ret = ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage);
if (ret == 1)
{
int serno = ogg_page_serialno(&mInfo.OggPage);
if (serno == mInfo.TheoraStreamState.serialno)
{
granule = ogg_page_granulepos(&mInfo.OggPage);
if (granule >= 0)
{
frame = (long) th_granule_frame(mInfo.TheoraDecoder, granule);
if (frame < targetFrame && targetFrame - frame < 10)
{
// we're close enough, let's break this.
i = 1000;
break;
}
// we're not close enough, let's shorten the borders of the binary search
if (targetFrame - 1 > frame) seek_min = (seek_min + seek_max) / 2;
else seek_max = (seek_min + seek_max) / 2;
break;
}
}
}
else
{
char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096);
int bytes_read = mStream->read(buffer, 4096);
if (bytes_read == 0) break;
ogg_sync_wrote(&mInfo.OggSyncState, bytes_read);
}
}
}
if (return_keyframe) return (long) (granule >> mInfo.TheoraInfo.keyframe_granule_shift);
ogg_sync_reset(&mInfo.OggSyncState);
memset(&mInfo.OggPage, 0, sizeof(ogg_page));
ogg_sync_pageseek(&mInfo.OggSyncState, &mInfo.OggPage);
if (targetFrame == 0) return -1;
mStream->seek((seek_min + seek_max) / 2); // do a binary search
return -1;
}
void TheoraVideoClip_Theora::doSeek()
{
#if _DEBUG
th_writelog(mName + " [seek]: seeking to frame " + str(mSeekFrame));
#endif
int frame;
float time = mSeekFrame / getFPS();
mTimer->seek(time);
bool paused = mTimer->isPaused();
if (!paused) mTimer->pause(); // pause until seeking is done
mEndOfFile = false;
mRestarted = false;
resetFrameQueue();
// reset the video decoder.
ogg_stream_reset(&mInfo.TheoraStreamState);
th_decode_free(mInfo.TheoraDecoder);
mInfo.TheoraDecoder = th_decode_alloc(&mInfo.TheoraInfo, mInfo.TheoraSetup);
if (mAudioInterface)
{
mAudioMutex->lock();
ogg_stream_reset(&mInfo.VorbisStreamState);
vorbis_synthesis_restart(&mInfo.VorbisDSPState);
destroyAllAudioPackets();
}
// first seek to desired frame, then figure out the location of the
// previous keyframe and seek to it.
// then by setting the correct time, the decoder will skip N frames untill
// we get the frame we want.
frame = (int) seekPage(mSeekFrame, 1); // find the keyframe nearest to the target frame
#ifdef _DEBUG
// th_writelog(mName + " [seek]: nearest keyframe for frame " + str(mSeekFrame) + " is frame: " + str(frame));
#endif
seekPage(std::max(0, frame - 1), 0);
ogg_packet opTheora;
ogg_int64_t granulePos;
bool granule_set = 0;
if (frame <= 1)
{
if (mInfo.TheoraInfo.version_major == 3 && mInfo.TheoraInfo.version_minor == 2 && mInfo.TheoraInfo.version_subminor == 0)
granulePos = 0;
else
granulePos = 1; // because of difference in granule interpretation in theora streams 3.2.0 and newer ones
th_decode_ctl(mInfo.TheoraDecoder, TH_DECCTL_SET_GRANPOS, &granulePos, sizeof(granulePos));
granule_set = 1;
}
// now that we've found the keyframe that preceeds our desired frame, lets keep on decoding frames until we
// reach our target frame.
int status, ret;
for (;mSeekFrame != 0;)
{
ret = ogg_stream_packetout(&mInfo.TheoraStreamState, &opTheora);
if (ret > 0)
{
if (!granule_set)
{
// theora decoder requires to set the granule pos after seek to be able to determine the current frame
if (opTheora.granulepos >= 0)
{
th_decode_ctl(mInfo.TheoraDecoder, TH_DECCTL_SET_GRANPOS, &opTheora.granulepos, sizeof(opTheora.granulepos));
granule_set = 1;
}
else continue; // ignore prev delta frames until we hit a keyframe
}
status = th_decode_packetin(mInfo.TheoraDecoder, &opTheora, &granulePos);
if (status != 0 && status != TH_DUPFRAME) continue;
frame = (int) th_granule_frame(mInfo.TheoraDecoder, granulePos);
if (frame >= mSeekFrame - 1) break;
}
else
{
if (!_readData())
{
th_writelog(mName + " [seek]: fineseeking failed, _readData failed!");
if (mAudioInterface) mAudioMutex->unlock();
return;
}
}
}
#ifdef _DEBUG
// th_writelog(mName + " [seek]: fineseeked to frame " + str(frame + 1) + ", requested: " + str(mSeekFrame));
#endif
if (mAudioInterface)
{
// read audio data until we reach a timestamp. this usually takes only one iteration, but just in case let's
// wrap it in a loop
float timestamp;
for (;;)
{
timestamp = decodeAudio();
if (timestamp >= 0) break;
else _readData();
}
float rate = (float) mAudioFrequency * mNumAudioChannels;
float queued_time = getAudioPacketQueueLength();
// at this point there are only 2 possibilities: either we have too much packets and we have to delete
// the first N ones, or we don't have enough, so let's fill the gap with silence.
if (time > timestamp - queued_time)
{
while (mTheoraAudioPacketQueue != NULL)
{
if (time > timestamp - queued_time + mTheoraAudioPacketQueue->numSamples / rate)
{
queued_time -= mTheoraAudioPacketQueue->numSamples / rate;
destroyAudioPacket(popAudioPacket());
}
else
{
int n_trim = (int) ((timestamp - queued_time + mTheoraAudioPacketQueue->numSamples / rate - time) * rate);
if (mTheoraAudioPacketQueue->numSamples - n_trim <= 0)
destroyAudioPacket(popAudioPacket()); // if there's no data to be left, just destroy it
else
{
for (int i = n_trim, j = 0; i < mTheoraAudioPacketQueue->numSamples; ++i, ++j)
mTheoraAudioPacketQueue->pcm[j] = mTheoraAudioPacketQueue->pcm[i];
mTheoraAudioPacketQueue->numSamples -= n_trim;
}
break;
}
}
}
else
{
// expand the first packet with silence.
if (mTheoraAudioPacketQueue) // just in case!
{
int i, j, nmissing = (int) ((timestamp - queued_time - time) * rate);
if (nmissing > 0)
{
float* samples = new float[nmissing + mTheoraAudioPacketQueue->numSamples];
for (i = 0; i < nmissing; ++i) samples[i] = 0;
for (j = 0; i < nmissing + mTheoraAudioPacketQueue->numSamples; ++i, ++j)
samples[i] = mTheoraAudioPacketQueue->pcm[j];
delete [] mTheoraAudioPacketQueue->pcm;
mTheoraAudioPacketQueue->pcm = samples;
}
}
}
mLastDecodedFrameNumber = mSeekFrame;
mReadAudioSamples = (unsigned int) (timestamp * mAudioFrequency);
mAudioMutex->unlock();
}
if (!paused) mTimer->play();
mSeekFrame = -1;
}
#endif

View file

@ -1,64 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#if defined(__THEORA) && !defined(_TheoraVideoClip_Theora_h)
#define _TheoraVideoClip_Theora_h
#include <ogg/ogg.h>
#include <vorbis/vorbisfile.h>
#include <theora/theoradec.h>
#include "TheoraAudioPacketQueue.h"
#include "TheoraVideoClip.h"
struct TheoraInfoStruct
{
// ogg/vorbis/theora variables
ogg_sync_state OggSyncState;
ogg_page OggPage;
ogg_stream_state VorbisStreamState;
ogg_stream_state TheoraStreamState;
//Theora State
th_info TheoraInfo;
th_comment TheoraComment;
th_setup_info* TheoraSetup;
th_dec_ctx* TheoraDecoder;
//Vorbis State
vorbis_info VorbisInfo;
vorbis_dsp_state VorbisDSPState;
vorbis_block VorbisBlock;
vorbis_comment VorbisComment;
};
class TheoraVideoClip_Theora : public TheoraVideoClip, public TheoraAudioPacketQueue
{
protected:
TheoraInfoStruct mInfo; // a pointer is used to avoid having to include theora & vorbis headers
int mTheoraStreams, mVorbisStreams; // Keeps track of Theora and Vorbis Streams
long seekPage(long targetFrame, bool return_keyframe);
void doSeek();
void readTheoraVorbisHeaders();
unsigned int mReadAudioSamples;
unsigned long mLastDecodedFrameNumber;
public:
TheoraVideoClip_Theora(TheoraDataSource* data_source,
TheoraOutputMode output_mode,
int nPrecachedFrames,
bool usePower2Stride);
~TheoraVideoClip_Theora();
bool _readData();
bool decodeNextFrame();
void _restart();
void load(TheoraDataSource* source);
float decodeAudio();
void decodedAudioCheck();
std::string getDecoderName() { return "Theora"; }
};
#endif

View file

@ -1,253 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef _WIN32
#include <windows.h>
#else
#include <unistd.h>
#include <pthread.h>
#endif
#include "TheoraAsync.h"
#include "TheoraUtil.h"
#ifdef _WINRT
#include <wrl.h>
#endif
///////////////////////////////////////////////////////////////////////////////////////////////////
// Mutex
///////////////////////////////////////////////////////////////////////////////////////////////////
TheoraMutex::TheoraMutex()
{
#ifdef _WIN32
#ifndef _WINRT // WinXP does not have CreateTheoraMutexEx()
mHandle = CreateMutex(0, 0, 0);
#else
mHandle = CreateMutexEx(NULL, NULL, 0, SYNCHRONIZE);
#endif
#else
mHandle = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
pthread_mutex_init((pthread_mutex_t*)mHandle, 0);
#endif
}
TheoraMutex::~TheoraMutex()
{
#ifdef _WIN32
CloseHandle(mHandle);
#else
pthread_mutex_destroy((pthread_mutex_t*)mHandle);
free((pthread_mutex_t*)mHandle);
mHandle = NULL;
#endif
}
void TheoraMutex::lock()
{
#ifdef _WIN32
WaitForSingleObjectEx(mHandle, INFINITE, FALSE);
#else
pthread_mutex_lock((pthread_mutex_t*)mHandle);
#endif
}
void TheoraMutex::unlock()
{
#ifdef _WIN32
ReleaseMutex(mHandle);
#else
pthread_mutex_unlock((pthread_mutex_t*)mHandle);
#endif
}
///////////////////////////////////////////////////////////////////////////////////////////////////
// Thread
///////////////////////////////////////////////////////////////////////////////////////////////////
#ifdef _WINRT
using namespace Windows::Foundation;
using namespace Windows::System::Threading;
#endif
#ifdef _WIN32
unsigned long WINAPI theoraAsyncCall(void* param)
#else
void* theoraAsyncCall(void* param)
#endif
{
TheoraThread* t = (TheoraThread*)param;
t->execute();
#ifdef _WIN32
return 0;
#else
pthread_exit(NULL);
return NULL;
#endif
}
#ifdef _WINRT
struct TheoraAsyncActionWrapper
{
public:
IAsyncAction^ mAsyncAction;
TheoraAsyncActionWrapper(IAsyncAction^ asyncAction)
{
mAsyncAction = asyncAction;
}
};
#endif
TheoraThread::TheoraThread() : mRunning(false), mId(0)
{
#ifndef _WIN32
mId = (pthread_t*)malloc(sizeof(pthread_t));
#endif
}
TheoraThread::~TheoraThread()
{
if (mRunning)
{
stop();
}
if (mId != NULL)
{
#ifdef _WIN32
#ifndef _WINRT
CloseHandle(mId);
#else
delete mId;
#endif
#else
free((pthread_t*)mId);
#endif
mId = NULL;
}
}
void TheoraThread::start()
{
mRunning = true;
#ifdef _WIN32
#ifndef _WINRT
mId = CreateThread(0, 0, &theoraAsyncCall, this, 0, 0);
#else
mId = new TheoraAsyncActionWrapper(ThreadPool::RunAsync(
ref new WorkItemHandler([&](IAsyncAction^ work_item)
{
execute();
}),
WorkItemPriority::Normal, WorkItemOptions::TimeSliced));
#endif
#else
pthread_create((pthread_t*)mId, NULL, &theoraAsyncCall, this);
#endif
}
bool TheoraThread::isRunning()
{
bool ret;
mRunningMutex.lock();
ret = mRunning;
mRunningMutex.unlock();
return ret;
}
void TheoraThread::join()
{
mRunningMutex.lock();
mRunning = false;
mRunningMutex.unlock();
#ifdef _WIN32
#ifndef _WINRT
WaitForSingleObject(mId, INFINITE);
if (mId != NULL)
{
CloseHandle(mId);
mId = NULL;
}
#else
IAsyncAction^ action = ((TheoraAsyncActionWrapper*)mId)->mAsyncAction;
int i = 0;
while (action->Status != AsyncStatus::Completed &&
action->Status != AsyncStatus::Canceled &&
action->Status != AsyncStatus::Error &&
i < 100)
{
_psleep(50);
++i;
}
if (i >= 100)
{
i = 0;
action->Cancel();
while (action->Status != AsyncStatus::Completed &&
action->Status != AsyncStatus::Canceled &&
action->Status != AsyncStatus::Error &&
i < 100)
{
_psleep(50);
++i;
}
}
#endif
#else
pthread_join(*((pthread_t*)mId), 0);
#endif
}
void TheoraThread::resume()
{
#ifdef _WIN32
#ifndef _WINRT
ResumeThread(mId);
#else
// not available in WinRT
#endif
#endif
}
void TheoraThread::pause()
{
#ifdef _WIN32
#ifndef _WINRT
SuspendThread(mId);
#else
// not available in WinRT
#endif
#endif
}
void TheoraThread::stop()
{
if (mRunning)
{
mRunningMutex.lock();
mRunning = false;
mRunningMutex.unlock();
#ifdef _WIN32
#ifndef _WINRT
TerminateThread(mId, 0);
#else
((TheoraAsyncActionWrapper*)mId)->mAsyncAction->Cancel();
#endif
#elif defined(_ANDROID)
pthread_kill(*((pthread_t*)mId), 0);
#else
pthread_cancel(*((pthread_t*)mId));
#endif
}
}

View file

@ -1,21 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#include "TheoraAudioInterface.h"
TheoraAudioInterface::TheoraAudioInterface(TheoraVideoClip* owner, int nChannels, int freq)
{
mFreq = freq;
mNumChannels = nChannels;
mClip = owner;
}
TheoraAudioInterface::~TheoraAudioInterface()
{
}

View file

@ -1,126 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#include <stdlib.h>
#include "TheoraAudioPacketQueue.h"
#include "TheoraAudioInterface.h"
TheoraAudioPacketQueue::TheoraAudioPacketQueue()
{
mTheoraAudioPacketQueue = NULL;
}
TheoraAudioPacketQueue::~TheoraAudioPacketQueue()
{
destroyAllAudioPackets();
}
float TheoraAudioPacketQueue::getAudioPacketQueueLength()
{
float len = 0;
for (TheoraAudioPacket* p = mTheoraAudioPacketQueue; p != NULL; p = p->next)
len += p->numSamples;
return len / (mAudioFrequency * mNumAudioChannels);
}
void TheoraAudioPacketQueue::_addAudioPacket(float* data, int numSamples)
{
TheoraAudioPacket* packet = new TheoraAudioPacket;
packet->pcm = data;
packet->numSamples = numSamples;
packet->next = NULL;
if (mTheoraAudioPacketQueue == NULL) mTheoraAudioPacketQueue = packet;
else
{
TheoraAudioPacket* last = mTheoraAudioPacketQueue;
for (TheoraAudioPacket* p = last; p != NULL; p = p->next)
last = p;
last->next = packet;
}
}
void TheoraAudioPacketQueue::addAudioPacket(float** buffer, int numSamples, float gain)
{
float* data = new float[numSamples * mNumAudioChannels];
float* dataptr = data;
int i;
unsigned int j;
if (gain < 1.0f)
{
// apply gain, let's attenuate the samples
for (i = 0; i < numSamples; ++i)
for (j = 0; j < mNumAudioChannels; j++, ++dataptr)
*dataptr = buffer[i][j] * gain;
}
else
{
// do a simple copy, faster then the above method, when gain is 1.0f
for (i = 0; i < numSamples; ++i)
for (j = 0; j < mNumAudioChannels; j++, ++dataptr)
*dataptr = buffer[j][i];
}
_addAudioPacket(data, numSamples * mNumAudioChannels);
}
void TheoraAudioPacketQueue::addAudioPacket(float* buffer, int numSamples, float gain)
{
float* data = new float[numSamples * mNumAudioChannels];
float* dataptr = data;
int i, numFloats = numSamples * mNumAudioChannels;
if (gain < 1.0f)
{
// apply gain, let's attenuate the samples
for (i = 0; i < numFloats; ++i, dataptr++)
*dataptr = buffer[i] * gain;
}
else
{
// do a simple copy, faster then the above method, when gain is 1.0f
for (i = 0; i < numFloats; ++i, dataptr++)
*dataptr = buffer[i];
}
_addAudioPacket(data, numFloats);
}
TheoraAudioPacket* TheoraAudioPacketQueue::popAudioPacket()
{
if (mTheoraAudioPacketQueue == NULL) return NULL;
TheoraAudioPacket* p = mTheoraAudioPacketQueue;
mTheoraAudioPacketQueue = mTheoraAudioPacketQueue->next;
return p;
}
void TheoraAudioPacketQueue::destroyAudioPacket(TheoraAudioPacket* p)
{
if (p == NULL) return;
delete [] p->pcm;
delete p;
}
void TheoraAudioPacketQueue::destroyAllAudioPackets()
{
for (TheoraAudioPacket* p = popAudioPacket(); p != NULL; p = popAudioPacket())
destroyAudioPacket(p);
}
void TheoraAudioPacketQueue::flushAudioPackets(TheoraAudioInterface* audioInterface)
{
for (TheoraAudioPacket* p = popAudioPacket(); p != NULL; p = popAudioPacket())
{
audioInterface->insertData(p->pcm, p->numSamples);
destroyAudioPacket(p);
}
}

View file

@ -1,128 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#include <stdio.h>
#include <memory.h>
#include "TheoraDataSource.h"
#include "TheoraException.h"
#include "TheoraVideoManager.h"
#include "TheoraUtil.h"
TheoraDataSource::~TheoraDataSource()
{
}
TheoraFileDataSource::TheoraFileDataSource(std::string filename)
{
mFilename = filename;
mFilePtr = NULL;
}
TheoraFileDataSource::~TheoraFileDataSource()
{
if (mFilePtr)
{
fclose(mFilePtr);
mFilePtr = NULL;
}
}
void TheoraFileDataSource::openFile()
{
if (mFilePtr == NULL)
{
mFilePtr=fopen(mFilename.c_str(), "rb");
if (!mFilePtr)
{
std::string msg = "Can't open video file: " + mFilename;
th_writelog(msg);
throw TheoraGenericException(msg);
}
fseek(mFilePtr, 0, SEEK_END);
mSize = ftell(mFilePtr);
fseek(mFilePtr, 0, SEEK_SET);
}
}
int TheoraFileDataSource::read(void* output, int nBytes)
{
if (mFilePtr == NULL) openFile();
size_t n = fread(output, 1, nBytes, mFilePtr);
return (int) n;
}
void TheoraFileDataSource::seek(unsigned long byte_index)
{
if (mFilePtr == NULL) openFile();
fseek(mFilePtr, byte_index, SEEK_SET);
}
unsigned long TheoraFileDataSource::size()
{
if (mFilePtr == NULL) openFile();
return mSize;
}
unsigned long TheoraFileDataSource::tell()
{
if (mFilePtr == NULL) return 0;
return ftell(mFilePtr);
}
TheoraMemoryFileDataSource::TheoraMemoryFileDataSource(std::string filename) :
mReadPointer(0),
mData(0)
{
mFilename=filename;
FILE* f=fopen(filename.c_str(),"rb");
if (!f) throw TheoraGenericException("Can't open video file: "+filename);
fseek(f,0,SEEK_END);
mSize=ftell(f);
fseek(f,0,SEEK_SET);
mData=new unsigned char[mSize];
fread(mData,1,mSize,f);
fclose(f);
}
TheoraMemoryFileDataSource::TheoraMemoryFileDataSource(unsigned char* data, long size, const std::string& filename)
{
mFilename = filename;
mData = data;
mSize = size;
mReadPointer = 0;
}
TheoraMemoryFileDataSource::~TheoraMemoryFileDataSource()
{
if (mData) delete [] mData;
}
int TheoraMemoryFileDataSource::read(void* output, int nBytes)
{
int n = (int) ((mReadPointer+nBytes <= mSize) ? nBytes : mSize - mReadPointer);
if (!n) return 0;
memcpy(output, mData + mReadPointer, n);
mReadPointer += n;
return n;
}
void TheoraMemoryFileDataSource::seek(unsigned long byte_index)
{
mReadPointer=byte_index;
}
unsigned long TheoraMemoryFileDataSource::size()
{
return mSize;
}
unsigned long TheoraMemoryFileDataSource::tell()
{
return mReadPointer;
}

View file

@ -1,37 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#include "TheoraException.h"
#include "TheoraUtil.h"
#include "TheoraVideoManager.h"
#include <stdio.h>
_TheoraGenericException::_TheoraGenericException(const std::string& errorText, std::string type, std::string file, int line)
{
mErrText = errorText;
int src = (int) file.find("src");
if (src >= 0) file = file.substr(src + 4, 1000);
mLineNumber = line;
mFile = file;
}
std::string _TheoraGenericException::repr()
{
std::string text = getType();
if (text != "") text += ": ";
if (mFile != "") text += "[" + mFile + ":" + str(mLineNumber) + "] - ";
return text + getErrorText();
}
void _TheoraGenericException::writeOutput()
{
th_writelog("----------------\nException Error!\n\n" + repr() + "\n----------------");
}

View file

@ -1,174 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#include "TheoraFrameQueue.h"
#include "TheoraVideoFrame.h"
#include "TheoraVideoManager.h"
#include "TheoraUtil.h"
TheoraFrameQueue::TheoraFrameQueue(TheoraVideoClip* parent)
{
mParent = parent;
}
TheoraFrameQueue::~TheoraFrameQueue()
{
foreach_l(TheoraVideoFrame*, mQueue)
{
delete (*it);
}
mQueue.clear();
}
TheoraVideoFrame* TheoraFrameQueue::createFrameInstance(TheoraVideoClip* clip)
{
TheoraVideoFrame* frame = new TheoraVideoFrame(clip);
if (frame->getBuffer() == NULL) // This can happen if you run out of memory
{
delete frame;
return NULL;
}
return frame;
}
void TheoraFrameQueue::setSize(int n)
{
mMutex.lock();
if (mQueue.size() > 0)
{
foreach_l (TheoraVideoFrame*, mQueue)
{
delete (*it);
}
mQueue.clear();
}
TheoraVideoFrame* frame;
for (int i = 0;i < n; ++i)
{
frame = createFrameInstance(mParent);
if (frame != NULL) mQueue.push_back(frame);
else
{
TheoraVideoManager::getSingleton().logMessage("TheoraFrameQueue: unable to create " + str(n) + " frames, out of memory. Created " + str((int) mQueue.size()) + " frames.");
break;
}
}
mMutex.unlock();
}
int TheoraFrameQueue::getSize()
{
return (int) mQueue.size();
}
TheoraVideoFrame* TheoraFrameQueue::_getFirstAvailableFrame()
{
TheoraVideoFrame* frame = mQueue.front();
if (frame->mReady) return frame;
else return NULL;
}
TheoraVideoFrame* TheoraFrameQueue::getFirstAvailableFrame()
{
mMutex.lock();
TheoraVideoFrame* frame = _getFirstAvailableFrame();
mMutex.unlock();
return frame;
}
void TheoraFrameQueue::clear()
{
mMutex.lock();
foreach_l (TheoraVideoFrame*, mQueue)
(*it)->clear();
mMutex.unlock();
}
void TheoraFrameQueue::_pop(int n)
{
for (int i = 0; i < n; ++i)
{
TheoraVideoFrame* first = mQueue.front();
first->clear();
mQueue.pop_front();
mQueue.push_back(first);
}
}
void TheoraFrameQueue::pop(int n)
{
mMutex.lock();
_pop(n);
mMutex.unlock();
}
TheoraVideoFrame* TheoraFrameQueue::requestEmptyFrame()
{
TheoraVideoFrame* frame = NULL;
mMutex.lock();
foreach_l (TheoraVideoFrame*, mQueue)
{
if (!(*it)->mInUse)
{
(*it)->mInUse = 1;
(*it)->mReady = 0;
frame = (*it);
break;
}
}
mMutex.unlock();
return frame;
}
int TheoraFrameQueue::getUsedCount()
{
mMutex.lock();
int n=0;
foreach_l(TheoraVideoFrame*,mQueue)
if ((*it)->mInUse) ++n;
mMutex.unlock();
return n;
}
int TheoraFrameQueue::_getReadyCount()
{
int n = 0;
foreach_l (TheoraVideoFrame*, mQueue)
if ((*it)->mReady) ++n;
return n;
}
int TheoraFrameQueue::getReadyCount()
{
mMutex.lock();
int n = _getReadyCount();
mMutex.unlock();
return n;
}
bool TheoraFrameQueue::isFull()
{
return getReadyCount() == mQueue.size();
}
void TheoraFrameQueue::lock()
{
mMutex.lock();
}
void TheoraFrameQueue::unlock()
{
mMutex.unlock();
}
std::list<TheoraVideoFrame*>& TheoraFrameQueue::_getFrameQueue()
{
return mQueue;
}

View file

@ -1,70 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#include "TheoraTimer.h"
TheoraTimer::TheoraTimer()
{
mTime = 0;
mPaused = 0;
mSpeed = 1.0f;
}
TheoraTimer::~TheoraTimer()
{
}
void TheoraTimer::update(float timeDelta)
{
if (!isPaused())
{
mTime += timeDelta * mSpeed;
}
}
float TheoraTimer::getTime()
{
return mTime;
}
void TheoraTimer::pause()
{
mPaused = true;
}
void TheoraTimer::play()
{
mPaused = false;
}
bool TheoraTimer::isPaused()
{
return mPaused;
}
void TheoraTimer::stop()
{
}
void TheoraTimer::seek(float time)
{
mTime = time;
}
void TheoraTimer::setSpeed(float speed)
{
mSpeed = speed;
}
float TheoraTimer::getSpeed()
{
return mSpeed;
}

View file

@ -1,59 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#include <stdio.h>
#include <algorithm>
#include <math.h>
#include <map>
#ifndef _WIN32
#include <unistd.h>
#include <pthread.h>
#endif
#include "TheoraUtil.h"
#include "TheoraException.h"
#ifdef _WIN32
#include <windows.h>
#pragma warning( disable: 4996 ) // MSVC++
#endif
std::string str(int i)
{
char s[32];
sprintf(s, "%d", i);
return std::string(s);
}
std::string strf(float i)
{
char s[32];
sprintf(s, "%.3f", i);
return std::string(s);
}
void _psleep(int miliseconds)
{
#ifdef _WIN32
#ifndef _WINRT
Sleep(miliseconds);
#else
WaitForSingleObjectEx(GetCurrentThread(), miliseconds, 0);
#endif
#else
usleep(miliseconds * 1000);
#endif
}
int _nextPow2(int x)
{
int y;
for (y = 1; y < x; y *= 2);
return y;
}

View file

@ -1,496 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#include "TheoraVideoClip.h"
#include "TheoraVideoManager.h"
#include "TheoraVideoFrame.h"
#include "TheoraFrameQueue.h"
#include "TheoraAudioInterface.h"
#include "TheoraTimer.h"
#include "TheoraDataSource.h"
#include "TheoraUtil.h"
#include "TheoraException.h"
#include "core/os/memory.h"
TheoraVideoClip::TheoraVideoClip(TheoraDataSource* data_source,
TheoraOutputMode output_mode,
int nPrecachedFrames,
bool usePower2Stride):
mAudioInterface(NULL),
mNumDroppedFrames(0),
mNumDisplayedFrames(0),
mSeekFrame(-1),
mDuration(-1),
mNumFrames(-1),
mFPS(1),
mUseAlpha(0),
mFrameDuration(0),
mName(data_source->repr()),
mStride(usePower2Stride),
mSubFrameWidth(0),
mSubFrameHeight(0),
mSubFrameOffsetX(0),
mSubFrameOffsetY(0),
mAudioGain(1),
mRequestedOutputMode(output_mode),
mAutoRestart(0),
mEndOfFile(0),
mRestarted(0),
mIteration(0),
mPlaybackIteration(0),
mStream(0),
mThreadAccessCount(0),
mPriority(1),
mFirstFrameDisplayed(0),
mWaitingForCache(false),
mOutputMode(TH_UNDEFINED)
{
audio_track=0;
mAudioMutex = NULL;
mThreadAccessMutex = new TheoraMutex();
mTimer = mDefaultTimer = new TheoraTimer();
mFrameQueue = NULL;
mAssignedWorkerThread = NULL;
mNumPrecachedFrames = nPrecachedFrames;
setOutputMode(output_mode);
}
TheoraVideoClip::~TheoraVideoClip()
{
// wait untill a worker thread is done decoding the frame
mThreadAccessMutex->lock();
delete mDefaultTimer;
if (mStream) memdelete(mStream);
if (mFrameQueue) delete mFrameQueue;
if (mAudioInterface)
{
mAudioMutex->lock(); // ensure a thread isn't using this mutex
delete mAudioInterface; // notify audio interface it's time to call it a day
mAudioMutex ->unlock();
delete mAudioMutex;
}
mThreadAccessMutex->unlock();
delete mThreadAccessMutex;
}
TheoraTimer* TheoraVideoClip::getTimer()
{
return mTimer;
}
void TheoraVideoClip::setTimer(TheoraTimer* timer)
{
if (!timer) mTimer = mDefaultTimer;
else mTimer = timer;
}
void TheoraVideoClip::resetFrameQueue()
{
mFrameQueue->clear();
mPlaybackIteration = mIteration = 0;
}
void TheoraVideoClip::restart()
{
mEndOfFile = true; //temp, to prevent threads to decode while restarting
mThreadAccessMutex->lock();
_restart();
mTimer->seek(0);
mFirstFrameDisplayed = false;
resetFrameQueue();
mEndOfFile = false;
mRestarted = false;
mSeekFrame = -1;
mThreadAccessMutex->unlock();
}
void TheoraVideoClip::update(float timeDelta)
{
if (mTimer->isPaused())
{
mTimer->update(0); // update timer in case there is some code that needs to execute each frame
return;
}
float time = mTimer->getTime(), speed = mTimer->getSpeed();
if (time + timeDelta * speed >= mDuration)
{
if (mAutoRestart && mRestarted)
{
float seekTime = time + timeDelta * speed;
for (;seekTime >= mDuration;)
{
seekTime -= mDuration;
++mPlaybackIteration;
}
mTimer->seek(seekTime);
}
else
{
if (time != mDuration)
{
mTimer->update((mDuration - time) / speed);
}
}
}
else
{
mTimer->update(timeDelta);
}
}
float TheoraVideoClip::updateToNextFrame()
{
TheoraVideoFrame* f = mFrameQueue->getFirstAvailableFrame();
if (!f) return 0;
float time = f->mTimeToDisplay - mTimer->getTime();
update(time);
return time;
}
TheoraFrameQueue* TheoraVideoClip::getFrameQueue()
{
return mFrameQueue;
}
void TheoraVideoClip::popFrame()
{
++mNumDisplayedFrames;
// after transfering frame data to the texture, free the frame
// so it can be used again
if (!mFirstFrameDisplayed)
{
mFrameQueue->lock();
mFrameQueue->_pop(1);
mFirstFrameDisplayed = true;
mFrameQueue->unlock();
}
else
{
mFrameQueue->pop();
}
}
int TheoraVideoClip::getWidth()
{
return mUseAlpha ? mWidth / 2 : mWidth;
}
int TheoraVideoClip::getHeight()
{
return mHeight;
}
int TheoraVideoClip::getSubFrameWidth()
{
return mUseAlpha ? mWidth / 2 : mSubFrameWidth;
}
int TheoraVideoClip::getSubFrameHeight()
{
return mUseAlpha ? mHeight : mSubFrameHeight;
}
int TheoraVideoClip::getSubFrameOffsetX()
{
return mUseAlpha ? 0 : mSubFrameOffsetX;
}
int TheoraVideoClip::getSubFrameOffsetY()
{
return mUseAlpha ? 0 : mSubFrameOffsetY;
}
float TheoraVideoClip::getAbsPlaybackTime()
{
return mTimer->getTime() + mPlaybackIteration * mDuration;
}
int TheoraVideoClip::discardOutdatedFrames(float absTime)
{
int nReady = mFrameQueue->_getReadyCount();
// only drop frames if you have more frames to show. otherwise even the late frame will do..
if (nReady == 1) return 0;
float time = absTime;
int nPop = 0;
TheoraVideoFrame* frame;
float timeToDisplay;
std::list<TheoraVideoFrame*>& queue = mFrameQueue->_getFrameQueue();
foreach_l (TheoraVideoFrame*, queue)
{
frame = *it;
if (!frame->mReady) break;
timeToDisplay = frame->mTimeToDisplay + frame->mIteration * mDuration;
if (time > timeToDisplay + mFrameDuration)
{
++nPop;
if (nReady - nPop == 1) break; // always leave at least one in the queue
}
else break;
}
if (nPop > 0)
{
#define _DEBUG
#ifdef _DEBUG
std::string log = getName() + ": dropped frame ";
int i = nPop;
foreach_l (TheoraVideoFrame*, queue)
{
log += str((int) (*it)->getFrameNumber());
if (i-- > 1)
{
log += ", ";
}
else break;
}
th_writelog(log);
#endif
mNumDroppedFrames += nPop;
mFrameQueue->_pop(nPop);
}
return nPop;
}
TheoraVideoFrame* TheoraVideoClip::getNextFrame()
{
TheoraVideoFrame* frame;
// if we are about to seek, then the current frame queue is invalidated
// (will be cleared when a worker thread does the actual seek)
if (mSeekFrame != -1) return NULL;
mFrameQueue->lock();
float time = getAbsPlaybackTime();
discardOutdatedFrames(time);
frame = mFrameQueue->_getFirstAvailableFrame();
if (frame != NULL)
{
if (frame->mTimeToDisplay + frame->mIteration * mDuration > time && mFirstFrameDisplayed)
{
frame = NULL; // frame is ready but it's not yet time to display it, except when we haven't displayed any frames yet
}
}
mFrameQueue->unlock();
return frame;
}
std::string TheoraVideoClip::getName()
{
return mName;
}
bool TheoraVideoClip::isBusy()
{
return mAssignedWorkerThread || mOutputMode != mRequestedOutputMode;
}
TheoraOutputMode TheoraVideoClip::getOutputMode()
{
return mOutputMode;
}
void TheoraVideoClip::setOutputMode(TheoraOutputMode mode)
{
if (mode == TH_UNDEFINED) throw TheoraGenericException("Invalid output mode: TH_UNDEFINED for video: " + mName);
if (mOutputMode == mode) return;
mRequestedOutputMode = mode;
mUseAlpha = (mode == TH_RGBA ||
mode == TH_ARGB ||
mode == TH_BGRA ||
mode == TH_ABGR ||
mode == TH_GREY3A ||
mode == TH_AGREY3 ||
mode == TH_YUVA ||
mode == TH_AYUV);
if (mAssignedWorkerThread)
{
mThreadAccessMutex->lock();
// discard current frames and recreate them
mFrameQueue->setSize(mFrameQueue->getSize());
mThreadAccessMutex->unlock();
}
mOutputMode = mRequestedOutputMode;
}
float TheoraVideoClip::getTimePosition()
{
return mTimer->getTime();
}
int TheoraVideoClip::getNumPrecachedFrames()
{
return mFrameQueue->getSize();
}
void TheoraVideoClip::setNumPrecachedFrames(int n)
{
if (mFrameQueue->getSize() != n)
mFrameQueue->setSize(n);
}
int TheoraVideoClip::_getNumReadyFrames()
{
if (mSeekFrame != -1) return 0;
return mFrameQueue->_getReadyCount();
}
int TheoraVideoClip::getNumReadyFrames()
{
if (mSeekFrame != -1) return 0; // we are about to seek, consider frame queue empty even though it will be emptied upon seek
return mFrameQueue->getReadyCount();
}
float TheoraVideoClip::getDuration()
{
return mDuration;
}
float TheoraVideoClip::getFPS()
{
return mFPS;
}
void TheoraVideoClip::play()
{
mTimer->play();
}
void TheoraVideoClip::pause()
{
mTimer->pause();
}
bool TheoraVideoClip::isPaused()
{
return mTimer->isPaused();
}
bool TheoraVideoClip::isDone()
{
return mEndOfFile && !mFrameQueue->getFirstAvailableFrame();
}
void TheoraVideoClip::stop()
{
pause();
resetFrameQueue();
mFirstFrameDisplayed = false;
seek(0);
}
void TheoraVideoClip::setPlaybackSpeed(float speed)
{
mTimer->setSpeed(speed);
}
float TheoraVideoClip::getPlaybackSpeed()
{
return mTimer->getSpeed();
}
void TheoraVideoClip::seek(float time)
{
seekToFrame((int) (time * getFPS()));
}
void TheoraVideoClip::seekToFrame(int frame)
{
if (frame < 0) mSeekFrame = 0;
else if (frame > mNumFrames) mSeekFrame = mNumFrames;
else mSeekFrame = frame;
mFirstFrameDisplayed = false;
mEndOfFile = false;
}
void TheoraVideoClip::waitForCache(float desired_cache_factor, float max_wait_time)
{
mWaitingForCache = true;
bool paused = mTimer->isPaused();
if (!paused) mTimer->pause();
int elapsed = 0;
int desired_num_precached_frames = (int) (desired_cache_factor * getNumPrecachedFrames());
while (getNumReadyFrames() < desired_num_precached_frames)
{
_psleep(10);
elapsed += 10;
if (elapsed >= max_wait_time * 1000) break;
}
if (!paused) mTimer->play();
mWaitingForCache = false;
}
float TheoraVideoClip::getPriority()
{
return mPriority;
}
void TheoraVideoClip::setPriority(float priority)
{
mPriority = priority;
}
float TheoraVideoClip::getPriorityIndex()
{
float priority = (float) getNumReadyFrames();
if (mTimer->isPaused()) priority += getNumPrecachedFrames() / 2;
return priority;
}
void TheoraVideoClip::setAudioInterface(TheoraAudioInterface* iface)
{
mAudioInterface = iface;
if (iface && !mAudioMutex) mAudioMutex = new TheoraMutex;
if (!iface && mAudioMutex)
{
delete mAudioMutex;
mAudioMutex = NULL;
}
}
TheoraAudioInterface* TheoraVideoClip::getAudioInterface()
{
return mAudioInterface;
}
void TheoraVideoClip::setAudioGain(float gain)
{
if (gain > 1) mAudioGain=1;
if (gain < 0) mAudioGain=0;
else mAudioGain=gain;
}
float TheoraVideoClip::getAudioGain()
{
return mAudioGain;
}
void TheoraVideoClip::setAutoRestart(bool value)
{
mAutoRestart = value;
if (value) mEndOfFile = false;
}

View file

@ -1,159 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#include <memory.h>
#include "TheoraPixelTransform.h"
#include "TheoraVideoClip.h"
#include "TheoraVideoFrame.h"
#include "TheoraVideoManager.h"
//#define YUV_TEST // uncomment this if you want to benchmark YUV decoding functions
extern "C"
{
void decodeRGB (struct TheoraPixelTransform* t);
void decodeRGBA (struct TheoraPixelTransform* t);
void decodeRGBX (struct TheoraPixelTransform* t);
void decodeARGB (struct TheoraPixelTransform* t);
void decodeXRGB (struct TheoraPixelTransform* t);
void decodeBGR (struct TheoraPixelTransform* t);
void decodeBGRA (struct TheoraPixelTransform* t);
void decodeBGRX (struct TheoraPixelTransform* t);
void decodeABGR (struct TheoraPixelTransform* t);
void decodeXBGR (struct TheoraPixelTransform* t);
void decodeGrey (struct TheoraPixelTransform* t);
void decodeGrey3(struct TheoraPixelTransform* t);
void decodeGreyA(struct TheoraPixelTransform* t);
void decodeGreyX(struct TheoraPixelTransform* t);
void decodeAGrey(struct TheoraPixelTransform* t);
void decodeXGrey(struct TheoraPixelTransform* t);
void decodeYUV (struct TheoraPixelTransform* t);
void decodeYUVA (struct TheoraPixelTransform* t);
void decodeYUVX (struct TheoraPixelTransform* t);
void decodeAYUV (struct TheoraPixelTransform* t);
void decodeXYUV (struct TheoraPixelTransform* t);
}
static void (*conversion_functions[])(struct TheoraPixelTransform*) = {0,
decodeRGB,
decodeRGBA,
decodeRGBX,
decodeARGB,
decodeXRGB,
decodeBGR,
decodeBGRA,
decodeBGRX,
decodeABGR,
decodeXBGR,
decodeGrey,
decodeGrey3,
decodeGreyA,
decodeGreyX,
decodeAGrey,
decodeXGrey,
decodeYUV,
decodeYUVA,
decodeYUVX,
decodeAYUV,
decodeXYUV
};
TheoraVideoFrame::TheoraVideoFrame(TheoraVideoClip* parent)
{
mReady = mInUse = false;
mParent = parent;
mIteration = 0;
// number of bytes based on output mode
int bytemap[]={0, 3, 4, 4, 4, 4, 3, 4, 4, 4, 4, 1, 3, 4, 4, 4, 4, 3, 4, 4, 4, 4};
mBpp = bytemap[mParent->getOutputMode()];
unsigned int size = mParent->getStride() * mParent->mHeight * mBpp;
try
{
mBuffer = new unsigned char[size];
}
catch (std::bad_alloc)
{
mBuffer = NULL;
return;
}
memset(mBuffer, 255, size);
}
TheoraVideoFrame::~TheoraVideoFrame()
{
if (mBuffer) delete [] mBuffer;
}
int TheoraVideoFrame::getWidth()
{
return mParent->getWidth();
}
int TheoraVideoFrame::getStride()
{
return mParent->mStride;
}
int TheoraVideoFrame::getHeight()
{
return mParent->getHeight();
}
unsigned char* TheoraVideoFrame::getBuffer()
{
return mBuffer;
}
void TheoraVideoFrame::decode(struct TheoraPixelTransform* t)
{
if (t->raw != NULL)
{
int bufferStride = mParent->getWidth() * mBpp;
if (bufferStride == t->rawStride)
{
memcpy(mBuffer, t->raw, t->rawStride * mParent->getHeight());
}
else
{
unsigned char *buff = mBuffer, *src = t->raw;
int i, h = mParent->getHeight();
for (i = 0; i < h; ++i, buff += bufferStride, src += t->rawStride)
{
memcpy(buff, src, bufferStride);
}
}
}
else
{
t->out = mBuffer;
t->w = mParent->getWidth();
t->h = mParent->getHeight();
#ifdef YUV_TEST // when benchmarking yuv conversion functions during development, do a timed average
#define N 1000
clock_t time = clock();
for (int i = 0; i < N; ++i)
{
conversion_functions[mParent->getOutputMode()](t);
}
float diff = (clock() - time) * 1000.0f / CLOCKS_PER_SEC;
char s[128];
sprintf(s, "%.2f", diff / N);
TheoraVideoManager::getSingleton().logMessage("YUV Decoding time: " + std::string(s) + " ms\n");
#else
conversion_functions[mParent->getOutputMode()](t);
#endif
}
mReady = true;
}
void TheoraVideoFrame::clear()
{
mInUse = mReady = false;
}

View file

@ -1,485 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#include "TheoraVideoManager.h"
#include "TheoraWorkerThread.h"
#include "TheoraVideoClip.h"
#include "TheoraFrameQueue.h"
#include "TheoraAudioInterface.h"
#include "TheoraUtil.h"
#include "TheoraDataSource.h"
#include "TheoraException.h"
#ifdef __THEORA
#include <theora/codec.h>
#include <vorbis/codec.h>
#include "TheoraVideoClip_Theora.h"
#endif
#ifdef __AVFOUNDATION
#include "TheoraVideoClip_AVFoundation.h"
#endif
#ifdef __FFMPEG
#include "TheoraVideoClip_FFmpeg.h"
#endif
#ifdef _ANDROID //libtheoraplayer addition for cpu feature detection
#include "cpu-features.h"
#endif
// declaring function prototype here so I don't have to put it in a header file
// it only needs to be used by this plugin and called once
extern "C"
{
void initYUVConversionModule();
}
#include "core/os/memory.h"
//#define _DECODING_BENCHMARK //uncomment to test average decoding time on a given device
// --------------------------
//#define _SCHEDULING_DEBUG
#ifdef _SCHEDULING_DEBUG
float gThreadDiagnosticTimer = 0;
#endif
// --------------------------
#ifdef _DECODING_BENCHMARK
void benchmark(TheoraVideoClip* clip)
{
int nPrecached = 256;
int n = nPrecached;
char msg[1024];
clock_t t = clock();
while (n > 0)
{
clip->waitForCache(1.0f, 1000000);
n -= 32;
clip->getFrameQueue()->clear();
}
float diff = ((float) (clock() - t) * 1000.0f) / CLOCKS_PER_SEC;
sprintf(msg, "BENCHMARK: %s: Decoding %d frames took %.1fms (%.2fms average per frame)\n",clip->getName().c_str(), nPrecached, diff, diff / nPrecached);
TheoraVideoManager::getSingleton().logMessage(msg);
clip->seek(0);
}
#endif
struct TheoraWorkCandidate
{
TheoraVideoClip* clip;
float priority, queuedTime, workTime, entitledTime;
};
TheoraVideoManager* g_ManagerSingleton = NULL;
void theora_writelog(std::string output)
{
printf("%s\n", output.c_str());
}
void (*g_LogFuction)(std::string) = theora_writelog;
void TheoraVideoManager::setLogFunction(void (*fn)(std::string))
{
g_LogFuction = fn;
}
TheoraVideoManager* TheoraVideoManager::getSingletonPtr()
{
return g_ManagerSingleton;
}
TheoraVideoManager& TheoraVideoManager::getSingleton()
{
return *g_ManagerSingleton;
}
TheoraVideoManager::TheoraVideoManager(int num_worker_threads) :
mDefaultNumPrecachedFrames(8)
{
if (num_worker_threads < 1) throw TheoraGenericException("Unable to create TheoraVideoManager, at least one worker thread is reqired");
g_ManagerSingleton = this;
std::string msg = "Initializing Theora Playback Library (" + getVersionString() + ")\n";
#ifdef __THEORA
msg += " - libtheora version: " + std::string(th_version_string()) + "\n" +
" - libvorbis version: " + std::string(vorbis_version_string()) + "\n";
#endif
#ifdef _ANDROID
uint64_t features = android_getCpuFeaturesExt();
char s[128];
sprintf(s, " - Android: CPU Features: %u\n", (unsigned int) features);
msg += s;
if ((features & ANDROID_CPU_ARM_FEATURE_NEON) == 0)
msg += " - Android: NEON features NOT SUPPORTED by CPU\n";
else
msg += " - Android: Detected NEON CPU features\n";
#endif
#ifdef __AVFOUNDATION
msg += " - using Apple AVFoundation classes.\n";
#endif
#ifdef __FFMPEG
msg += " - using FFmpeg library.\n";
#endif
logMessage(msg + "------------------------------------");
mAudioFactory = NULL;
mWorkMutex = new TheoraMutex();
// for CPU based yuv2rgb decoding
initYUVConversionModule();
createWorkerThreads(num_worker_threads);
}
TheoraVideoManager::~TheoraVideoManager()
{
destroyWorkerThreads();
mWorkMutex->lock();
ClipList::iterator ci;
for (ci = mClips.begin(); ci != mClips.end(); ++ci)
delete (*ci);
mClips.clear();
mWorkMutex->unlock();
delete mWorkMutex;
}
void TheoraVideoManager::logMessage(std::string msg)
{
g_LogFuction(msg);
}
TheoraVideoClip* TheoraVideoManager::getVideoClipByName(std::string name)
{
TheoraVideoClip* clip = NULL;
mWorkMutex->lock();
foreach(TheoraVideoClip*, mClips)
{
if ((*it)->getName() == name)
{
clip = *it;
break;
}
}
mWorkMutex->unlock();
return clip;
}
void TheoraVideoManager::setAudioInterfaceFactory(TheoraAudioInterfaceFactory* factory)
{
mAudioFactory = factory;
}
TheoraAudioInterfaceFactory* TheoraVideoManager::getAudioInterfaceFactory()
{
return mAudioFactory;
}
TheoraVideoClip* TheoraVideoManager::createVideoClip(std::string filename,
TheoraOutputMode output_mode,
int numPrecachedOverride,
bool usePower2Stride,
int p_track)
{
TheoraDataSource* src=memnew(TheoraFileDataSource(filename));
return createVideoClip(src,output_mode,numPrecachedOverride,usePower2Stride, p_track);
}
TheoraVideoClip* TheoraVideoManager::createVideoClip(TheoraDataSource* data_source,
TheoraOutputMode output_mode,
int numPrecachedOverride,
bool usePower2Stride,
int p_audio_track)
{
mWorkMutex->lock();
TheoraVideoClip* clip = NULL;
int nPrecached = numPrecachedOverride ? numPrecachedOverride : mDefaultNumPrecachedFrames;
logMessage("Creating video from data source: " + data_source->repr() + " [" + str(nPrecached) + " precached frames].");
#ifdef __AVFOUNDATION
TheoraFileDataSource* fileDataSource = dynamic_cast<TheoraFileDataSource*>(data_source);
std::string filename;
if (fileDataSource == NULL)
{
TheoraMemoryFileDataSource* memoryDataSource = dynamic_cast<TheoraMemoryFileDataSource*>(data_source);
if (memoryDataSource != NULL) filename = memoryDataSource->getFilename();
// if the user has his own data source, it's going to be a problem for AVAssetReader since it only supports reading from files...
}
else filename = fileDataSource->getFilename();
if (filename.size() > 4 && filename.substr(filename.size() - 4, filename.size()) == ".mp4")
{
clip = new TheoraVideoClip_AVFoundation(data_source, output_mode, nPrecached, usePower2Stride);
}
#endif
#if defined(__AVFOUNDATION) && defined(__THEORA)
else
#endif
#ifdef __THEORA
clip = new TheoraVideoClip_Theora(data_source, output_mode, nPrecached, usePower2Stride);
#endif
#ifdef __FFMPEG
clip = new TheoraVideoClip_FFmpeg(data_source, output_mode, nPrecached, usePower2Stride);
#endif
clip->set_audio_track(p_audio_track);
clip->load(data_source);
clip->decodeNextFrame(); // ensure the first frame is always preloaded and have the main thread do it to prevent potential thread starvatio
mClips.push_back(clip);
mWorkMutex->unlock();
#ifdef _DECODING_BENCHMARK
benchmark(clip);
#endif
return clip;
}
void TheoraVideoManager::destroyVideoClip(TheoraVideoClip* clip)
{
if (clip)
{
th_writelog("Destroying video clip: " + clip->getName());
mWorkMutex->lock();
bool reported = 0;
while (clip->mAssignedWorkerThread)
{
if (!reported)
{
th_writelog(" - Waiting for WorkerThread to finish decoding in order to destroy");
reported = 1;
}
_psleep(1);
}
if (reported) th_writelog(" - WorkerThread done, destroying...");
// erase the clip from the clip list
foreach (TheoraVideoClip*, mClips)
{
if ((*it) == clip)
{
mClips.erase(it);
break;
}
}
// remove all it's references from the work log
mWorkLog.remove(clip);
// delete the actual clip
delete clip;
#ifdef _DEBUG
th_writelog("Destroyed video.");
#endif
mWorkMutex->unlock();
}
}
TheoraVideoClip* TheoraVideoManager::requestWork(TheoraWorkerThread* caller)
{
if (!mWorkMutex) return NULL;
mWorkMutex->lock();
TheoraVideoClip* selectedClip = NULL;
float maxQueuedTime = 0, totalAccessCount = 0, prioritySum = 0, diff, maxDiff = -1;
int nReadyFrames;
std::vector<TheoraWorkCandidate> candidates;
TheoraVideoClip* clip;
TheoraWorkCandidate candidate;
// first pass is for playing videos, but if no such videos are available for decoding
// paused videos are selected in the second pass.
// Note that paused videos that are waiting for cache are considered equal to playing
// videos in the scheduling context
for (int i = 0; i < 2 && candidates.size() == 0; ++i)
{
foreach (TheoraVideoClip*, mClips)
{
clip = *it;
if (clip->isBusy() || (i == 0 && clip->isPaused() && !clip->mWaitingForCache)) continue;
nReadyFrames = clip->getNumReadyFrames();
if (nReadyFrames == clip->getFrameQueue()->getSize()) continue;
candidate.clip = clip;
candidate.priority = clip->getPriority();
candidate.queuedTime = (float) nReadyFrames / (clip->getFPS() * clip->getPlaybackSpeed());
candidate.workTime = (float) clip->mThreadAccessCount;
totalAccessCount += candidate.workTime;
if (maxQueuedTime < candidate.queuedTime) maxQueuedTime = candidate.queuedTime;
candidates.push_back(candidate);
}
}
// prevent division by zero
if (totalAccessCount == 0) totalAccessCount = 1;
if (maxQueuedTime == 0) maxQueuedTime = 1;
// normalize candidate values
foreach (TheoraWorkCandidate, candidates)
{
it->workTime /= totalAccessCount;
// adjust user priorities to favor clips that have fewer frames queued
it->priority *= 1.0f - (it->queuedTime / maxQueuedTime) * 0.5f;
prioritySum += it->priority;
}
foreach (TheoraWorkCandidate, candidates)
{
it->entitledTime = it->priority / prioritySum;
}
// now, based on how much access time has been given to each clip in the work log
// and how much time should be given to each clip based on calculated priorities,
// we choose a best suited clip for this worker thread to decode next
foreach (TheoraWorkCandidate, candidates)
{
diff = it->entitledTime - it->workTime;
if (maxDiff < diff)
{
maxDiff = diff;
selectedClip = it->clip;
}
}
if (selectedClip)
{
selectedClip->mAssignedWorkerThread = caller;
int nClips = (int) mClips.size();
unsigned int maxWorkLogSize = (nClips - 1) * 50;
if (nClips > 1)
{
mWorkLog.push_front(selectedClip);
++selectedClip->mThreadAccessCount;
}
TheoraVideoClip* c;
while (mWorkLog.size() > maxWorkLogSize)
{
c = mWorkLog.back();
mWorkLog.pop_back();
c->mThreadAccessCount--;
}
#ifdef _SCHEDULING_DEBUG
if (mClips.size() > 1)
{
int accessCount = mWorkLog.size();
if (gThreadDiagnosticTimer > 2.0f)
{
gThreadDiagnosticTimer = 0;
std::string logstr = "-----\nTheora Playback Library debug CPU time analysis (" + str(accessCount) + "):\n";
int percent;
foreach (TheoraVideoClip*, mClips)
{
percent = ((float) (*it)->mThreadAccessCount / mWorkLog.size()) * 100.0f;
logstr += (*it)->getName() + " (" + str((*it)->getPriority()) + "): " + str((*it)->mThreadAccessCount) + ", " + str(percent) + "%\n";
}
logstr += "-----";
th_writelog(logstr);
}
}
#endif
}
mWorkMutex->unlock();
return selectedClip;
}
void TheoraVideoManager::update(float timeDelta)
{
mWorkMutex->lock();
foreach (TheoraVideoClip*, mClips)
{
(*it)->update(timeDelta);
(*it)->decodedAudioCheck();
}
mWorkMutex->unlock();
#ifdef _SCHEDULING_DEBUG
gThreadDiagnosticTimer += timeDelta;
#endif
}
int TheoraVideoManager::getNumWorkerThreads()
{
return (int) mWorkerThreads.size();
}
void TheoraVideoManager::createWorkerThreads(int n)
{
TheoraWorkerThread* t;
for (int i=0;i<n;++i)
{
t=new TheoraWorkerThread();
t->start();
mWorkerThreads.push_back(t);
}
}
void TheoraVideoManager::destroyWorkerThreads()
{
foreach(TheoraWorkerThread*,mWorkerThreads)
{
(*it)->join();
delete (*it);
}
mWorkerThreads.clear();
}
void TheoraVideoManager::setNumWorkerThreads(int n)
{
if (n == getNumWorkerThreads()) return;
if (n < 1) throw TheoraGenericException("Unable to change the number of worker threads in TheoraVideoManager, at least one worker thread is reqired");
th_writelog("changing number of worker threats to: "+str(n));
destroyWorkerThreads();
createWorkerThreads(n);
}
std::string TheoraVideoManager::getVersionString()
{
int a, b, c;
getVersion(&a, &b, &c);
std::string out = str(a) + "." + str(b);
if (c != 0)
{
if (c < 0) out += " RC" + str(-c);
else out += "." + str(c);
}
return out;
}
void TheoraVideoManager::getVersion(int* a, int* b, int* c) // TODO, return a struct instead of the current solution.
{
*a = 1;
*b = 1;
*c = 0;
}
std::vector<std::string> TheoraVideoManager::getSupportedDecoders()
{
std::vector<std::string> lst;
#ifdef __THEORA
lst.push_back("Theora");
#endif
#ifdef __AVFOUNDATION
lst.push_back("AVFoundation");
#endif
#ifdef __FFMPEG
lst.push_back("FFmpeg");
#endif
return lst;
}

View file

@ -1,49 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifdef _WIN32
#pragma warning( disable: 4251 ) // MSVC++
#endif
#include "TheoraWorkerThread.h"
#include "TheoraVideoManager.h"
#include "TheoraVideoClip.h"
#include "TheoraUtil.h"
TheoraWorkerThread::TheoraWorkerThread() : TheoraThread()
{
mClip = NULL;
}
TheoraWorkerThread::~TheoraWorkerThread()
{
}
void TheoraWorkerThread::execute()
{
while (isRunning())
{
mClip = TheoraVideoManager::getSingleton().requestWork(this);
if (!mClip)
{
_psleep(100);
continue;
}
mClip->mThreadAccessMutex->lock();
// if user requested seeking, do that then.
if (mClip->mSeekFrame >= 0) mClip->doSeek();
if (!mClip->decodeNextFrame())
_psleep(1); // this happens when the video frame queue is full.
mClip->mAssignedWorkerThread = NULL;
mClip->mThreadAccessMutex->unlock();
mClip = NULL;
}
}

View file

@ -1,56 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#include "yuv_util.h"
static void _decodeGrey3(struct TheoraPixelTransform* t, int stride, int nBytes)
{
unsigned char *ySrc = t->y, *yLineEnd, *out = t->out;
unsigned int y;
for (y = 0; y < t->h; ++y, ySrc += t->yStride - t->w, out += stride-t->w * nBytes)
for (yLineEnd = ySrc + t->w; ySrc != yLineEnd; ++ySrc, out += nBytes)
out[0] = out[1] = out[2] = *ySrc;
}
void decodeGrey(struct TheoraPixelTransform* t)
{
unsigned char *ySrc = t->y, *yLineEnd, *out = t->out;
unsigned int y;
for (y = 0; y < t->h; ++y, ySrc += t->yStride - t->w)
for (yLineEnd = ySrc + t->w; ySrc != yLineEnd; ++ySrc, ++out)
*out = *ySrc;
}
void decodeGrey3(struct TheoraPixelTransform* t)
{
_decodeGrey3(t, t->w * 3, 3);
}
void decodeGreyA(struct TheoraPixelTransform* t)
{
_decodeGrey3(t, t->w * 4, 4);
_decodeAlpha(incOut(t, 3), t->w * 4);
}
void decodeGreyX(struct TheoraPixelTransform* t)
{
_decodeGrey3(t, t->w * 4, 4);
}
void decodeAGrey(struct TheoraPixelTransform* t)
{
_decodeGrey3(incOut(t, 1), t->w * 4, 4);
_decodeAlpha(t, t->w * 4);
}
void decodeXGrey(struct TheoraPixelTransform* t)
{
_decodeGrey3(incOut(t, 1), t->w * 4, 4);
}

View file

@ -1,358 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#ifdef _YUV_C
#include "yuv_util.h"
int YTable [256];
int BUTable[256];
int GUTable[256];
int GVTable[256];
int RVTable[256];
#define CLIP_RGB_COLOR(dst, x) \
tmp = (x) >> 13;\
if ((tmp & ~0xFF) == 0) dst = tmp;\
else dst = (-tmp) >> 31;
#define _decodeRGB(t, stride, nBytes, maxWidth, i1, i2, i3, j1, j2, j3)\
register int tmp;\
int nBytes2 = nBytes * 2, cv, cu, rgbY1, rgbY2, rgbY3, rgbY4, rV, gUV, bU, width = maxWidth == 0 ? t->w : maxWidth;\
unsigned int y;\
unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2;\
\
for (y = 0; y < t->h; y += 2)\
{\
ySrcEven = t->y + y * t->yStride;\
ySrcOdd = t->y + (y + 1) * t->yStride;\
uSrc = t->u + y * t->uStride / 2;\
vSrc = t->v + y * t->vStride / 2;\
out1 = t->out + y * stride;\
out2 = t->out + (y + 1) * stride;\
\
for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;)\
{\
cu = *uSrc; ++uSrc;\
cv = *vSrc; ++vSrc;\
rV = RVTable[cv];\
gUV = GUTable[cu] + GVTable[cv];\
bU = BUTable[cu];\
\
rgbY1 = YTable[*ySrcEven]; ++ySrcEven;\
rgbY2 = YTable[*ySrcOdd]; ++ySrcOdd;\
rgbY3 = YTable[*ySrcEven]; ++ySrcEven;\
rgbY4 = YTable[*ySrcOdd]; ++ySrcOdd;\
\
CLIP_RGB_COLOR(out1[i1], rgbY1 + rV );\
CLIP_RGB_COLOR(out1[i2], rgbY1 - gUV);\
CLIP_RGB_COLOR(out1[i3], rgbY1 + bU );\
\
CLIP_RGB_COLOR(out2[i1], rgbY2 + rV );\
CLIP_RGB_COLOR(out2[i2], rgbY2 - gUV);\
CLIP_RGB_COLOR(out2[i3], rgbY2 + bU );\
\
CLIP_RGB_COLOR(out1[j1], rgbY3 + rV );\
CLIP_RGB_COLOR(out1[j2], rgbY3 - gUV);\
CLIP_RGB_COLOR(out1[j3], rgbY3 + bU );\
\
CLIP_RGB_COLOR(out2[j1], rgbY4 + rV );\
CLIP_RGB_COLOR(out2[j2], rgbY4 - gUV);\
CLIP_RGB_COLOR(out2[j3], rgbY4 + bU );\
\
out1 += nBytes2; out2 += nBytes2;\
}\
}
// The 'trick' with this function is that it skips decoding YUV pixels if the alpha value is 0, thus improving the decoding speed of a frame
#define _decodeRGBA(t, stride, nBytes, maxWidth, i1, i2, i3, j1, j2, j3, aindex1, aindex2)\
\
register int tmp;\
int nBytes2 = nBytes * 2, cv, cu, rgbY1, rgbY2, rgbY3, rgbY4, a1, a2, a3, a4, rV, gUV, bU, width = maxWidth == 0 ? t->w : maxWidth;\
int alphaStride = t->w;\
unsigned int y;\
unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2;\
\
for (y = 0; y < t->h; y += 2)\
{\
ySrcEven = t->y + y * t->yStride;\
ySrcOdd = t->y + (y + 1) * t->yStride;\
uSrc = t->u + y * t->uStride / 2;\
vSrc = t->v + y * t->vStride / 2;\
out1 = t->out + y * stride;\
out2 = t->out + (y + 1) * stride;\
\
for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;)\
{\
cu = *uSrc; ++uSrc;\
cv = *vSrc; ++vSrc;\
rV = RVTable[cv];\
gUV = GUTable[cu] + GVTable[cv];\
bU = BUTable[cu];\
\
rgbY1 = YTable[*ySrcEven]; a1 = ySrcEven[alphaStride]; ++ySrcEven;\
rgbY2 = YTable[*ySrcOdd]; a2 = ySrcOdd [alphaStride]; ++ySrcOdd;\
rgbY3 = YTable[*ySrcEven]; a3 = ySrcEven[alphaStride]; ++ySrcEven;\
rgbY4 = YTable[*ySrcOdd]; a4 = ySrcOdd [alphaStride]; ++ySrcOdd;\
\
if (a1 > 16)\
{\
CLIP_RGB_COLOR(out1[i1], rgbY1 + rV );\
CLIP_RGB_COLOR(out1[i2], rgbY1 - gUV);\
CLIP_RGB_COLOR(out1[i3], rgbY1 + bU );\
out1[aindex1] = a1 >= 235 ? 255 : (unsigned char) (((a1 - 16) * 255) / 219);\
}\
else *((unsigned int*) out1) = 0;\
\
if (a2 > 16)\
{\
CLIP_RGB_COLOR(out2[i1], rgbY2 + rV );\
CLIP_RGB_COLOR(out2[i2], rgbY2 - gUV);\
CLIP_RGB_COLOR(out2[i3], rgbY2 + bU );\
out2[aindex1] = a2 >= 235 ? 255 : (unsigned char) (((a2 - 16) * 255) / 219);\
}\
else *((unsigned int*) out2) = 0;\
\
if (a3 > 16)\
{\
CLIP_RGB_COLOR(out1[j1], rgbY3 + rV );\
CLIP_RGB_COLOR(out1[j2], rgbY3 - gUV);\
CLIP_RGB_COLOR(out1[j3], rgbY3 + bU );\
out1[aindex2] = a3 >= 235 ? 255 : (unsigned char) (((a3 - 16) * 255) / 219);\
}\
else *((unsigned int*) &out1[4]) = 0;\
\
if (a4 > 16)\
{\
CLIP_RGB_COLOR(out2[j1], rgbY4 + rV );\
CLIP_RGB_COLOR(out2[j2], rgbY4 - gUV);\
CLIP_RGB_COLOR(out2[j3], rgbY4 + bU );\
out2[aindex2] = a4 >= 235 ? 255 : (unsigned char) (((a4 - 16) * 255) / 219);\
}\
else *((unsigned int*) &out2[4]) = 0;\
\
out1 += nBytes2; out2 += nBytes2;\
}\
}\
void decodeRGB(struct TheoraPixelTransform* t)
{
_decodeRGB(t, t->w * 3, 3, 0, 0, 1, 2, 3, 4, 5);
}
void decodeRGBA(struct TheoraPixelTransform* t)
{
_decodeRGBA(t, t->w * 4, 4, 0, 0, 1, 2, 4, 5, 6, 3, 7);
// This is the old 2-phase version, leaving it here in case more debugging is needed
// _decodeRGB(t, t->w * 4, 4, 0, 0, 1, 2, 4, 5, 6);
// _decodeAlpha(incOut(t, 3), t->w * 4);
}
void decodeRGBX(struct TheoraPixelTransform* t)
{
_decodeRGB(t, t->w * 4, 4, 0, 0, 1, 2, 4, 5, 6);
}
void decodeARGB(struct TheoraPixelTransform* t)
{
_decodeRGBA(t, t->w * 4, 4, 0, 1, 2, 3, 5, 6, 7, 0, 4);
// This is the old 2-phase version, leaving it here in case more debugging is needed
// _decodeRGB(t, t->w * 4, 4, 0, 1, 2, 3, 5, 6, 7);
// _decodeAlpha(t, t->w * 4);
}
void decodeXRGB(struct TheoraPixelTransform* t)
{
_decodeRGB(t, t->w * 4, 4, 0, 1, 2, 3, 5, 6, 7);
}
void decodeBGR(struct TheoraPixelTransform* t)
{
_decodeRGB(t, t->w * 3, 3, 0, 2, 1, 0, 5, 4, 3);
}
void decodeBGRA(struct TheoraPixelTransform* t)
{
_decodeRGBA(t, t->w * 4, 4, 0, 2, 1, 0, 6, 5, 4, 3, 7);
// This is the old 2-phase version, leaving it here in case more debugging is needed
// _decodeRGB(t, t->w * 4, 4, 0, 2, 1, 0, 6, 5, 4);
// _decodeAlpha(incOut(t, 3), t->w * 4);
}
void decodeBGRX(struct TheoraPixelTransform* t)
{
_decodeRGB(t, t->w * 4, 4, 0, 2, 1, 0, 6, 5, 4);
}
void decodeABGR(struct TheoraPixelTransform* t)
{
_decodeRGBA(t, t->w * 4, 4, 0, 3, 2, 1, 7, 6, 5, 0, 4);
// This is the old 2-phase version, leaving it here in case more debugging is needed
// _decodeRGB(t, t->w * 4, 4, 0, 3, 2, 1, 7, 6, 5);
// _decodeAlpha(t, t->w * 4);
}
void decodeXBGR(struct TheoraPixelTransform* t)
{
_decodeRGB(t, t->w * 4, 4, 0, 3, 2, 1, 7, 6, 5);
}
void initYUVConversionModule()
{
//used to bring the table into the high side (scale up) so we
//can maintain high precision and not use floats (FIXED POINT)
// this is the pseudocode for yuv->rgb conversion
// r = 1.164*(*ySrc - 16) + 1.596*(cv - 128);
// b = 1.164*(*ySrc - 16) + 2.018*(cu - 128);
// g = 1.164*(*ySrc - 16) - 0.813*(cv - 128) - 0.391*(cu - 128);
double scale = 1L << 13, temp;
int i;
for (i = 0; i < 256; ++i)
{
temp = i - 128;
YTable[i] = (int)((1.164 * scale + 0.5) * (i - 16)); //Calc Y component
RVTable[i] = (int)((1.596 * scale + 0.5) * temp); //Calc R component
GUTable[i] = (int)((0.391 * scale + 0.5) * temp); //Calc G u & v components
GVTable[i] = (int)((0.813 * scale + 0.5) * temp);
BUTable[i] = (int)((2.018 * scale + 0.5) * temp); //Calc B component
}
}
/*
* Below are the function versions of the above macros, use those for debugging, but leave the macros for maximum CPU execution speed
*
*
*
*
void _decodeRGB(struct TheoraPixelTransform* t, int stride, int nBytes, int maxWidth, int i1, int i2, int i3, int j1, int j2, int j3)
{
register int tmp;
int nBytes2 = nBytes * 2, cv, cu, rgbY1, rgbY2, rgbY3, rgbY4, rV, gUV, bU, width = maxWidth == 0 ? t->w : maxWidth;
unsigned int y;
unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2;
for (y = 0; y < t->h; y += 2)
{
ySrcEven = t->y + y * t->yStride;
ySrcOdd = t->y + (y + 1) * t->yStride;
uSrc = t->u + y * t->uStride / 2;
vSrc = t->v + y * t->vStride / 2;
out1 = t->out + y * stride;
out2 = t->out + (y + 1) * stride;
for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;)
{
cu = *uSrc; ++uSrc;
cv = *vSrc; ++vSrc;
rV = RVTable[cv];
gUV = GUTable[cu] + GVTable[cv];
bU = BUTable[cu];
rgbY1 = YTable[*ySrcEven]; ++ySrcEven;
rgbY2 = YTable[*ySrcOdd]; ++ySrcOdd;
rgbY3 = YTable[*ySrcEven]; ++ySrcEven;
rgbY4 = YTable[*ySrcOdd]; ++ySrcOdd;
CLIP_RGB_COLOR(out1[i1], rgbY1 + rV );
CLIP_RGB_COLOR(out1[i2], rgbY1 - gUV);
CLIP_RGB_COLOR(out1[i3], rgbY1 + bU );
CLIP_RGB_COLOR(out2[i1], rgbY2 + rV );
CLIP_RGB_COLOR(out2[i2], rgbY2 - gUV);
CLIP_RGB_COLOR(out2[i3], rgbY2 + bU );
CLIP_RGB_COLOR(out1[j1], rgbY3 + rV );
CLIP_RGB_COLOR(out1[j2], rgbY3 - gUV);
CLIP_RGB_COLOR(out1[j3], rgbY3 + bU );
CLIP_RGB_COLOR(out2[j1], rgbY4 + rV );
CLIP_RGB_COLOR(out2[j2], rgbY4 - gUV);
CLIP_RGB_COLOR(out2[j3], rgbY4 + bU );
out1 += nBytes2; out2 += nBytes2;
}
}
}
void _decodeRGBA(struct TheoraPixelTransform* t, int stride, int nBytes, int maxWidth, int i1, int i2, int i3, int j1, int j2, int j3, int aindex1, int aindex2)
{
register int tmp;
int nBytes2 = nBytes * 2, cv, cu, rgbY1, rgbY2, rgbY3, rgbY4, a1, a2, a3, a4, rV, gUV, bU, width = maxWidth == 0 ? t->w : maxWidth;
int alphaStride = t->w;
unsigned int y;
unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2;
for (y = 0; y < t->h; y += 2)
{
ySrcEven = t->y + y * t->yStride;
ySrcOdd = t->y + (y + 1) * t->yStride;
uSrc = t->u + y * t->uStride / 2;
vSrc = t->v + y * t->vStride / 2;
out1 = t->out + y * stride;
out2 = t->out + (y + 1) * stride;
for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;)
{
cu = *uSrc; ++uSrc;
cv = *vSrc; ++vSrc;
rV = RVTable[cv];
gUV = GUTable[cu] + GVTable[cv];
bU = BUTable[cu];
rgbY1 = YTable[*ySrcEven]; a1 = ySrcEven[alphaStride]; ++ySrcEven;
rgbY2 = YTable[*ySrcOdd]; a2 = ySrcOdd [alphaStride]; ++ySrcOdd;
rgbY3 = YTable[*ySrcEven]; a3 = ySrcEven[alphaStride]; ++ySrcEven;
rgbY4 = YTable[*ySrcOdd]; a4 = ySrcOdd [alphaStride]; ++ySrcOdd;
if (a1 >= 32)
{
CLIP_RGB_COLOR(out1[i1], rgbY1 + rV );
CLIP_RGB_COLOR(out1[i2], rgbY1 - gUV);
CLIP_RGB_COLOR(out1[i3], rgbY1 + bU );
out1[aindex1] = a1 > 224 ? 255 : a1;
}
else *((unsigned int*) out1) = 0;
if (a2 >= 32)
{
CLIP_RGB_COLOR(out2[i1], rgbY2 + rV );
CLIP_RGB_COLOR(out2[i2], rgbY2 - gUV);
CLIP_RGB_COLOR(out2[i3], rgbY2 + bU );
out2[aindex1] = a2 > 224 ? 255 : a2;
}
else *((unsigned int*) out2) = 0;
if (a3 >= 32)
{
CLIP_RGB_COLOR(out1[j1], rgbY3 + rV );
CLIP_RGB_COLOR(out1[j2], rgbY3 - gUV);
CLIP_RGB_COLOR(out1[j3], rgbY3 + bU );
out1[aindex2] = a3 > 224 ? 255 : a3;
}
else *((unsigned int*) &out1[4]) = 0;
if (a4 >= 32)
{
CLIP_RGB_COLOR(out2[j1], rgbY4 + rV );
CLIP_RGB_COLOR(out2[j2], rgbY4 - gUV);
CLIP_RGB_COLOR(out2[j3], rgbY4 + bU );
out2[aindex2] = a4 > 224 ? 255 : a4;
}
else *((unsigned int*) &out2[4]) = 0;
out1 += nBytes2; out2 += nBytes2;
}
}
}
*/
#endif

View file

@ -1,86 +0,0 @@
/************************************************************************************
This source file is part of the Theora Video Playback Library
For latest info, see http://libtheoraplayer.googlecode.com
*************************************************************************************
Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
This program is free software; you can redistribute it and/or modify it under
the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
*************************************************************************************/
#include "yuv_util.h"
static void _decodeYUV(struct TheoraPixelTransform* t, int stride, int nBytes, int maxWidth)
{
int cv, cu, y1, y2, y3, y4, width = maxWidth == 0 ? t->w : maxWidth;
unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2;
unsigned int y;
for (y=0; y < t->h; y += 2)
{
ySrcEven = t->y + y * t->yStride;
ySrcOdd = t->y + (y + 1) * t->yStride;
uSrc = t->u + y * t->uStride / 2;
vSrc = t->v + y * t->vStride / 2;
out1 = t->out + y * stride;
out2 = t->out + (y + 1) * stride;
for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;)
{
// EVEN columns
cu = *uSrc; ++uSrc;
cv = *vSrc; ++vSrc;
y1 = *ySrcEven; ++ySrcEven;
y2 = *ySrcOdd; ++ySrcOdd;
y3 = *ySrcEven; ++ySrcEven;
y4 = *ySrcOdd; ++ySrcOdd;
// EVEN columns
out1[0] = y1;
out1[1] = cu;
out1[2] = cv;
out2[0] = y2;
out2[1] = cu;
out2[2] = cv;
out1 += nBytes; out2 += nBytes;
// ODD columns
out1[0] = y3;
out1[1] = cu;
out1[2] = cv;
out2[0] = y4;
out2[1] = cu;
out2[2] = cv;
out1 += nBytes; out2 += nBytes;
}
}
}
void decodeYUV(struct TheoraPixelTransform* t)
{
_decodeYUV(t, t->w * 3, 3, 0);
}
void decodeYUVA(struct TheoraPixelTransform* t)
{
_decodeYUV(t, t->w * 4, 4, 0);
_decodeAlpha(incOut(t, 3), t->w * 4);
}
void decodeYUVX(struct TheoraPixelTransform* t)
{
_decodeYUV(t, t->w * 4, 4, 0);
}
void decodeAYUV(struct TheoraPixelTransform* t)
{
_decodeYUV(incOut(t, 1), t->w * 4, 4, 0);
_decodeAlpha(t, t->w * 4);
}
void decodeXYUV(struct TheoraPixelTransform* t)
{
_decodeYUV(incOut(t, 1), t->w * 4, 4, 0);
}

File diff suppressed because it is too large Load diff

View file

@ -1,212 +0,0 @@
/*
* Copyright (C) 2010 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef CPU_FEATURES_H
#define CPU_FEATURES_H
#include <sys/cdefs.h>
#include <stdint.h>
__BEGIN_DECLS
typedef enum {
ANDROID_CPU_FAMILY_UNKNOWN = 0,
ANDROID_CPU_FAMILY_ARM,
ANDROID_CPU_FAMILY_X86,
ANDROID_CPU_FAMILY_MIPS,
ANDROID_CPU_FAMILY_MAX /* do not remove */
} AndroidCpuFamily;
/* Return family of the device's CPU */
extern AndroidCpuFamily android_getCpuFamily(void);
/* The list of feature flags for ARM CPUs that can be recognized by the
* library. Value details are:
*
* VFPv2:
* CPU supports the VFPv2 instruction set. Many, but not all, ARMv6 CPUs
* support these instructions. VFPv2 is a subset of VFPv3 so this will
* be set whenever VFPv3 is set too.
*
* ARMv7:
* CPU supports the ARMv7-A basic instruction set.
* This feature is mandated by the 'armeabi-v7a' ABI.
*
* VFPv3:
* CPU supports the VFPv3-D16 instruction set, providing hardware FPU
* support for single and double precision floating point registers.
* Note that only 16 FPU registers are available by default, unless
* the D32 bit is set too. This feature is also mandated by the
* 'armeabi-v7a' ABI.
*
* VFP_D32:
* CPU VFP optional extension that provides 32 FPU registers,
* instead of 16. Note that ARM mandates this feature is the 'NEON'
* feature is implemented by the CPU.
*
* NEON:
* CPU FPU supports "ARM Advanced SIMD" instructions, also known as
* NEON. Note that this mandates the VFP_D32 feature as well, per the
* ARM Architecture specification.
*
* VFP_FP16:
* Half-width floating precision VFP extension. If set, the CPU
* supports instructions to perform floating-point operations on
* 16-bit registers. This is part of the VFPv4 specification, but
* not mandated by any Android ABI.
*
* VFP_FMA:
* Fused multiply-accumulate VFP instructions extension. Also part of
* the VFPv4 specification, but not mandated by any Android ABI.
*
* NEON_FMA:
* Fused multiply-accumulate NEON instructions extension. Optional
* extension from the VFPv4 specification, but not mandated by any
* Android ABI.
*
* IDIV_ARM:
* Integer division available in ARM mode. Only available
* on recent CPUs (e.g. Cortex-A15).
*
* IDIV_THUMB2:
* Integer division available in Thumb-2 mode. Only available
* on recent CPUs (e.g. Cortex-A15).
*
* iWMMXt:
* Optional extension that adds MMX registers and operations to an
* ARM CPU. This is only available on a few XScale-based CPU designs
* sold by Marvell. Pretty rare in practice.
*
* If you want to tell the compiler to generate code that targets one of
* the feature set above, you should probably use one of the following
* flags (for more details, see technical note at the end of this file):
*
* -mfpu=vfp
* -mfpu=vfpv2
* These are equivalent and tell GCC to use VFPv2 instructions for
* floating-point operations. Use this if you want your code to
* run on *some* ARMv6 devices, and any ARMv7-A device supported
* by Android.
*
* Generated code requires VFPv2 feature.
*
* -mfpu=vfpv3-d16
* Tell GCC to use VFPv3 instructions (using only 16 FPU registers).
* This should be generic code that runs on any CPU that supports the
* 'armeabi-v7a' Android ABI. Note that no ARMv6 CPU supports this.
*
* Generated code requires VFPv3 feature.
*
* -mfpu=vfpv3
* Tell GCC to use VFPv3 instructions with 32 FPU registers.
* Generated code requires VFPv3|VFP_D32 features.
*
* -mfpu=neon
* Tell GCC to use VFPv3 instructions with 32 FPU registers, and
* also support NEON intrinsics (see <arm_neon.h>).
* Generated code requires VFPv3|VFP_D32|NEON features.
*
* -mfpu=vfpv4-d16
* Generated code requires VFPv3|VFP_FP16|VFP_FMA features.
*
* -mfpu=vfpv4
* Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32 features.
*
* -mfpu=neon-vfpv4
* Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32|NEON|NEON_FMA
* features.
*
* -mcpu=cortex-a7
* -mcpu=cortex-a15
* Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32|
* NEON|NEON_FMA|IDIV_ARM|IDIV_THUMB2
* This flag implies -mfpu=neon-vfpv4.
*
* -mcpu=iwmmxt
* Allows the use of iWMMXt instrinsics with GCC.
*/
enum {
ANDROID_CPU_ARM_FEATURE_ARMv7 = (1 << 0),
ANDROID_CPU_ARM_FEATURE_VFPv3 = (1 << 1),
ANDROID_CPU_ARM_FEATURE_NEON = (1 << 2),
ANDROID_CPU_ARM_FEATURE_LDREX_STREX = (1 << 3),
ANDROID_CPU_ARM_FEATURE_VFPv2 = (1 << 4),
ANDROID_CPU_ARM_FEATURE_VFP_D32 = (1 << 5),
ANDROID_CPU_ARM_FEATURE_VFP_FP16 = (1 << 6),
ANDROID_CPU_ARM_FEATURE_VFP_FMA = (1 << 7),
ANDROID_CPU_ARM_FEATURE_NEON_FMA = (1 << 8),
ANDROID_CPU_ARM_FEATURE_IDIV_ARM = (1 << 9),
ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 = (1 << 10),
ANDROID_CPU_ARM_FEATURE_iWMMXt = (1 << 11),
};
enum {
ANDROID_CPU_X86_FEATURE_SSSE3 = (1 << 0),
ANDROID_CPU_X86_FEATURE_POPCNT = (1 << 1),
ANDROID_CPU_X86_FEATURE_MOVBE = (1 << 2),
};
// libtheoraplayer addition, renamed this to "Ext" as not to conflict with your own project if you've included cpu-features.c in it
//extern uint64_t android_getCpuFeaturesExt(void);
#define android_getCpuFeaturesExt android_getCpuFeatures
/* Return the number of CPU cores detected on this device. */
extern int android_getCpuCount(void);
/* The following is used to force the CPU count and features
* mask in sandboxed processes. Under 4.1 and higher, these processes
* cannot access /proc, which is the only way to get information from
* the kernel about the current hardware (at least on ARM).
*
* It _must_ be called only once, and before any android_getCpuXXX
* function, any other case will fail.
*
* This function return 1 on success, and 0 on failure.
*/
extern int android_setCpu(int cpu_count,
uint64_t cpu_features);
#ifdef __arm__
/* Retrieve the ARM 32-bit CPUID value from the kernel.
* Note that this cannot work on sandboxed processes under 4.1 and
* higher, unless you called android_setCpuArm() before.
*/
extern uint32_t android_getCpuIdArm(void);
/* An ARM-specific variant of android_setCpu() that also allows you
* to set the ARM CPUID field.
*/
extern int android_setCpuArm(int cpu_count,
uint64_t cpu_features,
uint32_t cpu_id);
#endif
__END_DECLS
#endif /* CPU_FEATURES_H */

View file

@ -1,29 +0,0 @@
Copyright 2011 The LibYuv Project Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Google nor the names of its contributors may
be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -1,8 +0,0 @@
This source tree contains third party source code which is governed by third
party licenses. This file contains references to files which are under other
licenses than the one provided in the LICENSE file in the root of the source
tree.
Files governed by third party licenses:
source/x86inc.asm

View file

@ -1,33 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_H_ // NOLINT
#define INCLUDE_LIBYUV_H_
#include "libyuv/basic_types.h"
#include "libyuv/compare.h"
#include "libyuv/convert.h"
#include "libyuv/convert_argb.h"
#include "libyuv/convert_from.h"
#include "libyuv/convert_from_argb.h"
#include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
#include "libyuv/mjpeg_decoder.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#include "libyuv/rotate_argb.h"
#include "libyuv/row.h"
#include "libyuv/scale.h"
#include "libyuv/scale_argb.h"
#include "libyuv/scale_row.h"
#include "libyuv/version.h"
#include "libyuv/video_common.h"
#endif // INCLUDE_LIBYUV_H_ NOLINT

View file

@ -1,118 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_ // NOLINT
#define INCLUDE_LIBYUV_BASIC_TYPES_H_
#include <stddef.h> // for NULL, size_t
#if defined(__ANDROID__) || (defined(_MSC_VER) && (_MSC_VER < 1600))
#include <sys/types.h> // for uintptr_t on x86
#else
#include <stdint.h> // for uintptr_t
#endif
#ifndef GG_LONGLONG
#ifndef INT_TYPES_DEFINED
#define INT_TYPES_DEFINED
#ifdef COMPILER_MSVC
typedef unsigned __int64 uint64;
typedef __int64 int64;
#ifndef INT64_C
#define INT64_C(x) x ## I64
#endif
#ifndef UINT64_C
#define UINT64_C(x) x ## UI64
#endif
#define INT64_F "I64"
#else // COMPILER_MSVC
#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
typedef unsigned long uint64; // NOLINT
typedef long int64; // NOLINT
#ifndef INT64_C
#define INT64_C(x) x ## L
#endif
#ifndef UINT64_C
#define UINT64_C(x) x ## UL
#endif
#define INT64_F "l"
#else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
typedef unsigned long long uint64; // NOLINT
typedef long long int64; // NOLINT
#ifndef INT64_C
#define INT64_C(x) x ## LL
#endif
#ifndef UINT64_C
#define UINT64_C(x) x ## ULL
#endif
#define INT64_F "ll"
#endif // __LP64__
#endif // COMPILER_MSVC
typedef unsigned int uint32;
typedef int int32;
typedef unsigned short uint16; // NOLINT
typedef short int16; // NOLINT
typedef unsigned char uint8;
typedef signed char int8;
#endif // INT_TYPES_DEFINED
#endif // GG_LONGLONG
// Detect compiler is for x86 or x64.
#if defined(__x86_64__) || defined(_M_X64) || \
defined(__i386__) || defined(_M_IX86)
#define CPU_X86 1
#endif
// Detect compiler is for ARM.
#if defined(__arm__) || defined(_M_ARM)
#define CPU_ARM 1
#endif
#ifndef ALIGNP
#ifdef __cplusplus
#define ALIGNP(p, t) \
(reinterpret_cast<uint8*>(((reinterpret_cast<uintptr_t>(p) + \
((t) - 1)) & ~((t) - 1))))
#else
#define ALIGNP(p, t) \
((uint8*)((((uintptr_t)(p) + ((t) - 1)) & ~((t) - 1)))) /* NOLINT */
#endif
#endif
#if !defined(LIBYUV_API)
#if defined(_WIN32) || defined(__CYGWIN__)
#if defined(LIBYUV_BUILDING_SHARED_LIBRARY)
#define LIBYUV_API __declspec(dllexport)
#elif defined(LIBYUV_USING_SHARED_LIBRARY)
#define LIBYUV_API __declspec(dllimport)
#else
#define LIBYUV_API
#endif // LIBYUV_BUILDING_SHARED_LIBRARY
#elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__APPLE__) && \
(defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \
defined(LIBYUV_USING_SHARED_LIBRARY))
#define LIBYUV_API __attribute__ ((visibility ("default")))
#else
#define LIBYUV_API
#endif // __GNUC__
#endif // LIBYUV_API
#define LIBYUV_BOOL int
#define LIBYUV_FALSE 0
#define LIBYUV_TRUE 1
// Visual C x86 or GCC little endian.
#if defined(__x86_64__) || defined(_M_X64) || \
defined(__i386__) || defined(_M_IX86) || \
defined(__arm__) || defined(_M_ARM) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#define LIBYUV_LITTLE_ENDIAN
#endif
#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_ NOLINT

View file

@ -1,73 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_COMPARE_H_ // NOLINT
#define INCLUDE_LIBYUV_COMPARE_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Compute a hash for specified memory. Seed of 5381 recommended.
LIBYUV_API
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed);
// Sum Square Error - used to compute Mean Square Error or PSNR.
LIBYUV_API
uint64 ComputeSumSquareError(const uint8* src_a,
const uint8* src_b, int count);
LIBYUV_API
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height);
static const int kMaxPsnr = 128;
LIBYUV_API
double SumSquareErrorToPsnr(uint64 sse, uint64 count);
LIBYUV_API
double CalcFramePsnr(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height);
LIBYUV_API
double I420Psnr(const uint8* src_y_a, int stride_y_a,
const uint8* src_u_a, int stride_u_a,
const uint8* src_v_a, int stride_v_a,
const uint8* src_y_b, int stride_y_b,
const uint8* src_u_b, int stride_u_b,
const uint8* src_v_b, int stride_v_b,
int width, int height);
LIBYUV_API
double CalcFrameSsim(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height);
LIBYUV_API
double I420Ssim(const uint8* src_y_a, int stride_y_a,
const uint8* src_u_a, int stride_u_a,
const uint8* src_v_a, int stride_v_a,
const uint8* src_y_b, int stride_y_b,
const uint8* src_u_b, int stride_u_b,
const uint8* src_v_b, int stride_v_b,
int width, int height);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_COMPARE_H_ NOLINT

View file

@ -1,254 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_H_ // NOLINT
#define INCLUDE_LIBYUV_CONVERT_H_
#include "libyuv/basic_types.h"
// TODO(fbarchard): Remove the following headers includes.
#include "libyuv/convert_from.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Convert I444 to I420.
LIBYUV_API
int I444ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I422 to I420.
LIBYUV_API
int I422ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I411 to I420.
LIBYUV_API
int I411ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Copy I420 to I420.
#define I420ToI420 I420Copy
LIBYUV_API
int I420Copy(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I400 (grey) to I420.
LIBYUV_API
int I400ToI420(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert NV12 to I420.
LIBYUV_API
int NV12ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert NV21 to I420.
LIBYUV_API
int NV21ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_vu, int src_stride_vu,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert YUY2 to I420.
LIBYUV_API
int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert UYVY to I420.
LIBYUV_API
int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert M420 to I420.
LIBYUV_API
int M420ToI420(const uint8* src_m420, int src_stride_m420,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert Q420 to I420.
LIBYUV_API
int Q420ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// ARGB little endian (bgra in memory) to I420.
LIBYUV_API
int ARGBToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// BGRA little endian (argb in memory) to I420.
LIBYUV_API
int BGRAToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// ABGR little endian (rgba in memory) to I420.
LIBYUV_API
int ABGRToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGBA little endian (abgr in memory) to I420.
LIBYUV_API
int RGBAToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGB little endian (bgr in memory) to I420.
LIBYUV_API
int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGB big endian (rgb in memory) to I420.
LIBYUV_API
int RAWToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGB16 (RGBP fourcc) little endian to I420.
LIBYUV_API
int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGB15 (RGBO fourcc) little endian to I420.
LIBYUV_API
int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGB12 (R444 fourcc) little endian to I420.
LIBYUV_API
int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
#ifdef HAVE_JPEG
// src_width/height provided by capture.
// dst_width/height for clipping determine final size.
LIBYUV_API
int MJPGToI420(const uint8* sample, size_t sample_size,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int src_width, int src_height,
int dst_width, int dst_height);
// Query size of MJPG in pixels.
LIBYUV_API
int MJPGSize(const uint8* sample, size_t sample_size,
int* width, int* height);
#endif
// Note Bayer formats (BGGR) To I420 are in format_conversion.h
// Convert camera sample to I420 with cropping, rotation and vertical flip.
// "src_size" is needed to parse MJPG.
// "dst_stride_y" number of bytes in a row of the dst_y plane.
// Normally this would be the same as dst_width, with recommended alignment
// to 16 bytes for better efficiency.
// If rotation of 90 or 270 is used, stride is affected. The caller should
// allocate the I420 buffer according to rotation.
// "dst_stride_u" number of bytes in a row of the dst_u plane.
// Normally this would be the same as (dst_width + 1) / 2, with
// recommended alignment to 16 bytes for better efficiency.
// If rotation of 90 or 270 is used, stride is affected.
// "crop_x" and "crop_y" are starting position for cropping.
// To center, crop_x = (src_width - dst_width) / 2
// crop_y = (src_height - dst_height) / 2
// "src_width" / "src_height" is size of src_frame in pixels.
// "src_height" can be negative indicating a vertically flipped image source.
// "crop_width" / "crop_height" is the size to crop the src to.
// Must be less than or equal to src_width/src_height
// Cropping parameters are pre-rotation.
// "rotation" can be 0, 90, 180 or 270.
// "format" is a fourcc. ie 'I420', 'YUY2'
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
LIBYUV_API
int ConvertToI420(const uint8* src_frame, size_t src_size,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int crop_x, int crop_y,
int src_width, int src_height,
int crop_width, int crop_height,
enum RotationMode rotation,
uint32 format);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CONVERT_H_ NOLINT

View file

@ -1,225 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_ARGB_H_ // NOLINT
#define INCLUDE_LIBYUV_CONVERT_ARGB_H_
#include "libyuv/basic_types.h"
// TODO(fbarchard): Remove the following headers includes
#include "libyuv/convert_from.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
// TODO(fbarchard): This set of functions should exactly match convert.h
// Add missing Q420.
// TODO(fbarchard): Add tests. Create random content of right size and convert
// with C vs Opt and or to I420 and compare.
// TODO(fbarchard): Some of these functions lack parameter setting.
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Alias.
#define ARGBToARGB ARGBCopy
// Copy ARGB to ARGB.
LIBYUV_API
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I420 to ARGB.
LIBYUV_API
int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I422 to ARGB.
LIBYUV_API
int I422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I444 to ARGB.
LIBYUV_API
int I444ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I411 to ARGB.
LIBYUV_API
int I411ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I400 (grey) to ARGB.
LIBYUV_API
int I400ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Alias.
#define YToARGB I400ToARGB_Reference
// Convert I400 to ARGB. Reverse of ARGBToI400.
LIBYUV_API
int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert NV12 to ARGB.
LIBYUV_API
int NV12ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert NV21 to ARGB.
LIBYUV_API
int NV21ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_vu, int src_stride_vu,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert M420 to ARGB.
LIBYUV_API
int M420ToARGB(const uint8* src_m420, int src_stride_m420,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// TODO(fbarchard): Convert Q420 to ARGB.
// LIBYUV_API
// int Q420ToARGB(const uint8* src_y, int src_stride_y,
// const uint8* src_yuy2, int src_stride_yuy2,
// uint8* dst_argb, int dst_stride_argb,
// int width, int height);
// Convert YUY2 to ARGB.
LIBYUV_API
int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert UYVY to ARGB.
LIBYUV_API
int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// BGRA little endian (argb in memory) to ARGB.
LIBYUV_API
int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// ABGR little endian (rgba in memory) to ARGB.
LIBYUV_API
int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// RGBA little endian (abgr in memory) to ARGB.
LIBYUV_API
int RGBAToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Deprecated function name.
#define BG24ToARGB RGB24ToARGB
// RGB little endian (bgr in memory) to ARGB.
LIBYUV_API
int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// RGB big endian (rgb in memory) to ARGB.
LIBYUV_API
int RAWToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// RGB16 (RGBP fourcc) little endian to ARGB.
LIBYUV_API
int RGB565ToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// RGB15 (RGBO fourcc) little endian to ARGB.
LIBYUV_API
int ARGB1555ToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// RGB12 (R444 fourcc) little endian to ARGB.
LIBYUV_API
int ARGB4444ToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
#ifdef HAVE_JPEG
// src_width/height provided by capture
// dst_width/height for clipping determine final size.
LIBYUV_API
int MJPGToARGB(const uint8* sample, size_t sample_size,
uint8* dst_argb, int dst_stride_argb,
int src_width, int src_height,
int dst_width, int dst_height);
#endif
// Note Bayer formats (BGGR) to ARGB are in format_conversion.h.
// Convert camera sample to ARGB with cropping, rotation and vertical flip.
// "src_size" is needed to parse MJPG.
// "dst_stride_argb" number of bytes in a row of the dst_argb plane.
// Normally this would be the same as dst_width, with recommended alignment
// to 16 bytes for better efficiency.
// If rotation of 90 or 270 is used, stride is affected. The caller should
// allocate the I420 buffer according to rotation.
// "dst_stride_u" number of bytes in a row of the dst_u plane.
// Normally this would be the same as (dst_width + 1) / 2, with
// recommended alignment to 16 bytes for better efficiency.
// If rotation of 90 or 270 is used, stride is affected.
// "crop_x" and "crop_y" are starting position for cropping.
// To center, crop_x = (src_width - dst_width) / 2
// crop_y = (src_height - dst_height) / 2
// "src_width" / "src_height" is size of src_frame in pixels.
// "src_height" can be negative indicating a vertically flipped image source.
// "crop_width" / "crop_height" is the size to crop the src to.
// Must be less than or equal to src_width/src_height
// Cropping parameters are pre-rotation.
// "rotation" can be 0, 90, 180 or 270.
// "format" is a fourcc. ie 'I420', 'YUY2'
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
LIBYUV_API
int ConvertToARGB(const uint8* src_frame, size_t src_size,
uint8* dst_argb, int dst_stride_argb,
int crop_x, int crop_y,
int src_width, int src_height,
int crop_width, int crop_height,
enum RotationMode rotation,
uint32 format);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CONVERT_ARGB_H_ NOLINT

View file

@ -1,173 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_ // NOLINT
#define INCLUDE_LIBYUV_CONVERT_FROM_H_
#include "libyuv/basic_types.h"
#include "libyuv/rotate.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// See Also convert.h for conversions from formats to I420.
// I420Copy in convert to I420ToI420.
LIBYUV_API
int I420ToI422(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int I420ToI444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int I420ToI411(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21.
LIBYUV_API
int I400Copy(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height);
// TODO(fbarchard): I420ToM420
// TODO(fbarchard): I420ToQ420
LIBYUV_API
int I420ToNV12(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_uv, int dst_stride_uv,
int width, int height);
LIBYUV_API
int I420ToNV21(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_vu, int dst_stride_vu,
int width, int height);
LIBYUV_API
int I420ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int I420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int I420ToRGBA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_rgba, int dst_stride_rgba,
int width, int height);
LIBYUV_API
int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToRAW(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
// Note Bayer formats (BGGR) To I420 are in format_conversion.h.
// Convert I420 to specified format.
// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
LIBYUV_API
int ConvertFromI420(const uint8* y, int y_stride,
const uint8* u, int u_stride,
const uint8* v, int v_stride,
uint8* dst_sample, int dst_sample_stride,
int width, int height,
uint32 format);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_ NOLINT

View file

@ -1,168 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ // NOLINT
#define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Copy ARGB to ARGB.
#define ARGBToARGB ARGBCopy
LIBYUV_API
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert ARGB To BGRA. (alias)
#define ARGBToBGRA BGRAToARGB
LIBYUV_API
int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert ARGB To ABGR. (alias)
#define ARGBToABGR ABGRToARGB
LIBYUV_API
int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert ARGB To RGBA.
LIBYUV_API
int ARGBToRGBA(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert ARGB To RGB24.
LIBYUV_API
int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb24, int dst_stride_rgb24,
int width, int height);
// Convert ARGB To RAW.
LIBYUV_API
int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb, int dst_stride_rgb,
int width, int height);
// Convert ARGB To RGB565.
LIBYUV_API
int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height);
// Convert ARGB To ARGB1555.
LIBYUV_API
int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb1555, int dst_stride_argb1555,
int width, int height);
// Convert ARGB To ARGB4444.
LIBYUV_API
int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb4444, int dst_stride_argb4444,
int width, int height);
// Convert ARGB To I444.
LIBYUV_API
int ARGBToI444(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB To I422.
LIBYUV_API
int ARGBToI422(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB To I420. (also in convert.h)
LIBYUV_API
int ARGBToI420(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB to J420. (JPeg full range I420).
LIBYUV_API
int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
uint8* dst_yj, int dst_stride_yj,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB To I411.
LIBYUV_API
int ARGBToI411(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB to J400. (JPeg full range).
LIBYUV_API
int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
uint8* dst_yj, int dst_stride_yj,
int width, int height);
// Convert ARGB to I400.
LIBYUV_API
int ARGBToI400(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Convert ARGB To NV12.
LIBYUV_API
int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_uv, int dst_stride_uv,
int width, int height);
// Convert ARGB To NV21.
LIBYUV_API
int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_vu, int dst_stride_vu,
int width, int height);
// Convert ARGB To NV21.
LIBYUV_API
int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_vu, int dst_stride_vu,
int width, int height);
// Convert ARGB To YUY2.
LIBYUV_API
int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
uint8* dst_yuy2, int dst_stride_yuy2,
int width, int height);
// Convert ARGB To UYVY.
LIBYUV_API
int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
uint8* dst_uyvy, int dst_stride_uyvy,
int width, int height);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ NOLINT

View file

@ -1,81 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CPU_ID_H_ // NOLINT
#define INCLUDE_LIBYUV_CPU_ID_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// TODO(fbarchard): Consider overlapping bits for different architectures.
// Internal flag to indicate cpuid requires initialization.
#define kCpuInit 0x1
// These flags are only valid on ARM processors.
static const int kCpuHasARM = 0x2;
static const int kCpuHasNEON = 0x4;
// 0x8 reserved for future ARM flag.
// These flags are only valid on x86 processors.
static const int kCpuHasX86 = 0x10;
static const int kCpuHasSSE2 = 0x20;
static const int kCpuHasSSSE3 = 0x40;
static const int kCpuHasSSE41 = 0x80;
static const int kCpuHasSSE42 = 0x100;
static const int kCpuHasAVX = 0x200;
static const int kCpuHasAVX2 = 0x400;
static const int kCpuHasERMS = 0x800;
static const int kCpuHasFMA3 = 0x1000;
// 0x2000, 0x4000, 0x8000 reserved for future X86 flags.
// These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x10000;
static const int kCpuHasMIPS_DSP = 0x20000;
static const int kCpuHasMIPS_DSPR2 = 0x40000;
// Internal function used to auto-init.
LIBYUV_API
int InitCpuFlags(void);
// Internal function for parsing /proc/cpuinfo.
LIBYUV_API
int ArmCpuCaps(const char* cpuinfo_name);
// Detect CPU has SSE2 etc.
// Test_flag parameter should be one of kCpuHas constants above.
// returns non-zero if instruction set is detected
static __inline int TestCpuFlag(int test_flag) {
LIBYUV_API extern int cpu_info_;
return (cpu_info_ == kCpuInit ? InitCpuFlags() : cpu_info_) & test_flag;
}
// For testing, allow CPU flags to be disabled.
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
// MaskCpuFlags(-1) to enable all cpu specific optimizations.
// MaskCpuFlags(0) to disable all cpu specific optimizations.
LIBYUV_API
void MaskCpuFlags(int enable_flags);
// Low level cpuid for X86. Returns zeros on other CPUs.
// eax is the info type that you want.
// ecx is typically the cpu number, and should normally be zero.
LIBYUV_API
void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CPU_ID_H_ NOLINT

View file

@ -1,168 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_FORMATCONVERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_FORMATCONVERSION_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Convert Bayer RGB formats to I420.
LIBYUV_API
int BayerBGGRToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int BayerGBRGToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int BayerGRBGToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int BayerRGGBToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Temporary API mapper.
#define BayerRGBToI420(b, bs, f, y, ys, u, us, v, vs, w, h) \
BayerToI420(b, bs, y, ys, u, us, v, vs, w, h, f)
LIBYUV_API
int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height,
uint32 src_fourcc_bayer);
// Convert I420 to Bayer RGB formats.
LIBYUV_API
int I420ToBayerBGGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToBayerGBRG(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToBayerGRBG(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToBayerRGGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
// Temporary API mapper.
#define I420ToBayerRGB(y, ys, u, us, v, vs, b, bs, f, w, h) \
I420ToBayer(y, ys, u, us, v, vs, b, bs, w, h, f)
LIBYUV_API
int I420ToBayer(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height,
uint32 dst_fourcc_bayer);
// Convert Bayer RGB formats to ARGB.
LIBYUV_API
int BayerBGGRToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int BayerGBRGToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int BayerGRBGToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int BayerRGGBToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Temporary API mapper.
#define BayerRGBToARGB(b, bs, f, a, as, w, h) BayerToARGB(b, bs, a, as, w, h, f)
LIBYUV_API
int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height,
uint32 src_fourcc_bayer);
// Converts ARGB to Bayer RGB formats.
LIBYUV_API
int ARGBToBayerBGGR(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height);
LIBYUV_API
int ARGBToBayerGBRG(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height);
LIBYUV_API
int ARGBToBayerGRBG(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height);
LIBYUV_API
int ARGBToBayerRGGB(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height);
// Temporary API mapper.
#define ARGBToBayerRGB(a, as, b, bs, f, w, h) ARGBToBayer(b, bs, a, as, w, h, f)
LIBYUV_API
int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height,
uint32 dst_fourcc_bayer);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_FORMATCONVERSION_H_ NOLINT

View file

@ -1,201 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_MJPEG_DECODER_H_ // NOLINT
#define INCLUDE_LIBYUV_MJPEG_DECODER_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
// NOTE: For a simplified public API use convert.h MJPGToI420().
struct jpeg_common_struct;
struct jpeg_decompress_struct;
struct jpeg_source_mgr;
namespace libyuv {
#ifdef __cplusplus
extern "C" {
#endif
LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size);
#ifdef __cplusplus
} // extern "C"
#endif
static const uint32 kUnknownDataSize = 0xFFFFFFFF;
enum JpegSubsamplingType {
kJpegYuv420,
kJpegYuv422,
kJpegYuv411,
kJpegYuv444,
kJpegYuv400,
kJpegUnknown
};
struct SetJmpErrorMgr;
// MJPEG ("Motion JPEG") is a pseudo-standard video codec where the frames are
// simply independent JPEG images with a fixed huffman table (which is omitted).
// It is rarely used in video transmission, but is common as a camera capture
// format, especially in Logitech devices. This class implements a decoder for
// MJPEG frames.
//
// See http://tools.ietf.org/html/rfc2435
class LIBYUV_API MJpegDecoder {
public:
typedef void (*CallbackFunction)(void* opaque,
const uint8* const* data,
const int* strides,
int rows);
static const int kColorSpaceUnknown;
static const int kColorSpaceGrayscale;
static const int kColorSpaceRgb;
static const int kColorSpaceYCbCr;
static const int kColorSpaceCMYK;
static const int kColorSpaceYCCK;
MJpegDecoder();
~MJpegDecoder();
// Loads a new frame, reads its headers, and determines the uncompressed
// image format.
// Returns LIBYUV_TRUE if image looks valid and format is supported.
// If return value is LIBYUV_TRUE, then the values for all the following
// getters are populated.
// src_len is the size of the compressed mjpeg frame in bytes.
LIBYUV_BOOL LoadFrame(const uint8* src, size_t src_len);
// Returns width of the last loaded frame in pixels.
int GetWidth();
// Returns height of the last loaded frame in pixels.
int GetHeight();
// Returns format of the last loaded frame. The return value is one of the
// kColorSpace* constants.
int GetColorSpace();
// Number of color components in the color space.
int GetNumComponents();
// Sample factors of the n-th component.
int GetHorizSampFactor(int component);
int GetVertSampFactor(int component);
int GetHorizSubSampFactor(int component);
int GetVertSubSampFactor(int component);
// Public for testability.
int GetImageScanlinesPerImcuRow();
// Public for testability.
int GetComponentScanlinesPerImcuRow(int component);
// Width of a component in bytes.
int GetComponentWidth(int component);
// Height of a component.
int GetComponentHeight(int component);
// Width of a component in bytes with padding for DCTSIZE. Public for testing.
int GetComponentStride(int component);
// Size of a component in bytes.
int GetComponentSize(int component);
// Call this after LoadFrame() if you decide you don't want to decode it
// after all.
LIBYUV_BOOL UnloadFrame();
// Decodes the entire image into a one-buffer-per-color-component format.
// dst_width must match exactly. dst_height must be <= to image height; if
// less, the image is cropped. "planes" must have size equal to at least
// GetNumComponents() and they must point to non-overlapping buffers of size
// at least GetComponentSize(i). The pointers in planes are incremented
// to point to after the end of the written data.
// TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
LIBYUV_BOOL DecodeToBuffers(uint8** planes, int dst_width, int dst_height);
// Decodes the entire image and passes the data via repeated calls to a
// callback function. Each call will get the data for a whole number of
// image scanlines.
// TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
LIBYUV_BOOL DecodeToCallback(CallbackFunction fn, void* opaque,
int dst_width, int dst_height);
// The helper function which recognizes the jpeg sub-sampling type.
static JpegSubsamplingType JpegSubsamplingTypeHelper(
int* subsample_x, int* subsample_y, int number_of_components);
private:
struct Buffer {
const uint8* data;
int len;
};
struct BufferVector {
Buffer* buffers;
int len;
int pos;
};
// Methods that are passed to jpeglib.
static int fill_input_buffer(jpeg_decompress_struct* cinfo);
static void init_source(jpeg_decompress_struct* cinfo);
static void skip_input_data(jpeg_decompress_struct* cinfo,
long num_bytes); // NOLINT
static void term_source(jpeg_decompress_struct* cinfo);
static void ErrorHandler(jpeg_common_struct* cinfo);
void AllocOutputBuffers(int num_outbufs);
void DestroyOutputBuffers();
LIBYUV_BOOL StartDecode();
LIBYUV_BOOL FinishDecode();
void SetScanlinePointers(uint8** data);
LIBYUV_BOOL DecodeImcuRow();
int GetComponentScanlinePadding(int component);
// A buffer holding the input data for a frame.
Buffer buf_;
BufferVector buf_vec_;
jpeg_decompress_struct* decompress_struct_;
jpeg_source_mgr* source_mgr_;
SetJmpErrorMgr* error_mgr_;
// LIBYUV_TRUE iff at least one component has scanline padding. (i.e.,
// GetComponentScanlinePadding() != 0.)
LIBYUV_BOOL has_scanline_padding_;
// Temporaries used to point to scanline outputs.
int num_outbufs_; // Outermost size of all arrays below.
uint8*** scanlines_;
int* scanlines_sizes_;
// Temporary buffer used for decoding when we can't decode directly to the
// output buffers. Large enough for just one iMCU row.
uint8** databuf_;
int* databuf_strides_;
};
} // namespace libyuv
#endif // __cplusplus
#endif // INCLUDE_LIBYUV_MJPEG_DECODER_H_ NOLINT

View file

@ -1,434 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ // NOLINT
#define INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
#include "libyuv/basic_types.h"
// TODO(fbarchard): Remove the following headers includes.
#include "libyuv/convert.h"
#include "libyuv/convert_argb.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Copy a plane of data.
LIBYUV_API
void CopyPlane(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Set a plane of data to a 32 bit value.
LIBYUV_API
void SetPlane(uint8* dst_y, int dst_stride_y,
int width, int height,
uint32 value);
// Copy I400. Supports inverting.
LIBYUV_API
int I400ToI400(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Copy I422 to I422.
#define I422ToI422 I422Copy
LIBYUV_API
int I422Copy(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Copy I444 to I444.
#define I444ToI444 I444Copy
LIBYUV_API
int I444Copy(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert YUY2 to I422.
LIBYUV_API
int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert UYVY to I422.
LIBYUV_API
int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I420 to I400. (calls CopyPlane ignoring u/v).
LIBYUV_API
int I420ToI400(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Alias
#define I420ToI420Mirror I420Mirror
// I420 mirror.
LIBYUV_API
int I420Mirror(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Alias
#define I400ToI400Mirror I400Mirror
// I400 mirror. A single plane is mirrored horizontally.
// Pass negative height to achieve 180 degree rotation.
LIBYUV_API
int I400Mirror(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Alias
#define ARGBToARGBMirror ARGBMirror
// ARGB mirror.
LIBYUV_API
int ARGBMirror(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert NV12 to RGB565.
LIBYUV_API
int NV12ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height);
// Convert NV21 to RGB565.
LIBYUV_API
int NV21ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height);
// I422ToARGB is in convert_argb.h
// Convert I422 to BGRA.
LIBYUV_API
int I422ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_bgra, int dst_stride_bgra,
int width, int height);
// Convert I422 to ABGR.
LIBYUV_API
int I422ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert I422 to RGBA.
LIBYUV_API
int I422ToRGBA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_rgba, int dst_stride_rgba,
int width, int height);
// Draw a rectangle into I420.
LIBYUV_API
int I420Rect(uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int x, int y, int width, int height,
int value_y, int value_u, int value_v);
// Draw a rectangle into ARGB.
LIBYUV_API
int ARGBRect(uint8* dst_argb, int dst_stride_argb,
int x, int y, int width, int height, uint32 value);
// Convert ARGB to gray scale ARGB.
LIBYUV_API
int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Make a rectangle of ARGB gray scale.
LIBYUV_API
int ARGBGray(uint8* dst_argb, int dst_stride_argb,
int x, int y, int width, int height);
// Make a rectangle of ARGB Sepia tone.
LIBYUV_API
int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
int x, int y, int width, int height);
// Apply a matrix rotation to each ARGB pixel.
// matrix_argb is 4 signed ARGB values. -128 to 127 representing -2 to 2.
// The first 4 coefficients apply to B, G, R, A and produce B of the output.
// The next 4 coefficients apply to B, G, R, A and produce G of the output.
// The next 4 coefficients apply to B, G, R, A and produce R of the output.
// The last 4 coefficients apply to B, G, R, A and produce A of the output.
LIBYUV_API
int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
const int8* matrix_argb,
int width, int height);
// Deprecated. Use ARGBColorMatrix instead.
// Apply a matrix rotation to each ARGB pixel.
// matrix_argb is 3 signed ARGB values. -128 to 127 representing -1 to 1.
// The first 4 coefficients apply to B, G, R, A and produce B of the output.
// The next 4 coefficients apply to B, G, R, A and produce G of the output.
// The last 4 coefficients apply to B, G, R, A and produce R of the output.
LIBYUV_API
int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
const int8* matrix_rgb,
int x, int y, int width, int height);
// Apply a color table each ARGB pixel.
// Table contains 256 ARGB values.
LIBYUV_API
int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
const uint8* table_argb,
int x, int y, int width, int height);
// Apply a color table each ARGB pixel but preserve destination alpha.
// Table contains 256 ARGB values.
LIBYUV_API
int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
const uint8* table_argb,
int x, int y, int width, int height);
// Apply a luma/color table each ARGB pixel but preserve destination alpha.
// Table contains 32768 values indexed by [Y][C] where 7 it 7 bit luma from
// RGB (YJ style) and C is an 8 bit color component (R, G or B).
LIBYUV_API
int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
const uint8* luma_rgb_table,
int width, int height);
// Apply a 3 term polynomial to ARGB values.
// poly points to a 4x4 matrix. The first row is constants. The 2nd row is
// coefficients for b, g, r and a. The 3rd row is coefficients for b squared,
// g squared, r squared and a squared. The 4rd row is coefficients for b to
// the 3, g to the 3, r to the 3 and a to the 3. The values are summed and
// result clamped to 0 to 255.
// A polynomial approximation can be dirived using software such as 'R'.
LIBYUV_API
int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
const float* poly,
int width, int height);
// Quantize a rectangle of ARGB. Alpha unaffected.
// scale is a 16 bit fractional fixed point scaler between 0 and 65535.
// interval_size should be a value between 1 and 255.
// interval_offset should be a value between 0 and 255.
LIBYUV_API
int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
int scale, int interval_size, int interval_offset,
int x, int y, int width, int height);
// Copy ARGB to ARGB.
LIBYUV_API
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Copy ARGB to ARGB.
LIBYUV_API
int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Copy ARGB to ARGB.
LIBYUV_API
int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
typedef void (*ARGBBlendRow)(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width);
// Get function to Alpha Blend ARGB pixels and store to destination.
LIBYUV_API
ARGBBlendRow GetARGBBlend();
// Alpha Blend ARGB images and store to destination.
// Alpha of destination is set to 255.
LIBYUV_API
int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
const uint8* src_argb1, int src_stride_argb1,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
LIBYUV_API
int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
const uint8* src_argb1, int src_stride_argb1,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Add ARGB image with ARGB image. Saturates to 255.
LIBYUV_API
int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
const uint8* src_argb1, int src_stride_argb1,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Subtract ARGB image (argb1) from ARGB image (argb0). Saturates to 0.
LIBYUV_API
int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
const uint8* src_argb1, int src_stride_argb1,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I422 to YUY2.
LIBYUV_API
int I422ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
// Convert I422 to UYVY.
LIBYUV_API
int I422ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
// Convert unattentuated ARGB to preattenuated ARGB.
LIBYUV_API
int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert preattentuated ARGB to unattenuated ARGB.
LIBYUV_API
int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert MJPG to ARGB.
LIBYUV_API
int MJPGToARGB(const uint8* sample, size_t sample_size,
uint8* argb, int argb_stride,
int w, int h, int dw, int dh);
// Internal function - do not call directly.
// Computes table of cumulative sum for image where the value is the sum
// of all values above and to the left of the entry. Used by ARGBBlur.
LIBYUV_API
int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
int32* dst_cumsum, int dst_stride32_cumsum,
int width, int height);
// Blur ARGB image.
// dst_cumsum table of width * (height + 1) * 16 bytes aligned to
// 16 byte boundary.
// dst_stride32_cumsum is number of ints in a row (width * 4).
// radius is number of pixels around the center. e.g. 1 = 3x3. 2=5x5.
// Blur is optimized for radius of 5 (11x11) or less.
LIBYUV_API
int ARGBBlur(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int32* dst_cumsum, int dst_stride32_cumsum,
int width, int height, int radius);
// Multiply ARGB image by ARGB value.
LIBYUV_API
int ARGBShade(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height, uint32 value);
// Interpolate between two ARGB images using specified amount of interpolation
// (0 to 255) and store to destination.
// 'interpolation' is specified as 8 bit fraction where 0 means 100% src_argb0
// and 255 means 1% src_argb0 and 99% src_argb1.
// Internally uses ARGBScale bilinear filtering.
// Caveat: This function will write up to 16 bytes beyond the end of dst_argb.
LIBYUV_API
int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
const uint8* src_argb1, int src_stride_argb1,
uint8* dst_argb, int dst_stride_argb,
int width, int height, int interpolation);
#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
defined(TARGET_IPHONE_SIMULATOR)
#define LIBYUV_DISABLE_X86
#endif
// Row functions for copying a pixels from a source with a slope to a row
// of destination. Useful for scaling, rotation, mirror, texture mapping.
LIBYUV_API
void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width);
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
LIBYUV_API
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width);
#define HAS_ARGBAFFINEROW_SSE2
#endif // LIBYUV_DISABLE_X86
// Shuffle ARGB channel order. e.g. BGRA to ARGB.
// shuffler is 16 bytes and must be aligned.
LIBYUV_API
int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
uint8* dst_argb, int dst_stride_argb,
const uint8* shuffler, int width, int height);
// Sobel ARGB effect with planar output.
LIBYUV_API
int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Sobel ARGB effect.
LIBYUV_API
int ARGBSobel(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Sobel ARGB effect w/ Sobel X, Sobel, Sobel Y in ARGB.
LIBYUV_API
int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ NOLINT

View file

@ -1,117 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_ROTATE_H_ // NOLINT
#define INCLUDE_LIBYUV_ROTATE_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Supported rotation.
typedef enum RotationMode {
kRotate0 = 0, // No rotation.
kRotate90 = 90, // Rotate 90 degrees clockwise.
kRotate180 = 180, // Rotate 180 degrees.
kRotate270 = 270, // Rotate 270 degrees clockwise.
// Deprecated.
kRotateNone = 0,
kRotateClockwise = 90,
kRotateCounterClockwise = 270,
} RotationModeEnum;
// Rotate I420 frame.
LIBYUV_API
int I420Rotate(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int src_width, int src_height, enum RotationMode mode);
// Rotate NV12 input and store in I420.
LIBYUV_API
int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int src_width, int src_height, enum RotationMode mode);
// Rotate a plane by 0, 90, 180, or 270.
LIBYUV_API
int RotatePlane(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int src_width, int src_height, enum RotationMode mode);
// Rotate planes by 90, 180, 270. Deprecated.
LIBYUV_API
void RotatePlane90(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height);
LIBYUV_API
void RotatePlane180(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height);
LIBYUV_API
void RotatePlane270(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height);
LIBYUV_API
void RotateUV90(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height);
// Rotations for when U and V are interleaved.
// These functions take one input pointer and
// split the data into two buffers while
// rotating them. Deprecated.
LIBYUV_API
void RotateUV180(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height);
LIBYUV_API
void RotateUV270(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height);
// The 90 and 270 functions are based on transposes.
// Doing a transpose with reversing the read/write
// order will result in a rotation by +- 90 degrees.
// Deprecated.
LIBYUV_API
void TransposePlane(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height);
LIBYUV_API
void TransposeUV(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_ROTATE_H_ NOLINT

View file

@ -1,33 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_ROTATE_ARGB_H_ // NOLINT
#define INCLUDE_LIBYUV_ROTATE_ARGB_H_
#include "libyuv/basic_types.h"
#include "libyuv/rotate.h" // For RotationMode.
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Rotate ARGB frame
LIBYUV_API
int ARGBRotate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int src_width, int src_height, enum RotationMode mode);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_ROTATE_ARGB_H_ NOLINT

File diff suppressed because it is too large Load diff

View file

@ -1,85 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_SCALE_H_ // NOLINT
#define INCLUDE_LIBYUV_SCALE_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Supported filtering.
typedef enum FilterMode {
kFilterNone = 0, // Point sample; Fastest.
kFilterLinear = 1, // Filter horizontally only.
kFilterBilinear = 2, // Faster than box, but lower quality scaling down.
kFilterBox = 3 // Highest quality.
} FilterModeEnum;
// Scale a YUV plane.
LIBYUV_API
void ScalePlane(const uint8* src, int src_stride,
int src_width, int src_height,
uint8* dst, int dst_stride,
int dst_width, int dst_height,
enum FilterMode filtering);
// Scales a YUV 4:2:0 image from the src width and height to the
// dst width and height.
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
// used. This produces basic (blocky) quality at the fastest speed.
// If filtering is kFilterBilinear, interpolation is used to produce a better
// quality image, at the expense of speed.
// If filtering is kFilterBox, averaging is used to produce ever better
// quality image, at further expense of speed.
// Returns 0 if successful.
LIBYUV_API
int I420Scale(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
int src_width, int src_height,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int dst_width, int dst_height,
enum FilterMode filtering);
#ifdef __cplusplus
// Legacy API. Deprecated.
LIBYUV_API
int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
int src_stride_y, int src_stride_u, int src_stride_v,
int src_width, int src_height,
uint8* dst_y, uint8* dst_u, uint8* dst_v,
int dst_stride_y, int dst_stride_u, int dst_stride_v,
int dst_width, int dst_height,
LIBYUV_BOOL interpolate);
// Legacy API. Deprecated.
LIBYUV_API
int ScaleOffset(const uint8* src_i420, int src_width, int src_height,
uint8* dst_i420, int dst_width, int dst_height, int dst_yoffset,
LIBYUV_BOOL interpolate);
// For testing, allow disabling of specialized scalers.
LIBYUV_API
void SetUseReferenceImpl(LIBYUV_BOOL use);
#endif // __cplusplus
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_SCALE_H_ NOLINT

View file

@ -1,57 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_SCALE_ARGB_H_ // NOLINT
#define INCLUDE_LIBYUV_SCALE_ARGB_H_
#include "libyuv/basic_types.h"
#include "libyuv/scale.h" // For FilterMode
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
LIBYUV_API
int ARGBScale(const uint8* src_argb, int src_stride_argb,
int src_width, int src_height,
uint8* dst_argb, int dst_stride_argb,
int dst_width, int dst_height,
enum FilterMode filtering);
// Clipped scale takes destination rectangle coordinates for clip values.
LIBYUV_API
int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
int src_width, int src_height,
uint8* dst_argb, int dst_stride_argb,
int dst_width, int dst_height,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering);
// TODO(fbarchard): Implement this.
// Scale with YUV conversion to ARGB and clipping.
LIBYUV_API
int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint32 src_fourcc,
int src_width, int src_height,
uint8* dst_argb, int dst_stride_argb,
uint32 dst_fourcc,
int dst_width, int dst_height,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_SCALE_ARGB_H_ NOLINT

View file

@ -1,301 +0,0 @@
/*
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_SCALE_ROW_H_ // NOLINT
#define INCLUDE_LIBYUV_SCALE_ROW_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
defined(TARGET_IPHONE_SIMULATOR)
#define LIBYUV_DISABLE_X86
#endif
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_SCALEROWDOWN2_SSE2
#define HAS_SCALEROWDOWN4_SSE2
#define HAS_SCALEROWDOWN34_SSSE3
#define HAS_SCALEROWDOWN38_SSSE3
#define HAS_SCALEADDROWS_SSE2
#define HAS_SCALEFILTERCOLS_SSSE3
#define HAS_SCALECOLSUP2_SSE2
#define HAS_SCALEARGBROWDOWN2_SSE2
#define HAS_SCALEARGBROWDOWNEVEN_SSE2
#define HAS_SCALEARGBCOLS_SSE2
#define HAS_SCALEARGBFILTERCOLS_SSSE3
#define HAS_SCALEARGBCOLSUP2_SSE2
#define HAS_FIXEDDIV_X86
#define HAS_FIXEDDIV1_X86
#endif
// The following are available on Neon platforms:
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON))
#define HAS_SCALEROWDOWN2_NEON
#define HAS_SCALEROWDOWN4_NEON
#define HAS_SCALEROWDOWN34_NEON
#define HAS_SCALEROWDOWN38_NEON
#define HAS_SCALEARGBROWDOWNEVEN_NEON
#define HAS_SCALEARGBROWDOWN2_NEON
#endif
// The following are available on Mips platforms:
#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#define HAS_SCALEROWDOWN2_MIPS_DSPR2
#define HAS_SCALEROWDOWN4_MIPS_DSPR2
#define HAS_SCALEROWDOWN34_MIPS_DSPR2
#define HAS_SCALEROWDOWN38_MIPS_DSPR2
#endif
// Scale ARGB vertically with bilinear interpolation.
void ScalePlaneVertical(int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int y, int dy,
int bpp, enum FilterMode filtering);
// Simplify the filtering based on scale factors.
enum FilterMode ScaleFilterReduce(int src_width, int src_height,
int dst_width, int dst_height,
enum FilterMode filtering);
// Divide num by div and return as 16.16 fixed point result.
int FixedDiv_C(int num, int div);
int FixedDiv_X86(int num, int div);
// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
int FixedDiv1_C(int num, int div);
int FixedDiv1_X86(int num, int div);
#ifdef HAS_FIXEDDIV_X86
#define FixedDiv FixedDiv_X86
#define FixedDiv1 FixedDiv1_X86
#else
#define FixedDiv FixedDiv_C
#define FixedDiv1 FixedDiv1_C
#endif
// Compute slope values for stepping.
void ScaleSlope(int src_width, int src_height,
int dst_width, int dst_height,
enum FilterMode filtering,
int* x, int* y, int* dx, int* dy);
void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width);
void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width);
void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int, int);
void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height);
void ScaleARGBRowDown2_C(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int, int);
void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width,
int src_height);
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
// Row functions.
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, int src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, int src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
// ScaleRowDown2Box also used by planar functions
// NEON downscalers with interpolation.
// Note - not static due to reuse in convert for 444 to 420.
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
// Down scale from 4 to 3 pixels. Use the neon multilane read/write
// to load up the every 4th pixel into a 4 different registers.
// Point samples 32 pixels to 24 pixels.
void ScaleRowDown34_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
// 32 -> 12
void ScaleRowDown38_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
// 32x3 -> 12x1
void ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
// 32x2 -> 12x1
void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width);
void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width);
void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_SCALE_ROW_H_ NOLINT

View file

@ -1,16 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 998
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT

View file

@ -1,182 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Common definitions for video, including fourcc and VideoFormat.
#ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_ // NOLINT
#define INCLUDE_LIBYUV_VIDEO_COMMON_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
//////////////////////////////////////////////////////////////////////////////
// Definition of FourCC codes
//////////////////////////////////////////////////////////////////////////////
// Convert four characters to a FourCC code.
// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
// constants are used in a switch.
#ifdef __cplusplus
#define FOURCC(a, b, c, d) ( \
(static_cast<uint32>(a)) | (static_cast<uint32>(b) << 8) | \
(static_cast<uint32>(c) << 16) | (static_cast<uint32>(d) << 24))
#else
#define FOURCC(a, b, c, d) ( \
((uint32)(a)) | ((uint32)(b) << 8) | /* NOLINT */ \
((uint32)(c) << 16) | ((uint32)(d) << 24)) /* NOLINT */
#endif
// Some pages discussing FourCC codes:
// http://www.fourcc.org/yuv.php
// http://v4l2spec.bytesex.org/spec/book1.htm
// http://developer.apple.com/quicktime/icefloe/dispatch020.html
// http://msdn.microsoft.com/library/windows/desktop/dd206750.aspx#nv12
// http://people.xiph.org/~xiphmont/containers/nut/nut4cc.txt
// FourCC codes grouped according to implementation efficiency.
// Primary formats should convert in 1 efficient step.
// Secondary formats are converted in 2 steps.
// Auxilliary formats call primary converters.
enum FourCC {
// 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
FOURCC_I420 = FOURCC('I', '4', '2', '0'),
FOURCC_I422 = FOURCC('I', '4', '2', '2'),
FOURCC_I444 = FOURCC('I', '4', '4', '4'),
FOURCC_I411 = FOURCC('I', '4', '1', '1'),
FOURCC_I400 = FOURCC('I', '4', '0', '0'),
FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
// 2 Secondary YUV formats: row biplanar.
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
// 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE.
FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE.
FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE.
// 4 Secondary RGB formats: 4 Bayer Patterns.
FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
// 1 Primary Compressed YUV format.
FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
// 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
FOURCC_J420 = FOURCC('J', '4', '2', '0'),
FOURCC_J400 = FOURCC('J', '4', '0', '0'),
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
FOURCC_YU16 = FOURCC('Y', 'U', '1', '6'), // Alias for I422.
FOURCC_YU24 = FOURCC('Y', 'U', '2', '4'), // Alias for I444.
FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'), // Alias for YUY2.
FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'), // Alias for YUY2 on Mac.
FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'), // Alias for UYVY.
FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'), // Alias for UYVY on Mac.
FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'), // Alias for MJPG.
FOURCC_DMB1 = FOURCC('d', 'm', 'b', '1'), // Alias for MJPG on Mac.
FOURCC_BA81 = FOURCC('B', 'A', '8', '1'), // Alias for BGGR.
FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'), // Alias for RAW.
FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'), // Alias for 24BG.
FOURCC_CM32 = FOURCC(0, 0, 0, 32), // Alias for BGRA kCMPixelFormat_32ARGB
FOURCC_CM24 = FOURCC(0, 0, 0, 24), // Alias for RAW kCMPixelFormat_24RGB
FOURCC_L555 = FOURCC('L', '5', '5', '5'), // Alias for RGBO.
FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP.
FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO.
// 1 Auxiliary compressed YUV format set aside for capturer.
FOURCC_H264 = FOURCC('H', '2', '6', '4'),
// Match any fourcc.
FOURCC_ANY = 0xFFFFFFFF,
};
enum FourCCBpp {
// Canonical fourcc codes used in our code.
FOURCC_BPP_I420 = 12,
FOURCC_BPP_I422 = 16,
FOURCC_BPP_I444 = 24,
FOURCC_BPP_I411 = 12,
FOURCC_BPP_I400 = 8,
FOURCC_BPP_NV21 = 12,
FOURCC_BPP_NV12 = 12,
FOURCC_BPP_YUY2 = 16,
FOURCC_BPP_UYVY = 16,
FOURCC_BPP_M420 = 12,
FOURCC_BPP_Q420 = 12,
FOURCC_BPP_ARGB = 32,
FOURCC_BPP_BGRA = 32,
FOURCC_BPP_ABGR = 32,
FOURCC_BPP_RGBA = 32,
FOURCC_BPP_24BG = 24,
FOURCC_BPP_RAW = 24,
FOURCC_BPP_RGBP = 16,
FOURCC_BPP_RGBO = 16,
FOURCC_BPP_R444 = 16,
FOURCC_BPP_RGGB = 8,
FOURCC_BPP_BGGR = 8,
FOURCC_BPP_GRBG = 8,
FOURCC_BPP_GBRG = 8,
FOURCC_BPP_YV12 = 12,
FOURCC_BPP_YV16 = 16,
FOURCC_BPP_YV24 = 24,
FOURCC_BPP_YU12 = 12,
FOURCC_BPP_J420 = 12,
FOURCC_BPP_J400 = 8,
FOURCC_BPP_MJPG = 0, // 0 means unknown.
FOURCC_BPP_H264 = 0,
FOURCC_BPP_IYUV = 12,
FOURCC_BPP_YU16 = 16,
FOURCC_BPP_YU24 = 24,
FOURCC_BPP_YUYV = 16,
FOURCC_BPP_YUVS = 16,
FOURCC_BPP_HDYC = 16,
FOURCC_BPP_2VUY = 16,
FOURCC_BPP_JPEG = 1,
FOURCC_BPP_DMB1 = 1,
FOURCC_BPP_BA81 = 8,
FOURCC_BPP_RGB3 = 24,
FOURCC_BPP_BGR3 = 24,
FOURCC_BPP_CM32 = 32,
FOURCC_BPP_CM24 = 24,
// Match any fourcc.
FOURCC_BPP_ANY = 0, // 0 means unknown.
};
// Converts fourcc aliases into canonical ones.
LIBYUV_API uint32 CanonicalFourCC(uint32 fourcc);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_VIDEO_COMMON_H_ NOLINT

View file

@ -1,15 +0,0 @@
libyuv's source code is here provided in minimalist distribution format
with all source files not needed for compiling libtheoraplayer removed.
- The project files were modified to fit libtheoraplayer's binary output
folder structure.
- Some project files missing in the original source distibution were added to support
compiling the libtheoraplayer on those platforms.
- Also, some code may have been changed to address certain compiler/platform
specific problems and is so indicated in the source code.
libyuv is owned and maintained by the Google Inc. and this distribution
is present here only for convenience and easier compilation of libtheoraplayer.
If you want to use libyuv outside of libtheoraplayer, it is encouraged to use the
original source distribution by Google Inc: https://code.google.com/p/libyuv/

View file

@ -1,325 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/compare.h"
#include <float.h>
#include <math.h>
#ifdef _OPENMP
#include <omp.h>
#endif
#include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// hash seed of 5381 recommended.
// Internal C version of HashDjb2 with int sized count for efficiency.
uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
// This module is for Visual C x86
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || \
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
#define HAS_HASHDJB2_SSE41
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
#if _MSC_VER >= 1700
#define HAS_HASHDJB2_AVX2
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
#endif
#endif // HAS_HASHDJB2_SSE41
// hash seed of 5381 recommended.
LIBYUV_API
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
const int kBlockSize = 1 << 15; // 32768;
int remainder;
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
#if defined(HAS_HASHDJB2_SSE41)
if (TestCpuFlag(kCpuHasSSE41)) {
HashDjb2_SSE = HashDjb2_SSE41;
}
#endif
#if defined(HAS_HASHDJB2_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
HashDjb2_SSE = HashDjb2_AVX2;
}
#endif
while (count >= (uint64)(kBlockSize)) {
seed = HashDjb2_SSE(src, kBlockSize, seed);
src += kBlockSize;
count -= kBlockSize;
}
remainder = (int)(count) & ~15;
if (remainder) {
seed = HashDjb2_SSE(src, remainder, seed);
src += remainder;
count -= remainder;
}
remainder = (int)(count) & 15;
if (remainder) {
seed = HashDjb2_C(src, remainder, seed);
}
return seed;
}
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON))
#define HAS_SUMSQUAREERROR_NEON
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
#endif
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_SUMSQUAREERROR_SSE2
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
#endif
// Visual C 2012 required for AVX2.
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700
#define HAS_SUMSQUAREERROR_AVX2
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
#endif
// TODO(fbarchard): Refactor into row function.
LIBYUV_API
uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
int count) {
// SumSquareError returns values 0 to 65535 for each squared difference.
// Up to 65536 of those can be summed and remain within a uint32.
// After each block of 65536 pixels, accumulate into a uint64.
const int kBlockSize = 65536;
int remainder = count & (kBlockSize - 1) & ~31;
uint64 sse = 0;
int i;
uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
SumSquareError_C;
#if defined(HAS_SUMSQUAREERROR_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
SumSquareError = SumSquareError_NEON;
}
#endif
#if defined(HAS_SUMSQUAREERROR_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) {
// Note only used for multiples of 16 so count is not checked.
SumSquareError = SumSquareError_SSE2;
}
#endif
#if defined(HAS_SUMSQUAREERROR_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
// Note only used for multiples of 32 so count is not checked.
SumSquareError = SumSquareError_AVX2;
}
#endif
#ifdef _OPENMP
#pragma omp parallel for reduction(+: sse)
#endif
for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
}
src_a += count & ~(kBlockSize - 1);
src_b += count & ~(kBlockSize - 1);
if (remainder) {
sse += SumSquareError(src_a, src_b, remainder);
src_a += remainder;
src_b += remainder;
}
remainder = count & 31;
if (remainder) {
sse += SumSquareError_C(src_a, src_b, remainder);
}
return sse;
}
LIBYUV_API
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height) {
uint64 sse = 0;
int h;
// Coalesce rows.
if (stride_a == width &&
stride_b == width) {
width *= height;
height = 1;
stride_a = stride_b = 0;
}
for (h = 0; h < height; ++h) {
sse += ComputeSumSquareError(src_a, src_b, width);
src_a += stride_a;
src_b += stride_b;
}
return sse;
}
LIBYUV_API
double SumSquareErrorToPsnr(uint64 sse, uint64 count) {
double psnr;
if (sse > 0) {
double mse = (double)(count) / (double)(sse);
psnr = 10.0 * log10(255.0 * 255.0 * mse);
} else {
psnr = kMaxPsnr; // Limit to prevent divide by 0
}
if (psnr > kMaxPsnr)
psnr = kMaxPsnr;
return psnr;
}
LIBYUV_API
double CalcFramePsnr(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height) {
const uint64 samples = width * height;
const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a,
src_b, stride_b,
width, height);
return SumSquareErrorToPsnr(sse, samples);
}
LIBYUV_API
double I420Psnr(const uint8* src_y_a, int stride_y_a,
const uint8* src_u_a, int stride_u_a,
const uint8* src_v_a, int stride_v_a,
const uint8* src_y_b, int stride_y_b,
const uint8* src_u_b, int stride_u_b,
const uint8* src_v_b, int stride_v_b,
int width, int height) {
const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a,
src_y_b, stride_y_b,
width, height);
const int width_uv = (width + 1) >> 1;
const int height_uv = (height + 1) >> 1;
const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a,
src_u_b, stride_u_b,
width_uv, height_uv);
const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a,
src_v_b, stride_v_b,
width_uv, height_uv);
const uint64 samples = width * height + 2 * (width_uv * height_uv);
const uint64 sse = sse_y + sse_u + sse_v;
return SumSquareErrorToPsnr(sse, samples);
}
static const int64 cc1 = 26634; // (64^2*(.01*255)^2
static const int64 cc2 = 239708; // (64^2*(.03*255)^2
static double Ssim8x8_C(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b) {
int64 sum_a = 0;
int64 sum_b = 0;
int64 sum_sq_a = 0;
int64 sum_sq_b = 0;
int64 sum_axb = 0;
int i;
for (i = 0; i < 8; ++i) {
int j;
for (j = 0; j < 8; ++j) {
sum_a += src_a[j];
sum_b += src_b[j];
sum_sq_a += src_a[j] * src_a[j];
sum_sq_b += src_b[j] * src_b[j];
sum_axb += src_a[j] * src_b[j];
}
src_a += stride_a;
src_b += stride_b;
}
{
const int64 count = 64;
// scale the constants by number of pixels
const int64 c1 = (cc1 * count * count) >> 12;
const int64 c2 = (cc2 * count * count) >> 12;
const int64 sum_a_x_sum_b = sum_a * sum_b;
const int64 ssim_n = (2 * sum_a_x_sum_b + c1) *
(2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
const int64 sum_a_sq = sum_a*sum_a;
const int64 sum_b_sq = sum_b*sum_b;
const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) *
(count * sum_sq_a - sum_a_sq +
count * sum_sq_b - sum_b_sq + c2);
if (ssim_d == 0.0) {
return DBL_MAX;
}
return ssim_n * 1.0 / ssim_d;
}
}
// We are using a 8x8 moving window with starting location of each 8x8 window
// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
// block boundaries to penalize blocking artifacts.
LIBYUV_API
double CalcFrameSsim(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height) {
int samples = 0;
double ssim_total = 0;
double (*Ssim8x8)(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b) = Ssim8x8_C;
// sample point start with each 4x4 location
int i;
for (i = 0; i < height - 8; i += 4) {
int j;
for (j = 0; j < width - 8; j += 4) {
ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b);
samples++;
}
src_a += stride_a * 4;
src_b += stride_b * 4;
}
ssim_total /= samples;
return ssim_total;
}
LIBYUV_API
double I420Ssim(const uint8* src_y_a, int stride_y_a,
const uint8* src_u_a, int stride_u_a,
const uint8* src_v_a, int stride_v_a,
const uint8* src_y_b, int stride_y_b,
const uint8* src_u_b, int stride_u_b,
const uint8* src_v_b, int stride_v_b,
int width, int height) {
const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a,
src_y_b, stride_y_b, width, height);
const int width_uv = (width + 1) >> 1;
const int height_uv = (height + 1) >> 1;
const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a,
src_u_b, stride_u_b,
width_uv, height_uv);
const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a,
src_v_b, stride_v_b,
width_uv, height_uv);
return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v);
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View file

@ -1,42 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) {
uint32 sse = 0u;
int i;
for (i = 0; i < count; ++i) {
int diff = src_a[i] - src_b[i];
sse += (uint32)(diff * diff);
}
return sse;
}
// hash seed of 5381 recommended.
// Internal C version of HashDjb2 with int sized count for efficiency.
uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) {
uint32 hash = seed;
int i;
for (i = 0; i < count; ++i) {
hash += (hash << 5) + src[i];
}
return hash;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View file

@ -1,64 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
volatile uint32 sse;
asm volatile (
#ifdef _ANDROID
".fpu neon\n"
#endif
"vmov.u8 q8, #0 \n"
"vmov.u8 q10, #0 \n"
"vmov.u8 q9, #0 \n"
"vmov.u8 q11, #0 \n"
".p2align 2 \n"
"1: \n"
"vld1.8 {q0}, [%0]! \n"
"vld1.8 {q1}, [%1]! \n"
"subs %2, %2, #16 \n"
"vsubl.u8 q2, d0, d2 \n"
"vsubl.u8 q3, d1, d3 \n"
"vmlal.s16 q8, d4, d4 \n"
"vmlal.s16 q9, d6, d6 \n"
"vmlal.s16 q10, d5, d5 \n"
"vmlal.s16 q11, d7, d7 \n"
"bgt 1b \n"
"vadd.u32 q8, q8, q9 \n"
"vadd.u32 q10, q10, q11 \n"
"vadd.u32 q11, q8, q10 \n"
"vpaddl.u32 q1, q11 \n"
"vadd.u64 d0, d2, d3 \n"
"vmov.32 %3, d0[0] \n"
: "+r"(src_a),
"+r"(src_b),
"+r"(count),
"=r"(sse)
:
: "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
return sse;
}
#endif // __ARM_NEON__
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View file

@ -1,158 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
uint32 sse;
asm volatile ( // NOLINT
"pxor %%xmm0,%%xmm0 \n"
"pxor %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
"movdqa " MEMACCESS(0) ",%%xmm1 \n"
"lea " MEMLEA(0x10, 0) ",%0 \n"
"movdqa " MEMACCESS(1) ",%%xmm2 \n"
"lea " MEMLEA(0x10, 1) ",%1 \n"
"sub $0x10,%2 \n"
"movdqa %%xmm1,%%xmm3 \n"
"psubusb %%xmm2,%%xmm1 \n"
"psubusb %%xmm3,%%xmm2 \n"
"por %%xmm2,%%xmm1 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm5,%%xmm1 \n"
"punpckhbw %%xmm5,%%xmm2 \n"
"pmaddwd %%xmm1,%%xmm1 \n"
"pmaddwd %%xmm2,%%xmm2 \n"
"paddd %%xmm1,%%xmm0 \n"
"paddd %%xmm2,%%xmm0 \n"
"jg 1b \n"
"pshufd $0xee,%%xmm0,%%xmm1 \n"
"paddd %%xmm1,%%xmm0 \n"
"pshufd $0x1,%%xmm0,%%xmm1 \n"
"paddd %%xmm1,%%xmm0 \n"
"movd %%xmm0,%3 \n"
: "+r"(src_a), // %0
"+r"(src_b), // %1
"+r"(count), // %2
"=g"(sse) // %3
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif
); // NOLINT
return sse;
}
#endif // defined(__x86_64__) || defined(__i386__)
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
#define HAS_HASHDJB2_SSE41
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
static uvec32 kHashMul0 = {
0x0c3525e1, // 33 ^ 15
0xa3476dc1, // 33 ^ 14
0x3b4039a1, // 33 ^ 13
0x4f5f0981, // 33 ^ 12
};
static uvec32 kHashMul1 = {
0x30f35d61, // 33 ^ 11
0x855cb541, // 33 ^ 10
0x040a9121, // 33 ^ 9
0x747c7101, // 33 ^ 8
};
static uvec32 kHashMul2 = {
0xec41d4e1, // 33 ^ 7
0x4cfa3cc1, // 33 ^ 6
0x025528a1, // 33 ^ 5
0x00121881, // 33 ^ 4
};
static uvec32 kHashMul3 = {
0x00008c61, // 33 ^ 3
0x00000441, // 33 ^ 2
0x00000021, // 33 ^ 1
0x00000001, // 33 ^ 0
};
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
uint32 hash;
asm volatile ( // NOLINT
"movd %2,%%xmm0 \n"
"pxor %%xmm7,%%xmm7 \n"
"movdqa %4,%%xmm6 \n"
LABELALIGN
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm1 \n"
"lea " MEMLEA(0x10, 0) ",%0 \n"
"pmulld %%xmm6,%%xmm0 \n"
"movdqa %5,%%xmm5 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm7,%%xmm2 \n"
"movdqa %%xmm2,%%xmm3 \n"
"punpcklwd %%xmm7,%%xmm3 \n"
"pmulld %%xmm5,%%xmm3 \n"
"movdqa %6,%%xmm5 \n"
"movdqa %%xmm2,%%xmm4 \n"
"punpckhwd %%xmm7,%%xmm4 \n"
"pmulld %%xmm5,%%xmm4 \n"
"movdqa %7,%%xmm5 \n"
"punpckhbw %%xmm7,%%xmm1 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklwd %%xmm7,%%xmm2 \n"
"pmulld %%xmm5,%%xmm2 \n"
"movdqa %8,%%xmm5 \n"
"punpckhwd %%xmm7,%%xmm1 \n"
"pmulld %%xmm5,%%xmm1 \n"
"paddd %%xmm4,%%xmm3 \n"
"paddd %%xmm2,%%xmm1 \n"
"sub $0x10,%1 \n"
"paddd %%xmm3,%%xmm1 \n"
"pshufd $0xe,%%xmm1,%%xmm2 \n"
"paddd %%xmm2,%%xmm1 \n"
"pshufd $0x1,%%xmm1,%%xmm2 \n"
"paddd %%xmm2,%%xmm1 \n"
"paddd %%xmm1,%%xmm0 \n"
"jg 1b \n"
"movd %%xmm0,%3 \n"
: "+r"(src), // %0
"+r"(count), // %1
"+rm"(seed), // %2
"=g"(hash) // %3
: "m"(kHash16x33), // %4
"m"(kHashMul0), // %5
"m"(kHashMul1), // %6
"m"(kHashMul2), // %7
"m"(kHashMul3) // %8
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
#endif
); // NOLINT
return hash;
}
#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View file

@ -1,232 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
__declspec(naked) __declspec(align(16))
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b
mov ecx, [esp + 12] // count
pxor xmm0, xmm0
pxor xmm5, xmm5
align 4
wloop:
movdqa xmm1, [eax]
lea eax, [eax + 16]
movdqa xmm2, [edx]
lea edx, [edx + 16]
sub ecx, 16
movdqa xmm3, xmm1 // abs trick
psubusb xmm1, xmm2
psubusb xmm2, xmm3
por xmm1, xmm2
movdqa xmm2, xmm1
punpcklbw xmm1, xmm5
punpckhbw xmm2, xmm5
pmaddwd xmm1, xmm1
pmaddwd xmm2, xmm2
paddd xmm0, xmm1
paddd xmm0, xmm2
jg wloop
pshufd xmm1, xmm0, 0xee
paddd xmm0, xmm1
pshufd xmm1, xmm0, 0x01
paddd xmm0, xmm1
movd eax, xmm0
ret
}
}
// Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
#pragma warning(disable: 4752)
__declspec(naked) __declspec(align(16))
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b
mov ecx, [esp + 12] // count
vpxor ymm0, ymm0, ymm0 // sum
vpxor ymm5, ymm5, ymm5 // constant 0 for unpck
sub edx, eax
align 4
wloop:
vmovdqu ymm1, [eax]
vmovdqu ymm2, [eax + edx]
lea eax, [eax + 32]
sub ecx, 32
vpsubusb ymm3, ymm1, ymm2 // abs difference trick
vpsubusb ymm2, ymm2, ymm1
vpor ymm1, ymm2, ymm3
vpunpcklbw ymm2, ymm1, ymm5 // u16. mutates order.
vpunpckhbw ymm1, ymm1, ymm5
vpmaddwd ymm2, ymm2, ymm2 // square + hadd to u32.
vpmaddwd ymm1, ymm1, ymm1
vpaddd ymm0, ymm0, ymm1
vpaddd ymm0, ymm0, ymm2
jg wloop
vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes.
vpaddd ymm0, ymm0, ymm1
vpshufd ymm1, ymm0, 0x01 // 1 + 0 both lanes.
vpaddd ymm0, ymm0, ymm1
vpermq ymm1, ymm0, 0x02 // high + low lane.
vpaddd ymm0, ymm0, ymm1
vmovd eax, xmm0
vzeroupper
ret
}
}
#endif // _MSC_VER >= 1700
#define HAS_HASHDJB2_SSE41
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
static uvec32 kHashMul0 = {
0x0c3525e1, // 33 ^ 15
0xa3476dc1, // 33 ^ 14
0x3b4039a1, // 33 ^ 13
0x4f5f0981, // 33 ^ 12
};
static uvec32 kHashMul1 = {
0x30f35d61, // 33 ^ 11
0x855cb541, // 33 ^ 10
0x040a9121, // 33 ^ 9
0x747c7101, // 33 ^ 8
};
static uvec32 kHashMul2 = {
0xec41d4e1, // 33 ^ 7
0x4cfa3cc1, // 33 ^ 6
0x025528a1, // 33 ^ 5
0x00121881, // 33 ^ 4
};
static uvec32 kHashMul3 = {
0x00008c61, // 33 ^ 3
0x00000441, // 33 ^ 2
0x00000021, // 33 ^ 1
0x00000001, // 33 ^ 0
};
// 27: 66 0F 38 40 C6 pmulld xmm0,xmm6
// 44: 66 0F 38 40 DD pmulld xmm3,xmm5
// 59: 66 0F 38 40 E5 pmulld xmm4,xmm5
// 72: 66 0F 38 40 D5 pmulld xmm2,xmm5
// 83: 66 0F 38 40 CD pmulld xmm1,xmm5
#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
_asm _emit 0x40 _asm _emit reg
__declspec(naked) __declspec(align(16))
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
__asm {
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count
movd xmm0, [esp + 12] // seed
pxor xmm7, xmm7 // constant 0 for unpck
movdqa xmm6, kHash16x33
align 4
wloop:
movdqu xmm1, [eax] // src[0-15]
lea eax, [eax + 16]
pmulld(0xc6) // pmulld xmm0,xmm6 hash *= 33 ^ 16
movdqa xmm5, kHashMul0
movdqa xmm2, xmm1
punpcklbw xmm2, xmm7 // src[0-7]
movdqa xmm3, xmm2
punpcklwd xmm3, xmm7 // src[0-3]
pmulld(0xdd) // pmulld xmm3, xmm5
movdqa xmm5, kHashMul1
movdqa xmm4, xmm2
punpckhwd xmm4, xmm7 // src[4-7]
pmulld(0xe5) // pmulld xmm4, xmm5
movdqa xmm5, kHashMul2
punpckhbw xmm1, xmm7 // src[8-15]
movdqa xmm2, xmm1
punpcklwd xmm2, xmm7 // src[8-11]
pmulld(0xd5) // pmulld xmm2, xmm5
movdqa xmm5, kHashMul3
punpckhwd xmm1, xmm7 // src[12-15]
pmulld(0xcd) // pmulld xmm1, xmm5
paddd xmm3, xmm4 // add 16 results
paddd xmm1, xmm2
sub ecx, 16
paddd xmm1, xmm3
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
paddd xmm1, xmm2
pshufd xmm2, xmm1, 0x01
paddd xmm1, xmm2
paddd xmm0, xmm1
jg wloop
movd eax, xmm0 // return hash
ret
}
}
// Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
__declspec(naked) __declspec(align(16))
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
__asm {
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count
movd xmm0, [esp + 12] // seed
movdqa xmm6, kHash16x33
align 4
wloop:
vpmovzxbd xmm3, dword ptr [eax] // src[0-3]
pmulld xmm0, xmm6 // hash *= 33 ^ 16
vpmovzxbd xmm4, dword ptr [eax + 4] // src[4-7]
pmulld xmm3, kHashMul0
vpmovzxbd xmm2, dword ptr [eax + 8] // src[8-11]
pmulld xmm4, kHashMul1
vpmovzxbd xmm1, dword ptr [eax + 12] // src[12-15]
pmulld xmm2, kHashMul2
lea eax, [eax + 16]
pmulld xmm1, kHashMul3
paddd xmm3, xmm4 // add 16 results
paddd xmm1, xmm2
sub ecx, 16
paddd xmm1, xmm3
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
paddd xmm1, xmm2
pshufd xmm2, xmm1, 0x01
paddd xmm1, xmm2
paddd xmm0, xmm1
jg wloop
movd eax, xmm0 // return hash
ret
}
}
#endif // _MSC_VER >= 1700
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

File diff suppressed because it is too large Load diff

View file

@ -1,901 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/convert_argb.h"
#include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
#ifdef HAVE_JPEG
#include "libyuv/mjpeg_decoder.h"
#endif
#include "libyuv/rotate_argb.h"
#include "libyuv/row.h"
#include "libyuv/video_common.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Copy ARGB with optional flipping
LIBYUV_API
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_argb || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
width * 4, height);
return 0;
}
// Convert I444 to ARGB.
LIBYUV_API
int I444ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_y || !src_u || !src_v ||
!dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
// Coalesce rows.
if (src_stride_y == width &&
src_stride_u == width &&
src_stride_v == width &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
}
void (*I444ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I444ToARGBRow_C;
#if defined(HAS_I444TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I444ToARGBRow = I444ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I444ToARGBRow = I444ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I444ToARGBRow = I444ToARGBRow_SSSE3;
}
}
}
#elif defined(HAS_I444TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I444ToARGBRow = I444ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I444ToARGBRow = I444ToARGBRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
I444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
}
return 0;
}
// Convert I422 to ARGB.
LIBYUV_API
int I422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_y || !src_u || !src_v ||
!dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
// Coalesce rows.
if (src_stride_y == width &&
src_stride_u * 2 == width &&
src_stride_v * 2 == width &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
}
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
for (int y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
}
return 0;
}
// Convert I411 to ARGB.
LIBYUV_API
int I411ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_y || !src_u || !src_v ||
!dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
// Coalesce rows.
if (src_stride_y == width &&
src_stride_u * 4 == width &&
src_stride_v * 4 == width &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
}
void (*I411ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I411ToARGBRow_C;
#if defined(HAS_I411TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I411ToARGBRow = I411ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I411ToARGBRow = I411ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I411ToARGBRow = I411ToARGBRow_SSSE3;
}
}
}
#elif defined(HAS_I411TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I411ToARGBRow = I411ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I411ToARGBRow = I411ToARGBRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
I411ToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
}
return 0;
}
// Convert I400 to ARGB.
LIBYUV_API
int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_y || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
// Coalesce rows.
if (src_stride_y == width &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_y = dst_stride_argb = 0;
}
void (*YToARGBRow)(const uint8* y_buf,
uint8* rgb_buf,
int width) = YToARGBRow_C;
#if defined(HAS_YTOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
YToARGBRow = YToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
YToARGBRow = YToARGBRow_SSE2;
}
}
#elif defined(HAS_YTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
YToARGBRow = YToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
YToARGBRow = YToARGBRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
YToARGBRow(src_y, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
}
return 0;
}
// Convert I400 to ARGB.
LIBYUV_API
int I400ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_y || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_stride_y = -src_stride_y;
}
// Coalesce rows.
if (src_stride_y == width &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_y = dst_stride_argb = 0;
}
void (*I400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
I400ToARGBRow_C;
#if defined(HAS_I400TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
I400ToARGBRow = I400ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
I400ToARGBRow = I400ToARGBRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I400ToARGBRow = I400ToARGBRow_SSE2;
}
}
}
#elif defined(HAS_I400TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I400ToARGBRow = I400ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I400ToARGBRow = I400ToARGBRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
I400ToARGBRow(src_y, dst_argb, width);
src_y += src_stride_y;
dst_argb += dst_stride_argb;
}
return 0;
}
// Shuffle table for converting BGRA to ARGB.
static uvec8 kShuffleMaskBGRAToARGB = {
3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u
};
// Shuffle table for converting ABGR to ARGB.
static uvec8 kShuffleMaskABGRToARGB = {
2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u
};
// Shuffle table for converting RGBA to ARGB.
static uvec8 kShuffleMaskRGBAToARGB = {
1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u
};
// Convert BGRA to ARGB.
LIBYUV_API
int BGRAToARGB(const uint8* src_bgra, int src_stride_bgra,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
return ARGBShuffle(src_bgra, src_stride_bgra,
dst_argb, dst_stride_argb,
(const uint8*)(&kShuffleMaskBGRAToARGB),
width, height);
}
// Convert ABGR to ARGB.
LIBYUV_API
int ABGRToARGB(const uint8* src_abgr, int src_stride_abgr,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
return ARGBShuffle(src_abgr, src_stride_abgr,
dst_argb, dst_stride_argb,
(const uint8*)(&kShuffleMaskABGRToARGB),
width, height);
}
// Convert RGBA to ARGB.
LIBYUV_API
int RGBAToARGB(const uint8* src_rgba, int src_stride_rgba,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
return ARGBShuffle(src_rgba, src_stride_rgba,
dst_argb, dst_stride_argb,
(const uint8*)(&kShuffleMaskRGBAToARGB),
width, height);
}
// Convert RGB24 to ARGB.
LIBYUV_API
int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_rgb24 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
src_stride_rgb24 = -src_stride_rgb24;
}
// Coalesce rows.
if (src_stride_rgb24 == width * 3 &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_rgb24 = dst_stride_argb = 0;
}
void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
RGB24ToARGBRow_C;
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
}
}
#elif defined(HAS_RGB24TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB24ToARGBRow = RGB24ToARGBRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
RGB24ToARGBRow(src_rgb24, dst_argb, width);
src_rgb24 += src_stride_rgb24;
dst_argb += dst_stride_argb;
}
return 0;
}
// Convert RAW to ARGB.
LIBYUV_API
int RAWToARGB(const uint8* src_raw, int src_stride_raw,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_raw || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_raw = src_raw + (height - 1) * src_stride_raw;
src_stride_raw = -src_stride_raw;
}
// Coalesce rows.
if (src_stride_raw == width * 3 &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_raw = dst_stride_argb = 0;
}
void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
RAWToARGBRow_C;
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_SSSE3;
}
}
#elif defined(HAS_RAWTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
RAWToARGBRow = RAWToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToARGBRow = RAWToARGBRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
RAWToARGBRow(src_raw, dst_argb, width);
src_raw += src_stride_raw;
dst_argb += dst_stride_argb;
}
return 0;
}
// Convert RGB565 to ARGB.
LIBYUV_API
int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_rgb565 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565;
src_stride_rgb565 = -src_stride_rgb565;
}
// Coalesce rows.
if (src_stride_rgb565 == width * 2 &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_rgb565 = dst_stride_argb = 0;
}
void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
RGB565ToARGBRow_C;
#if defined(HAS_RGB565TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
}
}
#elif defined(HAS_RGB565TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
RGB565ToARGBRow(src_rgb565, dst_argb, width);
src_rgb565 += src_stride_rgb565;
dst_argb += dst_stride_argb;
}
return 0;
}
// Convert ARGB1555 to ARGB.
LIBYUV_API
int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_argb1555 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
src_stride_argb1555 = -src_stride_argb1555;
}
// Coalesce rows.
if (src_stride_argb1555 == width * 2 &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_argb1555 = dst_stride_argb = 0;
}
void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
int pix) = ARGB1555ToARGBRow_C;
#if defined(HAS_ARGB1555TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
}
}
#elif defined(HAS_ARGB1555TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
ARGB1555ToARGBRow(src_argb1555, dst_argb, width);
src_argb1555 += src_stride_argb1555;
dst_argb += dst_stride_argb;
}
return 0;
}
// Convert ARGB4444 to ARGB.
LIBYUV_API
int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_argb4444 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
src_stride_argb4444 = -src_stride_argb4444;
}
// Coalesce rows.
if (src_stride_argb4444 == width * 2 &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_argb4444 = dst_stride_argb = 0;
}
void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
int pix) = ARGB4444ToARGBRow_C;
#if defined(HAS_ARGB4444TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
}
}
#elif defined(HAS_ARGB4444TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
ARGB4444ToARGBRow(src_argb4444, dst_argb, width);
src_argb4444 += src_stride_argb4444;
dst_argb += dst_stride_argb;
}
return 0;
}
// Convert NV12 to ARGB.
LIBYUV_API
int NV12ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_y || !src_uv || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*NV12ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
int width) = NV12ToARGBRow_C;
#if defined(HAS_NV12TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
NV12ToARGBRow = NV12ToARGBRow_SSSE3;
}
}
}
#elif defined(HAS_NV12TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
NV12ToARGBRow(src_y, src_uv, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_uv += src_stride_uv;
}
}
return 0;
}
// Convert NV21 to ARGB.
LIBYUV_API
int NV21ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_y || !src_uv || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*NV21ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
int width) = NV21ToARGBRow_C;
#if defined(HAS_NV21TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
NV21ToARGBRow = NV21ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
NV21ToARGBRow = NV21ToARGBRow_SSSE3;
}
}
}
#endif
#if defined(HAS_NV21TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
NV21ToARGBRow = NV21ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
NV21ToARGBRow = NV21ToARGBRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
NV21ToARGBRow(src_y, src_uv, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_uv += src_stride_uv;
}
}
return 0;
}
// Convert M420 to ARGB.
LIBYUV_API
int M420ToARGB(const uint8* src_m420, int src_stride_m420,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_m420 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*NV12ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
int width) = NV12ToARGBRow_C;
#if defined(HAS_NV12TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
NV12ToARGBRow = NV12ToARGBRow_SSSE3;
}
}
}
#elif defined(HAS_NV12TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_NEON;
}
}
#endif
for (int y = 0; y < height - 1; y += 2) {
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2,
dst_argb + dst_stride_argb, width);
dst_argb += dst_stride_argb * 2;
src_m420 += src_stride_m420 * 3;
}
if (height & 1) {
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
}
return 0;
}
// Convert YUY2 to ARGB.
LIBYUV_API
int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_yuy2 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
src_stride_yuy2 = -src_stride_yuy2;
}
// Coalesce rows.
if (src_stride_yuy2 == width * 2 &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_yuy2 = dst_stride_argb = 0;
}
void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
YUY2ToARGBRow_C;
#if defined(HAS_YUY2TOARGBROW_SSSE3)
// Posix is 16, Windows is 8.
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
YUY2ToARGBRow = YUY2ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
YUY2ToARGBRow = YUY2ToARGBRow_SSSE3;
}
}
}
#elif defined(HAS_YUY2TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
YUY2ToARGBRow = YUY2ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
YUY2ToARGBRow = YUY2ToARGBRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
YUY2ToARGBRow(src_yuy2, dst_argb, width);
src_yuy2 += src_stride_yuy2;
dst_argb += dst_stride_argb;
}
return 0;
}
// Convert UYVY to ARGB.
LIBYUV_API
int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_uyvy || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
src_stride_uyvy = -src_stride_uyvy;
}
// Coalesce rows.
if (src_stride_uyvy == width * 2 &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_uyvy = dst_stride_argb = 0;
}
void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
UYVYToARGBRow_C;
#if defined(HAS_UYVYTOARGBROW_SSSE3)
// Posix is 16, Windows is 8.
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
UYVYToARGBRow = UYVYToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
UYVYToARGBRow = UYVYToARGBRow_SSSE3;
}
}
}
#elif defined(HAS_UYVYTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
UYVYToARGBRow = UYVYToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
UYVYToARGBRow = UYVYToARGBRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
UYVYToARGBRow(src_uyvy, dst_argb, width);
src_uyvy += src_stride_uyvy;
dst_argb += dst_stride_argb;
}
return 0;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,392 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/convert.h"
#ifdef HAVE_JPEG
#include "libyuv/mjpeg_decoder.h"
#endif
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#ifdef HAVE_JPEG
struct I420Buffers {
uint8* y;
int y_stride;
uint8* u;
int u_stride;
uint8* v;
int v_stride;
int w;
int h;
};
static void JpegCopyI420(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
I420Copy(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->y, dest->y_stride,
dest->u, dest->u_stride,
dest->v, dest->v_stride,
dest->w, rows);
dest->y += rows * dest->y_stride;
dest->u += ((rows + 1) >> 1) * dest->u_stride;
dest->v += ((rows + 1) >> 1) * dest->v_stride;
dest->h -= rows;
}
static void JpegI422ToI420(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
I422ToI420(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->y, dest->y_stride,
dest->u, dest->u_stride,
dest->v, dest->v_stride,
dest->w, rows);
dest->y += rows * dest->y_stride;
dest->u += ((rows + 1) >> 1) * dest->u_stride;
dest->v += ((rows + 1) >> 1) * dest->v_stride;
dest->h -= rows;
}
static void JpegI444ToI420(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
I444ToI420(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->y, dest->y_stride,
dest->u, dest->u_stride,
dest->v, dest->v_stride,
dest->w, rows);
dest->y += rows * dest->y_stride;
dest->u += ((rows + 1) >> 1) * dest->u_stride;
dest->v += ((rows + 1) >> 1) * dest->v_stride;
dest->h -= rows;
}
static void JpegI411ToI420(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
I411ToI420(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->y, dest->y_stride,
dest->u, dest->u_stride,
dest->v, dest->v_stride,
dest->w, rows);
dest->y += rows * dest->y_stride;
dest->u += ((rows + 1) >> 1) * dest->u_stride;
dest->v += ((rows + 1) >> 1) * dest->v_stride;
dest->h -= rows;
}
static void JpegI400ToI420(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
I400ToI420(data[0], strides[0],
dest->y, dest->y_stride,
dest->u, dest->u_stride,
dest->v, dest->v_stride,
dest->w, rows);
dest->y += rows * dest->y_stride;
dest->u += ((rows + 1) >> 1) * dest->u_stride;
dest->v += ((rows + 1) >> 1) * dest->v_stride;
dest->h -= rows;
}
// Query size of MJPG in pixels.
LIBYUV_API
int MJPGSize(const uint8* sample, size_t sample_size,
int* width, int* height) {
MJpegDecoder mjpeg_decoder;
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
if (ret) {
*width = mjpeg_decoder.GetWidth();
*height = mjpeg_decoder.GetHeight();
}
mjpeg_decoder.UnloadFrame();
return ret ? 0 : -1; // -1 for runtime failure.
}
// MJPG (Motion JPeg) to I420
// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
LIBYUV_API
int MJPGToI420(const uint8* sample,
size_t sample_size,
uint8* y, int y_stride,
uint8* u, int u_stride,
uint8* v, int v_stride,
int w, int h,
int dw, int dh) {
if (sample_size == kUnknownDataSize) {
// ERROR: MJPEG frame size unknown
return -1;
}
// TODO(fbarchard): Port MJpeg to C.
MJpegDecoder mjpeg_decoder;
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
if (ret && (mjpeg_decoder.GetWidth() != w ||
mjpeg_decoder.GetHeight() != h)) {
// ERROR: MJPEG frame has unexpected dimensions
mjpeg_decoder.UnloadFrame();
return 1; // runtime failure
}
if (ret) {
I420Buffers bufs = { y, y_stride, u, u_stride, v, v_stride, dw, dh };
// YUV420
if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 2 &&
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dw, dh);
// YUV422
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dw, dh);
// YUV444
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dw, dh);
// YUV411
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToI420, &bufs, dw, dh);
// YUV400
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceGrayscale &&
mjpeg_decoder.GetNumComponents() == 1 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dw, dh);
} else {
// TODO(fbarchard): Implement conversion for any other colorspace/sample
// factors that occur in practice. 411 is supported by libjpeg
// ERROR: Unable to convert MJPEG frame because format is not supported
mjpeg_decoder.UnloadFrame();
return 1;
}
}
return ret ? 0 : 1;
}
#ifdef HAVE_JPEG
struct ARGBBuffers {
uint8* argb;
int argb_stride;
int w;
int h;
};
static void JpegI420ToARGB(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
I420ToARGB(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->argb, dest->argb_stride,
dest->w, rows);
dest->argb += rows * dest->argb_stride;
dest->h -= rows;
}
static void JpegI422ToARGB(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
I422ToARGB(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->argb, dest->argb_stride,
dest->w, rows);
dest->argb += rows * dest->argb_stride;
dest->h -= rows;
}
static void JpegI444ToARGB(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
I444ToARGB(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->argb, dest->argb_stride,
dest->w, rows);
dest->argb += rows * dest->argb_stride;
dest->h -= rows;
}
static void JpegI411ToARGB(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
I411ToARGB(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->argb, dest->argb_stride,
dest->w, rows);
dest->argb += rows * dest->argb_stride;
dest->h -= rows;
}
static void JpegI400ToARGB(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
I400ToARGB(data[0], strides[0],
dest->argb, dest->argb_stride,
dest->w, rows);
dest->argb += rows * dest->argb_stride;
dest->h -= rows;
}
// MJPG (Motion JPeg) to ARGB
// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
LIBYUV_API
int MJPGToARGB(const uint8* sample,
size_t sample_size,
uint8* argb, int argb_stride,
int w, int h,
int dw, int dh) {
if (sample_size == kUnknownDataSize) {
// ERROR: MJPEG frame size unknown
return -1;
}
// TODO(fbarchard): Port MJpeg to C.
MJpegDecoder mjpeg_decoder;
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
if (ret && (mjpeg_decoder.GetWidth() != w ||
mjpeg_decoder.GetHeight() != h)) {
// ERROR: MJPEG frame has unexpected dimensions
mjpeg_decoder.UnloadFrame();
return 1; // runtime failure
}
if (ret) {
ARGBBuffers bufs = { argb, argb_stride, dw, dh };
// YUV420
if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 2 &&
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToARGB, &bufs, dw, dh);
// YUV422
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToARGB, &bufs, dw, dh);
// YUV444
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dw, dh);
// YUV411
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToARGB, &bufs, dw, dh);
// YUV400
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceGrayscale &&
mjpeg_decoder.GetNumComponents() == 1 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dw, dh);
} else {
// TODO(fbarchard): Implement conversion for any other colorspace/sample
// factors that occur in practice. 411 is supported by libjpeg
// ERROR: Unable to convert MJPEG frame because format is not supported
mjpeg_decoder.UnloadFrame();
return 1;
}
}
return ret ? 0 : 1;
}
#endif
#endif
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View file

@ -1,327 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/convert_argb.h"
#include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
#ifdef HAVE_JPEG
#include "libyuv/mjpeg_decoder.h"
#endif
#include "libyuv/rotate_argb.h"
#include "libyuv/row.h"
#include "libyuv/video_common.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Convert camera sample to I420 with cropping, rotation and vertical flip.
// src_width is used for source stride computation
// src_height is used to compute location of planes, and indicate inversion
// sample_size is measured in bytes and is the size of the frame.
// With MJPEG it is the compressed size of the frame.
LIBYUV_API
int ConvertToARGB(const uint8* sample, size_t sample_size,
uint8* crop_argb, int argb_stride,
int crop_x, int crop_y,
int src_width, int src_height,
int crop_width, int crop_height,
enum RotationMode rotation,
uint32 fourcc) {
uint32 format = CanonicalFourCC(fourcc);
int aligned_src_width = (src_width + 1) & ~1;
const uint8* src;
const uint8* src_uv;
int abs_src_height = (src_height < 0) ? -src_height : src_height;
int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
int r = 0;
// One pass rotation is available for some formats. For the rest, convert
// to I420 (with optional vertical flipping) into a temporary I420 buffer,
// and then rotate the I420 to the final destination buffer.
// For in-place conversion, if destination crop_argb is same as source sample,
// also enable temporary buffer.
LIBYUV_BOOL need_buf = (rotation && format != FOURCC_ARGB) ||
crop_argb == sample;
uint8* tmp_argb = crop_argb;
int tmp_argb_stride = argb_stride;
uint8* rotate_buffer = NULL;
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
if (crop_argb == NULL || sample == NULL ||
src_width <= 0 || crop_width <= 0 ||
src_height == 0 || crop_height == 0) {
return -1;
}
if (src_height < 0) {
inv_crop_height = -inv_crop_height;
}
if (need_buf) {
int argb_size = crop_width * abs_crop_height * 4;
rotate_buffer = (uint8*)malloc(argb_size);
if (!rotate_buffer) {
return 1; // Out of memory runtime error.
}
crop_argb = rotate_buffer;
argb_stride = crop_width;
}
switch (format) {
// Single plane formats
case FOURCC_YUY2:
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
r = YUY2ToARGB(src, aligned_src_width * 2,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_UYVY:
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
r = UYVYToARGB(src, aligned_src_width * 2,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_24BG:
src = sample + (src_width * crop_y + crop_x) * 3;
r = RGB24ToARGB(src, src_width * 3,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RAW:
src = sample + (src_width * crop_y + crop_x) * 3;
r = RAWToARGB(src, src_width * 3,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_ARGB:
src = sample + (src_width * crop_y + crop_x) * 4;
r = ARGBToARGB(src, src_width * 4,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_BGRA:
src = sample + (src_width * crop_y + crop_x) * 4;
r = BGRAToARGB(src, src_width * 4,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_ABGR:
src = sample + (src_width * crop_y + crop_x) * 4;
r = ABGRToARGB(src, src_width * 4,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGBA:
src = sample + (src_width * crop_y + crop_x) * 4;
r = RGBAToARGB(src, src_width * 4,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGBP:
src = sample + (src_width * crop_y + crop_x) * 2;
r = RGB565ToARGB(src, src_width * 2,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGBO:
src = sample + (src_width * crop_y + crop_x) * 2;
r = ARGB1555ToARGB(src, src_width * 2,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_R444:
src = sample + (src_width * crop_y + crop_x) * 2;
r = ARGB4444ToARGB(src, src_width * 2,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
// TODO(fbarchard): Support cropping Bayer by odd numbers
// by adjusting fourcc.
case FOURCC_BGGR:
src = sample + (src_width * crop_y + crop_x);
r = BayerBGGRToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_GBRG:
src = sample + (src_width * crop_y + crop_x);
r = BayerGBRGToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_GRBG:
src = sample + (src_width * crop_y + crop_x);
r = BayerGRBGToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGGB:
src = sample + (src_width * crop_y + crop_x);
r = BayerRGGBToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_I400:
src = sample + src_width * crop_y + crop_x;
r = I400ToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
// Biplanar formats
case FOURCC_NV12:
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
r = NV12ToARGB(src, src_width,
src_uv, aligned_src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_NV21:
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
// Call NV12 but with u and v parameters swapped.
r = NV21ToARGB(src, src_width,
src_uv, aligned_src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_M420:
src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
r = M420ToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
// case FOURCC_Q420:
// src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x;
// src_uv = sample + (src_width + aligned_src_width * 2) * crop_y +
// src_width + crop_x * 2;
// r = Q420ToARGB(src, src_width * 3,
// src_uv, src_width * 3,
// crop_argb, argb_stride,
// crop_width, inv_crop_height);
// break;
// Triplanar formats
case FOURCC_I420:
case FOURCC_YU12:
case FOURCC_YV12: {
const uint8* src_y = sample + (src_width * crop_y + crop_x);
const uint8* src_u;
const uint8* src_v;
int halfwidth = (src_width + 1) / 2;
int halfheight = (abs_src_height + 1) / 2;
if (format == FOURCC_YV12) {
src_v = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
src_u = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
} else {
src_u = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
src_v = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
}
r = I420ToARGB(src_y, src_width,
src_u, halfwidth,
src_v, halfwidth,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
}
case FOURCC_I422:
case FOURCC_YV16: {
const uint8* src_y = sample + src_width * crop_y + crop_x;
const uint8* src_u;
const uint8* src_v;
int halfwidth = (src_width + 1) / 2;
if (format == FOURCC_YV16) {
src_v = sample + src_width * abs_src_height +
halfwidth * crop_y + crop_x / 2;
src_u = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
} else {
src_u = sample + src_width * abs_src_height +
halfwidth * crop_y + crop_x / 2;
src_v = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
}
r = I422ToARGB(src_y, src_width,
src_u, halfwidth,
src_v, halfwidth,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
}
case FOURCC_I444:
case FOURCC_YV24: {
const uint8* src_y = sample + src_width * crop_y + crop_x;
const uint8* src_u;
const uint8* src_v;
if (format == FOURCC_YV24) {
src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
} else {
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
}
r = I444ToARGB(src_y, src_width,
src_u, src_width,
src_v, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
}
case FOURCC_I411: {
int quarterwidth = (src_width + 3) / 4;
const uint8* src_y = sample + src_width * crop_y + crop_x;
const uint8* src_u = sample + src_width * abs_src_height +
quarterwidth * crop_y + crop_x / 4;
const uint8* src_v = sample + src_width * abs_src_height +
quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
r = I411ToARGB(src_y, src_width,
src_u, quarterwidth,
src_v, quarterwidth,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
}
#ifdef HAVE_JPEG
case FOURCC_MJPG:
r = MJPGToARGB(sample, sample_size,
crop_argb, argb_stride,
src_width, abs_src_height, crop_width, inv_crop_height);
break;
#endif
default:
r = -1; // unknown fourcc - return failure code.
}
if (need_buf) {
if (!r) {
r = ARGBRotate(crop_argb, argb_stride,
tmp_argb, tmp_argb_stride,
crop_width, abs_crop_height, rotation);
}
free(rotate_buffer);
}
return r;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View file

@ -1,383 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
#include "libyuv/convert.h"
#include "libyuv/format_conversion.h"
#include "libyuv/video_common.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Convert camera sample to I420 with cropping, rotation and vertical flip.
// src_width is used for source stride computation
// src_height is used to compute location of planes, and indicate inversion
// sample_size is measured in bytes and is the size of the frame.
// With MJPEG it is the compressed size of the frame.
LIBYUV_API
int ConvertToI420(const uint8* sample,
size_t sample_size,
uint8* y, int y_stride,
uint8* u, int u_stride,
uint8* v, int v_stride,
int crop_x, int crop_y,
int src_width, int src_height,
int crop_width, int crop_height,
enum RotationMode rotation,
uint32 fourcc) {
uint32 format = CanonicalFourCC(fourcc);
int aligned_src_width = (src_width + 1) & ~1;
const uint8* src;
const uint8* src_uv;
int abs_src_height = (src_height < 0) ? -src_height : src_height;
int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
int r = 0;
LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 &&
format != FOURCC_NV12 && format != FOURCC_NV21 &&
format != FOURCC_YU12 && format != FOURCC_YV12) || y == sample;
uint8* tmp_y = y;
uint8* tmp_u = u;
uint8* tmp_v = v;
int tmp_y_stride = y_stride;
int tmp_u_stride = u_stride;
int tmp_v_stride = v_stride;
uint8* rotate_buffer = NULL;
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
if (!y || !u || !v || !sample ||
src_width <= 0 || crop_width <= 0 ||
src_height == 0 || crop_height == 0) {
return -1;
}
if (src_height < 0) {
inv_crop_height = -inv_crop_height;
}
// One pass rotation is available for some formats. For the rest, convert
// to I420 (with optional vertical flipping) into a temporary I420 buffer,
// and then rotate the I420 to the final destination buffer.
// For in-place conversion, if destination y is same as source sample,
// also enable temporary buffer.
if (need_buf) {
int y_size = crop_width * abs_crop_height;
int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
rotate_buffer = (uint8*)malloc(y_size + uv_size * 2);
if (!rotate_buffer) {
return 1; // Out of memory runtime error.
}
y = rotate_buffer;
u = y + y_size;
v = u + uv_size;
y_stride = crop_width;
u_stride = v_stride = ((crop_width + 1) / 2);
}
switch (format) {
// Single plane formats
case FOURCC_YUY2:
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
r = YUY2ToI420(src, aligned_src_width * 2,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_UYVY:
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
r = UYVYToI420(src, aligned_src_width * 2,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGBP:
src = sample + (src_width * crop_y + crop_x) * 2;
r = RGB565ToI420(src, src_width * 2,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGBO:
src = sample + (src_width * crop_y + crop_x) * 2;
r = ARGB1555ToI420(src, src_width * 2,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_R444:
src = sample + (src_width * crop_y + crop_x) * 2;
r = ARGB4444ToI420(src, src_width * 2,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_24BG:
src = sample + (src_width * crop_y + crop_x) * 3;
r = RGB24ToI420(src, src_width * 3,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RAW:
src = sample + (src_width * crop_y + crop_x) * 3;
r = RAWToI420(src, src_width * 3,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_ARGB:
src = sample + (src_width * crop_y + crop_x) * 4;
r = ARGBToI420(src, src_width * 4,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_BGRA:
src = sample + (src_width * crop_y + crop_x) * 4;
r = BGRAToI420(src, src_width * 4,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_ABGR:
src = sample + (src_width * crop_y + crop_x) * 4;
r = ABGRToI420(src, src_width * 4,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGBA:
src = sample + (src_width * crop_y + crop_x) * 4;
r = RGBAToI420(src, src_width * 4,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
// TODO(fbarchard): Support cropping Bayer by odd numbers
// by adjusting fourcc.
case FOURCC_BGGR:
src = sample + (src_width * crop_y + crop_x);
r = BayerBGGRToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_GBRG:
src = sample + (src_width * crop_y + crop_x);
r = BayerGBRGToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_GRBG:
src = sample + (src_width * crop_y + crop_x);
r = BayerGRBGToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGGB:
src = sample + (src_width * crop_y + crop_x);
r = BayerRGGBToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_I400:
src = sample + src_width * crop_y + crop_x;
r = I400ToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
// Biplanar formats
case FOURCC_NV12:
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
r = NV12ToI420Rotate(src, src_width,
src_uv, aligned_src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height, rotation);
break;
case FOURCC_NV21:
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
// Call NV12 but with u and v parameters swapped.
r = NV12ToI420Rotate(src, src_width,
src_uv, aligned_src_width,
y, y_stride,
v, v_stride,
u, u_stride,
crop_width, inv_crop_height, rotation);
break;
case FOURCC_M420:
src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
r = M420ToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_Q420:
src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x;
src_uv = sample + (src_width + aligned_src_width * 2) * crop_y +
src_width + crop_x * 2;
r = Q420ToI420(src, src_width * 3,
src_uv, src_width * 3,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
// Triplanar formats
case FOURCC_I420:
case FOURCC_YU12:
case FOURCC_YV12: {
const uint8* src_y = sample + (src_width * crop_y + crop_x);
const uint8* src_u;
const uint8* src_v;
int halfwidth = (src_width + 1) / 2;
int halfheight = (abs_src_height + 1) / 2;
if (format == FOURCC_YV12) {
src_v = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
src_u = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
} else {
src_u = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
src_v = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
}
r = I420Rotate(src_y, src_width,
src_u, halfwidth,
src_v, halfwidth,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height, rotation);
break;
}
case FOURCC_I422:
case FOURCC_YV16: {
const uint8* src_y = sample + src_width * crop_y + crop_x;
const uint8* src_u;
const uint8* src_v;
int halfwidth = (src_width + 1) / 2;
if (format == FOURCC_YV16) {
src_v = sample + src_width * abs_src_height +
halfwidth * crop_y + crop_x / 2;
src_u = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
} else {
src_u = sample + src_width * abs_src_height +
halfwidth * crop_y + crop_x / 2;
src_v = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
}
r = I422ToI420(src_y, src_width,
src_u, halfwidth,
src_v, halfwidth,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
}
case FOURCC_I444:
case FOURCC_YV24: {
const uint8* src_y = sample + src_width * crop_y + crop_x;
const uint8* src_u;
const uint8* src_v;
if (format == FOURCC_YV24) {
src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
} else {
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
}
r = I444ToI420(src_y, src_width,
src_u, src_width,
src_v, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
}
case FOURCC_I411: {
int quarterwidth = (src_width + 3) / 4;
const uint8* src_y = sample + src_width * crop_y + crop_x;
const uint8* src_u = sample + src_width * abs_src_height +
quarterwidth * crop_y + crop_x / 4;
const uint8* src_v = sample + src_width * abs_src_height +
quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
r = I411ToI420(src_y, src_width,
src_u, quarterwidth,
src_v, quarterwidth,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
}
#ifdef HAVE_JPEG
case FOURCC_MJPG:
r = MJPGToI420(sample, sample_size,
y, y_stride,
u, u_stride,
v, v_stride,
src_width, abs_src_height, crop_width, inv_crop_height);
break;
#endif
default:
r = -1; // unknown fourcc - return failure code.
}
if (need_buf) {
if (!r) {
r = I420Rotate(y, y_stride,
u, u_stride,
v, v_stride,
tmp_y, tmp_y_stride,
tmp_u, tmp_u_stride,
tmp_v, tmp_v_stride,
crop_width, abs_crop_height, rotation);
}
free(rotate_buffer);
}
return r;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View file

@ -1,300 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/cpu_id.h"
#ifdef _ANDROID //libtheoraplayer addition for cpu feature detection
#include "cpu-features.h"
#endif
#ifdef _MSC_VER
#include <intrin.h> // For __cpuidex()
#endif
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
!defined(__native_client__) && defined(_M_X64) && \
defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
#include <immintrin.h> // For _xgetbv()
#endif
#if !defined(__native_client__)
#include <stdlib.h> // For getenv()
#endif
// For ArmCpuCaps() but unittested on all platforms
#include <stdio.h>
#include <string.h>
#include "libyuv/basic_types.h" // For CPU_X86
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// For functions that use the stack and have runtime checks for overflow,
// use SAFEBUFFERS to avoid additional check.
#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
#define SAFEBUFFERS __declspec(safebuffers)
#else
#define SAFEBUFFERS
#endif
// Low level cpuid for X86. Returns zeros on other CPUs.
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
(defined(_M_IX86) || defined(_M_X64) || \
defined(__i386__) || defined(__x86_64__))
LIBYUV_API
void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
#if defined(_MSC_VER)
#if (_MSC_FULL_VER >= 160040219)
__cpuidex((int*)(cpu_info), info_eax, info_ecx);
#elif defined(_M_IX86)
__asm {
mov eax, info_eax
mov ecx, info_ecx
mov edi, cpu_info
cpuid
mov [edi], eax
mov [edi + 4], ebx
mov [edi + 8], ecx
mov [edi + 12], edx
}
#else
if (info_ecx == 0) {
__cpuid((int*)(cpu_info), info_eax);
} else {
cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0;
}
#endif
#else // defined(_MSC_VER)
uint32 info_ebx, info_edx;
asm volatile ( // NOLINT
#if defined( __i386__) && defined(__PIC__)
// Preserve ebx for fpic 32 bit.
"mov %%ebx, %%edi \n"
"cpuid \n"
"xchg %%edi, %%ebx \n"
: "=D" (info_ebx),
#else
"cpuid \n"
: "=b" (info_ebx),
#endif // defined( __i386__) && defined(__PIC__)
"+a" (info_eax), "+c" (info_ecx), "=d" (info_edx));
cpu_info[0] = info_eax;
cpu_info[1] = info_ebx;
cpu_info[2] = info_ecx;
cpu_info[3] = info_edx;
#endif // defined(_MSC_VER)
}
#if !defined(__native_client__)
#define HAS_XGETBV
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
int TestOsSaveYmm() {
uint32 xcr0 = 0u;
#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
#elif defined(_M_IX86)
__asm {
xor ecx, ecx // xcr 0
_asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
mov xcr0, eax
}
#elif defined(__i386__) || defined(__x86_64__)
asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
#endif // defined(_MSC_VER)
return((xcr0 & 6) == 6); // Is ymm saved?
}
#endif // !defined(__native_client__)
#else
LIBYUV_API
void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
}
#endif
// based on libvpx arm_cpudetect.c
// For Arm, but public to allow testing on any CPU
LIBYUV_API SAFEBUFFERS
int ArmCpuCaps(const char* cpuinfo_name) {
char cpuinfo_line[512];
FILE* f = fopen(cpuinfo_name, "r");
if (!f) {
// Assume Neon if /proc/cpuinfo is unavailable.
// This will occur for Chrome sandbox for Pepper or Render process.
return kCpuHasNEON;
}
while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
if (memcmp(cpuinfo_line, "Features", 8) == 0) {
char* p = strstr(cpuinfo_line, " neon");
if (p && (p[5] == ' ' || p[5] == '\n')) {
fclose(f);
return kCpuHasNEON;
}
}
}
fclose(f);
return 0;
}
#if defined(__mips__) && defined(__linux__)
static int MipsCpuCaps(const char* search_string) {
char cpuinfo_line[512];
const char* file_name = "/proc/cpuinfo";
FILE* f = fopen(file_name, "r");
if (!f) {
// Assume DSP if /proc/cpuinfo is unavailable.
// This will occur for Chrome sandbox for Pepper or Render process.
return kCpuHasMIPS_DSP;
}
while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f) != NULL) {
if (strstr(cpuinfo_line, search_string) != NULL) {
fclose(f);
return kCpuHasMIPS_DSP;
}
}
fclose(f);
return 0;
}
#endif
// CPU detect function for SIMD instruction sets.
LIBYUV_API
int cpu_info_ = kCpuInit; // cpu_info is not initialized yet.
// Test environment variable for disabling CPU features. Any non-zero value
// to disable. Zero ignored to make it easy to set the variable on/off.
#if !defined(__native_client__) && !defined(_M_ARM)
static LIBYUV_BOOL TestEnv(const char* name) {
#if !defined(_WINRT) && !defined(ORBIS_ENABLED)
const char* var = getenv(name);
if (var) {
if (var[0] != '0') {
return LIBYUV_TRUE;
}
}
#endif
return LIBYUV_FALSE;
}
#else // nacl does not support getenv().
static LIBYUV_BOOL TestEnv(const char*) {
return LIBYUV_FALSE;
}
#endif
LIBYUV_API SAFEBUFFERS
int InitCpuFlags(void) {
#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
uint32 cpu_info1[4] = { 0, 0, 0, 0 };
uint32 cpu_info7[4] = { 0, 0, 0, 0 };
CpuId(1, 0, cpu_info1);
CpuId(7, 0, cpu_info7);
cpu_info_ = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
kCpuHasX86;
#ifdef HAS_XGETBV
if ((cpu_info1[2] & 0x18000000) == 0x18000000 && // AVX and OSSave
TestOsSaveYmm()) { // Saves YMM.
cpu_info_ |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
kCpuHasAVX;
}
#endif
// Environment variable overrides for testing.
if (TestEnv("LIBYUV_DISABLE_X86")) {
cpu_info_ &= ~kCpuHasX86;
}
if (TestEnv("LIBYUV_DISABLE_SSE2")) {
cpu_info_ &= ~kCpuHasSSE2;
}
if (TestEnv("LIBYUV_DISABLE_SSSE3")) {
cpu_info_ &= ~kCpuHasSSSE3;
}
if (TestEnv("LIBYUV_DISABLE_SSE41")) {
cpu_info_ &= ~kCpuHasSSE41;
}
if (TestEnv("LIBYUV_DISABLE_SSE42")) {
cpu_info_ &= ~kCpuHasSSE42;
}
if (TestEnv("LIBYUV_DISABLE_AVX")) {
cpu_info_ &= ~kCpuHasAVX;
}
if (TestEnv("LIBYUV_DISABLE_AVX2")) {
cpu_info_ &= ~kCpuHasAVX2;
}
if (TestEnv("LIBYUV_DISABLE_ERMS")) {
cpu_info_ &= ~kCpuHasERMS;
}
if (TestEnv("LIBYUV_DISABLE_FMA3")) {
cpu_info_ &= ~kCpuHasFMA3;
}
#elif defined(__mips__) && defined(__linux__)
// Linux mips parse text file for dsp detect.
cpu_info_ = MipsCpuCaps("dsp"); // set kCpuHasMIPS_DSP.
#if defined(__mips_dspr2)
cpu_info_ |= kCpuHasMIPS_DSPR2;
#endif
cpu_info_ |= kCpuHasMIPS;
if (getenv("LIBYUV_DISABLE_MIPS")) {
cpu_info_ &= ~kCpuHasMIPS;
}
if (getenv("LIBYUV_DISABLE_MIPS_DSP")) {
cpu_info_ &= ~kCpuHasMIPS_DSP;
}
if (getenv("LIBYUV_DISABLE_MIPS_DSPR2")) {
cpu_info_ &= ~kCpuHasMIPS_DSPR2;
}
#elif defined(__arm__)
// gcc -mfpu=neon defines __ARM_NEON__
// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
#ifdef _ANDROID
cpu_info_ = ArmCpuCaps("/proc/cpuinfo"); // libtheoraplayer #ifdef addition, just in case, android gave us troubles
#else
cpu_info_ = kCpuHasNEON;
#endif
#else
// Linux arm parse text file for neon detect.
cpu_info_ = ArmCpuCaps("/proc/cpuinfo");
#endif
cpu_info_ |= kCpuHasARM;
if (TestEnv("LIBYUV_DISABLE_NEON")) {
cpu_info_ &= ~kCpuHasNEON;
}
#ifdef _ANDROID
// libtheoraplayer addition to disable NEON support on android devices that don't support it, once again, just in case
if ((android_getCpuFeaturesExt() & ANDROID_CPU_ARM_FEATURE_NEON) == 0)
{
cpu_info_ = kCpuHasARM;
}
#endif
#endif // __arm__
if (TestEnv("LIBYUV_DISABLE_ASM")) {
cpu_info_ = 0;
}
return cpu_info_;
}
LIBYUV_API
void MaskCpuFlags(int enable_flags) {
cpu_info_ = InitCpuFlags() & enable_flags;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View file

@ -1,552 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/format_conversion.h"
#include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h"
#include "libyuv/video_common.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// generate a selector mask useful for pshufb
static uint32 GenerateSelector(int select0, int select1) {
return (uint32)(select0) |
(uint32)((select1 + 4) << 8) |
(uint32)((select0 + 8) << 16) |
(uint32)((select1 + 12) << 24);
}
static int MakeSelectors(const int blue_index,
const int green_index,
const int red_index,
uint32 dst_fourcc_bayer,
uint32* index_map) {
// Now build a lookup table containing the indices for the four pixels in each
// 2x2 Bayer grid.
switch (dst_fourcc_bayer) {
case FOURCC_BGGR:
index_map[0] = GenerateSelector(blue_index, green_index);
index_map[1] = GenerateSelector(green_index, red_index);
break;
case FOURCC_GBRG:
index_map[0] = GenerateSelector(green_index, blue_index);
index_map[1] = GenerateSelector(red_index, green_index);
break;
case FOURCC_RGGB:
index_map[0] = GenerateSelector(red_index, green_index);
index_map[1] = GenerateSelector(green_index, blue_index);
break;
case FOURCC_GRBG:
index_map[0] = GenerateSelector(green_index, red_index);
index_map[1] = GenerateSelector(blue_index, green_index);
break;
default:
return -1; // Bad FourCC
}
return 0;
}
// Converts 32 bit ARGB to Bayer RGB formats.
LIBYUV_API
int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height,
uint32 dst_fourcc_bayer) {
int y;
const int blue_index = 0; // Offsets for ARGB format
const int green_index = 1;
const int red_index = 2;
uint32 index_map[2];
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) = ARGBToBayerRow_C;
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
}
}
#elif defined(HAS_ARGBTOBAYERROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_NEON;
}
}
#endif
if (MakeSelectors(blue_index, green_index, red_index,
dst_fourcc_bayer, index_map)) {
return -1; // Bad FourCC
}
for (y = 0; y < height; ++y) {
ARGBToBayerRow(src_argb, dst_bayer, index_map[y & 1], width);
src_argb += src_stride_argb;
dst_bayer += dst_stride_bayer;
}
return 0;
}
#define AVG(a, b) (((a) + (b)) >> 1)
static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_argb, int pix) {
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 g = src_bayer0[1];
uint8 r = src_bayer1[1];
int x;
for (x = 0; x < pix - 2; x += 2) {
dst_argb[0] = src_bayer0[0];
dst_argb[1] = AVG(g, src_bayer0[1]);
dst_argb[2] = AVG(r, src_bayer1[1]);
dst_argb[3] = 255U;
dst_argb[4] = AVG(src_bayer0[0], src_bayer0[2]);
dst_argb[5] = src_bayer0[1];
dst_argb[6] = src_bayer1[1];
dst_argb[7] = 255U;
g = src_bayer0[1];
r = src_bayer1[1];
src_bayer0 += 2;
src_bayer1 += 2;
dst_argb += 8;
}
dst_argb[0] = src_bayer0[0];
dst_argb[1] = AVG(g, src_bayer0[1]);
dst_argb[2] = AVG(r, src_bayer1[1]);
dst_argb[3] = 255U;
if (!(pix & 1)) {
dst_argb[4] = src_bayer0[0];
dst_argb[5] = src_bayer0[1];
dst_argb[6] = src_bayer1[1];
dst_argb[7] = 255U;
}
}
static void BayerRowRG(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_argb, int pix) {
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 g = src_bayer0[1];
uint8 b = src_bayer1[1];
int x;
for (x = 0; x < pix - 2; x += 2) {
dst_argb[0] = AVG(b, src_bayer1[1]);
dst_argb[1] = AVG(g, src_bayer0[1]);
dst_argb[2] = src_bayer0[0];
dst_argb[3] = 255U;
dst_argb[4] = src_bayer1[1];
dst_argb[5] = src_bayer0[1];
dst_argb[6] = AVG(src_bayer0[0], src_bayer0[2]);
dst_argb[7] = 255U;
g = src_bayer0[1];
b = src_bayer1[1];
src_bayer0 += 2;
src_bayer1 += 2;
dst_argb += 8;
}
dst_argb[0] = AVG(b, src_bayer1[1]);
dst_argb[1] = AVG(g, src_bayer0[1]);
dst_argb[2] = src_bayer0[0];
dst_argb[3] = 255U;
if (!(pix & 1)) {
dst_argb[4] = src_bayer1[1];
dst_argb[5] = src_bayer0[1];
dst_argb[6] = src_bayer0[0];
dst_argb[7] = 255U;
}
}
static void BayerRowGB(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_argb, int pix) {
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 b = src_bayer0[1];
int x;
for (x = 0; x < pix - 2; x += 2) {
dst_argb[0] = AVG(b, src_bayer0[1]);
dst_argb[1] = src_bayer0[0];
dst_argb[2] = src_bayer1[0];
dst_argb[3] = 255U;
dst_argb[4] = src_bayer0[1];
dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]);
dst_argb[6] = AVG(src_bayer1[0], src_bayer1[2]);
dst_argb[7] = 255U;
b = src_bayer0[1];
src_bayer0 += 2;
src_bayer1 += 2;
dst_argb += 8;
}
dst_argb[0] = AVG(b, src_bayer0[1]);
dst_argb[1] = src_bayer0[0];
dst_argb[2] = src_bayer1[0];
dst_argb[3] = 255U;
if (!(pix & 1)) {
dst_argb[4] = src_bayer0[1];
dst_argb[5] = src_bayer0[0];
dst_argb[6] = src_bayer1[0];
dst_argb[7] = 255U;
}
}
static void BayerRowGR(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_argb, int pix) {
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 r = src_bayer0[1];
int x;
for (x = 0; x < pix - 2; x += 2) {
dst_argb[0] = src_bayer1[0];
dst_argb[1] = src_bayer0[0];
dst_argb[2] = AVG(r, src_bayer0[1]);
dst_argb[3] = 255U;
dst_argb[4] = AVG(src_bayer1[0], src_bayer1[2]);
dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]);
dst_argb[6] = src_bayer0[1];
dst_argb[7] = 255U;
r = src_bayer0[1];
src_bayer0 += 2;
src_bayer1 += 2;
dst_argb += 8;
}
dst_argb[0] = src_bayer1[0];
dst_argb[1] = src_bayer0[0];
dst_argb[2] = AVG(r, src_bayer0[1]);
dst_argb[3] = 255U;
if (!(pix & 1)) {
dst_argb[4] = src_bayer1[0];
dst_argb[5] = src_bayer0[0];
dst_argb[6] = src_bayer0[1];
dst_argb[7] = 255U;
}
}
// Converts any Bayer RGB format to ARGB.
LIBYUV_API
int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height,
uint32 src_fourcc_bayer) {
int y;
void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix);
void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix);
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
switch (src_fourcc_bayer) {
case FOURCC_BGGR:
BayerRow0 = BayerRowBG;
BayerRow1 = BayerRowGR;
break;
case FOURCC_GBRG:
BayerRow0 = BayerRowGB;
BayerRow1 = BayerRowRG;
break;
case FOURCC_GRBG:
BayerRow0 = BayerRowGR;
BayerRow1 = BayerRowBG;
break;
case FOURCC_RGGB:
BayerRow0 = BayerRowRG;
BayerRow1 = BayerRowGB;
break;
default:
return -1; // Bad FourCC
}
for (y = 0; y < height - 1; y += 2) {
BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
dst_argb + dst_stride_argb, width);
src_bayer += src_stride_bayer * 2;
dst_argb += dst_stride_argb * 2;
}
if (height & 1) {
BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
}
return 0;
}
// Converts any Bayer RGB format to ARGB.
LIBYUV_API
int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height,
uint32 src_fourcc_bayer) {
void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix);
void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix);
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
// Negative height means invert the image.
if (height < 0) {
int halfheight;
height = -height;
halfheight = (height + 1) >> 1;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_u = dst_u + (halfheight - 1) * dst_stride_u;
dst_v = dst_v + (halfheight - 1) * dst_stride_v;
dst_stride_y = -dst_stride_y;
dst_stride_u = -dst_stride_u;
dst_stride_v = -dst_stride_v;
}
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
ARGBToUVRow = ARGBToUVRow_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
if (width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#endif
switch (src_fourcc_bayer) {
case FOURCC_BGGR:
BayerRow0 = BayerRowBG;
BayerRow1 = BayerRowGR;
break;
case FOURCC_GBRG:
BayerRow0 = BayerRowGB;
BayerRow1 = BayerRowRG;
break;
case FOURCC_GRBG:
BayerRow0 = BayerRowGR;
BayerRow1 = BayerRowBG;
break;
case FOURCC_RGGB:
BayerRow0 = BayerRowRG;
BayerRow1 = BayerRowGB;
break;
default:
return -1; // Bad FourCC
}
{
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 15) & ~15;
align_buffer_64(row, kRowSize * 2);
int y;
for (y = 0; y < height - 1; y += 2) {
BayerRow0(src_bayer, src_stride_bayer, row, width);
BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
row + kRowSize, width);
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
src_bayer += src_stride_bayer * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
BayerRow0(src_bayer, src_stride_bayer, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
}
free_aligned_buffer_64(row);
}
return 0;
}
// Convert I420 to Bayer.
LIBYUV_API
int I420ToBayer(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height,
uint32 dst_fourcc_bayer) {
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToARGBRow_C;
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) = ARGBToBayerRow_C;
const int blue_index = 0; // Offsets for ARGB format
const int green_index = 1;
const int red_index = 2;
uint32 index_map[2];
// Negative height means invert the image.
if (height < 0) {
int halfheight;
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
}
}
#elif defined(HAS_ARGBTOBAYERROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_NEON;
}
}
#endif
if (MakeSelectors(blue_index, green_index, red_index,
dst_fourcc_bayer, index_map)) {
return -1; // Bad FourCC
}
{
// Allocate a row of ARGB.
align_buffer_64(row, width * 4);
int y;
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToBayerRow(row, dst_bayer, index_map[y & 1], width);
dst_bayer += dst_stride_bayer;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
free_aligned_buffer_64(row);
}
return 0;
}
#define MAKEBAYERFOURCC(BAYER) \
LIBYUV_API \
int Bayer##BAYER##ToI420(const uint8* src_bayer, int src_stride_bayer, \
uint8* dst_y, int dst_stride_y, \
uint8* dst_u, int dst_stride_u, \
uint8* dst_v, int dst_stride_v, \
int width, int height) { \
return BayerToI420(src_bayer, src_stride_bayer, \
dst_y, dst_stride_y, \
dst_u, dst_stride_u, \
dst_v, dst_stride_v, \
width, height, \
FOURCC_##BAYER); \
} \
\
LIBYUV_API \
int I420ToBayer##BAYER(const uint8* src_y, int src_stride_y, \
const uint8* src_u, int src_stride_u, \
const uint8* src_v, int src_stride_v, \
uint8* dst_bayer, int dst_stride_bayer, \
int width, int height) { \
return I420ToBayer(src_y, src_stride_y, \
src_u, src_stride_u, \
src_v, src_stride_v, \
dst_bayer, dst_stride_bayer, \
width, height, \
FOURCC_##BAYER); \
} \
\
LIBYUV_API \
int ARGBToBayer##BAYER(const uint8* src_argb, int src_stride_argb, \
uint8* dst_bayer, int dst_stride_bayer, \
int width, int height) { \
return ARGBToBayer(src_argb, src_stride_argb, \
dst_bayer, dst_stride_bayer, \
width, height, \
FOURCC_##BAYER); \
} \
\
LIBYUV_API \
int Bayer##BAYER##ToARGB(const uint8* src_bayer, int src_stride_bayer, \
uint8* dst_argb, int dst_stride_argb, \
int width, int height) { \
return BayerToARGB(src_bayer, src_stride_bayer, \
dst_argb, dst_stride_argb, \
width, height, \
FOURCC_##BAYER); \
}
MAKEBAYERFOURCC(BGGR)
MAKEBAYERFOURCC(GBRG)
MAKEBAYERFOURCC(GRBG)
MAKEBAYERFOURCC(RGGB)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View file

@ -1,558 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/mjpeg_decoder.h"
#ifdef HAVE_JPEG
#include <assert.h>
#if !defined(__pnacl__) && !defined(__CLR_VER) && !defined(COVERAGE_ENABLED) &&\
!defined(TARGET_IPHONE_SIMULATOR)
// Must be included before jpeglib.
#include <setjmp.h>
#define HAVE_SETJMP
#endif
struct FILE; // For jpeglib.h.
// C++ build requires extern C for jpeg internals.
#ifdef __cplusplus
extern "C" {
#endif
#include <jpeglib.h>
#ifdef __cplusplus
} // extern "C"
#endif
#include "libyuv/planar_functions.h" // For CopyPlane().
namespace libyuv {
#ifdef HAVE_SETJMP
struct SetJmpErrorMgr {
jpeg_error_mgr base; // Must be at the top
jmp_buf setjmp_buffer;
};
#endif
const int MJpegDecoder::kColorSpaceUnknown = JCS_UNKNOWN;
const int MJpegDecoder::kColorSpaceGrayscale = JCS_GRAYSCALE;
const int MJpegDecoder::kColorSpaceRgb = JCS_RGB;
const int MJpegDecoder::kColorSpaceYCbCr = JCS_YCbCr;
const int MJpegDecoder::kColorSpaceCMYK = JCS_CMYK;
const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK;
MJpegDecoder::MJpegDecoder()
: has_scanline_padding_(LIBYUV_FALSE),
num_outbufs_(0),
scanlines_(NULL),
scanlines_sizes_(NULL),
databuf_(NULL),
databuf_strides_(NULL) {
decompress_struct_ = new jpeg_decompress_struct;
source_mgr_ = new jpeg_source_mgr;
#ifdef HAVE_SETJMP
error_mgr_ = new SetJmpErrorMgr;
decompress_struct_->err = jpeg_std_error(&error_mgr_->base);
// Override standard exit()-based error handler.
error_mgr_->base.error_exit = &ErrorHandler;
#endif
decompress_struct_->client_data = NULL;
source_mgr_->init_source = &init_source;
source_mgr_->fill_input_buffer = &fill_input_buffer;
source_mgr_->skip_input_data = &skip_input_data;
source_mgr_->resync_to_restart = &jpeg_resync_to_restart;
source_mgr_->term_source = &term_source;
jpeg_create_decompress(decompress_struct_);
decompress_struct_->src = source_mgr_;
buf_vec_.buffers = &buf_;
buf_vec_.len = 1;
}
MJpegDecoder::~MJpegDecoder() {
jpeg_destroy_decompress(decompress_struct_);
delete decompress_struct_;
delete source_mgr_;
#ifdef HAVE_SETJMP
delete error_mgr_;
#endif
DestroyOutputBuffers();
}
LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) {
if (!ValidateJpeg(src, src_len)) {
return LIBYUV_FALSE;
}
buf_.data = src;
buf_.len = (int)(src_len);
buf_vec_.pos = 0;
decompress_struct_->client_data = &buf_vec_;
#ifdef HAVE_SETJMP
if (setjmp(error_mgr_->setjmp_buffer)) {
// We called jpeg_read_header, it experienced an error, and we called
// longjmp() and rewound the stack to here. Return error.
return LIBYUV_FALSE;
}
#endif
if (jpeg_read_header(decompress_struct_, TRUE) != JPEG_HEADER_OK) {
// ERROR: Bad MJPEG header
return LIBYUV_FALSE;
}
AllocOutputBuffers(GetNumComponents());
for (int i = 0; i < num_outbufs_; ++i) {
int scanlines_size = GetComponentScanlinesPerImcuRow(i);
if (scanlines_sizes_[i] != scanlines_size) {
if (scanlines_[i]) {
delete scanlines_[i];
}
scanlines_[i] = new uint8* [scanlines_size];
scanlines_sizes_[i] = scanlines_size;
}
// We allocate padding for the final scanline to pad it up to DCTSIZE bytes
// to avoid memory errors, since jpeglib only reads full MCUs blocks. For
// the preceding scanlines, the padding is not needed/wanted because the
// following addresses will already be valid (they are the initial bytes of
// the next scanline) and will be overwritten when jpeglib writes out that
// next scanline.
int databuf_stride = GetComponentStride(i);
int databuf_size = scanlines_size * databuf_stride;
if (databuf_strides_[i] != databuf_stride) {
if (databuf_[i]) {
delete databuf_[i];
}
databuf_[i] = new uint8[databuf_size];
databuf_strides_[i] = databuf_stride;
}
if (GetComponentStride(i) != GetComponentWidth(i)) {
has_scanline_padding_ = LIBYUV_TRUE;
}
}
return LIBYUV_TRUE;
}
static int DivideAndRoundUp(int numerator, int denominator) {
return (numerator + denominator - 1) / denominator;
}
static int DivideAndRoundDown(int numerator, int denominator) {
return numerator / denominator;
}
// Returns width of the last loaded frame.
int MJpegDecoder::GetWidth() {
return decompress_struct_->image_width;
}
// Returns height of the last loaded frame.
int MJpegDecoder::GetHeight() {
return decompress_struct_->image_height;
}
// Returns format of the last loaded frame. The return value is one of the
// kColorSpace* constants.
int MJpegDecoder::GetColorSpace() {
return decompress_struct_->jpeg_color_space;
}
// Number of color components in the color space.
int MJpegDecoder::GetNumComponents() {
return decompress_struct_->num_components;
}
// Sample factors of the n-th component.
int MJpegDecoder::GetHorizSampFactor(int component) {
return decompress_struct_->comp_info[component].h_samp_factor;
}
int MJpegDecoder::GetVertSampFactor(int component) {
return decompress_struct_->comp_info[component].v_samp_factor;
}
int MJpegDecoder::GetHorizSubSampFactor(int component) {
return decompress_struct_->max_h_samp_factor /
GetHorizSampFactor(component);
}
int MJpegDecoder::GetVertSubSampFactor(int component) {
return decompress_struct_->max_v_samp_factor /
GetVertSampFactor(component);
}
int MJpegDecoder::GetImageScanlinesPerImcuRow() {
return decompress_struct_->max_v_samp_factor * DCTSIZE;
}
int MJpegDecoder::GetComponentScanlinesPerImcuRow(int component) {
int vs = GetVertSubSampFactor(component);
return DivideAndRoundUp(GetImageScanlinesPerImcuRow(), vs);
}
int MJpegDecoder::GetComponentWidth(int component) {
int hs = GetHorizSubSampFactor(component);
return DivideAndRoundUp(GetWidth(), hs);
}
int MJpegDecoder::GetComponentHeight(int component) {
int vs = GetVertSubSampFactor(component);
return DivideAndRoundUp(GetHeight(), vs);
}
// Get width in bytes padded out to a multiple of DCTSIZE
int MJpegDecoder::GetComponentStride(int component) {
return (GetComponentWidth(component) + DCTSIZE - 1) & ~(DCTSIZE - 1);
}
int MJpegDecoder::GetComponentSize(int component) {
return GetComponentWidth(component) * GetComponentHeight(component);
}
LIBYUV_BOOL MJpegDecoder::UnloadFrame() {
#ifdef HAVE_SETJMP
if (setjmp(error_mgr_->setjmp_buffer)) {
// We called jpeg_abort_decompress, it experienced an error, and we called
// longjmp() and rewound the stack to here. Return error.
return LIBYUV_FALSE;
}
#endif
jpeg_abort_decompress(decompress_struct_);
return LIBYUV_TRUE;
}
// TODO(fbarchard): Allow rectangle to be specified: x, y, width, height.
LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(
uint8** planes, int dst_width, int dst_height) {
if (dst_width != GetWidth() ||
dst_height > GetHeight()) {
// ERROR: Bad dimensions
return LIBYUV_FALSE;
}
#ifdef HAVE_SETJMP
if (setjmp(error_mgr_->setjmp_buffer)) {
// We called into jpeglib, it experienced an error sometime during this
// function call, and we called longjmp() and rewound the stack to here.
// Return error.
return LIBYUV_FALSE;
}
#endif
if (!StartDecode()) {
return LIBYUV_FALSE;
}
SetScanlinePointers(databuf_);
int lines_left = dst_height;
// Compute amount of lines to skip to implement vertical crop.
// TODO(fbarchard): Ensure skip is a multiple of maximum component
// subsample. ie 2
int skip = (GetHeight() - dst_height) / 2;
if (skip > 0) {
// There is no API to skip lines in the output data, so we read them
// into the temp buffer.
while (skip >= GetImageScanlinesPerImcuRow()) {
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
skip -= GetImageScanlinesPerImcuRow();
}
if (skip > 0) {
// Have a partial iMCU row left over to skip. Must read it and then
// copy the parts we want into the destination.
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
for (int i = 0; i < num_outbufs_; ++i) {
// TODO(fbarchard): Compute skip to avoid this
assert(skip % GetVertSubSampFactor(i) == 0);
int rows_to_skip =
DivideAndRoundDown(skip, GetVertSubSampFactor(i));
int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i) -
rows_to_skip;
int data_to_skip = rows_to_skip * GetComponentStride(i);
CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i),
planes[i], GetComponentWidth(i),
GetComponentWidth(i), scanlines_to_copy);
planes[i] += scanlines_to_copy * GetComponentWidth(i);
}
lines_left -= (GetImageScanlinesPerImcuRow() - skip);
}
}
// Read full MCUs but cropped horizontally
for (; lines_left > GetImageScanlinesPerImcuRow();
lines_left -= GetImageScanlinesPerImcuRow()) {
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
for (int i = 0; i < num_outbufs_; ++i) {
int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i);
CopyPlane(databuf_[i], GetComponentStride(i),
planes[i], GetComponentWidth(i),
GetComponentWidth(i), scanlines_to_copy);
planes[i] += scanlines_to_copy * GetComponentWidth(i);
}
}
if (lines_left > 0) {
// Have a partial iMCU row left over to decode.
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
for (int i = 0; i < num_outbufs_; ++i) {
int scanlines_to_copy =
DivideAndRoundUp(lines_left, GetVertSubSampFactor(i));
CopyPlane(databuf_[i], GetComponentStride(i),
planes[i], GetComponentWidth(i),
GetComponentWidth(i), scanlines_to_copy);
planes[i] += scanlines_to_copy * GetComponentWidth(i);
}
}
return FinishDecode();
}
LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn, void* opaque,
int dst_width, int dst_height) {
if (dst_width != GetWidth() ||
dst_height > GetHeight()) {
// ERROR: Bad dimensions
return LIBYUV_FALSE;
}
#ifdef HAVE_SETJMP
if (setjmp(error_mgr_->setjmp_buffer)) {
// We called into jpeglib, it experienced an error sometime during this
// function call, and we called longjmp() and rewound the stack to here.
// Return error.
return LIBYUV_FALSE;
}
#endif
if (!StartDecode()) {
return LIBYUV_FALSE;
}
SetScanlinePointers(databuf_);
int lines_left = dst_height;
// TODO(fbarchard): Compute amount of lines to skip to implement vertical crop
int skip = (GetHeight() - dst_height) / 2;
if (skip > 0) {
while (skip >= GetImageScanlinesPerImcuRow()) {
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
skip -= GetImageScanlinesPerImcuRow();
}
if (skip > 0) {
// Have a partial iMCU row left over to skip.
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
for (int i = 0; i < num_outbufs_; ++i) {
// TODO(fbarchard): Compute skip to avoid this
assert(skip % GetVertSubSampFactor(i) == 0);
int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
int data_to_skip = rows_to_skip * GetComponentStride(i);
// Change our own data buffer pointers so we can pass them to the
// callback.
databuf_[i] += data_to_skip;
}
int scanlines_to_copy = GetImageScanlinesPerImcuRow() - skip;
(*fn)(opaque, databuf_, databuf_strides_, scanlines_to_copy);
// Now change them back.
for (int i = 0; i < num_outbufs_; ++i) {
int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
int data_to_skip = rows_to_skip * GetComponentStride(i);
databuf_[i] -= data_to_skip;
}
lines_left -= scanlines_to_copy;
}
}
// Read full MCUs until we get to the crop point.
for (; lines_left >= GetImageScanlinesPerImcuRow();
lines_left -= GetImageScanlinesPerImcuRow()) {
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
(*fn)(opaque, databuf_, databuf_strides_, GetImageScanlinesPerImcuRow());
}
if (lines_left > 0) {
// Have a partial iMCU row left over to decode.
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
(*fn)(opaque, databuf_, databuf_strides_, lines_left);
}
return FinishDecode();
}
void MJpegDecoder::init_source(j_decompress_ptr cinfo) {
fill_input_buffer(cinfo);
}
boolean MJpegDecoder::fill_input_buffer(j_decompress_ptr cinfo) {
BufferVector* buf_vec = (BufferVector*)(cinfo->client_data);
if (buf_vec->pos >= buf_vec->len) {
assert(0 && "No more data");
// ERROR: No more data
return FALSE;
}
cinfo->src->next_input_byte = buf_vec->buffers[buf_vec->pos].data;
cinfo->src->bytes_in_buffer = buf_vec->buffers[buf_vec->pos].len;
++buf_vec->pos;
return TRUE;
}
void MJpegDecoder::skip_input_data(j_decompress_ptr cinfo,
long num_bytes) { // NOLINT
cinfo->src->next_input_byte += num_bytes;
}
void MJpegDecoder::term_source(j_decompress_ptr cinfo) {
// Nothing to do.
}
#ifdef HAVE_SETJMP
void MJpegDecoder::ErrorHandler(j_common_ptr cinfo) {
// This is called when a jpeglib command experiences an error. Unfortunately
// jpeglib's error handling model is not very flexible, because it expects the
// error handler to not return--i.e., it wants the program to terminate. To
// recover from errors we use setjmp() as shown in their example. setjmp() is
// C's implementation for the "call with current continuation" functionality
// seen in some functional programming languages.
// A formatted message can be output, but is unsafe for release.
#ifdef DEBUG
char buf[JMSG_LENGTH_MAX];
(*cinfo->err->format_message)(cinfo, buf);
// ERROR: Error in jpeglib: buf
#endif
SetJmpErrorMgr* mgr = (SetJmpErrorMgr*)(cinfo->err);
// This rewinds the call stack to the point of the corresponding setjmp()
// and causes it to return (for a second time) with value 1.
longjmp(mgr->setjmp_buffer, 1);
}
#endif
void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
if (num_outbufs != num_outbufs_) {
// We could perhaps optimize this case to resize the output buffers without
// necessarily having to delete and recreate each one, but it's not worth
// it.
DestroyOutputBuffers();
scanlines_ = new uint8** [num_outbufs];
scanlines_sizes_ = new int[num_outbufs];
databuf_ = new uint8* [num_outbufs];
databuf_strides_ = new int[num_outbufs];
for (int i = 0; i < num_outbufs; ++i) {
scanlines_[i] = NULL;
scanlines_sizes_[i] = 0;
databuf_[i] = NULL;
databuf_strides_[i] = 0;
}
num_outbufs_ = num_outbufs;
}
}
void MJpegDecoder::DestroyOutputBuffers() {
for (int i = 0; i < num_outbufs_; ++i) {
delete [] scanlines_[i];
delete [] databuf_[i];
}
delete [] scanlines_;
delete [] databuf_;
delete [] scanlines_sizes_;
delete [] databuf_strides_;
scanlines_ = NULL;
databuf_ = NULL;
scanlines_sizes_ = NULL;
databuf_strides_ = NULL;
num_outbufs_ = 0;
}
// JDCT_IFAST and do_block_smoothing improve performance substantially.
LIBYUV_BOOL MJpegDecoder::StartDecode() {
decompress_struct_->raw_data_out = TRUE;
decompress_struct_->dct_method = JDCT_IFAST; // JDCT_ISLOW is default
decompress_struct_->dither_mode = JDITHER_NONE;
// Not applicable to 'raw':
decompress_struct_->do_fancy_upsampling = LIBYUV_FALSE;
// Only for buffered mode:
decompress_struct_->enable_2pass_quant = LIBYUV_FALSE;
// Blocky but fast:
decompress_struct_->do_block_smoothing = LIBYUV_FALSE;
if (!jpeg_start_decompress(decompress_struct_)) {
// ERROR: Couldn't start JPEG decompressor";
return LIBYUV_FALSE;
}
return LIBYUV_TRUE;
}
LIBYUV_BOOL MJpegDecoder::FinishDecode() {
// jpeglib considers it an error if we finish without decoding the whole
// image, so we call "abort" rather than "finish".
jpeg_abort_decompress(decompress_struct_);
return LIBYUV_TRUE;
}
void MJpegDecoder::SetScanlinePointers(uint8** data) {
for (int i = 0; i < num_outbufs_; ++i) {
uint8* data_i = data[i];
for (int j = 0; j < scanlines_sizes_[i]; ++j) {
scanlines_[i][j] = data_i;
data_i += GetComponentStride(i);
}
}
}
inline LIBYUV_BOOL MJpegDecoder::DecodeImcuRow() {
return (unsigned int)(GetImageScanlinesPerImcuRow()) ==
jpeg_read_raw_data(decompress_struct_,
scanlines_,
GetImageScanlinesPerImcuRow());
}
// The helper function which recognizes the jpeg sub-sampling type.
JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper(
int* subsample_x, int* subsample_y, int number_of_components) {
if (number_of_components == 3) { // Color images.
if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
subsample_x[1] == 2 && subsample_y[1] == 2 &&
subsample_x[2] == 2 && subsample_y[2] == 2) {
return kJpegYuv420;
} else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
subsample_x[1] == 2 && subsample_y[1] == 1 &&
subsample_x[2] == 2 && subsample_y[2] == 1) {
return kJpegYuv422;
} else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
subsample_x[1] == 1 && subsample_y[1] == 1 &&
subsample_x[2] == 1 && subsample_y[2] == 1) {
return kJpegYuv444;
}
} else if (number_of_components == 1) { // Grey-scale images.
if (subsample_x[0] == 1 && subsample_y[0] == 1) {
return kJpegYuv400;
}
}
return kJpegUnknown;
}
} // namespace libyuv
#endif // HAVE_JPEG

View file

@ -1,47 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/mjpeg_decoder.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Helper function to validate the jpeg appears intact.
// TODO(fbarchard): Optimize case where SOI is found but EOI is not.
LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
size_t i;
if (sample_size < 64) {
// ERROR: Invalid jpeg size: sample_size
return LIBYUV_FALSE;
}
if (sample[0] != 0xff || sample[1] != 0xd8) { // Start Of Image
// ERROR: Invalid jpeg initial start code
return LIBYUV_FALSE;
}
for (i = sample_size - 2; i > 1;) {
if (sample[i] != 0xd9) {
if (sample[i] == 0xff && sample[i + 1] == 0xd9) { // End Of Image
return LIBYUV_TRUE; // Success: Valid jpeg.
}
--i;
}
--i;
}
// ERROR: Invalid jpeg end code not found. Size sample_size
return LIBYUV_FALSE;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,209 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/rotate.h"
#include "libyuv/cpu_id.h"
#include "libyuv/convert.h"
#include "libyuv/planar_functions.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// ARGBScale has a function to copy pixels to a row, striding each source
// pixel by a constant.
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || \
(defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__))
#define HAS_SCALEARGBROWDOWNEVEN_SSE2
void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride,
int src_stepx,
uint8* dst_ptr, int dst_width);
#endif
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON))
#define HAS_SCALEARGBROWDOWNEVEN_NEON
void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, int src_stride,
int src_stepx,
uint8* dst_ptr, int dst_width);
#endif
void ScaleARGBRowDownEven_C(const uint8* src_ptr, int,
int src_stepx,
uint8* dst_ptr, int dst_width);
static void ARGBTranspose(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
int i;
int src_pixel_step = src_stride >> 2;
void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride,
int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4) && // Width of dest.
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
}
#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4) && // Width of dest.
IS_ALIGNED(src, 4)) {
ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON;
}
#endif
for (i = 0; i < width; ++i) { // column of source to row of dest.
ScaleARGBRowDownEven(src, 0, src_pixel_step, dst, height);
dst += dst_stride;
src += 4;
}
}
void ARGBRotate90(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
// Rotate by 90 is a ARGBTranspose with the source read
// from bottom to top. So set the source pointer to the end
// of the buffer and flip the sign of the source stride.
src += src_stride * (height - 1);
src_stride = -src_stride;
ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
}
void ARGBRotate270(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
// Rotate by 270 is a ARGBTranspose with the destination written
// from bottom to top. So set the destination pointer to the end
// of the buffer and flip the sign of the destination stride.
dst += dst_stride * (width - 1);
dst_stride = -dst_stride;
ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
}
void ARGBRotate180(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
// Swap first and last row and mirror the content. Uses a temporary row.
align_buffer_64(row, width * 4);
const uint8* src_bot = src + src_stride * (height - 1);
uint8* dst_bot = dst + dst_stride * (height - 1);
int half_height = (height + 1) >> 1;
int y;
void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
ARGBMirrorRow_C;
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
#if defined(HAS_ARGBMIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
ARGBMirrorRow = ARGBMirrorRow_SSSE3;
}
#endif
#if defined(HAS_ARGBMIRRORROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
ARGBMirrorRow = ARGBMirrorRow_AVX2;
}
#endif
#if defined(HAS_ARGBMIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
ARGBMirrorRow = ARGBMirrorRow_NEON;
}
#endif
#if defined(HAS_COPYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width * 4, 32)) {
CopyRow = CopyRow_NEON;
}
#endif
#if defined(HAS_COPYROW_X86)
if (TestCpuFlag(kCpuHasX86)) {
CopyRow = CopyRow_X86;
}
#endif
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width * 4, 32) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
CopyRow = CopyRow_SSE2;
}
#endif
#if defined(HAS_COPYROW_ERMS)
if (TestCpuFlag(kCpuHasERMS)) {
CopyRow = CopyRow_ERMS;
}
#endif
#if defined(HAS_COPYROW_MIPS)
if (TestCpuFlag(kCpuHasMIPS)) {
CopyRow = CopyRow_MIPS;
}
#endif
// Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) {
ARGBMirrorRow(src, row, width); // Mirror first row into a buffer
ARGBMirrorRow(src_bot, dst, width); // Mirror last row into first row
CopyRow(row, dst_bot, width * 4); // Copy first mirrored row into last
src += src_stride;
dst += dst_stride;
src_bot -= src_stride;
dst_bot -= dst_stride;
}
free_aligned_buffer_64(row);
}
LIBYUV_API
int ARGBRotate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height,
enum RotationMode mode) {
if (!src_argb || width <= 0 || height == 0 || !dst_argb) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
switch (mode) {
case kRotate0:
// copy frame
return ARGBCopy(src_argb, src_stride_argb,
dst_argb, dst_stride_argb,
width, height);
case kRotate90:
ARGBRotate90(src_argb, src_stride_argb,
dst_argb, dst_stride_argb,
width, height);
return 0;
case kRotate270:
ARGBRotate270(src_argb, src_stride_argb,
dst_argb, dst_stride_argb,
width, height);
return 0;
case kRotate180:
ARGBRotate180(src_argb, src_stride_argb,
dst_argb, dst_stride_argb,
width, height);
return 0;
default:
break;
}
return -1;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View file

@ -1,486 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/row.h"
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_MIPS) && \
defined(__mips_dsp) && (__mips_dsp_rev >= 2)
void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
"addu $t3, $t2, %[src_stride] \n"
"addu $t5, $t4, %[src_stride] \n"
"addu $t6, $t2, $t4 \n"
"andi $t0, %[dst], 0x3 \n"
"andi $t1, %[dst_stride], 0x3 \n"
"or $t0, $t0, $t1 \n"
"bnez $t0, 11f \n"
" subu $t7, $t9, %[src_stride] \n"
//dst + dst_stride word aligned
"1: \n"
"lbu $t0, 0(%[src]) \n"
"lbux $t1, %[src_stride](%[src]) \n"
"lbux $t8, $t2(%[src]) \n"
"lbux $t9, $t3(%[src]) \n"
"sll $t1, $t1, 16 \n"
"sll $t9, $t9, 16 \n"
"or $t0, $t0, $t1 \n"
"or $t8, $t8, $t9 \n"
"precr.qb.ph $s0, $t8, $t0 \n"
"lbux $t0, $t4(%[src]) \n"
"lbux $t1, $t5(%[src]) \n"
"lbux $t8, $t6(%[src]) \n"
"lbux $t9, $t7(%[src]) \n"
"sll $t1, $t1, 16 \n"
"sll $t9, $t9, 16 \n"
"or $t0, $t0, $t1 \n"
"or $t8, $t8, $t9 \n"
"precr.qb.ph $s1, $t8, $t0 \n"
"sw $s0, 0(%[dst]) \n"
"addiu %[width], -1 \n"
"addiu %[src], 1 \n"
"sw $s1, 4(%[dst]) \n"
"bnez %[width], 1b \n"
" addu %[dst], %[dst], %[dst_stride] \n"
"b 2f \n"
//dst + dst_stride unaligned
"11: \n"
"lbu $t0, 0(%[src]) \n"
"lbux $t1, %[src_stride](%[src]) \n"
"lbux $t8, $t2(%[src]) \n"
"lbux $t9, $t3(%[src]) \n"
"sll $t1, $t1, 16 \n"
"sll $t9, $t9, 16 \n"
"or $t0, $t0, $t1 \n"
"or $t8, $t8, $t9 \n"
"precr.qb.ph $s0, $t8, $t0 \n"
"lbux $t0, $t4(%[src]) \n"
"lbux $t1, $t5(%[src]) \n"
"lbux $t8, $t6(%[src]) \n"
"lbux $t9, $t7(%[src]) \n"
"sll $t1, $t1, 16 \n"
"sll $t9, $t9, 16 \n"
"or $t0, $t0, $t1 \n"
"or $t8, $t8, $t9 \n"
"precr.qb.ph $s1, $t8, $t0 \n"
"swr $s0, 0(%[dst]) \n"
"swl $s0, 3(%[dst]) \n"
"addiu %[width], -1 \n"
"addiu %[src], 1 \n"
"swr $s1, 4(%[dst]) \n"
"swl $s1, 7(%[dst]) \n"
"bnez %[width], 11b \n"
"addu %[dst], %[dst], %[dst_stride] \n"
"2: \n"
".set pop \n"
:[src] "+r" (src),
[dst] "+r" (dst),
[width] "+r" (width)
:[src_stride] "r" (src_stride),
[dst_stride] "r" (dst_stride)
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9",
"s0", "s1"
);
}
void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width) {
__asm__ __volatile__ (
".set noat \n"
".set push \n"
".set noreorder \n"
"beqz %[width], 2f \n"
" sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
"addu $t3, $t2, %[src_stride] \n"
"addu $t5, $t4, %[src_stride] \n"
"addu $t6, $t2, $t4 \n"
"srl $AT, %[width], 0x2 \n"
"andi $t0, %[dst], 0x3 \n"
"andi $t1, %[dst_stride], 0x3 \n"
"or $t0, $t0, $t1 \n"
"bnez $t0, 11f \n"
" subu $t7, $t9, %[src_stride] \n"
//dst + dst_stride word aligned
"1: \n"
"lw $t0, 0(%[src]) \n"
"lwx $t1, %[src_stride](%[src]) \n"
"lwx $t8, $t2(%[src]) \n"
"lwx $t9, $t3(%[src]) \n"
// t0 = | 30 | 20 | 10 | 00 |
// t1 = | 31 | 21 | 11 | 01 |
// t8 = | 32 | 22 | 12 | 02 |
// t9 = | 33 | 23 | 13 | 03 |
"precr.qb.ph $s0, $t1, $t0 \n"
"precr.qb.ph $s1, $t9, $t8 \n"
"precrq.qb.ph $s2, $t1, $t0 \n"
"precrq.qb.ph $s3, $t9, $t8 \n"
// s0 = | 21 | 01 | 20 | 00 |
// s1 = | 23 | 03 | 22 | 02 |
// s2 = | 31 | 11 | 30 | 10 |
// s3 = | 33 | 13 | 32 | 12 |
"precr.qb.ph $s4, $s1, $s0 \n"
"precrq.qb.ph $s5, $s1, $s0 \n"
"precr.qb.ph $s6, $s3, $s2 \n"
"precrq.qb.ph $s7, $s3, $s2 \n"
// s4 = | 03 | 02 | 01 | 00 |
// s5 = | 23 | 22 | 21 | 20 |
// s6 = | 13 | 12 | 11 | 10 |
// s7 = | 33 | 32 | 31 | 30 |
"lwx $t0, $t4(%[src]) \n"
"lwx $t1, $t5(%[src]) \n"
"lwx $t8, $t6(%[src]) \n"
"lwx $t9, $t7(%[src]) \n"
// t0 = | 34 | 24 | 14 | 04 |
// t1 = | 35 | 25 | 15 | 05 |
// t8 = | 36 | 26 | 16 | 06 |
// t9 = | 37 | 27 | 17 | 07 |
"precr.qb.ph $s0, $t1, $t0 \n"
"precr.qb.ph $s1, $t9, $t8 \n"
"precrq.qb.ph $s2, $t1, $t0 \n"
"precrq.qb.ph $s3, $t9, $t8 \n"
// s0 = | 25 | 05 | 24 | 04 |
// s1 = | 27 | 07 | 26 | 06 |
// s2 = | 35 | 15 | 34 | 14 |
// s3 = | 37 | 17 | 36 | 16 |
"precr.qb.ph $t0, $s1, $s0 \n"
"precrq.qb.ph $t1, $s1, $s0 \n"
"precr.qb.ph $t8, $s3, $s2 \n"
"precrq.qb.ph $t9, $s3, $s2 \n"
// t0 = | 07 | 06 | 05 | 04 |
// t1 = | 27 | 26 | 25 | 24 |
// t8 = | 17 | 16 | 15 | 14 |
// t9 = | 37 | 36 | 35 | 34 |
"addu $s0, %[dst], %[dst_stride] \n"
"addu $s1, $s0, %[dst_stride] \n"
"addu $s2, $s1, %[dst_stride] \n"
"sw $s4, 0(%[dst]) \n"
"sw $t0, 4(%[dst]) \n"
"sw $s6, 0($s0) \n"
"sw $t8, 4($s0) \n"
"sw $s5, 0($s1) \n"
"sw $t1, 4($s1) \n"
"sw $s7, 0($s2) \n"
"sw $t9, 4($s2) \n"
"addiu $AT, -1 \n"
"addiu %[src], 4 \n"
"bnez $AT, 1b \n"
" addu %[dst], $s2, %[dst_stride] \n"
"b 2f \n"
//dst + dst_stride unaligned
"11: \n"
"lw $t0, 0(%[src]) \n"
"lwx $t1, %[src_stride](%[src]) \n"
"lwx $t8, $t2(%[src]) \n"
"lwx $t9, $t3(%[src]) \n"
// t0 = | 30 | 20 | 10 | 00 |
// t1 = | 31 | 21 | 11 | 01 |
// t8 = | 32 | 22 | 12 | 02 |
// t9 = | 33 | 23 | 13 | 03 |
"precr.qb.ph $s0, $t1, $t0 \n"
"precr.qb.ph $s1, $t9, $t8 \n"
"precrq.qb.ph $s2, $t1, $t0 \n"
"precrq.qb.ph $s3, $t9, $t8 \n"
// s0 = | 21 | 01 | 20 | 00 |
// s1 = | 23 | 03 | 22 | 02 |
// s2 = | 31 | 11 | 30 | 10 |
// s3 = | 33 | 13 | 32 | 12 |
"precr.qb.ph $s4, $s1, $s0 \n"
"precrq.qb.ph $s5, $s1, $s0 \n"
"precr.qb.ph $s6, $s3, $s2 \n"
"precrq.qb.ph $s7, $s3, $s2 \n"
// s4 = | 03 | 02 | 01 | 00 |
// s5 = | 23 | 22 | 21 | 20 |
// s6 = | 13 | 12 | 11 | 10 |
// s7 = | 33 | 32 | 31 | 30 |
"lwx $t0, $t4(%[src]) \n"
"lwx $t1, $t5(%[src]) \n"
"lwx $t8, $t6(%[src]) \n"
"lwx $t9, $t7(%[src]) \n"
// t0 = | 34 | 24 | 14 | 04 |
// t1 = | 35 | 25 | 15 | 05 |
// t8 = | 36 | 26 | 16 | 06 |
// t9 = | 37 | 27 | 17 | 07 |
"precr.qb.ph $s0, $t1, $t0 \n"
"precr.qb.ph $s1, $t9, $t8 \n"
"precrq.qb.ph $s2, $t1, $t0 \n"
"precrq.qb.ph $s3, $t9, $t8 \n"
// s0 = | 25 | 05 | 24 | 04 |
// s1 = | 27 | 07 | 26 | 06 |
// s2 = | 35 | 15 | 34 | 14 |
// s3 = | 37 | 17 | 36 | 16 |
"precr.qb.ph $t0, $s1, $s0 \n"
"precrq.qb.ph $t1, $s1, $s0 \n"
"precr.qb.ph $t8, $s3, $s2 \n"
"precrq.qb.ph $t9, $s3, $s2 \n"
// t0 = | 07 | 06 | 05 | 04 |
// t1 = | 27 | 26 | 25 | 24 |
// t8 = | 17 | 16 | 15 | 14 |
// t9 = | 37 | 36 | 35 | 34 |
"addu $s0, %[dst], %[dst_stride] \n"
"addu $s1, $s0, %[dst_stride] \n"
"addu $s2, $s1, %[dst_stride] \n"
"swr $s4, 0(%[dst]) \n"
"swl $s4, 3(%[dst]) \n"
"swr $t0, 4(%[dst]) \n"
"swl $t0, 7(%[dst]) \n"
"swr $s6, 0($s0) \n"
"swl $s6, 3($s0) \n"
"swr $t8, 4($s0) \n"
"swl $t8, 7($s0) \n"
"swr $s5, 0($s1) \n"
"swl $s5, 3($s1) \n"
"swr $t1, 4($s1) \n"
"swl $t1, 7($s1) \n"
"swr $s7, 0($s2) \n"
"swl $s7, 3($s2) \n"
"swr $t9, 4($s2) \n"
"swl $t9, 7($s2) \n"
"addiu $AT, -1 \n"
"addiu %[src], 4 \n"
"bnez $AT, 11b \n"
" addu %[dst], $s2, %[dst_stride] \n"
"2: \n"
".set pop \n"
".set at \n"
:[src] "+r" (src),
[dst] "+r" (dst),
[width] "+r" (width)
:[src_stride] "r" (src_stride),
[dst_stride] "r" (dst_stride)
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9",
"s0", "s1", "s2", "s3", "s4",
"s5", "s6", "s7"
);
}
void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"beqz %[width], 2f \n"
" sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
"addu $t3, $t2, %[src_stride] \n"
"addu $t5, $t4, %[src_stride] \n"
"addu $t6, $t2, $t4 \n"
"subu $t7, $t9, %[src_stride] \n"
"srl $t1, %[width], 1 \n"
// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
"andi $t0, %[dst_a], 0x3 \n"
"andi $t8, %[dst_b], 0x3 \n"
"or $t0, $t0, $t8 \n"
"andi $t8, %[dst_stride_a], 0x3 \n"
"andi $s5, %[dst_stride_b], 0x3 \n"
"or $t8, $t8, $s5 \n"
"or $t0, $t0, $t8 \n"
"bnez $t0, 11f \n"
" nop \n"
// dst + dst_stride word aligned (both, a & b dst addresses)
"1: \n"
"lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
"lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
"addu $s5, %[dst_a], %[dst_stride_a] \n"
"lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
"lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
"addu $s6, %[dst_b], %[dst_stride_b] \n"
"precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
"precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
"precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
"sll $t0, $t0, 16 \n"
"packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
"sll $t9, $t9, 16 \n"
"packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
"sw $s3, 0($s5) \n"
"sw $s4, 0($s6) \n"
"precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
"lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
"lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
"lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
"lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
"sw $s3, 0(%[dst_a]) \n"
"sw $s4, 0(%[dst_b]) \n"
"precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
"precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
"precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
"sll $t0, $t0, 16 \n"
"packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
"sll $t9, $t9, 16 \n"
"packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
"sw $s3, 4($s5) \n"
"sw $s4, 4($s6) \n"
"precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
"addiu %[src], 4 \n"
"addiu $t1, -1 \n"
"sll $t0, %[dst_stride_a], 1 \n"
"sll $t8, %[dst_stride_b], 1 \n"
"sw $s3, 4(%[dst_a]) \n"
"sw $s4, 4(%[dst_b]) \n"
"addu %[dst_a], %[dst_a], $t0 \n"
"bnez $t1, 1b \n"
" addu %[dst_b], %[dst_b], $t8 \n"
"b 2f \n"
" nop \n"
// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
"11: \n"
"lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
"lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
"addu $s5, %[dst_a], %[dst_stride_a] \n"
"lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
"lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
"addu $s6, %[dst_b], %[dst_stride_b] \n"
"precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
"precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
"precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
"sll $t0, $t0, 16 \n"
"packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
"sll $t9, $t9, 16 \n"
"packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
"swr $s3, 0($s5) \n"
"swl $s3, 3($s5) \n"
"swr $s4, 0($s6) \n"
"swl $s4, 3($s6) \n"
"precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
"lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
"lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
"lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
"lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
"swr $s3, 0(%[dst_a]) \n"
"swl $s3, 3(%[dst_a]) \n"
"swr $s4, 0(%[dst_b]) \n"
"swl $s4, 3(%[dst_b]) \n"
"precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
"precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
"precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
"sll $t0, $t0, 16 \n"
"packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
"sll $t9, $t9, 16 \n"
"packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
"swr $s3, 4($s5) \n"
"swl $s3, 7($s5) \n"
"swr $s4, 4($s6) \n"
"swl $s4, 7($s6) \n"
"precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
"addiu %[src], 4 \n"
"addiu $t1, -1 \n"
"sll $t0, %[dst_stride_a], 1 \n"
"sll $t8, %[dst_stride_b], 1 \n"
"swr $s3, 4(%[dst_a]) \n"
"swl $s3, 7(%[dst_a]) \n"
"swr $s4, 4(%[dst_b]) \n"
"swl $s4, 7(%[dst_b]) \n"
"addu %[dst_a], %[dst_a], $t0 \n"
"bnez $t1, 11b \n"
" addu %[dst_b], %[dst_b], $t8 \n"
"2: \n"
".set pop \n"
: [src] "+r" (src),
[dst_a] "+r" (dst_a),
[dst_b] "+r" (dst_b),
[width] "+r" (width),
[src_stride] "+r" (src_stride)
: [dst_stride_a] "r" (dst_stride_a),
[dst_stride_b] "r" (dst_stride_b)
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9",
"s0", "s1", "s2", "s3",
"s4", "s5", "s6"
);
}
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View file

@ -1,412 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/row.h"
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
static uvec8 kVTbl4x4Transpose =
{ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
void TransposeWx8_NEON(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width) {
const uint8* src_temp = NULL;
asm volatile (
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
// at w-8 allow for this
#ifdef _ANDROID
".fpu neon\n"
#endif
"sub %5, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
".p2align 2 \n"
"1: \n"
"mov %0, %1 \n"
"vld1.8 {d0}, [%0], %2 \n"
"vld1.8 {d1}, [%0], %2 \n"
"vld1.8 {d2}, [%0], %2 \n"
"vld1.8 {d3}, [%0], %2 \n"
"vld1.8 {d4}, [%0], %2 \n"
"vld1.8 {d5}, [%0], %2 \n"
"vld1.8 {d6}, [%0], %2 \n"
"vld1.8 {d7}, [%0] \n"
"vtrn.8 d1, d0 \n"
"vtrn.8 d3, d2 \n"
"vtrn.8 d5, d4 \n"
"vtrn.8 d7, d6 \n"
"vtrn.16 d1, d3 \n"
"vtrn.16 d0, d2 \n"
"vtrn.16 d5, d7 \n"
"vtrn.16 d4, d6 \n"
"vtrn.32 d1, d5 \n"
"vtrn.32 d0, d4 \n"
"vtrn.32 d3, d7 \n"
"vtrn.32 d2, d6 \n"
"vrev16.8 q0, q0 \n"
"vrev16.8 q1, q1 \n"
"vrev16.8 q2, q2 \n"
"vrev16.8 q3, q3 \n"
"mov %0, %3 \n"
"vst1.8 {d1}, [%0], %4 \n"
"vst1.8 {d0}, [%0], %4 \n"
"vst1.8 {d3}, [%0], %4 \n"
"vst1.8 {d2}, [%0], %4 \n"
"vst1.8 {d5}, [%0], %4 \n"
"vst1.8 {d4}, [%0], %4 \n"
"vst1.8 {d7}, [%0], %4 \n"
"vst1.8 {d6}, [%0] \n"
"add %1, #8 \n" // src += 8
"add %3, %3, %4, lsl #3 \n" // dst += 8 * dst_stride
"subs %5, #8 \n" // w -= 8
"bge 1b \n"
// add 8 back to counter. if the result is 0 there are
// no residuals.
"adds %5, #8 \n"
"beq 4f \n"
// some residual, so between 1 and 7 lines left to transpose
"cmp %5, #2 \n"
"blt 3f \n"
"cmp %5, #4 \n"
"blt 2f \n"
// 4x8 block
"mov %0, %1 \n"
"vld1.32 {d0[0]}, [%0], %2 \n"
"vld1.32 {d0[1]}, [%0], %2 \n"
"vld1.32 {d1[0]}, [%0], %2 \n"
"vld1.32 {d1[1]}, [%0], %2 \n"
"vld1.32 {d2[0]}, [%0], %2 \n"
"vld1.32 {d2[1]}, [%0], %2 \n"
"vld1.32 {d3[0]}, [%0], %2 \n"
"vld1.32 {d3[1]}, [%0] \n"
"mov %0, %3 \n"
"vld1.8 {q3}, [%6] \n"
"vtbl.8 d4, {d0, d1}, d6 \n"
"vtbl.8 d5, {d0, d1}, d7 \n"
"vtbl.8 d0, {d2, d3}, d6 \n"
"vtbl.8 d1, {d2, d3}, d7 \n"
// TODO(frkoenig): Rework shuffle above to
// write out with 4 instead of 8 writes.
"vst1.32 {d4[0]}, [%0], %4 \n"
"vst1.32 {d4[1]}, [%0], %4 \n"
"vst1.32 {d5[0]}, [%0], %4 \n"
"vst1.32 {d5[1]}, [%0] \n"
"add %0, %3, #4 \n"
"vst1.32 {d0[0]}, [%0], %4 \n"
"vst1.32 {d0[1]}, [%0], %4 \n"
"vst1.32 {d1[0]}, [%0], %4 \n"
"vst1.32 {d1[1]}, [%0] \n"
"add %1, #4 \n" // src += 4
"add %3, %3, %4, lsl #2 \n" // dst += 4 * dst_stride
"subs %5, #4 \n" // w -= 4
"beq 4f \n"
// some residual, check to see if it includes a 2x8 block,
// or less
"cmp %5, #2 \n"
"blt 3f \n"
// 2x8 block
"2: \n"
"mov %0, %1 \n"
"vld1.16 {d0[0]}, [%0], %2 \n"
"vld1.16 {d1[0]}, [%0], %2 \n"
"vld1.16 {d0[1]}, [%0], %2 \n"
"vld1.16 {d1[1]}, [%0], %2 \n"
"vld1.16 {d0[2]}, [%0], %2 \n"
"vld1.16 {d1[2]}, [%0], %2 \n"
"vld1.16 {d0[3]}, [%0], %2 \n"
"vld1.16 {d1[3]}, [%0] \n"
"vtrn.8 d0, d1 \n"
"mov %0, %3 \n"
"vst1.64 {d0}, [%0], %4 \n"
"vst1.64 {d1}, [%0] \n"
"add %1, #2 \n" // src += 2
"add %3, %3, %4, lsl #1 \n" // dst += 2 * dst_stride
"subs %5, #2 \n" // w -= 2
"beq 4f \n"
// 1x8 block
"3: \n"
"vld1.8 {d0[0]}, [%1], %2 \n"
"vld1.8 {d0[1]}, [%1], %2 \n"
"vld1.8 {d0[2]}, [%1], %2 \n"
"vld1.8 {d0[3]}, [%1], %2 \n"
"vld1.8 {d0[4]}, [%1], %2 \n"
"vld1.8 {d0[5]}, [%1], %2 \n"
"vld1.8 {d0[6]}, [%1], %2 \n"
"vld1.8 {d0[7]}, [%1] \n"
"vst1.64 {d0}, [%3] \n"
"4: \n"
: "+r"(src_temp), // %0
"+r"(src), // %1
"+r"(src_stride), // %2
"+r"(dst), // %3
"+r"(dst_stride), // %4
"+r"(width) // %5
: "r"(&kVTbl4x4Transpose) // %6
: "memory", "cc", "q0", "q1", "q2", "q3"
);
}
static uvec8 kVTbl4x4TransposeDi =
{ 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 };
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width) {
const uint8* src_temp = NULL;
asm volatile (
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
// at w-8 allow for this
"sub %7, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
".p2align 2 \n"
"1: \n"
"mov %0, %1 \n"
"vld2.8 {d0, d1}, [%0], %2 \n"
"vld2.8 {d2, d3}, [%0], %2 \n"
"vld2.8 {d4, d5}, [%0], %2 \n"
"vld2.8 {d6, d7}, [%0], %2 \n"
"vld2.8 {d16, d17}, [%0], %2 \n"
"vld2.8 {d18, d19}, [%0], %2 \n"
"vld2.8 {d20, d21}, [%0], %2 \n"
"vld2.8 {d22, d23}, [%0] \n"
"vtrn.8 q1, q0 \n"
"vtrn.8 q3, q2 \n"
"vtrn.8 q9, q8 \n"
"vtrn.8 q11, q10 \n"
"vtrn.16 q1, q3 \n"
"vtrn.16 q0, q2 \n"
"vtrn.16 q9, q11 \n"
"vtrn.16 q8, q10 \n"
"vtrn.32 q1, q9 \n"
"vtrn.32 q0, q8 \n"
"vtrn.32 q3, q11 \n"
"vtrn.32 q2, q10 \n"
"vrev16.8 q0, q0 \n"
"vrev16.8 q1, q1 \n"
"vrev16.8 q2, q2 \n"
"vrev16.8 q3, q3 \n"
"vrev16.8 q8, q8 \n"
"vrev16.8 q9, q9 \n"
"vrev16.8 q10, q10 \n"
"vrev16.8 q11, q11 \n"
"mov %0, %3 \n"
"vst1.8 {d2}, [%0], %4 \n"
"vst1.8 {d0}, [%0], %4 \n"
"vst1.8 {d6}, [%0], %4 \n"
"vst1.8 {d4}, [%0], %4 \n"
"vst1.8 {d18}, [%0], %4 \n"
"vst1.8 {d16}, [%0], %4 \n"
"vst1.8 {d22}, [%0], %4 \n"
"vst1.8 {d20}, [%0] \n"
"mov %0, %5 \n"
"vst1.8 {d3}, [%0], %6 \n"
"vst1.8 {d1}, [%0], %6 \n"
"vst1.8 {d7}, [%0], %6 \n"
"vst1.8 {d5}, [%0], %6 \n"
"vst1.8 {d19}, [%0], %6 \n"
"vst1.8 {d17}, [%0], %6 \n"
"vst1.8 {d23}, [%0], %6 \n"
"vst1.8 {d21}, [%0] \n"
"add %1, #8*2 \n" // src += 8*2
"add %3, %3, %4, lsl #3 \n" // dst_a += 8 * dst_stride_a
"add %5, %5, %6, lsl #3 \n" // dst_b += 8 * dst_stride_b
"subs %7, #8 \n" // w -= 8
"bge 1b \n"
// add 8 back to counter. if the result is 0 there are
// no residuals.
"adds %7, #8 \n"
"beq 4f \n"
// some residual, so between 1 and 7 lines left to transpose
"cmp %7, #2 \n"
"blt 3f \n"
"cmp %7, #4 \n"
"blt 2f \n"
//TODO(frkoenig): Clean this up
// 4x8 block
"mov %0, %1 \n"
"vld1.64 {d0}, [%0], %2 \n"
"vld1.64 {d1}, [%0], %2 \n"
"vld1.64 {d2}, [%0], %2 \n"
"vld1.64 {d3}, [%0], %2 \n"
"vld1.64 {d4}, [%0], %2 \n"
"vld1.64 {d5}, [%0], %2 \n"
"vld1.64 {d6}, [%0], %2 \n"
"vld1.64 {d7}, [%0] \n"
"vld1.8 {q15}, [%8] \n"
"vtrn.8 q0, q1 \n"
"vtrn.8 q2, q3 \n"
"vtbl.8 d16, {d0, d1}, d30 \n"
"vtbl.8 d17, {d0, d1}, d31 \n"
"vtbl.8 d18, {d2, d3}, d30 \n"
"vtbl.8 d19, {d2, d3}, d31 \n"
"vtbl.8 d20, {d4, d5}, d30 \n"
"vtbl.8 d21, {d4, d5}, d31 \n"
"vtbl.8 d22, {d6, d7}, d30 \n"
"vtbl.8 d23, {d6, d7}, d31 \n"
"mov %0, %3 \n"
"vst1.32 {d16[0]}, [%0], %4 \n"
"vst1.32 {d16[1]}, [%0], %4 \n"
"vst1.32 {d17[0]}, [%0], %4 \n"
"vst1.32 {d17[1]}, [%0], %4 \n"
"add %0, %3, #4 \n"
"vst1.32 {d20[0]}, [%0], %4 \n"
"vst1.32 {d20[1]}, [%0], %4 \n"
"vst1.32 {d21[0]}, [%0], %4 \n"
"vst1.32 {d21[1]}, [%0] \n"
"mov %0, %5 \n"
"vst1.32 {d18[0]}, [%0], %6 \n"
"vst1.32 {d18[1]}, [%0], %6 \n"
"vst1.32 {d19[0]}, [%0], %6 \n"
"vst1.32 {d19[1]}, [%0], %6 \n"
"add %0, %5, #4 \n"
"vst1.32 {d22[0]}, [%0], %6 \n"
"vst1.32 {d22[1]}, [%0], %6 \n"
"vst1.32 {d23[0]}, [%0], %6 \n"
"vst1.32 {d23[1]}, [%0] \n"
"add %1, #4*2 \n" // src += 4 * 2
"add %3, %3, %4, lsl #2 \n" // dst_a += 4 * dst_stride_a
"add %5, %5, %6, lsl #2 \n" // dst_b += 4 * dst_stride_b
"subs %7, #4 \n" // w -= 4
"beq 4f \n"
// some residual, check to see if it includes a 2x8 block,
// or less
"cmp %7, #2 \n"
"blt 3f \n"
// 2x8 block
"2: \n"
"mov %0, %1 \n"
"vld2.16 {d0[0], d2[0]}, [%0], %2 \n"
"vld2.16 {d1[0], d3[0]}, [%0], %2 \n"
"vld2.16 {d0[1], d2[1]}, [%0], %2 \n"
"vld2.16 {d1[1], d3[1]}, [%0], %2 \n"
"vld2.16 {d0[2], d2[2]}, [%0], %2 \n"
"vld2.16 {d1[2], d3[2]}, [%0], %2 \n"
"vld2.16 {d0[3], d2[3]}, [%0], %2 \n"
"vld2.16 {d1[3], d3[3]}, [%0] \n"
"vtrn.8 d0, d1 \n"
"vtrn.8 d2, d3 \n"
"mov %0, %3 \n"
"vst1.64 {d0}, [%0], %4 \n"
"vst1.64 {d2}, [%0] \n"
"mov %0, %5 \n"
"vst1.64 {d1}, [%0], %6 \n"
"vst1.64 {d3}, [%0] \n"
"add %1, #2*2 \n" // src += 2 * 2
"add %3, %3, %4, lsl #1 \n" // dst_a += 2 * dst_stride_a
"add %5, %5, %6, lsl #1 \n" // dst_b += 2 * dst_stride_b
"subs %7, #2 \n" // w -= 2
"beq 4f \n"
// 1x8 block
"3: \n"
"vld2.8 {d0[0], d1[0]}, [%1], %2 \n"
"vld2.8 {d0[1], d1[1]}, [%1], %2 \n"
"vld2.8 {d0[2], d1[2]}, [%1], %2 \n"
"vld2.8 {d0[3], d1[3]}, [%1], %2 \n"
"vld2.8 {d0[4], d1[4]}, [%1], %2 \n"
"vld2.8 {d0[5], d1[5]}, [%1], %2 \n"
"vld2.8 {d0[6], d1[6]}, [%1], %2 \n"
"vld2.8 {d0[7], d1[7]}, [%1] \n"
"vst1.64 {d0}, [%3] \n"
"vst1.64 {d1}, [%5] \n"
"4: \n"
: "+r"(src_temp), // %0
"+r"(src), // %1
"+r"(src_stride), // %2
"+r"(dst_a), // %3
"+r"(dst_stride_a), // %4
"+r"(dst_b), // %5
"+r"(dst_stride_b), // %6
"+r"(width) // %7
: "r"(&kVTbl4x4TransposeDi) // %8
: "memory", "cc",
"q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
);
}
#endif
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View file

@ -1,542 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/row.h"
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// TODO(fbarchard): Consider 'any' functions handling any quantity of pixels.
// TODO(fbarchard): Consider 'any' functions handling odd alignment.
// YUV to RGB does multiple of 8 with SIMD and remainder with C.
#define YANY(NAMEANY, I420TORGB_SIMD, I420TORGB_C, UV_SHIFT, BPP, MASK) \
void NAMEANY(const uint8* y_buf, \
const uint8* u_buf, \
const uint8* v_buf, \
uint8* rgb_buf, \
int width) { \
int n = width & ~MASK; \
I420TORGB_SIMD(y_buf, u_buf, v_buf, rgb_buf, n); \
I420TORGB_C(y_buf + n, \
u_buf + (n >> UV_SHIFT), \
v_buf + (n >> UV_SHIFT), \
rgb_buf + n * BPP, width & MASK); \
}
#ifdef HAS_I422TOARGBROW_SSSE3
YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C,
0, 4, 7)
YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C,
1, 4, 7)
YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C,
2, 4, 7)
YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C,
1, 4, 7)
YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C,
1, 4, 7)
YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C,
1, 4, 7)
// I422ToRGB565Row_SSSE3 is unaligned.
YANY(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, I422ToARGB4444Row_C,
1, 2, 7)
YANY(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, I422ToARGB1555Row_C,
1, 2, 7)
YANY(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, I422ToRGB565Row_C,
1, 2, 7)
// I422ToRGB24Row_SSSE3 is unaligned.
YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1, 3, 7)
YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3, 7)
YANY(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, I422ToYUY2Row_C, 1, 2, 15)
YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15)
#endif // HAS_I422TOARGBROW_SSSE3
#ifdef HAS_I422TOARGBROW_AVX2
YANY(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, I422ToARGBRow_C, 1, 4, 15)
#endif // HAS_I422TOARGBROW_AVX2
#ifdef HAS_I422TOARGBROW_NEON
YANY(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, I444ToARGBRow_C, 0, 4, 7)
YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1, 4, 7)
YANY(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, I411ToARGBRow_C, 2, 4, 7)
YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1, 4, 7)
YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1, 4, 7)
YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1, 4, 7)
YANY(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, I422ToRGB24Row_C, 1, 3, 7)
YANY(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, I422ToRAWRow_C, 1, 3, 7)
YANY(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, I422ToARGB4444Row_C,
1, 2, 7)
YANY(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, I422ToARGB1555Row_C,
1, 2, 7)
YANY(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, I422ToRGB565Row_C, 1, 2, 7)
YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15)
YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15)
#endif // HAS_I422TOARGBROW_NEON
#undef YANY
// Wrappers to handle odd width
#define NV2NY(NAMEANY, NV12TORGB_SIMD, NV12TORGB_C, UV_SHIFT, BPP) \
void NAMEANY(const uint8* y_buf, \
const uint8* uv_buf, \
uint8* rgb_buf, \
int width) { \
int n = width & ~7; \
NV12TORGB_SIMD(y_buf, uv_buf, rgb_buf, n); \
NV12TORGB_C(y_buf + n, \
uv_buf + (n >> UV_SHIFT), \
rgb_buf + n * BPP, width & 7); \
}
#ifdef HAS_NV12TOARGBROW_SSSE3
NV2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C,
0, 4)
NV2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C,
0, 4)
#endif // HAS_NV12TOARGBROW_SSSE3
#ifdef HAS_NV12TOARGBROW_NEON
NV2NY(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, NV12ToARGBRow_C, 0, 4)
NV2NY(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, NV21ToARGBRow_C, 0, 4)
#endif // HAS_NV12TOARGBROW_NEON
#ifdef HAS_NV12TORGB565ROW_SSSE3
NV2NY(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, NV12ToRGB565Row_C,
0, 2)
NV2NY(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, NV21ToRGB565Row_C,
0, 2)
#endif // HAS_NV12TORGB565ROW_SSSE3
#ifdef HAS_NV12TORGB565ROW_NEON
NV2NY(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, NV12ToRGB565Row_C, 0, 2)
NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2)
#endif // HAS_NV12TORGB565ROW_NEON
#undef NVANY
#define RGBANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \
void NAMEANY(const uint8* src, \
uint8* dst, \
int width) { \
int n = width & ~MASK; \
ARGBTORGB_SIMD(src, dst, n); \
ARGBTORGB_C(src + n * SBPP, dst + n * BPP, width & MASK); \
}
#if defined(HAS_ARGBTORGB24ROW_SSSE3)
RGBANY(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, ARGBToRGB24Row_C,
15, 4, 3)
RGBANY(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, ARGBToRAWRow_C,
15, 4, 3)
RGBANY(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, ARGBToRGB565Row_C,
3, 4, 2)
RGBANY(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, ARGBToARGB1555Row_C,
3, 4, 2)
RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, ARGBToARGB4444Row_C,
3, 4, 2)
#endif
#if defined(HAS_I400TOARGBROW_SSE2)
RGBANY(I400ToARGBRow_Any_SSE2, I400ToARGBRow_Unaligned_SSE2, I400ToARGBRow_C,
7, 1, 4)
#endif
#if defined(HAS_YTOARGBROW_SSE2)
RGBANY(YToARGBRow_Any_SSE2, YToARGBRow_SSE2, YToARGBRow_C,
7, 1, 4)
RGBANY(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_Unaligned_SSSE3, YUY2ToARGBRow_C,
15, 2, 4)
RGBANY(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_Unaligned_SSSE3, UYVYToARGBRow_C,
15, 2, 4)
// These require alignment on ARGB, so C is used for remainder.
RGBANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, RGB24ToARGBRow_C,
15, 3, 4)
RGBANY(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, RAWToARGBRow_C,
15, 3, 4)
RGBANY(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, RGB565ToARGBRow_C,
7, 2, 4)
RGBANY(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, ARGB1555ToARGBRow_C,
7, 2, 4)
RGBANY(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, ARGB4444ToARGBRow_C,
7, 2, 4)
#endif
#if defined(HAS_ARGBTORGB24ROW_NEON)
RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3)
RGBANY(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, ARGBToRAWRow_C, 7, 4, 3)
RGBANY(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, ARGBToRGB565Row_C,
7, 4, 2)
RGBANY(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, ARGBToARGB1555Row_C,
7, 4, 2)
RGBANY(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, ARGBToARGB4444Row_C,
7, 4, 2)
RGBANY(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, I400ToARGBRow_C,
7, 1, 4)
RGBANY(YToARGBRow_Any_NEON, YToARGBRow_NEON, YToARGBRow_C,
7, 1, 4)
RGBANY(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, YUY2ToARGBRow_C,
7, 2, 4)
RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C,
7, 2, 4)
#endif
#undef RGBANY
// ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst.
#define BAYERANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \
void NAMEANY(const uint8* src, \
uint8* dst, uint32 selector, \
int width) { \
int n = width & ~MASK; \
ARGBTORGB_SIMD(src, dst, selector, n); \
ARGBTORGB_C(src + n * SBPP, dst + n * BPP, selector, width & MASK); \
}
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
BAYERANY(ARGBToBayerRow_Any_SSSE3, ARGBToBayerRow_SSSE3, ARGBToBayerRow_C,
7, 4, 1)
#endif
#if defined(HAS_ARGBTOBAYERROW_NEON)
BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C,
7, 4, 1)
#endif
#if defined(HAS_ARGBTOBAYERGGROW_SSE2)
BAYERANY(ARGBToBayerGGRow_Any_SSE2, ARGBToBayerGGRow_SSE2, ARGBToBayerGGRow_C,
7, 4, 1)
#endif
#if defined(HAS_ARGBTOBAYERGGROW_NEON)
BAYERANY(ARGBToBayerGGRow_Any_NEON, ARGBToBayerGGRow_NEON, ARGBToBayerGGRow_C,
7, 4, 1)
#endif
#undef BAYERANY
// RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD.
#define YANY(NAMEANY, ARGBTOY_SIMD, SBPP, BPP, NUM) \
void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
ARGBTOY_SIMD(src_argb, dst_y, width - NUM); \
ARGBTOY_SIMD(src_argb + (width - NUM) * SBPP, \
dst_y + (width - NUM) * BPP, NUM); \
}
#ifdef HAS_ARGBTOYROW_AVX2
YANY(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 4, 1, 32)
YANY(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 4, 1, 32)
YANY(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 2, 1, 32)
YANY(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 2, 1, 32)
#endif
#ifdef HAS_ARGBTOYROW_SSSE3
YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 1, 16)
#endif
#ifdef HAS_BGRATOYROW_SSSE3
YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 1, 16)
YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 1, 16)
#endif
#ifdef HAS_ARGBTOYJROW_SSSE3
YANY(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_Unaligned_SSSE3, 4, 1, 16)
#endif
#ifdef HAS_ARGBTOYROW_NEON
YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 1, 8)
YANY(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 4, 1, 8)
YANY(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 4, 1, 8)
YANY(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 4, 1, 8)
YANY(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 4, 1, 8)
YANY(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 3, 1, 8)
YANY(RAWToYRow_Any_NEON, RAWToYRow_NEON, 3, 1, 8)
YANY(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 2, 1, 8)
YANY(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 2, 1, 8)
YANY(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 2, 1, 8)
YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 1, 16)
YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 1, 16)
YANY(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 3, 4, 8)
YANY(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 3, 4, 8)
YANY(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 2, 4, 8)
YANY(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 2, 4, 8)
YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 2, 4, 8)
#endif
#undef YANY
#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK) \
void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
int n = width & ~MASK; \
ARGBTOY_SIMD(src_argb, dst_y, n); \
ARGBTOY_C(src_argb + n * SBPP, \
dst_y + n * BPP, width & MASK); \
}
// Attenuate is destructive so last16 method can not be used due to overlap.
#ifdef HAS_ARGBATTENUATEROW_SSSE3
YANY(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, ARGBAttenuateRow_C,
4, 4, 3)
#endif
#ifdef HAS_ARGBATTENUATEROW_SSE2
YANY(ARGBAttenuateRow_Any_SSE2, ARGBAttenuateRow_SSE2, ARGBAttenuateRow_C,
4, 4, 3)
#endif
#ifdef HAS_ARGBUNATTENUATEROW_SSE2
YANY(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, ARGBUnattenuateRow_C,
4, 4, 3)
#endif
#ifdef HAS_ARGBATTENUATEROW_AVX2
YANY(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, ARGBAttenuateRow_C,
4, 4, 7)
#endif
#ifdef HAS_ARGBUNATTENUATEROW_AVX2
YANY(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, ARGBUnattenuateRow_C,
4, 4, 7)
#endif
#ifdef HAS_ARGBATTENUATEROW_NEON
YANY(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, ARGBAttenuateRow_C,
4, 4, 7)
#endif
#undef YANY
// RGB/YUV to UV does multiple of 16 with SIMD and remainder with C.
#define UVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK) \
void NAMEANY(const uint8* src_argb, int src_stride_argb, \
uint8* dst_u, uint8* dst_v, int width) { \
int n = width & ~MASK; \
ANYTOUV_SIMD(src_argb, src_stride_argb, dst_u, dst_v, n); \
ANYTOUV_C(src_argb + n * BPP, src_stride_argb, \
dst_u + (n >> 1), \
dst_v + (n >> 1), \
width & MASK); \
}
#ifdef HAS_ARGBTOUVROW_AVX2
UVANY(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, ARGBToUVRow_C, 4, 31)
UVANY(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, YUY2ToUVRow_C, 2, 31)
UVANY(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, UYVYToUVRow_C, 2, 31)
#endif
#ifdef HAS_ARGBTOUVROW_SSSE3
UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4, 15)
UVANY(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_Unaligned_SSSE3, ARGBToUVJRow_C,
4, 15)
UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4, 15)
UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4, 15)
UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4, 15)
UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2, 15)
UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2, 15)
#endif
#ifdef HAS_ARGBTOUVROW_NEON
UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4, 15)
UVANY(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, ARGBToUVJRow_C, 4, 15)
UVANY(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, BGRAToUVRow_C, 4, 15)
UVANY(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, ABGRToUVRow_C, 4, 15)
UVANY(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, RGBAToUVRow_C, 4, 15)
UVANY(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, RGB24ToUVRow_C, 3, 15)
UVANY(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, RAWToUVRow_C, 3, 15)
UVANY(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, RGB565ToUVRow_C, 2, 15)
UVANY(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, ARGB1555ToUVRow_C, 2, 15)
UVANY(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, ARGB4444ToUVRow_C, 2, 15)
UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2, 15)
UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2, 15)
#endif
#undef UVANY
#define UV422ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK, SHIFT) \
void NAMEANY(const uint8* src_uv, \
uint8* dst_u, uint8* dst_v, int width) { \
int n = width & ~MASK; \
ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \
ANYTOUV_C(src_uv + n * BPP, \
dst_u + (n >> SHIFT), \
dst_v + (n >> SHIFT), \
width & MASK); \
}
#ifdef HAS_ARGBTOUV444ROW_SSSE3
UV422ANY(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_Unaligned_SSSE3,
ARGBToUV444Row_C, 4, 15, 0)
#endif
#ifdef HAS_YUY2TOUV422ROW_AVX2
UV422ANY(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2,
YUY2ToUV422Row_C, 2, 31, 1)
UV422ANY(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2,
UYVYToUV422Row_C, 2, 31, 1)
#endif
#ifdef HAS_ARGBTOUVROW_SSSE3
UV422ANY(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_Unaligned_SSSE3,
ARGBToUV422Row_C, 4, 15, 1)
UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_Unaligned_SSE2,
YUY2ToUV422Row_C, 2, 15, 1)
UV422ANY(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_Unaligned_SSE2,
UYVYToUV422Row_C, 2, 15, 1)
#endif
#ifdef HAS_YUY2TOUV422ROW_NEON
UV422ANY(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON,
ARGBToUV444Row_C, 4, 7, 0)
UV422ANY(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON,
ARGBToUV422Row_C, 4, 15, 1)
UV422ANY(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON,
ARGBToUV411Row_C, 4, 31, 2)
UV422ANY(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON,
YUY2ToUV422Row_C, 2, 15, 1)
UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON,
UYVYToUV422Row_C, 2, 15, 1)
#endif
#undef UV422ANY
#define SPLITUVROWANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \
void NAMEANY(const uint8* src_uv, \
uint8* dst_u, uint8* dst_v, int width) { \
int n = width & ~MASK; \
ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \
ANYTOUV_C(src_uv + n * 2, \
dst_u + n, \
dst_v + n, \
width & MASK); \
}
#ifdef HAS_SPLITUVROW_SSE2
SPLITUVROWANY(SplitUVRow_Any_SSE2, SplitUVRow_Unaligned_SSE2, SplitUVRow_C, 15)
#endif
#ifdef HAS_SPLITUVROW_AVX2
SPLITUVROWANY(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, SplitUVRow_C, 31)
#endif
#ifdef HAS_SPLITUVROW_NEON
SPLITUVROWANY(SplitUVRow_Any_NEON, SplitUVRow_NEON, SplitUVRow_C, 15)
#endif
#ifdef HAS_SPLITUVROW_MIPS_DSPR2
SPLITUVROWANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_Unaligned_MIPS_DSPR2,
SplitUVRow_C, 15)
#endif
#undef SPLITUVROWANY
#define MERGEUVROW_ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \
void NAMEANY(const uint8* src_u, const uint8* src_v, \
uint8* dst_uv, int width) { \
int n = width & ~MASK; \
ANYTOUV_SIMD(src_u, src_v, dst_uv, n); \
ANYTOUV_C(src_u + n, \
src_v + n, \
dst_uv + n * 2, \
width & MASK); \
}
#ifdef HAS_MERGEUVROW_SSE2
MERGEUVROW_ANY(MergeUVRow_Any_SSE2, MergeUVRow_Unaligned_SSE2, MergeUVRow_C, 15)
#endif
#ifdef HAS_MERGEUVROW_AVX2
MERGEUVROW_ANY(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, MergeUVRow_C, 31)
#endif
#ifdef HAS_MERGEUVROW_NEON
MERGEUVROW_ANY(MergeUVRow_Any_NEON, MergeUVRow_NEON, MergeUVRow_C, 15)
#endif
#undef MERGEUVROW_ANY
#define MATHROW_ANY(NAMEANY, ARGBMATH_SIMD, ARGBMATH_C, MASK) \
void NAMEANY(const uint8* src_argb0, const uint8* src_argb1, \
uint8* dst_argb, int width) { \
int n = width & ~MASK; \
ARGBMATH_SIMD(src_argb0, src_argb1, dst_argb, n); \
ARGBMATH_C(src_argb0 + n * 4, \
src_argb1 + n * 4, \
dst_argb + n * 4, \
width & MASK); \
}
#ifdef HAS_ARGBMULTIPLYROW_SSE2
MATHROW_ANY(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, ARGBMultiplyRow_C,
3)
#endif
#ifdef HAS_ARGBADDROW_SSE2
MATHROW_ANY(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, ARGBAddRow_C, 3)
#endif
#ifdef HAS_ARGBSUBTRACTROW_SSE2
MATHROW_ANY(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, ARGBSubtractRow_C,
3)
#endif
#ifdef HAS_ARGBMULTIPLYROW_AVX2
MATHROW_ANY(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, ARGBMultiplyRow_C,
7)
#endif
#ifdef HAS_ARGBADDROW_AVX2
MATHROW_ANY(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, ARGBAddRow_C, 7)
#endif
#ifdef HAS_ARGBSUBTRACTROW_AVX2
MATHROW_ANY(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, ARGBSubtractRow_C,
7)
#endif
#ifdef HAS_ARGBMULTIPLYROW_NEON
MATHROW_ANY(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, ARGBMultiplyRow_C,
7)
#endif
#ifdef HAS_ARGBADDROW_NEON
MATHROW_ANY(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, ARGBAddRow_C, 7)
#endif
#ifdef HAS_ARGBSUBTRACTROW_NEON
MATHROW_ANY(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, ARGBSubtractRow_C,
7)
#endif
#undef MATHROW_ANY
// Shuffle may want to work in place, so last16 method can not be used.
#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK) \
void NAMEANY(const uint8* src_argb, uint8* dst_argb, \
const uint8* shuffler, int width) { \
int n = width & ~MASK; \
ARGBTOY_SIMD(src_argb, dst_argb, shuffler, n); \
ARGBTOY_C(src_argb + n * SBPP, \
dst_argb + n * BPP, shuffler, width & MASK); \
}
#ifdef HAS_ARGBSHUFFLEROW_SSE2
YANY(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2,
ARGBShuffleRow_C, 4, 4, 3)
#endif
#ifdef HAS_ARGBSHUFFLEROW_SSSE3
YANY(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_Unaligned_SSSE3,
ARGBShuffleRow_C, 4, 4, 7)
#endif
#ifdef HAS_ARGBSHUFFLEROW_AVX2
YANY(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2,
ARGBShuffleRow_C, 4, 4, 15)
#endif
#ifdef HAS_ARGBSHUFFLEROW_NEON
YANY(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON,
ARGBShuffleRow_C, 4, 4, 3)
#endif
#undef YANY
// Interpolate may want to work in place, so last16 method can not be used.
#define NANY(NAMEANY, TERP_SIMD, TERP_C, SBPP, BPP, MASK) \
void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \
ptrdiff_t src_stride_ptr, int width, \
int source_y_fraction) { \
int n = width & ~MASK; \
TERP_SIMD(dst_ptr, src_ptr, src_stride_ptr, \
n, source_y_fraction); \
TERP_C(dst_ptr + n * BPP, \
src_ptr + n * SBPP, src_stride_ptr, \
width & MASK, source_y_fraction); \
}
#ifdef HAS_INTERPOLATEROW_AVX2
NANY(InterpolateRow_Any_AVX2, InterpolateRow_AVX2,
InterpolateRow_C, 1, 1, 32)
#endif
#ifdef HAS_INTERPOLATEROW_SSSE3
NANY(InterpolateRow_Any_SSSE3, InterpolateRow_Unaligned_SSSE3,
InterpolateRow_C, 1, 1, 15)
#endif
#ifdef HAS_INTERPOLATEROW_SSE2
NANY(InterpolateRow_Any_SSE2, InterpolateRow_Unaligned_SSE2,
InterpolateRow_C, 1, 1, 15)
#endif
#ifdef HAS_INTERPOLATEROW_NEON
NANY(InterpolateRow_Any_NEON, InterpolateRow_NEON,
InterpolateRow_C, 1, 1, 15)
#endif
#ifdef HAS_INTERPOLATEROW_MIPS_DSPR2
NANY(InterpolateRow_Any_MIPS_DSPR2, InterpolateRow_MIPS_DSPR2,
InterpolateRow_C, 1, 1, 3)
#endif
#undef NANY
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

File diff suppressed because it is too large Load diff

View file

@ -1,991 +0,0 @@
/*
* Copyright (c) 2012 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// The following are available on Mips platforms:
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__)
#ifdef HAS_COPYROW_MIPS
void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
__asm__ __volatile__ (
".set noreorder \n"
".set noat \n"
"slti $at, %[count], 8 \n"
"bne $at ,$zero, $last8 \n"
"xor $t8, %[src], %[dst] \n"
"andi $t8, $t8, 0x3 \n"
"bne $t8, $zero, unaligned \n"
"negu $a3, %[dst] \n"
// make dst/src aligned
"andi $a3, $a3, 0x3 \n"
"beq $a3, $zero, $chk16w \n"
// word-aligned now count is the remining bytes count
"subu %[count], %[count], $a3 \n"
"lwr $t8, 0(%[src]) \n"
"addu %[src], %[src], $a3 \n"
"swr $t8, 0(%[dst]) \n"
"addu %[dst], %[dst], $a3 \n"
// Now the dst/src are mutually word-aligned with word-aligned addresses
"$chk16w: \n"
"andi $t8, %[count], 0x3f \n" // whole 64-B chunks?
// t8 is the byte count after 64-byte chunks
"beq %[count], $t8, chk8w \n"
// There will be at most 1 32-byte chunk after it
"subu $a3, %[count], $t8 \n" // the reminder
// Here a3 counts bytes in 16w chunks
"addu $a3, %[dst], $a3 \n"
// Now a3 is the final dst after 64-byte chunks
"addu $t0, %[dst], %[count] \n"
// t0 is the "past the end" address
// When in the loop we exercise "pref 30,x(a1)", the a1+x should not be past
// the "t0-32" address
// This means: for x=128 the last "safe" a1 address is "t0-160"
// Alternatively, for x=64 the last "safe" a1 address is "t0-96"
// we will use "pref 30,128(a1)", so "t0-160" is the limit
"subu $t9, $t0, 160 \n"
// t9 is the "last safe pref 30,128(a1)" address
"pref 0, 0(%[src]) \n" // first line of src
"pref 0, 32(%[src]) \n" // second line of src
"pref 0, 64(%[src]) \n"
"pref 30, 32(%[dst]) \n"
// In case the a1 > t9 don't use "pref 30" at all
"sgtu $v1, %[dst], $t9 \n"
"bgtz $v1, $loop16w \n"
"nop \n"
// otherwise, start with using pref30
"pref 30, 64(%[dst]) \n"
"$loop16w: \n"
"pref 0, 96(%[src]) \n"
"lw $t0, 0(%[src]) \n"
"bgtz $v1, $skip_pref30_96 \n" // skip
"lw $t1, 4(%[src]) \n"
"pref 30, 96(%[dst]) \n" // continue
"$skip_pref30_96: \n"
"lw $t2, 8(%[src]) \n"
"lw $t3, 12(%[src]) \n"
"lw $t4, 16(%[src]) \n"
"lw $t5, 20(%[src]) \n"
"lw $t6, 24(%[src]) \n"
"lw $t7, 28(%[src]) \n"
"pref 0, 128(%[src]) \n"
// bring the next lines of src, addr 128
"sw $t0, 0(%[dst]) \n"
"sw $t1, 4(%[dst]) \n"
"sw $t2, 8(%[dst]) \n"
"sw $t3, 12(%[dst]) \n"
"sw $t4, 16(%[dst]) \n"
"sw $t5, 20(%[dst]) \n"
"sw $t6, 24(%[dst]) \n"
"sw $t7, 28(%[dst]) \n"
"lw $t0, 32(%[src]) \n"
"bgtz $v1, $skip_pref30_128 \n" // skip pref 30,128(a1)
"lw $t1, 36(%[src]) \n"
"pref 30, 128(%[dst]) \n" // set dest, addr 128
"$skip_pref30_128: \n"
"lw $t2, 40(%[src]) \n"
"lw $t3, 44(%[src]) \n"
"lw $t4, 48(%[src]) \n"
"lw $t5, 52(%[src]) \n"
"lw $t6, 56(%[src]) \n"
"lw $t7, 60(%[src]) \n"
"pref 0, 160(%[src]) \n"
// bring the next lines of src, addr 160
"sw $t0, 32(%[dst]) \n"
"sw $t1, 36(%[dst]) \n"
"sw $t2, 40(%[dst]) \n"
"sw $t3, 44(%[dst]) \n"
"sw $t4, 48(%[dst]) \n"
"sw $t5, 52(%[dst]) \n"
"sw $t6, 56(%[dst]) \n"
"sw $t7, 60(%[dst]) \n"
"addiu %[dst], %[dst], 64 \n" // adding 64 to dest
"sgtu $v1, %[dst], $t9 \n"
"bne %[dst], $a3, $loop16w \n"
" addiu %[src], %[src], 64 \n" // adding 64 to src
"move %[count], $t8 \n"
// Here we have src and dest word-aligned but less than 64-bytes to go
"chk8w: \n"
"pref 0, 0x0(%[src]) \n"
"andi $t8, %[count], 0x1f \n" // 32-byte chunk?
// the t8 is the reminder count past 32-bytes
"beq %[count], $t8, chk1w \n"
// count=t8,no 32-byte chunk
" nop \n"
"lw $t0, 0(%[src]) \n"
"lw $t1, 4(%[src]) \n"
"lw $t2, 8(%[src]) \n"
"lw $t3, 12(%[src]) \n"
"lw $t4, 16(%[src]) \n"
"lw $t5, 20(%[src]) \n"
"lw $t6, 24(%[src]) \n"
"lw $t7, 28(%[src]) \n"
"addiu %[src], %[src], 32 \n"
"sw $t0, 0(%[dst]) \n"
"sw $t1, 4(%[dst]) \n"
"sw $t2, 8(%[dst]) \n"
"sw $t3, 12(%[dst]) \n"
"sw $t4, 16(%[dst]) \n"
"sw $t5, 20(%[dst]) \n"
"sw $t6, 24(%[dst]) \n"
"sw $t7, 28(%[dst]) \n"
"addiu %[dst], %[dst], 32 \n"
"chk1w: \n"
"andi %[count], $t8, 0x3 \n"
// now count is the reminder past 1w chunks
"beq %[count], $t8, $last8 \n"
" subu $a3, $t8, %[count] \n"
// a3 is count of bytes in 1w chunks
"addu $a3, %[dst], $a3 \n"
// now a3 is the dst address past the 1w chunks
// copying in words (4-byte chunks)
"$wordCopy_loop: \n"
"lw $t3, 0(%[src]) \n"
// the first t3 may be equal t0 ... optimize?
"addiu %[src], %[src],4 \n"
"addiu %[dst], %[dst],4 \n"
"bne %[dst], $a3,$wordCopy_loop \n"
" sw $t3, -4(%[dst]) \n"
// For the last (<8) bytes
"$last8: \n"
"blez %[count], leave \n"
" addu $a3, %[dst], %[count] \n" // a3 -last dst address
"$last8loop: \n"
"lb $v1, 0(%[src]) \n"
"addiu %[src], %[src], 1 \n"
"addiu %[dst], %[dst], 1 \n"
"bne %[dst], $a3, $last8loop \n"
" sb $v1, -1(%[dst]) \n"
"leave: \n"
" j $ra \n"
" nop \n"
//
// UNALIGNED case
//
"unaligned: \n"
// got here with a3="negu a1"
"andi $a3, $a3, 0x3 \n" // a1 is word aligned?
"beqz $a3, $ua_chk16w \n"
" subu %[count], %[count], $a3 \n"
// bytes left after initial a3 bytes
"lwr $v1, 0(%[src]) \n"
"lwl $v1, 3(%[src]) \n"
"addu %[src], %[src], $a3 \n" // a3 may be 1, 2 or 3
"swr $v1, 0(%[dst]) \n"
"addu %[dst], %[dst], $a3 \n"
// below the dst will be word aligned (NOTE1)
"$ua_chk16w: \n"
"andi $t8, %[count], 0x3f \n" // whole 64-B chunks?
// t8 is the byte count after 64-byte chunks
"beq %[count], $t8, ua_chk8w \n"
// if a2==t8, no 64-byte chunks
// There will be at most 1 32-byte chunk after it
"subu $a3, %[count], $t8 \n" // the reminder
// Here a3 counts bytes in 16w chunks
"addu $a3, %[dst], $a3 \n"
// Now a3 is the final dst after 64-byte chunks
"addu $t0, %[dst], %[count] \n" // t0 "past the end"
"subu $t9, $t0, 160 \n"
// t9 is the "last safe pref 30,128(a1)" address
"pref 0, 0(%[src]) \n" // first line of src
"pref 0, 32(%[src]) \n" // second line addr 32
"pref 0, 64(%[src]) \n"
"pref 30, 32(%[dst]) \n"
// safe, as we have at least 64 bytes ahead
// In case the a1 > t9 don't use "pref 30" at all
"sgtu $v1, %[dst], $t9 \n"
"bgtz $v1, $ua_loop16w \n"
// skip "pref 30,64(a1)" for too short arrays
" nop \n"
// otherwise, start with using pref30
"pref 30, 64(%[dst]) \n"
"$ua_loop16w: \n"
"pref 0, 96(%[src]) \n"
"lwr $t0, 0(%[src]) \n"
"lwl $t0, 3(%[src]) \n"
"lwr $t1, 4(%[src]) \n"
"bgtz $v1, $ua_skip_pref30_96 \n"
" lwl $t1, 7(%[src]) \n"
"pref 30, 96(%[dst]) \n"
// continue setting up the dest, addr 96
"$ua_skip_pref30_96: \n"
"lwr $t2, 8(%[src]) \n"
"lwl $t2, 11(%[src]) \n"
"lwr $t3, 12(%[src]) \n"
"lwl $t3, 15(%[src]) \n"
"lwr $t4, 16(%[src]) \n"
"lwl $t4, 19(%[src]) \n"
"lwr $t5, 20(%[src]) \n"
"lwl $t5, 23(%[src]) \n"
"lwr $t6, 24(%[src]) \n"
"lwl $t6, 27(%[src]) \n"
"lwr $t7, 28(%[src]) \n"
"lwl $t7, 31(%[src]) \n"
"pref 0, 128(%[src]) \n"
// bring the next lines of src, addr 128
"sw $t0, 0(%[dst]) \n"
"sw $t1, 4(%[dst]) \n"
"sw $t2, 8(%[dst]) \n"
"sw $t3, 12(%[dst]) \n"
"sw $t4, 16(%[dst]) \n"
"sw $t5, 20(%[dst]) \n"
"sw $t6, 24(%[dst]) \n"
"sw $t7, 28(%[dst]) \n"
"lwr $t0, 32(%[src]) \n"
"lwl $t0, 35(%[src]) \n"
"lwr $t1, 36(%[src]) \n"
"bgtz $v1, ua_skip_pref30_128 \n"
" lwl $t1, 39(%[src]) \n"
"pref 30, 128(%[dst]) \n"
// continue setting up the dest, addr 128
"ua_skip_pref30_128: \n"
"lwr $t2, 40(%[src]) \n"
"lwl $t2, 43(%[src]) \n"
"lwr $t3, 44(%[src]) \n"
"lwl $t3, 47(%[src]) \n"
"lwr $t4, 48(%[src]) \n"
"lwl $t4, 51(%[src]) \n"
"lwr $t5, 52(%[src]) \n"
"lwl $t5, 55(%[src]) \n"
"lwr $t6, 56(%[src]) \n"
"lwl $t6, 59(%[src]) \n"
"lwr $t7, 60(%[src]) \n"
"lwl $t7, 63(%[src]) \n"
"pref 0, 160(%[src]) \n"
// bring the next lines of src, addr 160
"sw $t0, 32(%[dst]) \n"
"sw $t1, 36(%[dst]) \n"
"sw $t2, 40(%[dst]) \n"
"sw $t3, 44(%[dst]) \n"
"sw $t4, 48(%[dst]) \n"
"sw $t5, 52(%[dst]) \n"
"sw $t6, 56(%[dst]) \n"
"sw $t7, 60(%[dst]) \n"
"addiu %[dst],%[dst],64 \n" // adding 64 to dest
"sgtu $v1,%[dst],$t9 \n"
"bne %[dst],$a3,$ua_loop16w \n"
" addiu %[src],%[src],64 \n" // adding 64 to src
"move %[count],$t8 \n"
// Here we have src and dest word-aligned but less than 64-bytes to go
"ua_chk8w: \n"
"pref 0, 0x0(%[src]) \n"
"andi $t8, %[count], 0x1f \n" // 32-byte chunk?
// the t8 is the reminder count
"beq %[count], $t8, $ua_chk1w \n"
// when count==t8, no 32-byte chunk
"lwr $t0, 0(%[src]) \n"
"lwl $t0, 3(%[src]) \n"
"lwr $t1, 4(%[src]) \n"
"lwl $t1, 7(%[src]) \n"
"lwr $t2, 8(%[src]) \n"
"lwl $t2, 11(%[src]) \n"
"lwr $t3, 12(%[src]) \n"
"lwl $t3, 15(%[src]) \n"
"lwr $t4, 16(%[src]) \n"
"lwl $t4, 19(%[src]) \n"
"lwr $t5, 20(%[src]) \n"
"lwl $t5, 23(%[src]) \n"
"lwr $t6, 24(%[src]) \n"
"lwl $t6, 27(%[src]) \n"
"lwr $t7, 28(%[src]) \n"
"lwl $t7, 31(%[src]) \n"
"addiu %[src], %[src], 32 \n"
"sw $t0, 0(%[dst]) \n"
"sw $t1, 4(%[dst]) \n"
"sw $t2, 8(%[dst]) \n"
"sw $t3, 12(%[dst]) \n"
"sw $t4, 16(%[dst]) \n"
"sw $t5, 20(%[dst]) \n"
"sw $t6, 24(%[dst]) \n"
"sw $t7, 28(%[dst]) \n"
"addiu %[dst], %[dst], 32 \n"
"$ua_chk1w: \n"
"andi %[count], $t8, 0x3 \n"
// now count is the reminder past 1w chunks
"beq %[count], $t8, ua_smallCopy \n"
"subu $a3, $t8, %[count] \n"
// a3 is count of bytes in 1w chunks
"addu $a3, %[dst], $a3 \n"
// now a3 is the dst address past the 1w chunks
// copying in words (4-byte chunks)
"$ua_wordCopy_loop: \n"
"lwr $v1, 0(%[src]) \n"
"lwl $v1, 3(%[src]) \n"
"addiu %[src], %[src], 4 \n"
"addiu %[dst], %[dst], 4 \n"
// note: dst=a1 is word aligned here, see NOTE1
"bne %[dst], $a3, $ua_wordCopy_loop \n"
" sw $v1,-4(%[dst]) \n"
// Now less than 4 bytes (value in count) left to copy
"ua_smallCopy: \n"
"beqz %[count], leave \n"
" addu $a3, %[dst], %[count] \n" // a3 = last dst address
"$ua_smallCopy_loop: \n"
"lb $v1, 0(%[src]) \n"
"addiu %[src], %[src], 1 \n"
"addiu %[dst], %[dst], 1 \n"
"bne %[dst],$a3,$ua_smallCopy_loop \n"
" sb $v1, -1(%[dst]) \n"
"j $ra \n"
" nop \n"
".set at \n"
".set reorder \n"
: [dst] "+r" (dst), [src] "+r" (src)
: [count] "r" (count)
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
"t8", "t9", "a3", "v1", "at"
);
}
#endif // HAS_COPYROW_MIPS
// MIPS DSPR2 functions
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_dsp) && \
(__mips_dsp_rev >= 2)
void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"srl $t4, %[width], 4 \n" // multiplies of 16
"blez $t4, 2f \n"
" andi %[width], %[width], 0xf \n" // residual
".p2align 2 \n"
"1: \n"
"addiu $t4, $t4, -1 \n"
"lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0
"lw $t1, 4(%[src_uv]) \n" // V3 | U3 | V2 | U2
"lw $t2, 8(%[src_uv]) \n" // V5 | U5 | V4 | U4
"lw $t3, 12(%[src_uv]) \n" // V7 | U7 | V6 | U6
"lw $t5, 16(%[src_uv]) \n" // V9 | U9 | V8 | U8
"lw $t6, 20(%[src_uv]) \n" // V11 | U11 | V10 | U10
"lw $t7, 24(%[src_uv]) \n" // V13 | U13 | V12 | U12
"lw $t8, 28(%[src_uv]) \n" // V15 | U15 | V14 | U14
"addiu %[src_uv], %[src_uv], 32 \n"
"precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
"precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
"precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
"precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
"precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
"precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
"precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
"precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
"sw $t9, 0(%[dst_v]) \n"
"sw $t0, 0(%[dst_u]) \n"
"sw $t1, 4(%[dst_v]) \n"
"sw $t2, 4(%[dst_u]) \n"
"sw $t3, 8(%[dst_v]) \n"
"sw $t5, 8(%[dst_u]) \n"
"sw $t6, 12(%[dst_v]) \n"
"sw $t7, 12(%[dst_u]) \n"
"addiu %[dst_v], %[dst_v], 16 \n"
"bgtz $t4, 1b \n"
" addiu %[dst_u], %[dst_u], 16 \n"
"beqz %[width], 3f \n"
" nop \n"
"2: \n"
"lbu $t0, 0(%[src_uv]) \n"
"lbu $t1, 1(%[src_uv]) \n"
"addiu %[src_uv], %[src_uv], 2 \n"
"addiu %[width], %[width], -1 \n"
"sb $t0, 0(%[dst_u]) \n"
"sb $t1, 0(%[dst_v]) \n"
"addiu %[dst_u], %[dst_u], 1 \n"
"bgtz %[width], 2b \n"
" addiu %[dst_v], %[dst_v], 1 \n"
"3: \n"
".set pop \n"
: [src_uv] "+r" (src_uv),
[width] "+r" (width),
[dst_u] "+r" (dst_u),
[dst_v] "+r" (dst_v)
:
: "t0", "t1", "t2", "t3",
"t4", "t5", "t6", "t7", "t8", "t9"
);
}
void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
uint8* dst_v, int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"srl $t4, %[width], 4 \n" // multiplies of 16
"blez $t4, 2f \n"
" andi %[width], %[width], 0xf \n" // residual
".p2align 2 \n"
"1: \n"
"addiu $t4, $t4, -1 \n"
"lwr $t0, 0(%[src_uv]) \n"
"lwl $t0, 3(%[src_uv]) \n" // V1 | U1 | V0 | U0
"lwr $t1, 4(%[src_uv]) \n"
"lwl $t1, 7(%[src_uv]) \n" // V3 | U3 | V2 | U2
"lwr $t2, 8(%[src_uv]) \n"
"lwl $t2, 11(%[src_uv]) \n" // V5 | U5 | V4 | U4
"lwr $t3, 12(%[src_uv]) \n"
"lwl $t3, 15(%[src_uv]) \n" // V7 | U7 | V6 | U6
"lwr $t5, 16(%[src_uv]) \n"
"lwl $t5, 19(%[src_uv]) \n" // V9 | U9 | V8 | U8
"lwr $t6, 20(%[src_uv]) \n"
"lwl $t6, 23(%[src_uv]) \n" // V11 | U11 | V10 | U10
"lwr $t7, 24(%[src_uv]) \n"
"lwl $t7, 27(%[src_uv]) \n" // V13 | U13 | V12 | U12
"lwr $t8, 28(%[src_uv]) \n"
"lwl $t8, 31(%[src_uv]) \n" // V15 | U15 | V14 | U14
"precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
"precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
"precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
"precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
"precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
"precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
"precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
"precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
"addiu %[src_uv], %[src_uv], 32 \n"
"swr $t9, 0(%[dst_v]) \n"
"swl $t9, 3(%[dst_v]) \n"
"swr $t0, 0(%[dst_u]) \n"
"swl $t0, 3(%[dst_u]) \n"
"swr $t1, 4(%[dst_v]) \n"
"swl $t1, 7(%[dst_v]) \n"
"swr $t2, 4(%[dst_u]) \n"
"swl $t2, 7(%[dst_u]) \n"
"swr $t3, 8(%[dst_v]) \n"
"swl $t3, 11(%[dst_v]) \n"
"swr $t5, 8(%[dst_u]) \n"
"swl $t5, 11(%[dst_u]) \n"
"swr $t6, 12(%[dst_v]) \n"
"swl $t6, 15(%[dst_v]) \n"
"swr $t7, 12(%[dst_u]) \n"
"swl $t7, 15(%[dst_u]) \n"
"addiu %[dst_u], %[dst_u], 16 \n"
"bgtz $t4, 1b \n"
" addiu %[dst_v], %[dst_v], 16 \n"
"beqz %[width], 3f \n"
" nop \n"
"2: \n"
"lbu $t0, 0(%[src_uv]) \n"
"lbu $t1, 1(%[src_uv]) \n"
"addiu %[src_uv], %[src_uv], 2 \n"
"addiu %[width], %[width], -1 \n"
"sb $t0, 0(%[dst_u]) \n"
"sb $t1, 0(%[dst_v]) \n"
"addiu %[dst_u], %[dst_u], 1 \n"
"bgtz %[width], 2b \n"
" addiu %[dst_v], %[dst_v], 1 \n"
"3: \n"
".set pop \n"
: [src_uv] "+r" (src_uv),
[width] "+r" (width),
[dst_u] "+r" (dst_u),
[dst_v] "+r" (dst_v)
:
: "t0", "t1", "t2", "t3",
"t4", "t5", "t6", "t7", "t8", "t9"
);
}
void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"srl $t4, %[width], 4 \n" // multiplies of 16
"andi $t5, %[width], 0xf \n"
"blez $t4, 2f \n"
" addu %[src], %[src], %[width] \n" // src += width
".p2align 2 \n"
"1: \n"
"lw $t0, -16(%[src]) \n" // |3|2|1|0|
"lw $t1, -12(%[src]) \n" // |7|6|5|4|
"lw $t2, -8(%[src]) \n" // |11|10|9|8|
"lw $t3, -4(%[src]) \n" // |15|14|13|12|
"wsbh $t0, $t0 \n" // |2|3|0|1|
"wsbh $t1, $t1 \n" // |6|7|4|5|
"wsbh $t2, $t2 \n" // |10|11|8|9|
"wsbh $t3, $t3 \n" // |14|15|12|13|
"rotr $t0, $t0, 16 \n" // |0|1|2|3|
"rotr $t1, $t1, 16 \n" // |4|5|6|7|
"rotr $t2, $t2, 16 \n" // |8|9|10|11|
"rotr $t3, $t3, 16 \n" // |12|13|14|15|
"addiu %[src], %[src], -16 \n"
"addiu $t4, $t4, -1 \n"
"sw $t3, 0(%[dst]) \n" // |15|14|13|12|
"sw $t2, 4(%[dst]) \n" // |11|10|9|8|
"sw $t1, 8(%[dst]) \n" // |7|6|5|4|
"sw $t0, 12(%[dst]) \n" // |3|2|1|0|
"bgtz $t4, 1b \n"
" addiu %[dst], %[dst], 16 \n"
"beqz $t5, 3f \n"
" nop \n"
"2: \n"
"lbu $t0, -1(%[src]) \n"
"addiu $t5, $t5, -1 \n"
"addiu %[src], %[src], -1 \n"
"sb $t0, 0(%[dst]) \n"
"bgez $t5, 2b \n"
" addiu %[dst], %[dst], 1 \n"
"3: \n"
".set pop \n"
: [src] "+r" (src), [dst] "+r" (dst)
: [width] "r" (width)
: "t0", "t1", "t2", "t3", "t4", "t5"
);
}
void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) {
int x = 0;
int y = 0;
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"addu $t4, %[width], %[width] \n"
"srl %[x], %[width], 4 \n"
"andi %[y], %[width], 0xf \n"
"blez %[x], 2f \n"
" addu %[src_uv], %[src_uv], $t4 \n"
".p2align 2 \n"
"1: \n"
"lw $t0, -32(%[src_uv]) \n" // |3|2|1|0|
"lw $t1, -28(%[src_uv]) \n" // |7|6|5|4|
"lw $t2, -24(%[src_uv]) \n" // |11|10|9|8|
"lw $t3, -20(%[src_uv]) \n" // |15|14|13|12|
"lw $t4, -16(%[src_uv]) \n" // |19|18|17|16|
"lw $t6, -12(%[src_uv]) \n" // |23|22|21|20|
"lw $t7, -8(%[src_uv]) \n" // |27|26|25|24|
"lw $t8, -4(%[src_uv]) \n" // |31|30|29|28|
"rotr $t0, $t0, 16 \n" // |1|0|3|2|
"rotr $t1, $t1, 16 \n" // |5|4|7|6|
"rotr $t2, $t2, 16 \n" // |9|8|11|10|
"rotr $t3, $t3, 16 \n" // |13|12|15|14|
"rotr $t4, $t4, 16 \n" // |17|16|19|18|
"rotr $t6, $t6, 16 \n" // |21|20|23|22|
"rotr $t7, $t7, 16 \n" // |25|24|27|26|
"rotr $t8, $t8, 16 \n" // |29|28|31|30|
"precr.qb.ph $t9, $t0, $t1 \n" // |0|2|4|6|
"precrq.qb.ph $t5, $t0, $t1 \n" // |1|3|5|7|
"precr.qb.ph $t0, $t2, $t3 \n" // |8|10|12|14|
"precrq.qb.ph $t1, $t2, $t3 \n" // |9|11|13|15|
"precr.qb.ph $t2, $t4, $t6 \n" // |16|18|20|22|
"precrq.qb.ph $t3, $t4, $t6 \n" // |17|19|21|23|
"precr.qb.ph $t4, $t7, $t8 \n" // |24|26|28|30|
"precrq.qb.ph $t6, $t7, $t8 \n" // |25|27|29|31|
"addiu %[src_uv], %[src_uv], -32 \n"
"addiu %[x], %[x], -1 \n"
"swr $t4, 0(%[dst_u]) \n"
"swl $t4, 3(%[dst_u]) \n" // |30|28|26|24|
"swr $t6, 0(%[dst_v]) \n"
"swl $t6, 3(%[dst_v]) \n" // |31|29|27|25|
"swr $t2, 4(%[dst_u]) \n"
"swl $t2, 7(%[dst_u]) \n" // |22|20|18|16|
"swr $t3, 4(%[dst_v]) \n"
"swl $t3, 7(%[dst_v]) \n" // |23|21|19|17|
"swr $t0, 8(%[dst_u]) \n"
"swl $t0, 11(%[dst_u]) \n" // |14|12|10|8|
"swr $t1, 8(%[dst_v]) \n"
"swl $t1, 11(%[dst_v]) \n" // |15|13|11|9|
"swr $t9, 12(%[dst_u]) \n"
"swl $t9, 15(%[dst_u]) \n" // |6|4|2|0|
"swr $t5, 12(%[dst_v]) \n"
"swl $t5, 15(%[dst_v]) \n" // |7|5|3|1|
"addiu %[dst_v], %[dst_v], 16 \n"
"bgtz %[x], 1b \n"
" addiu %[dst_u], %[dst_u], 16 \n"
"beqz %[y], 3f \n"
" nop \n"
"b 2f \n"
" nop \n"
"2: \n"
"lbu $t0, -2(%[src_uv]) \n"
"lbu $t1, -1(%[src_uv]) \n"
"addiu %[src_uv], %[src_uv], -2 \n"
"addiu %[y], %[y], -1 \n"
"sb $t0, 0(%[dst_u]) \n"
"sb $t1, 0(%[dst_v]) \n"
"addiu %[dst_u], %[dst_u], 1 \n"
"bgtz %[y], 2b \n"
" addiu %[dst_v], %[dst_v], 1 \n"
"3: \n"
".set pop \n"
: [src_uv] "+r" (src_uv),
[dst_u] "+r" (dst_u),
[dst_v] "+r" (dst_v),
[x] "=&r" (x),
[y] "+r" (y)
: [width] "r" (width)
: "t0", "t1", "t2", "t3", "t4",
"t5", "t7", "t8", "t9"
);
}
// Convert (4 Y and 2 VU) I422 and arrange RGB values into
// t5 = | 0 | B0 | 0 | b0 |
// t4 = | 0 | B1 | 0 | b1 |
// t9 = | 0 | G0 | 0 | g0 |
// t8 = | 0 | G1 | 0 | g1 |
// t2 = | 0 | R0 | 0 | r0 |
// t1 = | 0 | R1 | 0 | r1 |
#define I422ToTransientMipsRGB \
"lw $t0, 0(%[y_buf]) \n" \
"lhu $t1, 0(%[u_buf]) \n" \
"lhu $t2, 0(%[v_buf]) \n" \
"preceu.ph.qbr $t1, $t1 \n" \
"preceu.ph.qbr $t2, $t2 \n" \
"preceu.ph.qbra $t3, $t0 \n" \
"preceu.ph.qbla $t0, $t0 \n" \
"subu.ph $t1, $t1, $s5 \n" \
"subu.ph $t2, $t2, $s5 \n" \
"subu.ph $t3, $t3, $s4 \n" \
"subu.ph $t0, $t0, $s4 \n" \
"mul.ph $t3, $t3, $s0 \n" \
"mul.ph $t0, $t0, $s0 \n" \
"shll.ph $t4, $t1, 0x7 \n" \
"subu.ph $t4, $t4, $t1 \n" \
"mul.ph $t6, $t1, $s1 \n" \
"mul.ph $t1, $t2, $s2 \n" \
"addq_s.ph $t5, $t4, $t3 \n" \
"addq_s.ph $t4, $t4, $t0 \n" \
"shra.ph $t5, $t5, 6 \n" \
"shra.ph $t4, $t4, 6 \n" \
"addiu %[u_buf], 2 \n" \
"addiu %[v_buf], 2 \n" \
"addu.ph $t6, $t6, $t1 \n" \
"mul.ph $t1, $t2, $s3 \n" \
"addu.ph $t9, $t6, $t3 \n" \
"addu.ph $t8, $t6, $t0 \n" \
"shra.ph $t9, $t9, 6 \n" \
"shra.ph $t8, $t8, 6 \n" \
"addu.ph $t2, $t1, $t3 \n" \
"addu.ph $t1, $t1, $t0 \n" \
"shra.ph $t2, $t2, 6 \n" \
"shra.ph $t1, $t1, 6 \n" \
"subu.ph $t5, $t5, $s5 \n" \
"subu.ph $t4, $t4, $s5 \n" \
"subu.ph $t9, $t9, $s5 \n" \
"subu.ph $t8, $t8, $s5 \n" \
"subu.ph $t2, $t2, $s5 \n" \
"subu.ph $t1, $t1, $s5 \n" \
"shll_s.ph $t5, $t5, 8 \n" \
"shll_s.ph $t4, $t4, 8 \n" \
"shll_s.ph $t9, $t9, 8 \n" \
"shll_s.ph $t8, $t8, 8 \n" \
"shll_s.ph $t2, $t2, 8 \n" \
"shll_s.ph $t1, $t1, 8 \n" \
"shra.ph $t5, $t5, 8 \n" \
"shra.ph $t4, $t4, 8 \n" \
"shra.ph $t9, $t9, 8 \n" \
"shra.ph $t8, $t8, 8 \n" \
"shra.ph $t2, $t2, 8 \n" \
"shra.ph $t1, $t1, 8 \n" \
"addu.ph $t5, $t5, $s5 \n" \
"addu.ph $t4, $t4, $s5 \n" \
"addu.ph $t9, $t9, $s5 \n" \
"addu.ph $t8, $t8, $s5 \n" \
"addu.ph $t2, $t2, $s5 \n" \
"addu.ph $t1, $t1, $s5 \n"
void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"beqz %[width], 2f \n"
" repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
"repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
"repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
"repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
"repl.ph $s4, 16 \n" // |0|16|0|16|
"repl.ph $s5, 128 \n" // |128|128| // clipping
"lui $s6, 0xff00 \n"
"ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
".p2align 2 \n"
"1: \n"
I422ToTransientMipsRGB
// Arranging into argb format
"precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1|
"precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0|
"addiu %[width], -4 \n"
"precrq.qb.ph $t8, $t4, $t5 \n" // |G1|B1|G0|B0|
"precr.qb.ph $t9, $t4, $t5 \n" // |g1|b1|g0|b0|
"precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
"addiu %[y_buf], 4 \n"
"preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
"preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
"or $t1, $t1, $s6 \n" // |ff|R1|ff|R0|
"or $t2, $t2, $s6 \n" // |ff|r1|ff|r0|
"precrq.ph.w $t0, $t2, $t9 \n" // |ff|r1|g1|b1|
"precrq.ph.w $t3, $t1, $t8 \n" // |ff|R1|G1|B1|
"sll $t9, $t9, 16 \n"
"sll $t8, $t8, 16 \n"
"packrl.ph $t2, $t2, $t9 \n" // |ff|r0|g0|b0|
"packrl.ph $t1, $t1, $t8 \n" // |ff|R0|G0|B0|
// Store results.
"sw $t2, 0(%[rgb_buf]) \n"
"sw $t0, 4(%[rgb_buf]) \n"
"sw $t1, 8(%[rgb_buf]) \n"
"sw $t3, 12(%[rgb_buf]) \n"
"bnez %[width], 1b \n"
" addiu %[rgb_buf], 16 \n"
"2: \n"
".set pop \n"
:[y_buf] "+r" (y_buf),
[u_buf] "+r" (u_buf),
[v_buf] "+r" (v_buf),
[width] "+r" (width),
[rgb_buf] "+r" (rgb_buf)
:
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9",
"s0", "s1", "s2", "s3",
"s4", "s5", "s6"
);
}
void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"beqz %[width], 2f \n"
" repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
"repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
"repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
"repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
"repl.ph $s4, 16 \n" // |0|16|0|16|
"repl.ph $s5, 128 \n" // |128|128|
"lui $s6, 0xff00 \n"
"ori $s6, 0xff00 \n" // |ff|00|ff|00|
".p2align 2 \n"
"1: \n"
I422ToTransientMipsRGB
// Arranging into abgr format
"precr.qb.ph $t0, $t8, $t1 \n" // |G1|g1|R1|r1|
"precr.qb.ph $t3, $t9, $t2 \n" // |G0|g0|R0|r0|
"precrq.qb.ph $t8, $t0, $t3 \n" // |G1|R1|G0|R0|
"precr.qb.ph $t9, $t0, $t3 \n" // |g1|r1|g0|r0|
"precr.qb.ph $t2, $t4, $t5 \n" // |B1|b1|B0|b0|
"addiu %[width], -4 \n"
"addiu %[y_buf], 4 \n"
"preceu.ph.qbla $t1, $t2 \n" // |0 |B1|0 |B0|
"preceu.ph.qbra $t2, $t2 \n" // |0 |b1|0 |b0|
"or $t1, $t1, $s6 \n" // |ff|B1|ff|B0|
"or $t2, $t2, $s6 \n" // |ff|b1|ff|b0|
"precrq.ph.w $t0, $t2, $t9 \n" // |ff|b1|g1|r1|
"precrq.ph.w $t3, $t1, $t8 \n" // |ff|B1|G1|R1|
"sll $t9, $t9, 16 \n"
"sll $t8, $t8, 16 \n"
"packrl.ph $t2, $t2, $t9 \n" // |ff|b0|g0|r0|
"packrl.ph $t1, $t1, $t8 \n" // |ff|B0|G0|R0|
// Store results.
"sw $t2, 0(%[rgb_buf]) \n"
"sw $t0, 4(%[rgb_buf]) \n"
"sw $t1, 8(%[rgb_buf]) \n"
"sw $t3, 12(%[rgb_buf]) \n"
"bnez %[width], 1b \n"
" addiu %[rgb_buf], 16 \n"
"2: \n"
".set pop \n"
:[y_buf] "+r" (y_buf),
[u_buf] "+r" (u_buf),
[v_buf] "+r" (v_buf),
[width] "+r" (width),
[rgb_buf] "+r" (rgb_buf)
:
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9",
"s0", "s1", "s2", "s3",
"s4", "s5", "s6"
);
}
void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"beqz %[width], 2f \n"
" repl.ph $s0, 74 \n" // |YG|YG| = |74 |74 |
"repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
"repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
"repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
"repl.ph $s4, 16 \n" // |0|16|0|16|
"repl.ph $s5, 128 \n" // |128|128|
"lui $s6, 0xff \n"
"ori $s6, 0xff \n" // |00|ff|00|ff|
".p2align 2 \n"
"1: \n"
I422ToTransientMipsRGB
// Arranging into bgra format
"precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1|
"precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0|
"precrq.qb.ph $t8, $t4, $t5 \n" // |B1|G1|B0|G0|
"precr.qb.ph $t9, $t4, $t5 \n" // |b1|g1|b0|g0|
"precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
"addiu %[width], -4 \n"
"addiu %[y_buf], 4 \n"
"preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
"preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
"sll $t1, $t1, 8 \n" // |R1|0 |R0|0 |
"sll $t2, $t2, 8 \n" // |r1|0 |r0|0 |
"or $t1, $t1, $s6 \n" // |R1|ff|R0|ff|
"or $t2, $t2, $s6 \n" // |r1|ff|r0|ff|
"precrq.ph.w $t0, $t9, $t2 \n" // |b1|g1|r1|ff|
"precrq.ph.w $t3, $t8, $t1 \n" // |B1|G1|R1|ff|
"sll $t1, $t1, 16 \n"
"sll $t2, $t2, 16 \n"
"packrl.ph $t2, $t9, $t2 \n" // |b0|g0|r0|ff|
"packrl.ph $t1, $t8, $t1 \n" // |B0|G0|R0|ff|
// Store results.
"sw $t2, 0(%[rgb_buf]) \n"
"sw $t0, 4(%[rgb_buf]) \n"
"sw $t1, 8(%[rgb_buf]) \n"
"sw $t3, 12(%[rgb_buf]) \n"
"bnez %[width], 1b \n"
" addiu %[rgb_buf], 16 \n"
"2: \n"
".set pop \n"
:[y_buf] "+r" (y_buf),
[u_buf] "+r" (u_buf),
[v_buf] "+r" (v_buf),
[width] "+r" (width),
[rgb_buf] "+r" (rgb_buf)
:
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9",
"s0", "s1", "s2", "s3",
"s4", "s5", "s6"
);
}
// Bilinear filter 8x2 -> 8x1
void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) {
int y0_fraction = 256 - source_y_fraction;
const uint8* src_ptr1 = src_ptr + src_stride;
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"replv.ph $t0, %[y0_fraction] \n"
"replv.ph $t1, %[source_y_fraction] \n"
".p2align 2 \n"
"1: \n"
"lw $t2, 0(%[src_ptr]) \n"
"lw $t3, 0(%[src_ptr1]) \n"
"lw $t4, 4(%[src_ptr]) \n"
"lw $t5, 4(%[src_ptr1]) \n"
"muleu_s.ph.qbl $t6, $t2, $t0 \n"
"muleu_s.ph.qbr $t7, $t2, $t0 \n"
"muleu_s.ph.qbl $t8, $t3, $t1 \n"
"muleu_s.ph.qbr $t9, $t3, $t1 \n"
"muleu_s.ph.qbl $t2, $t4, $t0 \n"
"muleu_s.ph.qbr $t3, $t4, $t0 \n"
"muleu_s.ph.qbl $t4, $t5, $t1 \n"
"muleu_s.ph.qbr $t5, $t5, $t1 \n"
"addq.ph $t6, $t6, $t8 \n"
"addq.ph $t7, $t7, $t9 \n"
"addq.ph $t2, $t2, $t4 \n"
"addq.ph $t3, $t3, $t5 \n"
"shra.ph $t6, $t6, 8 \n"
"shra.ph $t7, $t7, 8 \n"
"shra.ph $t2, $t2, 8 \n"
"shra.ph $t3, $t3, 8 \n"
"precr.qb.ph $t6, $t6, $t7 \n"
"precr.qb.ph $t2, $t2, $t3 \n"
"addiu %[src_ptr], %[src_ptr], 8 \n"
"addiu %[src_ptr1], %[src_ptr1], 8 \n"
"addiu %[dst_width], %[dst_width], -8 \n"
"sw $t6, 0(%[dst_ptr]) \n"
"sw $t2, 4(%[dst_ptr]) \n"
"bgtz %[dst_width], 1b \n"
" addiu %[dst_ptr], %[dst_ptr], 8 \n"
".set pop \n"
: [dst_ptr] "+r" (dst_ptr),
[src_ptr1] "+r" (src_ptr1),
[src_ptr] "+r" (src_ptr),
[dst_width] "+r" (dst_width)
: [source_y_fraction] "r" (source_y_fraction),
[y0_fraction] "r" (y0_fraction),
[src_stride] "r" (src_stride)
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9"
);
}
#endif // __mips_dsp_rev >= 2
#endif // defined(__mips__)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,146 +0,0 @@
;
; Copyright 2012 The LibYuv Project Authors. All rights reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%ifdef __YASM_VERSION_ID__
%if __YASM_VERSION_ID__ < 01020000h
%error AVX2 is supported only by yasm 1.2.0 or later.
%endif
%endif
%include "x86inc.asm"
SECTION .text
; cglobal numeric constants are parameters, gpr regs, mm regs
; void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix)
%macro YUY2TOYROW 2-3
cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix
%ifidn %1,YUY2
pcmpeqb m2, m2, m2 ; generate mask 0x00ff00ff
psrlw m2, m2, 8
%endif
ALIGN 4
.convertloop:
mov%2 m0, [src_yuy2q]
mov%2 m1, [src_yuy2q + mmsize]
lea src_yuy2q, [src_yuy2q + mmsize * 2]
%ifidn %1,YUY2
pand m0, m0, m2 ; YUY2 even bytes are Y
pand m1, m1, m2
%else
psrlw m0, m0, 8 ; UYVY odd bytes are Y
psrlw m1, m1, 8
%endif
packuswb m0, m0, m1
%if cpuflag(AVX2)
vpermq m0, m0, 0xd8
%endif
sub pixd, mmsize
mov%2 [dst_yq], m0
lea dst_yq, [dst_yq + mmsize]
jg .convertloop
REP_RET
%endmacro
; TODO(fbarchard): Remove MMX. Add SSSE3 pshufb version.
INIT_MMX MMX
YUY2TOYROW YUY2,a,
YUY2TOYROW YUY2,u,_Unaligned
YUY2TOYROW UYVY,a,
YUY2TOYROW UYVY,u,_Unaligned
INIT_XMM SSE2
YUY2TOYROW YUY2,a,
YUY2TOYROW YUY2,u,_Unaligned
YUY2TOYROW UYVY,a,
YUY2TOYROW UYVY,u,_Unaligned
INIT_YMM AVX2
YUY2TOYROW YUY2,a,
YUY2TOYROW UYVY,a,
; void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix)
%macro SplitUVRow 1-2
cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
pcmpeqb m4, m4, m4 ; generate mask 0x00ff00ff
psrlw m4, m4, 8
sub dst_vq, dst_uq
ALIGN 4
.convertloop:
mov%1 m0, [src_uvq]
mov%1 m1, [src_uvq + mmsize]
lea src_uvq, [src_uvq + mmsize * 2]
psrlw m2, m0, 8 ; odd bytes
psrlw m3, m1, 8
pand m0, m0, m4 ; even bytes
pand m1, m1, m4
packuswb m0, m0, m1
packuswb m2, m2, m3
%if cpuflag(AVX2)
vpermq m0, m0, 0xd8
vpermq m2, m2, 0xd8
%endif
mov%1 [dst_uq], m0
mov%1 [dst_uq + dst_vq], m2
lea dst_uq, [dst_uq + mmsize]
sub pixd, mmsize
jg .convertloop
REP_RET
%endmacro
INIT_MMX MMX
SplitUVRow a,
SplitUVRow u,_Unaligned
INIT_XMM SSE2
SplitUVRow a,
SplitUVRow u,_Unaligned
INIT_YMM AVX2
SplitUVRow a,
; void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
; int width);
%macro MergeUVRow_ 1-2
cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix
sub src_vq, src_uq
ALIGN 4
.convertloop:
mov%1 m0, [src_uq]
mov%1 m1, [src_vq]
lea src_uq, [src_uq + mmsize]
punpcklbw m2, m0, m1 // first 8 UV pairs
punpckhbw m0, m0, m1 // next 8 UV pairs
%if cpuflag(AVX2)
vperm2i128 m1, m2, m0, 0x20 // low 128 of ymm2 and low 128 of ymm0
vperm2i128 m2, m2, m0, 0x31 // high 128 of ymm2 and high 128 of ymm0
mov%1 [dst_uvq], m1
mov%1 [dst_uvq + mmsize], m2
%else
mov%1 [dst_uvq], m2
mov%1 [dst_uvq + mmsize], m0
%endif
lea dst_uvq, [dst_uvq + mmsize * 2]
sub pixd, mmsize
jg .convertloop
REP_RET
%endmacro
INIT_MMX MMX
MergeUVRow_ a,
MergeUVRow_ u,_Unaligned
INIT_XMM SSE2
MergeUVRow_ a,
MergeUVRow_ u,_Unaligned
INIT_YMM AVX2
MergeUVRow_ a,

View file

@ -1,926 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/scale.h"
#include <assert.h>
#include <string.h>
#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h" // For CopyPlane
#include "libyuv/row.h"
#include "libyuv/scale_row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Remove this macro if OVERREAD is safe.
#define AVOID_OVERREAD 1
static __inline int Abs(int v) {
return v >= 0 ? v : -v;
}
#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
// Scale plane, 1/2
// This is an optimized version for scaling down a plane to 1/2 of
// its original size.
static void ScalePlaneDown2(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr,
enum FilterMode filtering) {
int y;
void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) =
filtering == kFilterNone ? ScaleRowDown2_C :
(filtering == kFilterLinear ? ScaleRowDown2Linear_C :
ScaleRowDown2Box_C);
int row_stride = src_stride << 1;
if (!filtering) {
src_ptr += src_stride; // Point to odd rows.
src_stride = 0;
}
#if defined(HAS_SCALEROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON;
}
#elif defined(HAS_SCALEROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 :
ScaleRowDown2Box_Unaligned_SSE2);
if (IS_ALIGNED(src_ptr, 16) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
ScaleRowDown2Box_SSE2);
}
}
#elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
ScaleRowDown2 = filtering ?
ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2;
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
}
// TODO(fbarchard): Loop through source height to allow odd height.
for (y = 0; y < dst_height; ++y) {
ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
src_ptr += row_stride;
dst_ptr += dst_stride;
}
}
// Scale plane, 1/4
// This is an optimized version for scaling down a plane to 1/4 of
// its original size.
static void ScalePlaneDown4(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr,
enum FilterMode filtering) {
int y;
void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) =
filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
int row_stride = src_stride << 2;
if (!filtering) {
src_ptr += src_stride * 2; // Point to row 2.
src_stride = 0;
}
#if defined(HAS_SCALEROWDOWN4_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
}
#elif defined(HAS_SCALEROWDOWN4_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
}
#elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
ScaleRowDown4 = filtering ?
ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2;
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
}
for (y = 0; y < dst_height; ++y) {
ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
src_ptr += row_stride;
dst_ptr += dst_stride;
}
}
// Scale plane down, 3/4
static void ScalePlaneDown34(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr,
enum FilterMode filtering) {
int y;
void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
assert(dst_width % 3 == 0);
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_C;
ScaleRowDown34_1 = ScaleRowDown34_C;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
}
#if defined(HAS_SCALEROWDOWN34_NEON)
if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_NEON;
ScaleRowDown34_1 = ScaleRowDown34_NEON;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
}
}
#endif
#if defined(HAS_SCALEROWDOWN34_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
}
}
#endif
#if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2;
ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2;
}
}
#endif
for (y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride;
dst_ptr += dst_stride;
ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride;
dst_ptr += dst_stride;
ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
dst_ptr, dst_width);
src_ptr += src_stride * 2;
dst_ptr += dst_stride;
}
// Remainder 1 or 2 rows with last row vertically unfiltered
if ((dst_height % 3) == 2) {
ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride;
dst_ptr += dst_stride;
ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
} else if ((dst_height % 3) == 1) {
ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
}
}
// Scale plane, 3/8
// This is an optimized version for scaling down a plane to 3/8
// of its original size.
//
// Uses box filter arranges like this
// aaabbbcc -> abc
// aaabbbcc def
// aaabbbcc ghi
// dddeeeff
// dddeeeff
// dddeeeff
// ggghhhii
// ggghhhii
// Boxes are 3x3, 2x3, 3x2 and 2x2
static void ScalePlaneDown38(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr,
enum FilterMode filtering) {
int y;
void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
assert(dst_width % 3 == 0);
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_C;
ScaleRowDown38_2 = ScaleRowDown38_C;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
}
#if defined(HAS_SCALEROWDOWN38_NEON)
if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_NEON;
ScaleRowDown38_2 = ScaleRowDown38_NEON;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
}
}
#elif defined(HAS_SCALEROWDOWN38_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
}
}
#elif defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2;
ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2;
}
}
#endif
for (y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride * 3;
dst_ptr += dst_stride;
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride * 3;
dst_ptr += dst_stride;
ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride * 2;
dst_ptr += dst_stride;
}
// Remainder 1 or 2 rows with last row vertically unfiltered
if ((dst_height % 3) == 2) {
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride * 3;
dst_ptr += dst_stride;
ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
} else if ((dst_height % 3) == 1) {
ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
}
}
static __inline uint32 SumBox(int iboxwidth, int iboxheight,
ptrdiff_t src_stride, const uint8* src_ptr) {
uint32 sum = 0u;
int y;
assert(iboxwidth > 0);
assert(iboxheight > 0);
for (y = 0; y < iboxheight; ++y) {
int x;
for (x = 0; x < iboxwidth; ++x) {
sum += src_ptr[x];
}
src_ptr += src_stride;
}
return sum;
}
static void ScalePlaneBoxRow_C(int dst_width, int boxheight,
int x, int dx, ptrdiff_t src_stride,
const uint8* src_ptr, uint8* dst_ptr) {
int i;
int boxwidth;
for (i = 0; i < dst_width; ++i) {
int ix = x >> 16;
x += dx;
boxwidth = (x >> 16) - ix;
*dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) /
(boxwidth * boxheight);
}
}
static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
uint32 sum = 0u;
int x;
assert(iboxwidth > 0);
for (x = 0; x < iboxwidth; ++x) {
sum += src_ptr[x];
}
return sum;
}
static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) {
int i;
int scaletbl[2];
int minboxwidth = (dx >> 16);
int* scaleptr = scaletbl - minboxwidth;
int boxwidth;
scaletbl[0] = 65536 / (minboxwidth * boxheight);
scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
for (i = 0; i < dst_width; ++i) {
int ix = x >> 16;
x += dx;
boxwidth = (x >> 16) - ix;
*dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
}
}
static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) {
int boxwidth = (dx >> 16);
int scaleval = 65536 / (boxwidth * boxheight);
int i;
for (i = 0; i < dst_width; ++i) {
*dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
x += boxwidth;
}
}
// Scale plane down to any dimensions, with interpolation.
// (boxfilter).
//
// Same method as SimpleScale, which is fixed point, outputting
// one pixel of destination using fixed point (16.16) to step
// through source, sampling a box of pixel with simple
// averaging.
static void ScalePlaneBox(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr) {
int j;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
int dx = 0;
int dy = 0;
const int max_y = (src_height << 16);
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
// TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
uint8* dst = dst_ptr;
int j;
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
const uint8* src = src_ptr + iy * src_stride;
y += dy;
if (y > max_y) {
y = max_y;
}
boxheight = (y >> 16) - iy;
ScalePlaneBoxRow_C(dst_width, boxheight,
x, dx, src_stride,
src, dst);
dst += dst_stride;
}
return;
}
{
// Allocate a row buffer of uint16.
align_buffer_64(row16, src_width * 2);
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C;
void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;
#if defined(HAS_SCALEADDROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
#ifdef AVOID_OVERREAD
IS_ALIGNED(src_width, 16) &&
#endif
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleAddRows = ScaleAddRows_SSE2;
}
#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
const uint8* src = src_ptr + iy * src_stride;
y += dy;
if (y > (src_height << 16)) {
y = (src_height << 16);
}
boxheight = (y >> 16) - iy;
ScaleAddRows(src, src_stride, (uint16*)(row16),
src_width, boxheight);
ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16),
dst_ptr);
dst_ptr += dst_stride;
}
free_aligned_buffer_64(row16);
}
}
// Scale plane down with bilinear interpolation.
void ScalePlaneBilinearDown(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr,
enum FilterMode filtering) {
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
int dx = 0;
int dy = 0;
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
// Allocate a row buffer.
align_buffer_64(row, src_width);
const int max_y = (src_height - 1) << 16;
int j;
void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(src_width, 32)) {
InterpolateRow = InterpolateRow_AVX2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(src_width, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2;
}
}
#endif
#if defined(HAS_SCALEFILTERCOLS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_SSSE3;
}
#endif
if (y > max_y) {
y = max_y;
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
const uint8* src = src_ptr + yi * src_stride;
if (filtering == kFilterLinear) {
ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
} else {
int yf = (y >> 8) & 255;
InterpolateRow(row, src, src_stride, src_width, yf);
ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
}
dst_ptr += dst_stride;
y += dy;
if (y > max_y) {
y = max_y;
}
}
free_aligned_buffer_64(row);
}
// Scale up down with bilinear interpolation.
void ScalePlaneBilinearUp(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr,
enum FilterMode filtering) {
int j;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
int dx = 0;
int dy = 0;
const int max_y = (src_height - 1) << 16;
void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) =
filtering ? ScaleFilterCols_C : ScaleCols_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 32)) {
InterpolateRow = InterpolateRow_AVX2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2;
}
}
#endif
if (filtering && src_width >= 32768) {
ScaleFilterCols = ScaleFilterCols64_C;
}
#if defined(HAS_SCALEFILTERCOLS_SSSE3)
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_SSSE3;
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleFilterCols = ScaleColsUp2_C;
#if defined(HAS_SCALECOLS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleFilterCols = ScaleColsUp2_SSE2;
}
#endif
}
if (y > max_y) {
y = max_y;
}
{
int yi = y >> 16;
const uint8* src = src_ptr + yi * src_stride;
// Allocate 2 row buffers.
const int kRowSize = (dst_width + 15) & ~15;
align_buffer_64(row, kRowSize * 2);
uint8* rowptr = row;
int rowstride = kRowSize;
int lasty = yi;
ScaleFilterCols(rowptr, src, dst_width, x, dx);
if (src_height > 1) {
src += src_stride;
}
ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
src += src_stride;
for (j = 0; j < dst_height; ++j) {
yi = y >> 16;
if (yi != lasty) {
if (y > max_y) {
y = max_y;
yi = y >> 16;
src = src_ptr + yi * src_stride;
}
if (yi != lasty) {
ScaleFilterCols(rowptr, src, dst_width, x, dx);
rowptr += rowstride;
rowstride = -rowstride;
lasty = yi;
src += src_stride;
}
}
if (filtering == kFilterLinear) {
InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
} else {
int yf = (y >> 8) & 255;
InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
}
dst_ptr += dst_stride;
y += dy;
}
free_aligned_buffer_64(row);
}
}
// Scale Plane to/from any dimensions, without interpolation.
// Fixed point math is used for performance: The upper 16 bits
// of x and dx is the integer part of the source position and
// the lower 16 bits are the fixed decimal part.
static void ScalePlaneSimple(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr) {
int i;
void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) = ScaleCols_C;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
int dx = 0;
int dy = 0;
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
if (src_width * 2 == dst_width && x < 0x8000) {
ScaleCols = ScaleColsUp2_C;
#if defined(HAS_SCALECOLS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleCols = ScaleColsUp2_SSE2;
}
#endif
}
for (i = 0; i < dst_height; ++i) {
ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
dst_width, x, dx);
dst_ptr += dst_stride;
y += dy;
}
}
// Scale a plane.
// This function dispatches to a specialized scaler based on scale factor.
LIBYUV_API
void ScalePlane(const uint8* src, int src_stride,
int src_width, int src_height,
uint8* dst, int dst_stride,
int dst_width, int dst_height,
enum FilterMode filtering) {
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height,
dst_width, dst_height,
filtering);
// Negative height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * src_stride;
src_stride = -src_stride;
}
// Use specialized scales to improve performance for common resolutions.
// For example, all the 1/2 scalings will use ScalePlaneDown2()
if (dst_width == src_width && dst_height == src_height) {
// Straight copy.
CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
return;
}
if (dst_width == src_width) {
int dy = FixedDiv(src_height, dst_height);
// Arbitrary scale vertically, but unscaled vertically.
ScalePlaneVertical(src_height,
dst_width, dst_height,
src_stride, dst_stride, src, dst,
0, 0, dy, 1, filtering);
return;
}
if (dst_width <= Abs(src_width) && dst_height <= src_height) {
// Scale down.
if (4 * dst_width == 3 * src_width &&
4 * dst_height == 3 * src_height) {
// optimized, 3/4
ScalePlaneDown34(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
return;
}
if (2 * dst_width == src_width && 2 * dst_height == src_height) {
// optimized, 1/2
ScalePlaneDown2(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
return;
}
// 3/8 rounded up for odd sized chroma height.
if (8 * dst_width == 3 * src_width &&
dst_height == ((src_height * 3 + 7) / 8)) {
// optimized, 3/8
ScalePlaneDown38(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
return;
}
if (4 * dst_width == src_width && 4 * dst_height == src_height &&
filtering != kFilterBilinear) {
// optimized, 1/4
ScalePlaneDown4(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
return;
}
}
if (filtering == kFilterBox && dst_height * 2 < src_height) {
ScalePlaneBox(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst);
return;
}
if (filtering && dst_height > src_height) {
ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
return;
}
if (filtering) {
ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
return;
}
ScalePlaneSimple(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst);
}
// Scale an I420 image.
// This function in turn calls a scaling function for each plane.
LIBYUV_API
int I420Scale(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
int src_width, int src_height,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int dst_width, int dst_height,
enum FilterMode filtering) {
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
!dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
return -1;
}
ScalePlane(src_y, src_stride_y, src_width, src_height,
dst_y, dst_stride_y, dst_width, dst_height,
filtering);
ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,
dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
filtering);
ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight,
dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
filtering);
return 0;
}
// Deprecated api
LIBYUV_API
int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
int src_stride_y, int src_stride_u, int src_stride_v,
int src_width, int src_height,
uint8* dst_y, uint8* dst_u, uint8* dst_v,
int dst_stride_y, int dst_stride_u, int dst_stride_v,
int dst_width, int dst_height,
LIBYUV_BOOL interpolate) {
return I420Scale(src_y, src_stride_y,
src_u, src_stride_u,
src_v, src_stride_v,
src_width, src_height,
dst_y, dst_stride_y,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
dst_width, dst_height,
interpolate ? kFilterBox : kFilterNone);
}
// Deprecated api
LIBYUV_API
int ScaleOffset(const uint8* src, int src_width, int src_height,
uint8* dst, int dst_width, int dst_height, int dst_yoffset,
LIBYUV_BOOL interpolate) {
// Chroma requires offset to multiple of 2.
int dst_yoffset_even = dst_yoffset & ~1;
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
int aheight = dst_height - dst_yoffset_even * 2; // actual output height
const uint8* src_y = src;
const uint8* src_u = src + src_width * src_height;
const uint8* src_v = src + src_width * src_height +
src_halfwidth * src_halfheight;
uint8* dst_y = dst + dst_yoffset_even * dst_width;
uint8* dst_u = dst + dst_width * dst_height +
(dst_yoffset_even >> 1) * dst_halfwidth;
uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
(dst_yoffset_even >> 1) * dst_halfwidth;
if (!src || src_width <= 0 || src_height <= 0 ||
!dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset_even < 0 ||
dst_yoffset_even >= dst_height) {
return -1;
}
return I420Scale(src_y, src_width,
src_u, src_halfwidth,
src_v, src_halfwidth,
src_width, src_height,
dst_y, dst_width,
dst_u, dst_halfwidth,
dst_v, dst_halfwidth,
dst_width, aheight,
interpolate ? kFilterBox : kFilterNone);
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View file

@ -1,809 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/scale.h"
#include <assert.h>
#include <string.h>
#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h" // For CopyARGB
#include "libyuv/row.h"
#include "libyuv/scale_row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
static __inline int Abs(int v) {
return v >= 0 ? v : -v;
}
// ScaleARGB ARGB, 1/2
// This is an optimized version for scaling down a ARGB to 1/2 of
// its original size.
static void ScaleARGBDown2(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy,
enum FilterMode filtering) {
int j;
int row_stride = src_stride * (dy >> 16);
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) =
filtering == kFilterNone ? ScaleARGBRowDown2_C :
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
ScaleARGBRowDown2Box_C);
assert(dx == 65536 * 2); // Test scale factor of 2.
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
// Advance to odd row, even column.
if (filtering == kFilterBilinear) {
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
} else {
src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
}
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
ScaleARGBRowDown2Box_SSE2);
}
#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON :
ScaleARGBRowDown2_NEON;
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
}
for (j = 0; j < dst_height; ++j) {
ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
src_argb += row_stride;
dst_argb += dst_stride;
}
}
// ScaleARGB ARGB, 1/4
// This is an optimized version for scaling down a ARGB to 1/4 of
// its original size.
static void ScaleARGBDown4Box(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy) {
int j;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 2 * 4 + 15) & ~15;
align_buffer_64(row, kRowSize * 2);
int row_stride = src_stride * (dy >> 16);
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
// Advance to odd row, even column.
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
assert(dx == 65536 * 4); // Test scale factor of 4.
assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
}
#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
}
#endif
for (j = 0; j < dst_height; ++j) {
ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
row + kRowSize, dst_width * 2);
ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
src_argb += row_stride;
dst_argb += dst_stride;
}
free_aligned_buffer_64(row);
}
// ScaleARGB ARGB Even
// This is an optimized version for scaling down a ARGB to even
// multiple of its original size.
static void ScaleARGBDownEven(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy,
enum FilterMode filtering) {
int j;
int col_step = dx >> 16;
int row_stride = (dy >> 16) * src_stride;
void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
int src_step, uint8* dst_argb, int dst_width) =
filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2));
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
ScaleARGBRowDownEven_SSE2;
}
#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) &&
IS_ALIGNED(src_argb, 4)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
ScaleARGBRowDownEven_NEON;
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
}
for (j = 0; j < dst_height; ++j) {
ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
src_argb += row_stride;
dst_argb += dst_stride;
}
}
// Scale ARGB down with bilinear interpolation.
static void ScaleARGBBilinearDown(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy,
enum FilterMode filtering) {
int j;
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
int64 xlast = x + (int64)(dst_width - 1) * dx;
int64 xl = (dx >= 0) ? x : xlast;
int64 xr = (dx >= 0) ? xlast : x;
int clip_src_width;
xl = (xl >> 16) & ~3; // Left edge aligned.
xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
if (xr > src_width) {
xr = src_width;
}
clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
src_argb += xl * 4;
x -= (int)(xl << 16);
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(clip_src_width, 32)) {
InterpolateRow = InterpolateRow_AVX2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(clip_src_width, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2;
}
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
}
#endif
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
// Allocate a row of ARGB.
{
align_buffer_64(row, clip_src_width * 4);
const int max_y = (src_height - 1) << 16;
if (y > max_y) {
y = max_y;
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
const uint8* src = src_argb + yi * src_stride;
if (filtering == kFilterLinear) {
ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
} else {
int yf = (y >> 8) & 255;
InterpolateRow(row, src, src_stride, clip_src_width, yf);
ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
}
dst_argb += dst_stride;
y += dy;
if (y > max_y) {
y = max_y;
}
}
free_aligned_buffer_64(row);
}
}
// Scale ARGB up with bilinear interpolation.
static void ScaleARGBBilinearUp(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy,
enum FilterMode filtering) {
int j;
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
const int max_y = (src_height - 1) << 16;
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_AVX2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2;
}
#endif
if (src_width >= 32768) {
ScaleARGBFilterCols = filtering ?
ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
}
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
}
#endif
#if defined(HAS_SCALEARGBCOLS_SSE2)
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
}
#endif
}
if (y > max_y) {
y = max_y;
}
{
int yi = y >> 16;
const uint8* src = src_argb + yi * src_stride;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 15) & ~15;
align_buffer_64(row, kRowSize * 2);
uint8* rowptr = row;
int rowstride = kRowSize;
int lasty = yi;
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
if (src_height > 1) {
src += src_stride;
}
ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
src += src_stride;
for (j = 0; j < dst_height; ++j) {
yi = y >> 16;
if (yi != lasty) {
if (y > max_y) {
y = max_y;
yi = y >> 16;
src = src_argb + yi * src_stride;
}
if (yi != lasty) {
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
rowptr += rowstride;
rowstride = -rowstride;
lasty = yi;
src += src_stride;
}
}
if (filtering == kFilterLinear) {
InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
} else {
int yf = (y >> 8) & 255;
InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
}
dst_argb += dst_stride;
y += dy;
}
free_aligned_buffer_64(row);
}
}
#ifdef YUVSCALEUP
// Scale YUV to ARGB up with bilinear interpolation.
static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride_y,
int src_stride_u,
int src_stride_v,
int dst_stride_argb,
const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
int x, int dx, int y, int dy,
enum FilterMode filtering) {
int j;
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(src_width, 8)) {
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(src_width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(src_width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_AVX2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2;
}
#endif
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
if (src_width >= 32768) {
ScaleARGBFilterCols = filtering ?
ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
}
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
}
#endif
#if defined(HAS_SCALEARGBCOLS_SSE2)
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
}
#endif
}
const int max_y = (src_height - 1) << 16;
if (y > max_y) {
y = max_y;
}
const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
int yi = y >> 16;
int uv_yi = yi >> kYShift;
const uint8* src_row_y = src_y + yi * src_stride_y;
const uint8* src_row_u = src_u + uv_yi * src_stride_u;
const uint8* src_row_v = src_v + uv_yi * src_stride_v;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 15) & ~15;
align_buffer_64(row, kRowSize * 2);
// Allocate 1 row of ARGB for source conversion.
align_buffer_64(argb_row, src_width * 4);
uint8* rowptr = row;
int rowstride = kRowSize;
int lasty = yi;
// TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
if (src_height > 1) {
src_row_y += src_stride_y;
if (yi & 1) {
src_row_u += src_stride_u;
src_row_v += src_stride_v;
}
}
ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
if (src_height > 2) {
src_row_y += src_stride_y;
if (!(yi & 1)) {
src_row_u += src_stride_u;
src_row_v += src_stride_v;
}
}
for (j = 0; j < dst_height; ++j) {
yi = y >> 16;
if (yi != lasty) {
if (y > max_y) {
y = max_y;
yi = y >> 16;
uv_yi = yi >> kYShift;
src_row_y = src_y + yi * src_stride_y;
src_row_u = src_u + uv_yi * src_stride_u;
src_row_v = src_v + uv_yi * src_stride_v;
}
if (yi != lasty) {
// TODO(fbarchard): Convert the clipped region of row.
I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
rowptr += rowstride;
rowstride = -rowstride;
lasty = yi;
src_row_y += src_stride_y;
if (yi & 1) {
src_row_u += src_stride_u;
src_row_v += src_stride_v;
}
}
}
if (filtering == kFilterLinear) {
InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
} else {
int yf = (y >> 8) & 255;
InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
}
dst_argb += dst_stride_argb;
y += dy;
}
free_aligned_buffer_64(row);
free_aligned_buffer_64(row_argb);
}
#endif
// Scale ARGB to/from any dimensions, without interpolation.
// Fixed point math is used for performance: The upper 16 bits
// of x and dx is the integer part of the source position and
// the lower 16 bits are the fixed decimal part.
static void ScaleARGBSimple(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy) {
int j;
void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
#if defined(HAS_SCALEARGBCOLS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBCols = ScaleARGBCols_SSE2;
}
#endif
if (src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBCols = ScaleARGBColsUp2_SSE2;
}
#endif
}
for (j = 0; j < dst_height; ++j) {
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
dst_width, x, dx);
dst_argb += dst_stride;
y += dy;
}
}
// ScaleARGB a ARGB.
// This function in turn calls a scaling function
// suitable for handling the desired resolutions.
static void ScaleARGB(const uint8* src, int src_stride,
int src_width, int src_height,
uint8* dst, int dst_stride,
int dst_width, int dst_height,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering) {
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
int dx = 0;
int dy = 0;
// ARGB does not support box filter yet, but allow the user to pass it.
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height,
dst_width, dst_height,
filtering);
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * src_stride;
src_stride = -src_stride;
}
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
if (clip_x) {
int64 clipf = (int64)(clip_x) * dx;
x += (clipf & 0xffff);
src += (clipf >> 16) * 4;
dst += clip_x * 4;
}
if (clip_y) {
int64 clipf = (int64)(clip_y) * dy;
y += (clipf & 0xffff);
src += (clipf >> 16) * src_stride;
dst += clip_y * dst_stride;
}
// Special case for integer step values.
if (((dx | dy) & 0xffff) == 0) {
if (!dx || !dy) { // 1 pixel wide and/or tall.
filtering = kFilterNone;
} else {
// Optimized even scale down. ie 2, 4, 6, 8, 10x.
if (!(dx & 0x10000) && !(dy & 0x10000)) {
if (dx == 0x20000) {
// Optimized 1/2 downsample.
ScaleARGBDown2(src_width, src_height,
clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, dx, y, dy, filtering);
return;
}
if (dx == 0x40000 && filtering == kFilterBox) {
// Optimized 1/4 box downsample.
ScaleARGBDown4Box(src_width, src_height,
clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, dx, y, dy);
return;
}
ScaleARGBDownEven(src_width, src_height,
clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, dx, y, dy, filtering);
return;
}
// Optimized odd scale down. ie 3, 5, 7, 9x.
if ((dx & 0x10000) && (dy & 0x10000)) {
filtering = kFilterNone;
if (dx == 0x10000 && dy == 0x10000) {
// Straight copy.
ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
dst, dst_stride, clip_width, clip_height);
return;
}
}
}
}
if (dx == 0x10000 && (x & 0xffff) == 0) {
// Arbitrary scale vertically, but unscaled vertically.
ScalePlaneVertical(src_height,
clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, y, dy, 4, filtering);
return;
}
if (filtering && dy < 65536) {
ScaleARGBBilinearUp(src_width, src_height,
clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, dx, y, dy, filtering);
return;
}
if (filtering) {
ScaleARGBBilinearDown(src_width, src_height,
clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, dx, y, dy, filtering);
return;
}
ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, dx, y, dy);
}
LIBYUV_API
int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
int src_width, int src_height,
uint8* dst_argb, int dst_stride_argb,
int dst_width, int dst_height,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering) {
if (!src_argb || src_width == 0 || src_height == 0 ||
!dst_argb || dst_width <= 0 || dst_height <= 0 ||
clip_x < 0 || clip_y < 0 ||
(clip_x + clip_width) > dst_width ||
(clip_y + clip_height) > dst_height) {
return -1;
}
ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
dst_argb, dst_stride_argb, dst_width, dst_height,
clip_x, clip_y, clip_width, clip_height, filtering);
return 0;
}
// Scale an ARGB image.
LIBYUV_API
int ARGBScale(const uint8* src_argb, int src_stride_argb,
int src_width, int src_height,
uint8* dst_argb, int dst_stride_argb,
int dst_width, int dst_height,
enum FilterMode filtering) {
if (!src_argb || src_width == 0 || src_height == 0 ||
!dst_argb || dst_width <= 0 || dst_height <= 0) {
return -1;
}
ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
dst_argb, dst_stride_argb, dst_width, dst_height,
0, 0, dst_width, dst_height, filtering);
return 0;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View file

@ -1,145 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// This module is for GCC Neon
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t /* src_stride */,
uint8* dst, int dst_width) {
asm volatile (
#ifdef _ANDROID
".fpu neon\n"
#endif
"1: \n"
// load even pixels into q0, odd into q1
"vld2.32 {q0, q1}, [%0]! \n"
"vld2.32 {q2, q3}, [%0]! \n"
"subs %2, %2, #8 \n" // 8 processed per loop
"vst1.8 {q1}, [%1]! \n" // store odd pixels
"vst1.8 {q3}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst), // %1
"+r"(dst_width) // %2
:
: "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
);
}
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
asm volatile (
// change the stride to row 2 pointer
"add %1, %1, %0 \n"
"1: \n"
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
"subs %3, %3, #8 \n" // 8 processed per loop.
"vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
"vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
"vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts.
"vld4.8 {d16, d18, d20, d22}, [%1]! \n" // load 8 more ARGB pixels.
"vld4.8 {d17, d19, d21, d23}, [%1]! \n" // load last 8 ARGB pixels.
"vpadal.u8 q0, q8 \n" // B 16 bytes -> 8 shorts.
"vpadal.u8 q1, q9 \n" // G 16 bytes -> 8 shorts.
"vpadal.u8 q2, q10 \n" // R 16 bytes -> 8 shorts.
"vpadal.u8 q3, q11 \n" // A 16 bytes -> 8 shorts.
"vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack
"vrshrn.u16 d1, q1, #2 \n"
"vrshrn.u16 d2, q2, #2 \n"
"vrshrn.u16 d3, q3, #2 \n"
"vst4.8 {d0, d1, d2, d3}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_ptr), // %0
"+r"(src_stride), // %1
"+r"(dst), // %2
"+r"(dst_width) // %3
:
: "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
);
}
// Reads 4 pixels at a time.
// Alignment requirement: src_argb 4 byte aligned.
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t, int src_stepx,
uint8* dst_argb, int dst_width) {
asm volatile (
"mov r12, %3, lsl #2 \n"
".p2align 2 \n"
"1: \n"
"vld1.32 {d0[0]}, [%0], r12 \n"
"vld1.32 {d0[1]}, [%0], r12 \n"
"vld1.32 {d1[0]}, [%0], r12 \n"
"vld1.32 {d1[1]}, [%0], r12 \n"
"subs %2, %2, #4 \n" // 4 pixels per loop.
"vst1.8 {q0}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(dst_width) // %2
: "r"(src_stepx) // %3
: "memory", "cc", "r12", "q0"
);
}
// Reads 4 pixels at a time.
// Alignment requirement: src_argb 4 byte aligned.
void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width) {
asm volatile (
"mov r12, %4, lsl #2 \n"
"add %1, %1, %0 \n"
".p2align 2 \n"
"1: \n"
"vld1.8 {d0}, [%0], r12 \n" // Read 4 2x2 blocks -> 2x1
"vld1.8 {d1}, [%1], r12 \n"
"vld1.8 {d2}, [%0], r12 \n"
"vld1.8 {d3}, [%1], r12 \n"
"vld1.8 {d4}, [%0], r12 \n"
"vld1.8 {d5}, [%1], r12 \n"
"vld1.8 {d6}, [%0], r12 \n"
"vld1.8 {d7}, [%1], r12 \n"
"vaddl.u8 q0, d0, d1 \n"
"vaddl.u8 q1, d2, d3 \n"
"vaddl.u8 q2, d4, d5 \n"
"vaddl.u8 q3, d6, d7 \n"
"vswp.8 d1, d2 \n" // ab_cd -> ac_bd
"vswp.8 d5, d6 \n" // ef_gh -> eg_fh
"vadd.u16 q0, q0, q1 \n" // (a+b)_(c+d)
"vadd.u16 q2, q2, q3 \n" // (e+f)_(g+h)
"vrshrn.u16 d0, q0, #2 \n" // first 2 pixels.
"vrshrn.u16 d1, q2, #2 \n" // next 2 pixels.
"subs %3, %3, #4 \n" // 4 pixels per loop.
"vst1.8 {q0}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(src_stride), // %1
"+r"(dst_argb), // %2
"+r"(dst_width) // %3
: "r"(src_stepx) // %4
: "memory", "cc", "r12", "q0", "q1", "q2", "q3"
);
}
#endif // __ARM_NEON__
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View file

@ -1,772 +0,0 @@
/*
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/scale.h"
#include <assert.h>
#include <string.h>
#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h" // For CopyARGB
#include "libyuv/row.h"
#include "libyuv/scale_row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
static __inline int Abs(int v) {
return v >= 0 ? v : -v;
}
// CPU agnostic row functions
void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
int x;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = src_ptr[1];
dst[1] = src_ptr[3];
dst += 2;
src_ptr += 4;
}
if (dst_width & 1) {
dst[0] = src_ptr[1];
}
}
void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
const uint8* s = src_ptr;
int x;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = (s[0] + s[1] + 1) >> 1;
dst[1] = (s[2] + s[3] + 1) >> 1;
dst += 2;
s += 4;
}
if (dst_width & 1) {
dst[0] = (s[0] + s[1] + 1) >> 1;
}
}
void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
const uint8* s = src_ptr;
const uint8* t = src_ptr + src_stride;
int x;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
dst += 2;
s += 4;
t += 4;
}
if (dst_width & 1) {
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
}
}
void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
int x;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = src_ptr[2];
dst[1] = src_ptr[6];
dst += 2;
src_ptr += 8;
}
if (dst_width & 1) {
dst[0] = src_ptr[2];
}
}
void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
intptr_t stride = src_stride;
int x;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
src_ptr[stride + 0] + src_ptr[stride + 1] +
src_ptr[stride + 2] + src_ptr[stride + 3] +
src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
8) >> 4;
dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
src_ptr[stride + 4] + src_ptr[stride + 5] +
src_ptr[stride + 6] + src_ptr[stride + 7] +
src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
8) >> 4;
dst += 2;
src_ptr += 8;
}
if (dst_width & 1) {
dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
src_ptr[stride + 0] + src_ptr[stride + 1] +
src_ptr[stride + 2] + src_ptr[stride + 3] +
src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
8) >> 4;
}
}
void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
int x;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (x = 0; x < dst_width; x += 3) {
dst[0] = src_ptr[0];
dst[1] = src_ptr[1];
dst[2] = src_ptr[3];
dst += 3;
src_ptr += 4;
}
}
// Filter rows 0 and 1 together, 3 : 1
void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width) {
const uint8* s = src_ptr;
const uint8* t = src_ptr + src_stride;
int x;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (x = 0; x < dst_width; x += 3) {
uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
d[0] = (a0 * 3 + b0 + 2) >> 2;
d[1] = (a1 * 3 + b1 + 2) >> 2;
d[2] = (a2 * 3 + b2 + 2) >> 2;
d += 3;
s += 4;
t += 4;
}
}
// Filter rows 1 and 2 together, 1 : 1
void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width) {
const uint8* s = src_ptr;
const uint8* t = src_ptr + src_stride;
int x;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (x = 0; x < dst_width; x += 3) {
uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
d[0] = (a0 + b0 + 1) >> 1;
d[1] = (a1 + b1 + 1) >> 1;
d[2] = (a2 + b2 + 1) >> 1;
d += 3;
s += 4;
t += 4;
}
}
// Scales a single row of pixels using point sampling.
void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) {
int j;
for (j = 0; j < dst_width - 1; j += 2) {
dst_ptr[0] = src_ptr[x >> 16];
x += dx;
dst_ptr[1] = src_ptr[x >> 16];
x += dx;
dst_ptr += 2;
}
if (dst_width & 1) {
dst_ptr[0] = src_ptr[x >> 16];
}
}
// Scales a single row of pixels up by 2x using point sampling.
void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) {
int j;
for (j = 0; j < dst_width - 1; j += 2) {
dst_ptr[1] = dst_ptr[0] = src_ptr[0];
src_ptr += 1;
dst_ptr += 2;
}
if (dst_width & 1) {
dst_ptr[0] = src_ptr[0];
}
}
// (1-f)a + fb can be replaced with a + f(b-a)
#define BLENDER(a, b, f) (uint8)((int)(a) + \
((int)(f) * ((int)(b) - (int)(a)) >> 16))
void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) {
int j;
for (j = 0; j < dst_width - 1; j += 2) {
int xi = x >> 16;
int a = src_ptr[xi];
int b = src_ptr[xi + 1];
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
x += dx;
xi = x >> 16;
a = src_ptr[xi];
b = src_ptr[xi + 1];
dst_ptr[1] = BLENDER(a, b, x & 0xffff);
x += dx;
dst_ptr += 2;
}
if (dst_width & 1) {
int xi = x >> 16;
int a = src_ptr[xi];
int b = src_ptr[xi + 1];
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
}
}
void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x32, int dx) {
int64 x = (int64)(x32);
int j;
for (j = 0; j < dst_width - 1; j += 2) {
int64 xi = x >> 16;
int a = src_ptr[xi];
int b = src_ptr[xi + 1];
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
x += dx;
xi = x >> 16;
a = src_ptr[xi];
b = src_ptr[xi + 1];
dst_ptr[1] = BLENDER(a, b, x & 0xffff);
x += dx;
dst_ptr += 2;
}
if (dst_width & 1) {
int64 xi = x >> 16;
int a = src_ptr[xi];
int b = src_ptr[xi + 1];
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
}
}
#undef BLENDER
void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
int x;
assert(dst_width % 3 == 0);
for (x = 0; x < dst_width; x += 3) {
dst[0] = src_ptr[0];
dst[1] = src_ptr[3];
dst[2] = src_ptr[6];
dst += 3;
src_ptr += 8;
}
}
// 8x3 -> 3x1
void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
intptr_t stride = src_stride;
int i;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (i = 0; i < dst_width; i += 3) {
dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
src_ptr[stride + 0] + src_ptr[stride + 1] +
src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
(65536 / 9) >> 16;
dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
src_ptr[stride + 3] + src_ptr[stride + 4] +
src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
(65536 / 9) >> 16;
dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
src_ptr[stride + 6] + src_ptr[stride + 7] +
src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
(65536 / 6) >> 16;
src_ptr += 8;
dst_ptr += 3;
}
}
// 8x2 -> 3x1
void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
intptr_t stride = src_stride;
int i;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (i = 0; i < dst_width; i += 3) {
dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
src_ptr[stride + 0] + src_ptr[stride + 1] +
src_ptr[stride + 2]) * (65536 / 6) >> 16;
dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
src_ptr[stride + 3] + src_ptr[stride + 4] +
src_ptr[stride + 5]) * (65536 / 6) >> 16;
dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
src_ptr[stride + 6] + src_ptr[stride + 7]) *
(65536 / 4) >> 16;
src_ptr += 8;
dst_ptr += 3;
}
}
void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height) {
int x;
assert(src_width > 0);
assert(src_height > 0);
for (x = 0; x < src_width; ++x) {
const uint8* s = src_ptr + x;
unsigned int sum = 0u;
int y;
for (y = 0; y < src_height; ++y) {
sum += s[0];
s += src_stride;
}
// TODO(fbarchard): Consider limitting height to 256 to avoid overflow.
dst_ptr[x] = sum < 65535u ? sum : 65535u;
}
}
void ScaleARGBRowDown2_C(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) {
const uint32* src = (const uint32*)(src_argb);
uint32* dst = (uint32*)(dst_argb);
int x;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = src[1];
dst[1] = src[3];
src += 4;
dst += 2;
}
if (dst_width & 1) {
dst[0] = src[1];
}
}
void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) {
int x;
for (x = 0; x < dst_width; ++x) {
dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
src_argb += 8;
dst_argb += 4;
}
}
void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) {
int x;
for (x = 0; x < dst_width; ++x) {
dst_argb[0] = (src_argb[0] + src_argb[4] +
src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
dst_argb[1] = (src_argb[1] + src_argb[5] +
src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
dst_argb[2] = (src_argb[2] + src_argb[6] +
src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
dst_argb[3] = (src_argb[3] + src_argb[7] +
src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
src_argb += 8;
dst_argb += 4;
}
}
void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width) {
const uint32* src = (const uint32*)(src_argb);
uint32* dst = (uint32*)(dst_argb);
int x;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = src[0];
dst[1] = src[src_stepx];
src += src_stepx * 2;
dst += 2;
}
if (dst_width & 1) {
dst[0] = src[0];
}
}
void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width) {
int x;
for (x = 0; x < dst_width; ++x) {
dst_argb[0] = (src_argb[0] + src_argb[4] +
src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
dst_argb[1] = (src_argb[1] + src_argb[5] +
src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
dst_argb[2] = (src_argb[2] + src_argb[6] +
src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
dst_argb[3] = (src_argb[3] + src_argb[7] +
src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
src_argb += src_stepx * 4;
dst_argb += 4;
}
}
// Scales a single row of pixels using point sampling.
void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
const uint32* src = (const uint32*)(src_argb);
uint32* dst = (uint32*)(dst_argb);
int j;
for (j = 0; j < dst_width - 1; j += 2) {
dst[0] = src[x >> 16];
x += dx;
dst[1] = src[x >> 16];
x += dx;
dst += 2;
}
if (dst_width & 1) {
dst[0] = src[x >> 16];
}
}
void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x32, int dx) {
int64 x = (int64)(x32);
const uint32* src = (const uint32*)(src_argb);
uint32* dst = (uint32*)(dst_argb);
int j;
for (j = 0; j < dst_width - 1; j += 2) {
dst[0] = src[x >> 16];
x += dx;
dst[1] = src[x >> 16];
x += dx;
dst += 2;
}
if (dst_width & 1) {
dst[0] = src[x >> 16];
}
}
// Scales a single row of pixels up by 2x using point sampling.
void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
const uint32* src = (const uint32*)(src_argb);
uint32* dst = (uint32*)(dst_argb);
int j;
for (j = 0; j < dst_width - 1; j += 2) {
dst[1] = dst[0] = src[0];
src += 1;
dst += 2;
}
if (dst_width & 1) {
dst[0] = src[0];
}
}
// Mimics SSSE3 blender
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
#define BLENDERC(a, b, f, s) (uint32)( \
BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
#define BLENDER(a, b, f) \
BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \
BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
const uint32* src = (const uint32*)(src_argb);
uint32* dst = (uint32*)(dst_argb);
int j;
for (j = 0; j < dst_width - 1; j += 2) {
int xi = x >> 16;
int xf = (x >> 9) & 0x7f;
uint32 a = src[xi];
uint32 b = src[xi + 1];
dst[0] = BLENDER(a, b, xf);
x += dx;
xi = x >> 16;
xf = (x >> 9) & 0x7f;
a = src[xi];
b = src[xi + 1];
dst[1] = BLENDER(a, b, xf);
x += dx;
dst += 2;
}
if (dst_width & 1) {
int xi = x >> 16;
int xf = (x >> 9) & 0x7f;
uint32 a = src[xi];
uint32 b = src[xi + 1];
dst[0] = BLENDER(a, b, xf);
}
}
void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x32, int dx) {
int64 x = (int64)(x32);
const uint32* src = (const uint32*)(src_argb);
uint32* dst = (uint32*)(dst_argb);
int j;
for (j = 0; j < dst_width - 1; j += 2) {
int64 xi = x >> 16;
int xf = (x >> 9) & 0x7f;
uint32 a = src[xi];
uint32 b = src[xi + 1];
dst[0] = BLENDER(a, b, xf);
x += dx;
xi = x >> 16;
xf = (x >> 9) & 0x7f;
a = src[xi];
b = src[xi + 1];
dst[1] = BLENDER(a, b, xf);
x += dx;
dst += 2;
}
if (dst_width & 1) {
int64 xi = x >> 16;
int xf = (x >> 9) & 0x7f;
uint32 a = src[xi];
uint32 b = src[xi + 1];
dst[0] = BLENDER(a, b, xf);
}
}
#undef BLENDER1
#undef BLENDERC
#undef BLENDER
// Scale plane vertically with bilinear interpolation.
void ScalePlaneVertical(int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int y, int dy,
int bpp, enum FilterMode filtering) {
// TODO(fbarchard): Allow higher bpp.
int dst_width_bytes = dst_width * bpp;
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
int j;
assert(bpp >= 1 && bpp <= 4);
assert(src_height != 0);
assert(dst_width > 0);
assert(dst_height > 0);
src_argb += (x >> 16) * bpp;
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width_bytes, 32)) {
InterpolateRow = InterpolateRow_AVX2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(dst_width_bytes, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2;
}
}
#endif
for (j = 0; j < dst_height; ++j) {
int yi;
int yf;
if (y > max_y) {
y = max_y;
}
yi = y >> 16;
yf = filtering ? ((y >> 8) & 255) : 0;
InterpolateRow(dst_argb, src_argb + yi * src_stride,
src_stride, dst_width_bytes, yf);
dst_argb += dst_stride;
y += dy;
}
}
// Simplify the filtering based on scale factors.
enum FilterMode ScaleFilterReduce(int src_width, int src_height,
int dst_width, int dst_height,
enum FilterMode filtering) {
if (src_width < 0) {
src_width = -src_width;
}
if (src_height < 0) {
src_height = -src_height;
}
if (filtering == kFilterBox) {
// If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
filtering = kFilterBilinear;
}
// If scaling to larger, switch from Box to Bilinear.
if (dst_width >= src_width || dst_height >= src_height) {
filtering = kFilterBilinear;
}
}
if (filtering == kFilterBilinear) {
if (src_height == 1) {
filtering = kFilterLinear;
}
// TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
if (dst_height == src_height || dst_height * 3 == src_height) {
filtering = kFilterLinear;
}
// TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
// avoid reading 2 pixels horizontally that causes memory exception.
if (src_width == 1) {
filtering = kFilterNone;
}
}
if (filtering == kFilterLinear) {
if (src_width == 1) {
filtering = kFilterNone;
}
// TODO(fbarchard): Detect any odd scale factor and reduce to None.
if (dst_width == src_width || dst_width * 3 == src_width) {
filtering = kFilterNone;
}
}
return filtering;
}
// Divide num by div and return as 16.16 fixed point result.
int FixedDiv_C(int num, int div) {
return (int)(((int64)(num) << 16) / div);
}
// Divide num by div and return as 16.16 fixed point result.
int FixedDiv1_C(int num, int div) {
return (int)((((int64)(num) << 16) - 0x00010001) /
(div - 1));
}
#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
// Compute slope values for stepping.
void ScaleSlope(int src_width, int src_height,
int dst_width, int dst_height,
enum FilterMode filtering,
int* x, int* y, int* dx, int* dy) {
assert(x != NULL);
assert(y != NULL);
assert(dx != NULL);
assert(dy != NULL);
assert(src_width != 0);
assert(src_height != 0);
assert(dst_width > 0);
assert(dst_height > 0);
// Check for 1 pixel and avoid FixedDiv overflow.
if (dst_width == 1 && src_width >= 32768) {
dst_width = src_width;
}
if (dst_height == 1 && src_height >= 32768) {
dst_height = src_height;
}
if (filtering == kFilterBox) {
// Scale step for point sampling duplicates all pixels equally.
*dx = FixedDiv(Abs(src_width), dst_width);
*dy = FixedDiv(src_height, dst_height);
*x = 0;
*y = 0;
} else if (filtering == kFilterBilinear) {
// Scale step for bilinear sampling renders last pixel once for upsample.
if (dst_width <= Abs(src_width)) {
*dx = FixedDiv(Abs(src_width), dst_width);
*x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
} else if (dst_width > 1) {
*dx = FixedDiv1(Abs(src_width), dst_width);
*x = 0;
}
if (dst_height <= src_height) {
*dy = FixedDiv(src_height, dst_height);
*y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
} else if (dst_height > 1) {
*dy = FixedDiv1(src_height, dst_height);
*y = 0;
}
} else if (filtering == kFilterLinear) {
// Scale step for bilinear sampling renders last pixel once for upsample.
if (dst_width <= Abs(src_width)) {
*dx = FixedDiv(Abs(src_width), dst_width);
*x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
} else if (dst_width > 1) {
*dx = FixedDiv1(Abs(src_width), dst_width);
*x = 0;
}
*dy = FixedDiv(src_height, dst_height);
*y = *dy >> 1;
} else {
// Scale step for point sampling duplicates all pixels equally.
*dx = FixedDiv(Abs(src_width), dst_width);
*dy = FixedDiv(src_height, dst_height);
*x = CENTERSTART(*dx, 0);
*y = CENTERSTART(*dy, 0);
}
// Negative src_width means horizontally mirror.
if (src_width < 0) {
*x += (dst_width - 1) * *dx;
*dx = -*dx;
// src_width = -src_width; // Caller must do this.
}
}
#undef CENTERSTART
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View file

@ -1,653 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// This module is for GCC MIPS DSPR2
#if !defined(LIBYUV_DISABLE_MIPS) && \
defined(__mips_dsp) && (__mips_dsp_rev >= 2)
void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"srl $t9, %[dst_width], 4 \n" // iterations -> by 16
"beqz $t9, 2f \n"
" nop \n"
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
"lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
"lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
"lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
// TODO(fbarchard): Use odd pixels instead of even.
"precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0|
"precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8|
"precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16|
"precr.qb.ph $t2, $t7, $t6 \n" // |30|28|26|24|
"addiu %[src_ptr], %[src_ptr], 32 \n"
"addiu $t9, $t9, -1 \n"
"sw $t8, 0(%[dst]) \n"
"sw $t0, 4(%[dst]) \n"
"sw $t1, 8(%[dst]) \n"
"sw $t2, 12(%[dst]) \n"
"bgtz $t9, 1b \n"
" addiu %[dst], %[dst], 16 \n"
"2: \n"
"andi $t9, %[dst_width], 0xf \n" // residue
"beqz $t9, 3f \n"
" nop \n"
"21: \n"
"lbu $t0, 0(%[src_ptr]) \n"
"addiu %[src_ptr], %[src_ptr], 2 \n"
"addiu $t9, $t9, -1 \n"
"sb $t0, 0(%[dst]) \n"
"bgtz $t9, 21b \n"
" addiu %[dst], %[dst], 1 \n"
"3: \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[dst] "+r" (dst)
: [dst_width] "r" (dst_width)
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9"
);
}
void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
const uint8* t = src_ptr + src_stride;
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"srl $t9, %[dst_width], 3 \n" // iterations -> step 8
"bltz $t9, 2f \n"
" nop \n"
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
"lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
"lw $t4, 0(%[t]) \n" // |19|18|17|16|
"lw $t5, 4(%[t]) \n" // |23|22|21|20|
"lw $t6, 8(%[t]) \n" // |27|26|25|24|
"lw $t7, 12(%[t]) \n" // |31|30|29|28|
"addiu $t9, $t9, -1 \n"
"srl $t8, $t0, 16 \n" // |X|X|3|2|
"ins $t0, $t4, 16, 16 \n" // |17|16|1|0|
"ins $t4, $t8, 0, 16 \n" // |19|18|3|2|
"raddu.w.qb $t0, $t0 \n" // |17+16+1+0|
"raddu.w.qb $t4, $t4 \n" // |19+18+3+2|
"shra_r.w $t0, $t0, 2 \n" // |t0+2|>>2
"shra_r.w $t4, $t4, 2 \n" // |t4+2|>>2
"srl $t8, $t1, 16 \n" // |X|X|7|6|
"ins $t1, $t5, 16, 16 \n" // |21|20|5|4|
"ins $t5, $t8, 0, 16 \n" // |22|23|7|6|
"raddu.w.qb $t1, $t1 \n" // |21+20+5+4|
"raddu.w.qb $t5, $t5 \n" // |23+22+7+6|
"shra_r.w $t1, $t1, 2 \n" // |t1+2|>>2
"shra_r.w $t5, $t5, 2 \n" // |t5+2|>>2
"srl $t8, $t2, 16 \n" // |X|X|11|10|
"ins $t2, $t6, 16, 16 \n" // |25|24|9|8|
"ins $t6, $t8, 0, 16 \n" // |27|26|11|10|
"raddu.w.qb $t2, $t2 \n" // |25+24+9+8|
"raddu.w.qb $t6, $t6 \n" // |27+26+11+10|
"shra_r.w $t2, $t2, 2 \n" // |t2+2|>>2
"shra_r.w $t6, $t6, 2 \n" // |t5+2|>>2
"srl $t8, $t3, 16 \n" // |X|X|15|14|
"ins $t3, $t7, 16, 16 \n" // |29|28|13|12|
"ins $t7, $t8, 0, 16 \n" // |31|30|15|14|
"raddu.w.qb $t3, $t3 \n" // |29+28+13+12|
"raddu.w.qb $t7, $t7 \n" // |31+30+15+14|
"shra_r.w $t3, $t3, 2 \n" // |t3+2|>>2
"shra_r.w $t7, $t7, 2 \n" // |t7+2|>>2
"addiu %[src_ptr], %[src_ptr], 16 \n"
"addiu %[t], %[t], 16 \n"
"sb $t0, 0(%[dst]) \n"
"sb $t4, 1(%[dst]) \n"
"sb $t1, 2(%[dst]) \n"
"sb $t5, 3(%[dst]) \n"
"sb $t2, 4(%[dst]) \n"
"sb $t6, 5(%[dst]) \n"
"sb $t3, 6(%[dst]) \n"
"sb $t7, 7(%[dst]) \n"
"bgtz $t9, 1b \n"
" addiu %[dst], %[dst], 8 \n"
"2: \n"
"andi $t9, %[dst_width], 0x7 \n" // x = residue
"beqz $t9, 3f \n"
" nop \n"
"21: \n"
"lwr $t1, 0(%[src_ptr]) \n"
"lwl $t1, 3(%[src_ptr]) \n"
"lwr $t2, 0(%[t]) \n"
"lwl $t2, 3(%[t]) \n"
"srl $t8, $t1, 16 \n"
"ins $t1, $t2, 16, 16 \n"
"ins $t2, $t8, 0, 16 \n"
"raddu.w.qb $t1, $t1 \n"
"raddu.w.qb $t2, $t2 \n"
"shra_r.w $t1, $t1, 2 \n"
"shra_r.w $t2, $t2, 2 \n"
"sb $t1, 0(%[dst]) \n"
"sb $t2, 1(%[dst]) \n"
"addiu %[src_ptr], %[src_ptr], 4 \n"
"addiu $t9, $t9, -2 \n"
"addiu %[t], %[t], 4 \n"
"bgtz $t9, 21b \n"
" addiu %[dst], %[dst], 2 \n"
"3: \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[dst] "+r" (dst), [t] "+r" (t)
: [dst_width] "r" (dst_width)
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9"
);
}
void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"srl $t9, %[dst_width], 3 \n"
"beqz $t9, 2f \n"
" nop \n"
".p2align 2 \n"
"1: \n"
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
"lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
"lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12|
"lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16|
"lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20|
"lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24|
"lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28|
"precr.qb.ph $t1, $t2, $t1 \n" // |6|4|2|0|
"precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8|
"precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16|
"precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24|
"precr.qb.ph $t1, $t2, $t1 \n" // |12|8|4|0|
"precr.qb.ph $t5, $t6, $t5 \n" // |28|24|20|16|
"addiu %[src_ptr], %[src_ptr], 32 \n"
"addiu $t9, $t9, -1 \n"
"sw $t1, 0(%[dst]) \n"
"sw $t5, 4(%[dst]) \n"
"bgtz $t9, 1b \n"
" addiu %[dst], %[dst], 8 \n"
"2: \n"
"andi $t9, %[dst_width], 7 \n" // residue
"beqz $t9, 3f \n"
" nop \n"
"21: \n"
"lbu $t1, 0(%[src_ptr]) \n"
"addiu %[src_ptr], %[src_ptr], 4 \n"
"addiu $t9, $t9, -1 \n"
"sb $t1, 0(%[dst]) \n"
"bgtz $t9, 21b \n"
" addiu %[dst], %[dst], 1 \n"
"3: \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[dst] "+r" (dst)
: [dst_width] "r" (dst_width)
: "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9"
);
}
void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
intptr_t stride = src_stride;
const uint8* s1 = src_ptr + stride;
const uint8* s2 = s1 + stride;
const uint8* s3 = s2 + stride;
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"srl $t9, %[dst_width], 1 \n"
"andi $t8, %[dst_width], 1 \n"
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 0(%[s1]) \n" // |7|6|5|4|
"lw $t2, 0(%[s2]) \n" // |11|10|9|8|
"lw $t3, 0(%[s3]) \n" // |15|14|13|12|
"lw $t4, 4(%[src_ptr]) \n" // |19|18|17|16|
"lw $t5, 4(%[s1]) \n" // |23|22|21|20|
"lw $t6, 4(%[s2]) \n" // |27|26|25|24|
"lw $t7, 4(%[s3]) \n" // |31|30|29|28|
"raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0|
"raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4|
"raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8|
"raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12|
"raddu.w.qb $t4, $t4 \n" // |19 + 18 + 17 + 16|
"raddu.w.qb $t5, $t5 \n" // |23 + 22 + 21 + 20|
"raddu.w.qb $t6, $t6 \n" // |27 + 26 + 25 + 24|
"raddu.w.qb $t7, $t7 \n" // |31 + 30 + 29 + 28|
"add $t0, $t0, $t1 \n"
"add $t1, $t2, $t3 \n"
"add $t0, $t0, $t1 \n"
"add $t4, $t4, $t5 \n"
"add $t6, $t6, $t7 \n"
"add $t4, $t4, $t6 \n"
"shra_r.w $t0, $t0, 4 \n"
"shra_r.w $t4, $t4, 4 \n"
"sb $t0, 0(%[dst]) \n"
"sb $t4, 1(%[dst]) \n"
"addiu %[src_ptr], %[src_ptr], 8 \n"
"addiu %[s1], %[s1], 8 \n"
"addiu %[s2], %[s2], 8 \n"
"addiu %[s3], %[s3], 8 \n"
"addiu $t9, $t9, -1 \n"
"bgtz $t9, 1b \n"
" addiu %[dst], %[dst], 2 \n"
"beqz $t8, 2f \n"
" nop \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 0(%[s1]) \n" // |7|6|5|4|
"lw $t2, 0(%[s2]) \n" // |11|10|9|8|
"lw $t3, 0(%[s3]) \n" // |15|14|13|12|
"raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0|
"raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4|
"raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8|
"raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12|
"add $t0, $t0, $t1 \n"
"add $t1, $t2, $t3 \n"
"add $t0, $t0, $t1 \n"
"shra_r.w $t0, $t0, 4 \n"
"sb $t0, 0(%[dst]) \n"
"2: \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[dst] "+r" (dst),
[s1] "+r" (s1),
[s2] "+r" (s2),
[s3] "+r" (s3)
: [dst_width] "r" (dst_width)
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6","t7", "t8", "t9"
);
}
void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
".p2align 2 \n"
"1: \n"
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
"lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
"lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12|
"lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16|
"lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20|
"lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24|
"lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28|
"precrq.qb.ph $t0, $t2, $t4 \n" // |7|5|15|13|
"precrq.qb.ph $t9, $t6, $t8 \n" // |23|21|31|30|
"addiu %[dst_width], %[dst_width], -24 \n"
"ins $t1, $t1, 8, 16 \n" // |3|1|0|X|
"ins $t4, $t0, 8, 16 \n" // |X|15|13|12|
"ins $t5, $t5, 8, 16 \n" // |19|17|16|X|
"ins $t8, $t9, 8, 16 \n" // |X|31|29|28|
"addiu %[src_ptr], %[src_ptr], 32 \n"
"packrl.ph $t0, $t3, $t0 \n" // |9|8|7|5|
"packrl.ph $t9, $t7, $t9 \n" // |25|24|23|21|
"prepend $t1, $t2, 8 \n" // |4|3|1|0|
"prepend $t3, $t4, 24 \n" // |15|13|12|11|
"prepend $t5, $t6, 8 \n" // |20|19|17|16|
"prepend $t7, $t8, 24 \n" // |31|29|28|27|
"sw $t1, 0(%[dst]) \n"
"sw $t0, 4(%[dst]) \n"
"sw $t3, 8(%[dst]) \n"
"sw $t5, 12(%[dst]) \n"
"sw $t9, 16(%[dst]) \n"
"sw $t7, 20(%[dst]) \n"
"bnez %[dst_width], 1b \n"
" addiu %[dst], %[dst], 24 \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[dst] "+r" (dst),
[dst_width] "+r" (dst_width)
:
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6","t7", "t8", "t9"
);
}
void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"repl.ph $t3, 3 \n" // 0x00030003
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
"rotr $t2, $t0, 8 \n" // |S0|S3|S2|S1|
"rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1|
"muleu_s.ph.qbl $t4, $t2, $t3 \n" // |S0*3|S3*3|
"muleu_s.ph.qbl $t5, $t6, $t3 \n" // |T0*3|T3*3|
"andi $t0, $t2, 0xFFFF \n" // |0|0|S2|S1|
"andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1|
"raddu.w.qb $t0, $t0 \n"
"raddu.w.qb $t1, $t1 \n"
"shra_r.w $t0, $t0, 1 \n"
"shra_r.w $t1, $t1, 1 \n"
"preceu.ph.qbr $t2, $t2 \n" // |0|S2|0|S1|
"preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1|
"rotr $t2, $t2, 16 \n" // |0|S1|0|S2|
"rotr $t6, $t6, 16 \n" // |0|T1|0|T2|
"addu.ph $t2, $t2, $t4 \n"
"addu.ph $t6, $t6, $t5 \n"
"sll $t5, $t0, 1 \n"
"add $t0, $t5, $t0 \n"
"shra_r.ph $t2, $t2, 2 \n"
"shra_r.ph $t6, $t6, 2 \n"
"shll.ph $t4, $t2, 1 \n"
"addq.ph $t4, $t4, $t2 \n"
"addu $t0, $t0, $t1 \n"
"addiu %[src_ptr], %[src_ptr], 4 \n"
"shra_r.w $t0, $t0, 2 \n"
"addu.ph $t6, $t6, $t4 \n"
"shra_r.ph $t6, $t6, 2 \n"
"srl $t1, $t6, 16 \n"
"addiu %[dst_width], %[dst_width], -3 \n"
"sb $t1, 0(%[d]) \n"
"sb $t0, 1(%[d]) \n"
"sb $t6, 2(%[d]) \n"
"bgtz %[dst_width], 1b \n"
" addiu %[d], %[d], 3 \n"
"3: \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[src_stride] "+r" (src_stride),
[d] "+r" (d),
[dst_width] "+r" (dst_width)
:
: "t0", "t1", "t2", "t3",
"t4", "t5", "t6"
);
}
void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"repl.ph $t2, 3 \n" // 0x00030003
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
"rotr $t4, $t0, 8 \n" // |S0|S3|S2|S1|
"rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1|
"muleu_s.ph.qbl $t3, $t4, $t2 \n" // |S0*3|S3*3|
"muleu_s.ph.qbl $t5, $t6, $t2 \n" // |T0*3|T3*3|
"andi $t0, $t4, 0xFFFF \n" // |0|0|S2|S1|
"andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1|
"raddu.w.qb $t0, $t0 \n"
"raddu.w.qb $t1, $t1 \n"
"shra_r.w $t0, $t0, 1 \n"
"shra_r.w $t1, $t1, 1 \n"
"preceu.ph.qbr $t4, $t4 \n" // |0|S2|0|S1|
"preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1|
"rotr $t4, $t4, 16 \n" // |0|S1|0|S2|
"rotr $t6, $t6, 16 \n" // |0|T1|0|T2|
"addu.ph $t4, $t4, $t3 \n"
"addu.ph $t6, $t6, $t5 \n"
"shra_r.ph $t6, $t6, 2 \n"
"shra_r.ph $t4, $t4, 2 \n"
"addu.ph $t6, $t6, $t4 \n"
"addiu %[src_ptr], %[src_ptr], 4 \n"
"shra_r.ph $t6, $t6, 1 \n"
"addu $t0, $t0, $t1 \n"
"addiu %[dst_width], %[dst_width], -3 \n"
"shra_r.w $t0, $t0, 1 \n"
"srl $t1, $t6, 16 \n"
"sb $t1, 0(%[d]) \n"
"sb $t0, 1(%[d]) \n"
"sb $t6, 2(%[d]) \n"
"bgtz %[dst_width], 1b \n"
" addiu %[d], %[d], 3 \n"
"3: \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[src_stride] "+r" (src_stride),
[d] "+r" (d),
[dst_width] "+r" (dst_width)
:
: "t0", "t1", "t2", "t3",
"t4", "t5", "t6"
);
}
void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
"lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
"lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
"lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
"wsbh $t0, $t0 \n" // |2|3|0|1|
"wsbh $t6, $t6 \n" // |26|27|24|25|
"srl $t0, $t0, 8 \n" // |X|2|3|0|
"srl $t3, $t3, 16 \n" // |X|X|15|14|
"srl $t5, $t5, 16 \n" // |X|X|23|22|
"srl $t7, $t7, 16 \n" // |X|X|31|30|
"ins $t1, $t2, 24, 8 \n" // |8|6|5|4|
"ins $t6, $t5, 0, 8 \n" // |26|27|24|22|
"ins $t1, $t0, 0, 16 \n" // |8|6|3|0|
"ins $t6, $t7, 24, 8 \n" // |30|27|24|22|
"prepend $t2, $t3, 24 \n" // |X|15|14|11|
"ins $t4, $t4, 16, 8 \n" // |19|16|17|X|
"ins $t4, $t2, 0, 16 \n" // |19|16|14|11|
"addiu %[src_ptr], %[src_ptr], 32 \n"
"addiu %[dst_width], %[dst_width], -12 \n"
"addiu $t8,%[dst_width], -12 \n"
"sw $t1, 0(%[dst]) \n"
"sw $t4, 4(%[dst]) \n"
"sw $t6, 8(%[dst]) \n"
"bgez $t8, 1b \n"
" addiu %[dst], %[dst], 12 \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[dst] "+r" (dst),
[dst_width] "+r" (dst_width)
:
: "t0", "t1", "t2", "t3", "t4",
"t5", "t6", "t7", "t8"
);
}
void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
intptr_t stride = src_stride;
const uint8* t = src_ptr + stride;
const int c = 0x2AAA;
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
"lw $t2, 0(%[t]) \n" // |T3|T2|T1|T0|
"lw $t3, 4(%[t]) \n" // |T7|T6|T5|T4|
"rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6|
"packrl.ph $t4, $t1, $t3 \n" // |S7|S6|T7|T6|
"packrl.ph $t5, $t3, $t1 \n" // |T5|T4|S5|S4|
"raddu.w.qb $t4, $t4 \n" // S7+S6+T7+T6
"raddu.w.qb $t5, $t5 \n" // T5+T4+S5+S4
"precrq.qb.ph $t6, $t0, $t2 \n" // |S3|S1|T3|T1|
"precrq.qb.ph $t6, $t6, $t6 \n" // |S3|T3|S3|T3|
"srl $t4, $t4, 2 \n" // t4 / 4
"srl $t6, $t6, 16 \n" // |0|0|S3|T3|
"raddu.w.qb $t6, $t6 \n" // 0+0+S3+T3
"addu $t6, $t5, $t6 \n"
"mul $t6, $t6, %[c] \n" // t6 * 0x2AAA
"sll $t0, $t0, 8 \n" // |S2|S1|S0|0|
"sll $t2, $t2, 8 \n" // |T2|T1|T0|0|
"raddu.w.qb $t0, $t0 \n" // S2+S1+S0+0
"raddu.w.qb $t2, $t2 \n" // T2+T1+T0+0
"addu $t0, $t0, $t2 \n"
"mul $t0, $t0, %[c] \n" // t0 * 0x2AAA
"addiu %[src_ptr], %[src_ptr], 8 \n"
"addiu %[t], %[t], 8 \n"
"addiu %[dst_width], %[dst_width], -3 \n"
"addiu %[dst_ptr], %[dst_ptr], 3 \n"
"srl $t6, $t6, 16 \n"
"srl $t0, $t0, 16 \n"
"sb $t4, -1(%[dst_ptr]) \n"
"sb $t6, -2(%[dst_ptr]) \n"
"bgtz %[dst_width], 1b \n"
" sb $t0, -3(%[dst_ptr]) \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[dst_ptr] "+r" (dst_ptr),
[t] "+r" (t),
[dst_width] "+r" (dst_width)
: [c] "r" (c)
: "t0", "t1", "t2", "t3", "t4", "t5", "t6"
);
}
void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
intptr_t stride = src_stride;
const uint8* s1 = src_ptr + stride;
stride += stride;
const uint8* s2 = src_ptr + stride;
const int c1 = 0x1C71;
const int c2 = 0x2AAA;
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
".p2align 2 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
"lw $t2, 0(%[s1]) \n" // |T3|T2|T1|T0|
"lw $t3, 4(%[s1]) \n" // |T7|T6|T5|T4|
"lw $t4, 0(%[s2]) \n" // |R3|R2|R1|R0|
"lw $t5, 4(%[s2]) \n" // |R7|R6|R5|R4|
"rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6|
"packrl.ph $t6, $t1, $t3 \n" // |S7|S6|T7|T6|
"raddu.w.qb $t6, $t6 \n" // S7+S6+T7+T6
"packrl.ph $t7, $t3, $t1 \n" // |T5|T4|S5|S4|
"raddu.w.qb $t7, $t7 \n" // T5+T4+S5+S4
"sll $t8, $t5, 16 \n" // |R5|R4|0|0|
"raddu.w.qb $t8, $t8 \n" // R5+R4
"addu $t7, $t7, $t8 \n"
"srl $t8, $t5, 16 \n" // |0|0|R7|R6|
"raddu.w.qb $t8, $t8 \n" // R7 + R6
"addu $t6, $t6, $t8 \n"
"mul $t6, $t6, %[c2] \n" // t6 * 0x2AAA
"precrq.qb.ph $t8, $t0, $t2 \n" // |S3|S1|T3|T1|
"precrq.qb.ph $t8, $t8, $t4 \n" // |S3|T3|R3|R1|
"srl $t8, $t8, 8 \n" // |0|S3|T3|R3|
"raddu.w.qb $t8, $t8 \n" // S3 + T3 + R3
"addu $t7, $t7, $t8 \n"
"mul $t7, $t7, %[c1] \n" // t7 * 0x1C71
"sll $t0, $t0, 8 \n" // |S2|S1|S0|0|
"sll $t2, $t2, 8 \n" // |T2|T1|T0|0|
"sll $t4, $t4, 8 \n" // |R2|R1|R0|0|
"raddu.w.qb $t0, $t0 \n"
"raddu.w.qb $t2, $t2 \n"
"raddu.w.qb $t4, $t4 \n"
"addu $t0, $t0, $t2 \n"
"addu $t0, $t0, $t4 \n"
"mul $t0, $t0, %[c1] \n" // t0 * 0x1C71
"addiu %[src_ptr], %[src_ptr], 8 \n"
"addiu %[s1], %[s1], 8 \n"
"addiu %[s2], %[s2], 8 \n"
"addiu %[dst_width], %[dst_width], -3 \n"
"addiu %[dst_ptr], %[dst_ptr], 3 \n"
"srl $t6, $t6, 16 \n"
"srl $t7, $t7, 16 \n"
"srl $t0, $t0, 16 \n"
"sb $t6, -1(%[dst_ptr]) \n"
"sb $t7, -2(%[dst_ptr]) \n"
"bgtz %[dst_width], 1b \n"
" sb $t0, -3(%[dst_ptr]) \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[dst_ptr] "+r" (dst_ptr),
[s1] "+r" (s1),
[s2] "+r" (s2),
[dst_width] "+r" (dst_width)
: [c1] "r" (c1), [c2] "r" (c2)
: "t0", "t1", "t2", "t3", "t4",
"t5", "t6", "t7", "t8"
);
}
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

Some files were not shown because too many files have changed in this diff Show more