mirror of
https://github.com/RPCS3/rpcs3
synced 2024-11-05 16:05:48 +00:00
cellRec: implement audio encoding (no mixing yet)
This commit is contained in:
parent
4c14290694
commit
51d0df97d3
2 changed files with 133 additions and 42 deletions
|
@ -155,7 +155,8 @@ struct rec_param
|
|||
}
|
||||
};
|
||||
|
||||
constexpr u32 rec_framerate = 30; // Always 30 fps
|
||||
static constexpr u32 rec_framerate = 30; // Always 30 fps
|
||||
static constexpr u32 rec_channels = 2; // Always 2 channels
|
||||
|
||||
class rec_video_sink : public utils::video_sink
|
||||
{
|
||||
|
@ -219,11 +220,21 @@ struct rec_info
|
|||
vm::bptr<u8> video_input_buffer{}; // Used by the game to inject a frame right before it would render a frame to the screen.
|
||||
vm::bptr<u8> audio_input_buffer{}; // Used by the game to inject audio: 2-channel interleaved (left-right) * 256 samples * sizeof(f32) at 48000 kHz
|
||||
|
||||
// Wrapper for our audio data
|
||||
struct audio_block
|
||||
{
|
||||
// 2-channel interleaved (left-right), 256 samples, float
|
||||
static constexpr usz block_size = rec_channels * CELL_REC_AUDIO_BLOCK_SAMPLES * sizeof(f32);
|
||||
std::array<u8, block_size> block{};
|
||||
s64 pts{};
|
||||
};
|
||||
|
||||
std::vector<utils::video_sink::encoder_frame> video_ringbuffer;
|
||||
std::vector<u8> audio_ringbuffer;
|
||||
std::vector<audio_block> audio_ringbuffer;
|
||||
usz video_ring_pos = 0;
|
||||
usz video_ring_frame_count = 0;
|
||||
usz audio_ring_step = 0;
|
||||
usz audio_ring_pos = 0;
|
||||
usz audio_ring_block_count = 0;
|
||||
|
||||
usz next_video_ring_pos()
|
||||
{
|
||||
|
@ -232,6 +243,13 @@ struct rec_info
|
|||
return pos;
|
||||
}
|
||||
|
||||
usz next_audio_ring_pos()
|
||||
{
|
||||
const usz pos = audio_ring_pos;
|
||||
audio_ring_pos = (audio_ring_pos + 1) % audio_ringbuffer.size();
|
||||
return pos;
|
||||
}
|
||||
|
||||
std::shared_ptr<rec_video_sink> ringbuffer_sink;
|
||||
std::shared_ptr<utils::video_encoder> encoder;
|
||||
std::unique_ptr<named_thread<std::function<void()>>> video_provider_thread;
|
||||
|
@ -245,13 +263,13 @@ struct rec_info
|
|||
u32 video_bps = 512000;
|
||||
s32 video_codec_id = 12; // AV_CODEC_ID_MPEG4
|
||||
s32 max_b_frames = 2;
|
||||
const u32 fps = rec_framerate; // Always 30 fps
|
||||
static constexpr u32 fps = rec_framerate; // Always 30 fps
|
||||
|
||||
// Audio parameters
|
||||
u32 sample_rate = 48000;
|
||||
u32 audio_bps = 64000;
|
||||
s32 audio_codec_id = 86018; // AV_CODEC_ID_AAC
|
||||
const u32 channels = 2; // Always 2 channels
|
||||
static constexpr u32 channels = rec_channels; // Always 2 channels
|
||||
|
||||
// Recording duration
|
||||
atomic_t<u64> recording_time_start = 0;
|
||||
|
@ -588,8 +606,7 @@ void rec_info::start_video_provider()
|
|||
}
|
||||
|
||||
// We only care for new video frames or audio samples that can be properly encoded, so we check the timestamps and pts.
|
||||
const usz timestamp_us = get_system_time() - recording_time_start - pause_time_total;
|
||||
const usz timestamp_ms = timestamp_us / 1000;
|
||||
const usz timestamp_ms = (get_system_time() - recording_time_start - pause_time_total) / 1000;
|
||||
|
||||
/////////////////
|
||||
// VIDEO //
|
||||
|
@ -632,7 +649,7 @@ void rec_info::start_video_provider()
|
|||
// The video frames originate from our render pipeline and are stored in a ringbuffer.
|
||||
utils::video_sink::encoder_frame frame = ringbuffer_sink->get_frame();
|
||||
|
||||
if (const s64 pts = encoder->get_pts(frame.timestamp_ms); pts > last_video_pts && frame.data.size() > 0)
|
||||
if (const s64 pts = encoder->get_pts(frame.timestamp_ms); pts > last_video_pts && !frame.data.empty())
|
||||
{
|
||||
ensure(frame.data.size() == frame_size);
|
||||
utils::video_sink::encoder_frame& frame_data = video_ringbuffer[next_video_ring_pos()];
|
||||
|
@ -647,34 +664,75 @@ void rec_info::start_video_provider()
|
|||
// The video frames originate from our render pipeline and are directly encoded by the encoder video sink itself.
|
||||
//}
|
||||
|
||||
if (use_internal_audio)
|
||||
{
|
||||
// TODO: fetch audio
|
||||
}
|
||||
/////////////////
|
||||
// AUDIO //
|
||||
/////////////////
|
||||
|
||||
if (use_external_audio && audio_input_buffer)
|
||||
{
|
||||
// 2-channel interleaved (left-right), 256 samples, float
|
||||
std::array<f32, 2 * CELL_REC_AUDIO_BLOCK_SAMPLES> audio_data{};
|
||||
std::memcpy(audio_data.data(), audio_input_buffer.get_ptr(), audio_data.size() * sizeof(f32));
|
||||
const usz timestamp_us = get_system_time() - recording_time_start - pause_time_total;
|
||||
|
||||
// TODO: mix audio with param.audio_input_mix_vol
|
||||
}
|
||||
// TODO: mix external and internal audio with param.audio_input_mix_vol
|
||||
// TODO: mix channels if necessary
|
||||
if (use_external_audio)
|
||||
{
|
||||
// The audio samples originate from cellRec instead of our render pipeline.
|
||||
// TODO: This needs to be synchronized with the game somehow if possible.
|
||||
if (const s64 pts = encoder->get_audio_pts(timestamp_us); pts > last_audio_pts)
|
||||
{
|
||||
if (audio_input_buffer)
|
||||
{
|
||||
if (use_ring_buffer)
|
||||
{
|
||||
// The audio samples originate from cellRec and are stored in a ringbuffer.
|
||||
audio_block& sample_block = audio_ringbuffer[next_audio_ring_pos()];
|
||||
std::memcpy(sample_block.block.data(), audio_input_buffer.get_ptr(), sample_block.block.size());
|
||||
sample_block.pts = pts;
|
||||
audio_ring_block_count++;
|
||||
}
|
||||
else
|
||||
{
|
||||
// The audio samples originate from cellRec and are pushed to the encoder immediately.
|
||||
encoder->add_audio_samples(audio_input_buffer.get_ptr(), CELL_REC_AUDIO_BLOCK_SAMPLES, channels, timestamp_us);
|
||||
}
|
||||
}
|
||||
|
||||
if (use_ring_buffer)
|
||||
{
|
||||
// TODO: add audio properly
|
||||
//std::memcpy(&ringbuffer[get_ring_pos(pts) + ring_audio_offset], audio_data.data(), audio_data.size());
|
||||
last_audio_pts = pts;
|
||||
}
|
||||
}
|
||||
else
|
||||
else if (use_ring_buffer && ringbuffer_sink && use_internal_audio)
|
||||
{
|
||||
// TODO: add audio to encoder
|
||||
// The audio samples originate from cellAudio and are stored in a ringbuffer.
|
||||
utils::video_sink::encoder_sample sample = ringbuffer_sink->get_sample();
|
||||
|
||||
if (!sample.data.empty() && sample.channels >= 2 && sample.sample_count >= CELL_REC_AUDIO_BLOCK_SAMPLES)
|
||||
{
|
||||
s64 pts = encoder->get_audio_pts(sample.timestamp_us);
|
||||
|
||||
// Each encoder_sample can have more than one block
|
||||
for (usz i = 0; i < sample.sample_count; i += CELL_REC_AUDIO_BLOCK_SAMPLES)
|
||||
{
|
||||
if (pts > last_audio_pts)
|
||||
{
|
||||
audio_block& sample_block = audio_ringbuffer[next_audio_ring_pos()];
|
||||
std::memcpy(sample_block.block.data(), &sample.data[i * channels * sizeof(f32)], sample_block.block.size());
|
||||
sample_block.pts = pts;
|
||||
last_audio_pts = pts;
|
||||
audio_ring_block_count++;
|
||||
}
|
||||
|
||||
// Increase pts for each sample block
|
||||
pts++;
|
||||
}
|
||||
}
|
||||
}
|
||||
//else
|
||||
//{
|
||||
// The audio samples originate from cellAudio and are directly encoded by the encoder video sink itself.
|
||||
//}
|
||||
|
||||
// Update recording time
|
||||
recording_time_total = encoder->get_timestamp_ms(encoder->last_video_pts());
|
||||
|
||||
thread_ctrl::wait_for(100);
|
||||
thread_ctrl::wait_for(1);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -705,7 +763,7 @@ void rec_info::stop_video_provider(bool flush)
|
|||
// Flush the ringbuffer if necessary.
|
||||
// This should only happen if the video sink is not the encoder itself.
|
||||
// In this case the encoder should have been idle until now.
|
||||
if (flush && param.ring_sec > 0 && !video_ringbuffer.empty())
|
||||
if (flush && param.ring_sec > 0 && (!video_ringbuffer.empty() || !audio_ringbuffer.empty()))
|
||||
{
|
||||
cellRec.notice("Flushing video ringbuffer.");
|
||||
|
||||
|
@ -714,19 +772,51 @@ void rec_info::stop_video_provider(bool flush)
|
|||
ensure(encoder);
|
||||
|
||||
const usz frame_count = std::min(video_ringbuffer.size(), video_ring_frame_count);
|
||||
const usz start_offset = video_ring_frame_count < video_ringbuffer.size() ? 0 : video_ring_frame_count;
|
||||
const s64 start_pts = video_ringbuffer[start_offset % video_ringbuffer.size()].pts;
|
||||
const usz video_start_offset = video_ring_frame_count < video_ringbuffer.size() ? 0 : video_ring_frame_count;
|
||||
const s64 video_start_pts = video_ringbuffer.empty() ? 0 : video_ringbuffer[video_start_offset % video_ringbuffer.size()].pts;
|
||||
|
||||
for (usz i = 0; i < frame_count; i++)
|
||||
const usz block_count = std::min(audio_ringbuffer.size(), audio_ring_block_count);
|
||||
const usz audio_start_offset = audio_ring_block_count < audio_ringbuffer.size() ? 0 : audio_ring_block_count;
|
||||
const s64 audio_start_pts = audio_ringbuffer.empty() ? 0 : audio_ringbuffer[audio_start_offset % audio_ringbuffer.size()].pts;
|
||||
|
||||
cellRec.error("Flushing video ringbuffer: block_count=%d, audio_ringbuffer.size=%d", block_count, audio_ringbuffer.size());
|
||||
cellRec.error("Flushing video ringbuffer: video_start_pts=%d, audio_start_pts=%d", video_start_pts, audio_start_pts);
|
||||
|
||||
// Try to add the frames and samples in proper order
|
||||
for (usz sync_timestamp_us = 0, frame = 0, block = 0; frame < frame_count || block < block_count; frame++)
|
||||
{
|
||||
const usz pos = (start_offset + i) % video_ringbuffer.size();
|
||||
utils::video_sink::encoder_frame& frame_data = video_ringbuffer[pos];
|
||||
encoder->add_frame(frame_data.data, frame_data.pitch, frame_data.width, frame_data.height, frame_data.av_pixel_format, encoder->get_timestamp_ms(frame_data.pts - start_pts));
|
||||
// Add one frame
|
||||
if (frame < frame_count)
|
||||
{
|
||||
const usz pos = (video_start_offset + frame) % video_ringbuffer.size();
|
||||
utils::video_sink::encoder_frame& frame_data = video_ringbuffer[pos];
|
||||
const usz timestamp_ms = encoder->get_timestamp_ms(frame_data.pts - video_start_pts);
|
||||
encoder->add_frame(frame_data.data, frame_data.pitch, frame_data.width, frame_data.height, frame_data.av_pixel_format, timestamp_ms);
|
||||
|
||||
// TODO: add audio data to encoder
|
||||
// Increase sync timestamp
|
||||
sync_timestamp_us = timestamp_ms * 1000;
|
||||
}
|
||||
|
||||
// Add all the samples that fit into the last frame
|
||||
for (usz i = block; i < block_count; i++)
|
||||
{
|
||||
const usz pos = (audio_start_offset + i) % audio_ringbuffer.size();
|
||||
const audio_block& sample_block = audio_ringbuffer[pos];
|
||||
const usz timestamp_us = encoder->get_audio_timestamp_us(sample_block.pts - audio_start_pts);
|
||||
|
||||
// Stop adding new samples if the sync timestamp is exceeded, unless we already added all the frames.
|
||||
if (timestamp_us > sync_timestamp_us && frame < frame_count)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
encoder->add_audio_samples(sample_block.block.data(), CELL_REC_AUDIO_BLOCK_SAMPLES, channels, timestamp_us);
|
||||
block++;
|
||||
}
|
||||
}
|
||||
|
||||
video_ringbuffer.clear();
|
||||
audio_ringbuffer.clear();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1093,6 +1183,8 @@ error_code cellRecOpen(vm::cptr<char> pDirName, vm::cptr<char> pFileName, vm::cp
|
|||
rec.cbUserData = cbUserData;
|
||||
rec.last_video_pts = -1;
|
||||
rec.audio_ringbuffer.clear();
|
||||
rec.audio_ring_block_count = 0;
|
||||
rec.audio_ring_pos = 0;
|
||||
rec.video_ringbuffer.clear();
|
||||
rec.video_ring_frame_count = 0;
|
||||
rec.video_ring_pos = 0;
|
||||
|
@ -1103,16 +1195,13 @@ error_code cellRecOpen(vm::cptr<char> pDirName, vm::cptr<char> pFileName, vm::cp
|
|||
|
||||
if (rec.param.ring_sec > 0)
|
||||
{
|
||||
const u32 audio_size_per_sample = rec.channels * sizeof(float);
|
||||
const u32 audio_size_per_second = rec.sample_rate * audio_size_per_sample;
|
||||
const usz audio_ring_buffer_size = rec.param.ring_sec * audio_size_per_second;
|
||||
const usz audio_ring_buffer_size = static_cast<usz>(std::ceil((rec.param.ring_sec * rec.sample_rate) / static_cast<f32>(CELL_REC_AUDIO_BLOCK_SAMPLES)));
|
||||
const usz video_ring_buffer_size = rec.param.ring_sec * rec.fps;
|
||||
|
||||
cellRec.notice("Preparing ringbuffer for %d seconds. video_ring_buffer_size=%d, audio_ring_buffer_size=%d, pitch=%d, width=%d, height=%d", rec.param.ring_sec, video_ring_buffer_size, audio_ring_buffer_size, rec.input_format.pitch, rec.input_format.width, rec.input_format.height);
|
||||
|
||||
rec.audio_ringbuffer.resize(audio_ring_buffer_size);
|
||||
rec.audio_ring_step = audio_size_per_sample;
|
||||
rec.video_ringbuffer.resize(video_ring_buffer_size, {});
|
||||
rec.video_ringbuffer.resize(video_ring_buffer_size);
|
||||
|
||||
rec.ringbuffer_sink = std::make_shared<rec_video_sink>();
|
||||
rec.ringbuffer_sink->use_internal_audio = rec.param.use_internal_audio();
|
||||
|
|
|
@ -26,7 +26,7 @@ namespace utils
|
|||
m_frames_to_encode.emplace_back(timestamp_ms, pitch, width, height, pixel_format, std::move(frame));
|
||||
}
|
||||
|
||||
void add_audio_samples(u8* buf, u32 sample_count, u16 channels, usz timestamp_us)
|
||||
void add_audio_samples(const u8* buf, u32 sample_count, u16 channels, usz timestamp_us)
|
||||
{
|
||||
// Do not allow new samples while flushing
|
||||
if (m_flush || !buf || !sample_count || !channels)
|
||||
|
@ -51,12 +51,14 @@ namespace utils
|
|||
|
||||
usz get_timestamp_ms(s64 pts) const
|
||||
{
|
||||
return static_cast<usz>(std::round((pts * 1000) / static_cast<float>(m_framerate)));
|
||||
return static_cast<usz>(std::round((pts * 1000) / static_cast<f32>(m_framerate)));
|
||||
}
|
||||
|
||||
usz get_audio_timestamp_us(s64 pts) const
|
||||
{
|
||||
return static_cast<usz>(std::round((pts * 1000) / static_cast<float>(m_sample_rate)));
|
||||
static constexpr f32 us_per_sec = 1000000.0f;
|
||||
const f32 us_per_block = us_per_sec / (m_sample_rate / static_cast<f32>(m_samples_per_block));
|
||||
return static_cast<usz>(pts * us_per_block);
|
||||
}
|
||||
|
||||
atomic_t<bool> has_error{false};
|
||||
|
|
Loading…
Reference in a new issue