Ring Daemon
Loading...
Searching...
No Matches
audio_input.cpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2004-2026 Savoir-faire Linux Inc.
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18#include "audio_frame_resizer.h"
19#include "audio_input.h"
20#include "jami/media_const.h"
21#include "manager.h"
22#include "media_decoder.h"
23#include "resampler.h"
24#include "logger.h"
25#include "ringbufferpool.h"
26#include "tracepoint.h"
27#include "video/video_device.h"
28
29#include <future>
30#include <memory>
31
32namespace jami {
33
34static constexpr auto MS_PER_PACKET = std::chrono::milliseconds(20);
35
36AudioInput::AudioInput(const std::string& id)
37 : id_(id)
38 , format_(Manager::instance().getRingBufferPool().getInternalAudioFormat())
39 , frameSize_(static_cast<int>(format_.sample_rate * MS_PER_PACKET.count()) / 1000)
40 , resampler_(new Resampler)
41 , resizer_(new AudioFrameResizer(format_,
42 frameSize_,
43 [this](std::shared_ptr<AudioFrame>&& f) { frameResized(std::move(f)); }))
44 , deviceGuard_()
45 , loop_([] { return true; }, [this] { process(); }, [] {})
46{
47 JAMI_DEBUG("Creating audio input with id: {}", id_);
49}
50
51AudioInput::AudioInput(const std::string& id, const std::string& resource)
52 : AudioInput(id)
53{
55}
56
68
69void
70AudioInput::process()
71{
72 readFromDevice();
73}
74
75void
77{
78 if (decoder_) {
79 decoder_->updateStartTime(start);
80 }
81}
82
83void
84AudioInput::frameResized(std::shared_ptr<AudioFrame>&& ptr)
85{
86 std::shared_ptr<AudioFrame> frame = std::move(ptr);
87 frame->pointer()->pts = static_cast<int64_t>(sent_samples);
88 sent_samples += frame->pointer()->nb_samples;
89
90 notify(std::static_pointer_cast<MediaFrame>(std::move(frame)));
91}
92
93void
95{
96 if (decoder_) {
97 decoder_->setSeekTime(time);
98 }
99}
100
101void
102AudioInput::readFromDevice()
103{
104 {
105 std::lock_guard lk(resourceMutex_);
106 if (decodingFile_)
107 while (ringBuf_ && ringBuf_->isEmpty())
108 readFromFile();
109 if (playingFile_) {
110 while (ringBuf_ && ringBuf_->getLength(id_) == 0)
111 readFromQueue();
112 }
113 }
114
116 if (not bufferPool.waitForDataAvailable(id_, wakeUp_))
117 std::this_thread::sleep_until(wakeUp_);
118 wakeUp_ += MS_PER_PACKET;
119
120 auto audioFrame = bufferPool.getData(id_);
121 if (not audioFrame)
122 return;
123
124 if (muteState_) {
126 audioFrame->has_voice = false; // force no voice activity when muted
127 }
128
129 std::lock_guard lk(fmtMutex_);
130 if (bufferPool.getInternalAudioFormat() != format_)
131 audioFrame = resampler_->resample(std::move(audioFrame), format_);
132 resizer_->enqueue(std::move(audioFrame));
133
134 if (recorderCallback_ && settingMS_.exchange(false)) {
135 recorderCallback_(MediaStream("a:local", format_, static_cast<int64_t>(sent_samples)));
136 }
137
139}
140
141void
142AudioInput::readFromQueue()
143{
144 if (!decoder_)
145 return;
146 if (paused_ || !decoder_->emitFrame(true)) {
147 std::this_thread::sleep_for(MS_PER_PACKET);
148 }
149}
150
151void
152AudioInput::readFromFile()
153{
154 if (!decoder_)
155 return;
156 const auto ret = decoder_->decode();
157 switch (ret) {
159 break;
161 createDecoder();
162 break;
164 JAMI_ERR() << "Failed to decode frame";
165 break;
167 JAMI_ERR() << "Read buffer overflow detected";
168 break;
171 break;
172 }
173}
174
175bool
176AudioInput::initCapture(const std::string& device)
177{
178 std::string targetId = device;
179#if defined(_WIN32)
180 // There are two possible formats for device:
181 // 1. A string containing "window-id:hwnd=XXXX" where XXXX is the HWND of the window to capture
182 // 2. A string that does not contain a window handle, in which case we capture desktop audio
183 std::string pattern = "window-id:hwnd=";
184 size_t winHandlePos = device.find(pattern);
185
186 if (winHandlePos != std::string::npos) {
187 // Get HWND from device URI
188 size_t startPos = winHandlePos + pattern.size();
189 size_t endPos = device.find(' ', startPos);
190 if (endPos == std::string::npos) {
191 endPos = device.size();
192 }
193 targetId = device.substr(startPos, endPos - startPos);
194 } else {
196 }
197#elif defined(__linux__)
198 // On Linux, we always capture desktop audio because window-specific audio capture is not yet implemented
199 // Possible to implement window audio capture on X11 specifically in the future, but not Wayland as of Jan 2026
200 // See https://github.com/flatpak/xdg-desktop-portal/issues/957
202#elif defined(__APPLE__)
203 // As of Jan 2026, audio capture has not been implemented for macOS (TODO)
205#endif
206
207 devOpts_ = {};
208 devOpts_.input = targetId;
209 devOpts_.channel = format_.nb_channels;
210 devOpts_.framerate = format_.sample_rate;
211
212 // This will cause the audio layer to create a ring buffer with id=targetId
213 // The audio layer will then fill it with the audio from the captured window/desktop
215 if (!deviceGuard_) {
216 if (!targetId.empty())
217 JAMI_ERROR("Failed to start capture stream for window-id: {}", targetId);
218 else
219 JAMI_ERROR("Failed to start capture stream for desktop audio");
220 return false;
221 }
222
223 // We want the audio input's ring buffer to read the captured audio from the audio layer
224 // Then the audio RTP session will handle sending the audio over the network
226
227 sourceRingBufferId_ = targetId;
228 playingDevice_ = true;
229 return true;
230}
231
232bool
233AudioInput::initDevice(const std::string& device)
234{
235 devOpts_ = {};
236 devOpts_.input = device;
237 devOpts_.channel = format_.nb_channels;
238 devOpts_.framerate = format_.sample_rate;
240 playingDevice_ = true;
241 return true;
242}
243
244void
245AudioInput::configureFilePlayback(const std::string& path, std::shared_ptr<MediaDemuxer>& demuxer, int index)
246{
247 decoder_.reset();
248 devOpts_ = {};
249 devOpts_.input = path;
250 devOpts_.name = path;
251 auto decoder = std::make_unique<MediaDecoder>(demuxer, index, [this](std::shared_ptr<MediaFrame>&& frame) {
252 if (muteState_)
254 if (ringBuf_)
255 ringBuf_->put(std::static_pointer_cast<AudioFrame>(frame));
256 });
257 decoder->emulateRate();
258 decoder->setInterruptCallback([](void* data) -> int { return not static_cast<AudioInput*>(data)->isCapturing(); },
259 this);
260
261 // have file audio mixed into the local buffer so it gets played
263 // Bind to itself to be able to read from the ringbuffer
265
266 sourceRingBufferId_ = id_;
268
269 wakeUp_ = std::chrono::steady_clock::now() + MS_PER_PACKET;
270 playingFile_ = true;
271 decoder_ = std::move(decoder);
272 resource_ = path;
273 loop_.start();
274}
275
276void
288
289void
291{
292 if (decoder_) {
293 decoder_->flushBuffers();
294 }
295}
296
297bool
298AudioInput::initFile(const std::string& path)
299{
300 if (access(path.c_str(), R_OK) != 0) {
301 JAMI_ERROR("File '{}' not available", path);
302 return false;
303 }
304
305 devOpts_ = {};
306 devOpts_.input = path;
307 devOpts_.name = path;
308 devOpts_.loop = "1";
309 // sets devOpts_'s sample rate and number of channels
310 if (!createDecoder()) {
311 JAMI_WARN() << "Unable to decode audio from file, switching back to default device";
312 return initDevice("");
313 }
314 wakeUp_ = std::chrono::steady_clock::now() + MS_PER_PACKET;
315
316 // have file audio mixed into the local buffer so it gets played
318 sourceRingBufferId_ = id_;
319 decodingFile_ = true;
321 return true;
322}
323
324std::shared_future<DeviceParams>
326{
327 // Always switch inputs, even if it's the same resource, so audio will be in sync with video
328 std::unique_lock lk(resourceMutex_);
329
330 JAMI_DEBUG("Switching audio source from [{}] to [{}]", resource_, resource);
331
332 auto oldGuard = std::move(deviceGuard_);
333
334 decoder_.reset();
335 if (decodingFile_) {
336 decodingFile_ = false;
338 }
339
340 playingDevice_ = false;
341 resource_ = resource;
342 sourceRingBufferId_.clear();
343 devOptsFound_ = false;
344
345 std::promise<DeviceParams> p;
346 foundDevOpts_.swap(p);
347
348 if (resource_.empty()) {
349 if (initDevice(""))
350 foundDevOpts(devOpts_);
351 } else {
352 static const std::string& sep = libjami::Media::VideoProtocolPrefix::SEPARATOR;
353 const auto pos = resource_.find(sep);
354 if (pos == std::string::npos)
355 return {};
356
357 const auto prefix = resource_.substr(0, pos);
358 if ((pos + sep.size()) >= resource_.size())
359 return {};
360
361 const auto suffix = resource_.substr(pos + sep.size());
362
363 bool ready = false;
365 ready = initFile(suffix);
367 ready = initCapture(suffix);
368 else
369 ready = initDevice(suffix);
370
371 if (ready)
372 foundDevOpts(devOpts_);
373 }
374
375 futureDevOpts_ = foundDevOpts_.get_future().share();
376 wakeUp_ = std::chrono::steady_clock::now() + MS_PER_PACKET;
377 lk.unlock();
378 if (not loop_.isRunning())
379 loop_.start();
380 if (onSuccessfulSetup_)
381 onSuccessfulSetup_(MEDIA_AUDIO, 0);
382 return futureDevOpts_;
383}
384
385void
386AudioInput::foundDevOpts(const DeviceParams& params)
387{
388 if (!devOptsFound_) {
389 devOptsFound_ = true;
390 foundDevOpts_.set_value(params);
391 }
392}
393
394void
395AudioInput::setRecorderCallback(const std::function<void(const MediaStream& ms)>& cb)
396{
397 settingMS_.exchange(true);
398 recorderCallback_ = cb;
399 if (decoder_)
400 decoder_->setContextCallback([this]() {
401 if (recorderCallback_)
402 recorderCallback_(getInfo());
403 });
404}
405
406bool
407AudioInput::createDecoder()
408{
409 decoder_.reset();
410 if (devOpts_.input.empty()) {
411 foundDevOpts(devOpts_);
412 return false;
413 }
414
415 auto decoder = std::make_unique<MediaDecoder>([this](std::shared_ptr<MediaFrame>&& frame) {
416 if (ringBuf_)
417 ringBuf_->put(std::static_pointer_cast<AudioFrame>(frame));
418 });
419
420 // NOTE don't emulate rate, file is read as frames are needed
421
422 decoder->setInterruptCallback([](void* data) -> int { return not static_cast<AudioInput*>(data)->isCapturing(); },
423 this);
424
425 if (decoder->openInput(devOpts_) < 0) {
426 JAMI_ERR() << "Unable to open input '" << devOpts_.input << "'";
427 foundDevOpts(devOpts_);
428 return false;
429 }
430
431 if (decoder->setupAudio() < 0) {
432 JAMI_ERR() << "Unable to setup decoder for '" << devOpts_.input << "'";
433 foundDevOpts(devOpts_);
434 return false;
435 }
436
437 auto ms = decoder->getStream(devOpts_.input);
438 devOpts_.channel = ms.nbChannels;
439 devOpts_.framerate = ms.sampleRate;
440 JAMI_DBG() << "Created audio decoder: " << ms;
441
442 decoder_ = std::move(decoder);
443 foundDevOpts(devOpts_);
444 decoder_->setContextCallback([this]() {
445 if (recorderCallback_)
446 recorderCallback_(getInfo());
447 });
448 return true;
449}
450
451void
453{
454 std::lock_guard lk(fmtMutex_);
455 format_ = fmt;
456 resizer_->setFormat(format_, static_cast<int>(format_.sample_rate * MS_PER_PACKET.count()) / 1000);
457}
458
459void
461{
462 JAMI_WARN("Audio Input muted [%s]", isMuted ? "YES" : "NO");
463 muteState_ = isMuted;
464}
465
468{
469 std::lock_guard lk(fmtMutex_);
470 return MediaStream("a:local", format_, static_cast<int64_t>(sent_samples));
471}
472
474AudioInput::getInfo(const std::string& name) const
475{
476 std::lock_guard lk(fmtMutex_);
477 auto ms = MediaStream(name, format_, static_cast<int64_t>(sent_samples));
478 return ms;
479}
480
481} // namespace jami
Buffers extra samples.
void updateStartTime(int64_t start)
void setRecorderCallback(const std::function< void(const MediaStream &ms)> &cb)
void setMuted(bool isMuted)
void setPaused(bool paused)
void setSeekTime(int64_t time)
void configureFilePlayback(const std::string &path, std::shared_ptr< MediaDemuxer > &demuxer, int index)
void setFormat(const AudioFormat &fmt)
std::shared_future< DeviceParams > switchInput(const std::string &resource)
AudioInput(const std::string &id)
MediaStream getInfo() const
bool isCapturing() const
Definition audio_input.h:51
Manager (controller) of daemon.
Definition manager.h:66
static LIBJAMI_TEST_EXPORT Manager & instance()
Definition manager.cpp:694
std::unique_ptr< AudioDeviceGuard > startCaptureStream(const std::string &captureDevice)
Start a capture stream on the given device (eg.
Definition manager.h:155
RingBufferPool & getRingBufferPool()
Return a pointer to the instance of the RingBufferPool.
Definition manager.cpp:3197
std::unique_ptr< AudioDeviceGuard > startAudioStream(AudioDeviceType stream)
Definition manager.h:143
void notify(std::shared_ptr< MediaFrame > data)
Definition observer.h:117
Wrapper class for libswresample.
Definition resampler.h:36
std::shared_ptr< RingBuffer > createRingBuffer(const std::string &id)
Create a new ringbuffer with a default readoffset.
void bindHalfDuplexOut(const std::string &readerBufferId, const std::string &sourceBufferId)
Attaches a reader the specified source.
static const char *const DEFAULT_ID
void flush(const std::string &ringbufferId)
void unBindHalfDuplexOut(const std::string &readerBufferId, const std::string &sourceBufferId)
Detaches a reader from the specified source.
bool isRunning() const noexcept
#define JAMI_ERR(...)
Definition logger.h:230
#define JAMI_ERROR(formatstr,...)
Definition logger.h:243
#define JAMI_DBG(...)
Definition logger.h:228
#define JAMI_DEBUG(formatstr,...)
Definition logger.h:238
#define JAMI_WARN(...)
Definition logger.h:229
void fillWithSilence(AVFrame *frame)
static constexpr const char DEVICE_DESKTOP[]
static constexpr auto MS_PER_PACKET
void emitSignal(Args... args)
Definition jami_signal.h:64
@ MEDIA_AUDIO
Definition media_codec.h:46
static constexpr const char * DISPLAY
Definition media_const.h:29
static constexpr const char * SEPARATOR
Definition media_const.h:32
static constexpr const char * FILE
Definition media_const.h:30
Structure to hold sample rate and channel number associated with audio data.
DeviceParams Parameters used by MediaDecoder and MediaEncoder to open a LibAV device/stream.
rational< double > framerate
std::string input
#define jami_tracepoint(...)
Definition tracepoint.h:48