/*
 * Example: Audio/Video Player using the LT API client
 *
 * This example demonstrates how to:
 *      1. Connect to an LT310 device and retrieve audio/video data from a given source URL.
 *      2. Start two independent threads:
 *          - Main thread for fetching and displaying video frames using OpenGL/GLFW.
 *          - Background thread for fetching and playing audio using SDL3 or WaveOut (fallback).
 *      3. Handle synchronization between video rendering and audio playback.
 *      4. Track dropped/missed frames and display performance statistics.
 *      5. Gracefully shut down resources when the application exits.
 *
 * Usage:
 *      video_player.exe -source <inputSource>
 *
 * Arguments:
 *      -source, -s   URL of a valid LT input, for example:
 *                        lt310:/0/hdmi-in/0
 *                        lt310:/0/sdi-in/1
 *
 * Key points:
 *      - The program automatically probes for active input sources if none specified.
 *      - Audio is handled by SDL3 with WaveOut fallback for maximum compatibility.
 *      - Video rendering uses GLFW and OpenGL for hardware-accelerated display.
 *      - Performance statistics track FPS and missed frames.
 *      - Audio and video processing are handled in separate threads.
 *      - The LT client API fetches raw frames in selected media formats.
 *
 * Memory Management:
 *      - All resources are properly cleaned up in reverse order of allocation
 *      - RAII principles are used where possible
 *      - Thread-safe cleanup with atomic flags and proper joining
 *        (see "Comprehensive cleanup" section in main() for implementation)
 *
 * Note:
 *      This code is intended for demonstration purposes. For production use,
 *      ensure proper error handling and resource cleanup.
 *
 * © 2025 Enciris Technologies. All rights reserved.
 */

#include <iostream>
#include <vector>
#include <thread>
#include <mutex>
#include <atomic>
#include <chrono>
#include <cmath>
#include <string>
#include <algorithm>
#ifdef _WIN32
#include <windows.h>
#include <mmsystem.h>
#else
#include <csignal>
#endif

#include "SDL3/SDL.h"
#include "glad/gl.h"
#include "glfw/glfw3.h"
#include "../../lt.h"
#include "yuyv.h"
#include "nv12.h"

#include <queue>
#include <condition_variable>


void printHelp() {
    std::cout << R"(
Usage:
    av_player.exe -source <sourceURL> [-format <format>]

Arguments:
    -source, -s   URL of a valid LT input (e.g. sdi-in/0)
    -format, -f   Video format: yuyv (default) or nv12

This example connects to the given source, retrieves audio and video frames
from the LT API, and plays them in real time using SDL3 and OpenGL.
)" << std::endl;
}


//=============================================================================
// Performance tracking & statistics
//=============================================================================

// Global performance counters (initialized at startup)
static int cnt = 0;                                        // Frame counter for FPS calculation
static int missed = 0;                                     // Missed/dropped frame counter
static uint64_t prevTS = 0;                                // Previous timestamp for frame timing
static std::chrono::steady_clock::time_point startTime;    // Application start time

/**
 * Resource usage and performance statistics tracker.
 * Monitors audio throughput and video FPS.
 */
struct ResourceStats {
    // Audio performance metrics
    size_t queueSize = 0;                                   // Current audio buffer queue size
    size_t audioBytesProcessed = 0;                         // Total bytes processed this measurement period
    std::chrono::steady_clock::time_point audioMeasureStart = std::chrono::steady_clock::now();
    double audioKBps = 0.0;                                 // Audio throughput in KB/s
    std::atomic<int> audioMissed{0};                        // Audio missed/dropped counter

    // Video performance metrics
    double avgFPS = 0.0;                                    // Average frames per second

    /**
     * Calculates and updates audio throughput statistics.
     * Resets counters every second to provide real-time metrics.
     */
    void updateAudioThroughput()
    {
        auto now = std::chrono::steady_clock::now();
        std::chrono::duration<double> elapsed = now - audioMeasureStart;

        if (elapsed.count() >= 1.0) {
            // Calculate KB/s over the elapsed period
            audioKBps = (audioBytesProcessed / 1024.0) / elapsed.count();

            // Reset counters for next measurement period
            audioBytesProcessed = 0;
            audioMeasureStart = now;
        }
    }
    /**
     * Prints current resource statistics to console.
     */
    void print(int videoMissed = 0, size_t videoDropped = 0)
    {
        updateAudioThroughput();
        printf("Video: %.2fFPS - missed %d - dropped %zu | Audio: %.0fkb/s - missed %d\n",
            avgFPS, videoMissed, videoDropped, audioKBps, audioMissed.load());
    }

    /**
     * Resets audio missed/dropped counters after statistics display.
     */
    void resetAudioCounters()
    {
        audioMissed = 0;
    }
};

static ResourceStats resourceStats;


//=============================================================================
// Audio subsystem
//=============================================================================

// Global audio control and state
static std::atomic<bool> audioRunning{true};    // Thread-safe audio processing flag
static SDL_AudioStream* audioStream = nullptr;  // SDL3 audio stream handle
#ifdef _WIN32
static bool useWaveOut = false;                 // Internal fallback flag (auto only, no CLI)
#endif


//=============================================================================
// Video subsystem
//=============================================================================

// Thread-safe video frame queue with bounded size
struct VideoFrameQueue {
    std::queue<lt::Packet> frames;
    std::mutex mtx;
    std::condition_variable cv;
    std::atomic<bool> running{true};
    static const size_t MAX_QUEUE_SIZE = 4; // Maximum frames to buffer
    std::atomic<size_t> droppedFrames{0};   // Track dropped frames for statistics

    void push(lt::Packet frame)
    {
        std::lock_guard<std::mutex> lock(mtx);

        // If queue is at max capacity, drop oldest frames to make room
        while (frames.size() >= MAX_QUEUE_SIZE) {
            frames.pop();
            droppedFrames++;
        }

        frames.push(frame);
        cv.notify_one();
    }

    bool pop(lt::Packet& frame, int timeoutMs = 100)
    {
        std::unique_lock<std::mutex> lock(mtx);
        if (frames.empty() && running) {
            // Wait for new frame or timeout
            auto status = cv.wait_for(lock, std::chrono::milliseconds(timeoutMs),
                [this] { return !frames.empty() || !running; });
            if (!status) return false; // Timeout
        }

        if (frames.empty()) return false;

        frame = frames.front();
        frames.pop();
        return true;
    }

    size_t size() {
        std::lock_guard<std::mutex> lock(mtx);
        return frames.size();
    }

    size_t getDroppedFrames() {
        return droppedFrames.load();
    }

    void resetDroppedFrames() {
        droppedFrames = 0;
    }
};

// Global video control and state
static VideoFrameQueue videoQueue;
static std::atomic<bool> videoRunning{true};

// Forward declarations
static void videoThread(lt::Client& client, const std::string& videoWorkerURL);

#ifdef _WIN32
/**
 * Windows WaveOut audio player for fallback compatibility.
 * Provides low-level audio output when SDL3 is unavailable or fails.
 * Uses circular buffer management for smooth playback.
 */
struct WaveOutPlayer {
    HWAVEOUT hWave = nullptr;                  // WaveOut device handle
    WAVEFORMATEX wfx{};                        // Audio format specification

    // Buffer management constants
    static constexpr int kBufferCount = 12;     // Number of circular buffers (increased)
    static constexpr int kMillisPerBuffer = 80; // Buffer duration (~80ms for more tolerance)

    std::vector<std::vector<uint8_t>> buffers;  // Circular PCM data buffers
    std::vector<WAVEHDR> headers;               // WaveOut buffer headers
    std::mutex mtx;                             // Thread-safe buffer access
    bool initialized = false;                   // Initialization status flag

    /**
     * Initializes WaveOut audio system with specified format.
     * @param samplerate Sample rate in Hz (e.g., 48000)
     * @param channels Number of audio channels (1=mono, 2=stereo)
     * @param bits Bit depth (16 or 32)
     * @return true if initialization successful, false otherwise
     */
    bool init(int samplerate, int channels, int bits)
    {
        if (initialized) return true;

        // Configure audio format
        wfx.wFormatTag = WAVE_FORMAT_PCM;
        wfx.nChannels = (WORD)channels;
        wfx.nSamplesPerSec = samplerate;
        wfx.wBitsPerSample = (WORD)bits;
        wfx.nBlockAlign = (wfx.nChannels * wfx.wBitsPerSample) / 8;
        wfx.nAvgBytesPerSec = wfx.nSamplesPerSec * wfx.nBlockAlign;
        wfx.cbSize = 0;

        // Open WaveOut device
        MMRESULT mm = waveOutOpen(&hWave, WAVE_MAPPER, &wfx, 0, 0, CALLBACK_NULL);
        if (mm != MMSYSERR_NOERROR) {
            printf("waveOutOpen failed (%u)\n", (unsigned)mm);
            return false;
        }

        // Allocate and prepare circular buffers
        size_t bytesPerBuffer = (size_t)(wfx.nAvgBytesPerSec * kMillisPerBuffer / 1000);
        buffers.resize(kBufferCount);
        headers.resize(kBufferCount);

        for (int i = 0; i < kBufferCount; i++) {
            buffers[i].resize(bytesPerBuffer);
            headers[i] = {};
            headers[i].lpData = (LPSTR)buffers[i].data();
            headers[i].dwBufferLength = (DWORD)bytesPerBuffer;
            headers[i].dwFlags = WHDR_DONE;

            // Prepare buffer for WaveOut (required by Windows API)
            MMRESULT prepResult = waveOutPrepareHeader(hWave, &headers[i], sizeof(WAVEHDR));
            if (prepResult != MMSYSERR_NOERROR) {
                printf("waveOutPrepareHeader failed for buffer %d (%u)\n", i, (unsigned)prepResult);
                // Continue with other buffers - some may still work
            }
        }

        initialized = true;
        printf("WaveOut initialized: %d Hz, %d channels, %d-bit\n", samplerate, channels, bits);
        return true;
    }

    /**
     * Queues PCM audio data for playback.
     * Uses circular buffer strategy - finds next available buffer and queues data.
     * @param data Pointer to PCM audio data
     * @param len Size of data in bytes
     * @return true if data was queued, false if no buffers available (data dropped)
     */
    bool queue(const void* data, size_t len)
    {
        std::lock_guard<std::mutex> lk(mtx);
        if (!initialized) return false;

        // Find first available (completed) buffer
        for (int i = 0; i < kBufferCount; i++) {
            WAVEHDR& hdr = headers[i];
            if (hdr.dwFlags & WHDR_DONE) {
                // Copy data to buffer (truncate if necessary)
                size_t copyBytes = std::min<size_t>(hdr.dwBufferLength, len);
                memcpy(hdr.lpData, data, copyBytes);
                hdr.dwBufferLength = (DWORD)copyBytes;
                hdr.dwFlags &= ~WHDR_DONE;

                // Queue buffer for playback
                MMRESULT mm = waveOutWrite(hWave, &hdr, sizeof(WAVEHDR));
                if (mm != MMSYSERR_NOERROR) {
                    printf("waveOutWrite failed (%u)\n", (unsigned)mm);
                    hdr.dwFlags |= WHDR_DONE; // Mark as done so it can be reused
                }
                return true;
            }
        }

        // No free buffers available - audio data will be dropped
        return false;
    }

    /**
     * Shuts down WaveOut audio system and releases all resources.
     * CRITICAL: Must be called before program exit to prevent memory leaks.
     */
    void shutdown()
    {
        if (!initialized) return;

        printf("Shutting down WaveOut audio...\n");

        // Stop playback and return all buffers to the application
        if (hWave != nullptr) {
            waveOutReset(hWave);
        }

        // Unprepare all headers (required by Windows API to free resources)
        for (int i = 0; i < kBufferCount; i++) {
            if (headers[i].lpData != nullptr) {
                MMRESULT unprepResult = waveOutUnprepareHeader(hWave, &headers[i], sizeof(WAVEHDR));
                if (unprepResult != MMSYSERR_NOERROR) {
                    printf("Warning: waveOutUnprepareHeader failed for buffer %d (%u)\n",
                           i, (unsigned)unprepResult);
                }
            }
        }

        // Close WaveOut device
        if (hWave != nullptr) {
            MMRESULT closeResult = waveOutClose(hWave);
            if (closeResult != MMSYSERR_NOERROR)
                printf("Warning: waveOutClose failed (%u)\n", (unsigned)closeResult);
            hWave = nullptr;
        }

        // Clear all data structures
        buffers.clear();
        headers.clear();
        initialized = false;
        printf("WaveOut shutdown complete.\n");
    }
};

static WaveOutPlayer waveOutPlayer;
#endif

/**
 * Audio processing thread function.
 * Continuously fetches audio packets from LT client and feeds them to the audio subsystem.
 * Runs in separate thread to avoid blocking video rendering.
 *
 * @param client Reference to LT API client
 * @param workerURL Audio worker endpoint URL
 * @param audioSignal Audio signal configuration
 */
static void audioThread(lt::Client& client, const std::string& workerURL, const lt::AudioSignal& audioSignal)
{
    const int bytesPerSample = (audioSignal.depth + 7) / 8;  // Convert bit depth to bytes
    size_t totalPacketsProcessed = 0;

    while (audioRunning) {
        // Fetch next batch of audio packets from LT device
        lt::Worker worker;
        std::string err = client.Get(workerURL, worker);
        if (!err.empty()) {
            printf("Audio worker error: %s\n", err.c_str());
            std::this_thread::sleep_for(std::chrono::milliseconds(100));
            continue;
        }

        // Process all audio packets in this batch
        for (auto& packet : worker->packets) {
            // Filter for audio packets
            if (packet->media.rfind("audio/", 0) == 0) {
                // Extract audio metadata
                lt::AudioMetadata m = packet->meta.template get<lt::AudioMetadata>();
                size_t dataSize = m.samples * audioSignal.channels * bytesPerSample;
                if (dataSize == 0) continue;  // Skip empty packets

                // Update throughput statistics
                resourceStats.audioBytesProcessed += dataSize;
                totalPacketsProcessed++;

                // Route audio to appropriate playback system
                bool audioQueued = false;
#ifdef _WIN32
                if (useWaveOut && waveOutPlayer.initialized) {
                    // Use WaveOut fallback
                    audioQueued = waveOutPlayer.queue(packet->data, dataSize);
                }
                else 
#endif
                if (audioStream) {
                    if (SDL_PutAudioStreamData(audioStream, packet->data, (int)dataSize) == 0) {
                        SDL_FlushAudioStream(audioStream);
                        resourceStats.queueSize = SDL_GetAudioStreamQueued(audioStream) / (audioSignal.channels * bytesPerSample);
                    }
                }
            }
        }

        // Small delay to prevent excessive CPU usage
        std::this_thread::sleep_for(std::chrono::milliseconds(1));
    }

    printf("Audio thread terminated. Processed %zu packets total.\n", totalPacketsProcessed);
}

/**
 * Initializes SDL3 audio subsystem using modern API.
 * This is the preferred audio method with automatic format conversion and device management.
 *
 * @param sampleRate Audio sample rate in Hz
 * @param channels Number of audio channels
 * @param bitsPerSample Bit depth (16 or 32)
 * @return true if initialization successful, false otherwise
 */
static bool initializeSDLAudio(int sampleRate, int channels, int bitsPerSample)
{
    // Initialize SDL audio subsystem if not already done
    if ((SDL_WasInit(SDL_INIT_AUDIO) & SDL_INIT_AUDIO) == 0) {
        int init_result = SDL_Init(SDL_INIT_AUDIO);
        if (init_result < 0) {
            printf("SDL_Init(SDL_INIT_AUDIO) failed: %s\n", SDL_GetError());
            return false;
        }
    }

    // Configure audio specification
    SDL_AudioSpec spec = {};
    spec.freq = sampleRate;
    spec.channels = (Uint8)channels;
    spec.format = (bitsPerSample == 32) ? SDL_AUDIO_S32 : SDL_AUDIO_S16;

    // Create audio device stream in single call (modern SDL3 API)
    // This replaces the old SDL2 pattern of separate device + stream + binding
    audioStream = SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &spec, nullptr, nullptr);
    if (!audioStream) {
        printf("SDL_OpenAudioDeviceStream failed: %s\n", SDL_GetError());
        return false;
    }

    // Start audio playback (SDL3 streams start paused by default)
    SDL_ResumeAudioStreamDevice(audioStream);
    return true;
}


//=============================================================================
// Main application
//=============================================================================

/**
 * Fatal error handler - logs message and exits application cleanly.
 */
auto logFatal = [](const std::string& msg)
{
    printf("FATAL ERROR: %s\n", msg.c_str());

    // Attempt basic cleanup before exit
    audioRunning = false;
    if (audioStream) {
        SDL_DestroyAudioStream(audioStream);
        SDL_QuitSubSystem(SDL_INIT_AUDIO);
    }
#ifdef _WIN32
    if (waveOutPlayer.initialized)
        waveOutPlayer.shutdown();
#endif

    exit(EXIT_FAILURE);
};

/**
 * Main application entry point.
 * Initializes all subsystems, creates worker threads, and manages the main rendering loop.
 */
int main(int argc, char** argv)
{
    // Print command line usage if no arguments provided
    if (argc < 2) {
        printHelp();
        return 1;
    }

    startTime = std::chrono::steady_clock::now();

#ifdef _WIN32
    // Windows console Ctrl+C handler
    SetConsoleCtrlHandler([](DWORD dwCtrlType) -> BOOL {
        if (dwCtrlType == CTRL_C_EVENT || dwCtrlType == CTRL_BREAK_EVENT || dwCtrlType == CTRL_CLOSE_EVENT) {
            printf("\nConsole Ctrl+C detected, exiting...\n");
            
            // Signal threads (optional, as we are about to force exit)
            audioRunning = false;
            videoRunning = false;
            videoQueue.running = false;
            videoQueue.cv.notify_all();

            // Return FALSE to pass the signal to the next handler (default Windows handler),
            // which will terminate the process immediately.
            return FALSE;
        }
        return FALSE;
    }, TRUE);

    // Request high-resolution timing (1ms) from Windows
    timeBeginPeriod(1);
#else
    // Unix signal handler for Ctrl+C
    std::signal(SIGINT, [](int) {
        printf("\nCtrl+C caught, shutting down...\n");
        audioRunning = false;
        videoRunning = false;
        videoQueue.running = false;
        videoQueue.cv.notify_all();
    });
    std::signal(SIGTERM, [](int) {
        printf("\nSIGTERM caught, shutting down...\n");
        audioRunning = false;
        videoRunning = false;
        videoQueue.running = false;
        videoQueue.cv.notify_all();
    });
#endif

    // Parse command line options
    std::string sourceURL;
    std::string videoFormat = "nv12"; // Default format

    // Command-line parsing
    for (int i = 1; i < argc; i++) {
        if ((strcmp(argv[i], "-source") == 0 || strcmp(argv[i], "-s") == 0) && i + 1 < argc) {
            sourceURL = argv[++i];
        }
        else if ((strcmp(argv[i], "-format") == 0 || strcmp(argv[i], "-f") == 0) && i + 1 < argc) {
            videoFormat = argv[++i];
            // Validate format
            if (videoFormat != "yuyv" && videoFormat != "nv12") {
                printf("Invalid format '%s'. Supported formats: yuyv, nv12\n", videoFormat.c_str());
                return 1;
            }
        }
    }

    // Require source URL
    if (sourceURL.empty()) {
        printHelp();
        return 1;
    }

    //-------------------------------------------------------------------------
    // LT client initialization & check if source is active
    //-------------------------------------------------------------------------

    lt::Client client;
    std::string err;

    // Check if input selected is active
    lt::Input input;

    err = client.Get(sourceURL, input);
    if (!err.empty()) {
        logFatal("Failed to get input: " + err);
    }

    if (input.video.signal != "locked") {
        logFatal("Input source is not locked");
    }

	// Video parameters
    lt::AudioSignal audioSignal = input.audio;
    lt::VideoSignal videoSignal = input.video;

    // Print input information
    printf("Video: %dx%d %c %.2f Hz\n", videoSignal.size[0], videoSignal.size[1], videoSignal.interlaced ? 'i' : 'p', videoSignal.framerate);
    printf("Audio: %d Hz %d channels %d-bit pcm\n", audioSignal.samplerate, audioSignal.channels, audioSignal.depth);

    //-------------------------------------------------------------------------
    // Audio subsystem initialization
    //-------------------------------------------------------------------------

    // Create audio data worker
    std::string audioWorkerURL;
    if (audioSignal.signal == "locked") {
        lt::AudioDataWorker aw;
        aw.media = "audio/pcm";
        aw.channels = audioSignal.channels;
        aw.samplerate = audioSignal.samplerate;
        aw.depth = audioSignal.depth;

        err = client.Post(sourceURL + "/data", aw, nullptr);
        if (!lt::ErrorIs(err, lt::ErrRedirect)) {
            logFatal("Audio worker creation failed: " + err);
        }
        audioWorkerURL = err.substr(std::string("redirect: ").length());
    }
    else {
        printf("No audio signal detected, skipping audio worker creation\n");
    }

    std::thread audioProcessor;  // Audio processing thread handle
    if (!audioWorkerURL.empty()) {
        // Try SDL3 first (preferred), fallback to WaveOut automatically if needed
        if (initializeSDLAudio(audioSignal.samplerate, audioSignal.channels, audioSignal.depth)) {
            audioProcessor = std::thread(audioThread, std::ref(client), audioWorkerURL, audioSignal);
        }
        else {
#ifdef _WIN32
            printf("SDL3 audio failed, attempting WaveOut fallback...\n");
            SDL_QuitSubSystem(SDL_INIT_AUDIO);
            useWaveOut = true;
            if (waveOutPlayer.init(audioSignal.samplerate, audioSignal.channels, audioSignal.depth)) {
                printf("WaveOut fallback successful\n");
                audioProcessor = std::thread(audioThread, std::ref(client), audioWorkerURL, audioSignal);
            }
            else {
                printf("Both SDL3 and WaveOut failed - no audio playback available\n");
            }
#else
            printf("SDL3 audio failed - no audio playback available\n");
#endif
        }
    }
    else {
        printf("No audio worker available - skipping audio initialization\n");
    }

    //-------------------------------------------------------------------------
    // Video subsystem initialization
    //-------------------------------------------------------------------------

    // Create video data worker with selected format
    std::string videoWorkerURL;
    std::string mediaType = "video/" + videoFormat;
    printf("Using video format: %s\n", videoFormat.c_str());
    err = client.Post(sourceURL + "/data", lt::VideoDataWorker{mediaType}, nullptr);
    if (!lt::ErrorIs(err, lt::ErrRedirect)) {
        logFatal("Video worker creation failed: " + err);
    }
    videoWorkerURL = err.substr(std::string("redirect: ").length());

    std::thread videoProcessor;  // Video processing thread handle
    if (!videoWorkerURL.empty()) {
        videoRunning = true;
        videoProcessor = std::thread(videoThread, std::ref(client), videoWorkerURL);
    }
    else {
        printf("No video worker available - skipping video initialization\n");
    }

    //-------------------------------------------------------------------------
    // Main execution loop
    //-------------------------------------------------------------------------

    // Initialize GLFW and create window
    if (!glfwInit()) {
        logFatal("GLFW initialization failed");
    }

    GLFWwindow* window = glfwCreateWindow(640, 360, "Audio/Video Player", NULL, NULL);
    if (!window) {
        glfwTerminate();
        logFatal("GLFW window creation failed");
    }

    glfwMakeContextCurrent(window);
    if (!gladLoadGL(glfwGetProcAddress)) {
        glfwTerminate();
        logFatal("OpenGL loader initialization failed");
    }

    // Disable VSync to allow frame rate above monitor refresh rate
    // Use 0 for unlimited FPS, or 1 to sync with monitor refresh rate
    glfwSwapInterval(0);

    // Set up key callback to handle Escape key and Ctrl+C for window closing
    glfwSetKeyCallback(window, [](GLFWwindow* win, int key, int scancode, int action, int mods) {
        if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS) {
            glfwSetWindowShouldClose(win, GLFW_TRUE);
        }
        if (key == GLFW_KEY_C && action == GLFW_PRESS && (mods & GLFW_MOD_CONTROL)) {
            glfwSetWindowShouldClose(win, GLFW_TRUE);
        }
    });

    // Initialize video renderer and run the main rendering loop inside a scope
    {
        // Create appropriate surface based on format
        YUYV yuyvSurface;
        NV12 nv12Surface;
        
        std::string surfaceErr = (videoFormat == "nv12") ? nv12Surface.err : yuyvSurface.err;
        if (surfaceErr != "") {
            printf("Video surface initialization failed: %s\n", surfaceErr.c_str());
        }
        else {
            // Main video rendering loop
            auto fpsStart = std::chrono::system_clock::now();

            while (audioRunning && videoRunning && !glfwWindowShouldClose(window)) {
                // Fetch next frame from queue with short timeout (16ms ~= 60fps frame time)
                lt::Packet packet;
                bool hasFrame = videoQueue.pop(packet, 16);

                if (hasFrame) {
                    // Process video frame
                    lt::VideoMetadata meta = packet->meta.template get<lt::VideoMetadata>();
                    uint64_t ts = packet->timestamp;

                    // Calculate dropped frames based on timestamp gaps
                    if (prevTS != 0) {
                        int expectedFrames = int(std::llround(double(ts - prevTS) * meta.framerate / 1000000.0));
                        if (expectedFrames > 1) {
                            missed += (expectedFrames - 1);
                        }
                    }

                    // Render frame with appropriate surface
                    prevTS = ts;
                    if (videoFormat == "nv12") {
                        nv12Surface.Draw(meta.size[0], meta.size[1], reinterpret_cast<char*>(packet->data));
                    } else {
                        yuyvSurface.Draw(meta.size[0], meta.size[1], reinterpret_cast<char*>(packet->data));
                    }

                    // Update window title
                    if ((videoSignal.size[0] != meta.size[0]) || (videoSignal.size[1] != meta.size[1]) ||
                        (std::abs(videoSignal.framerate - meta.framerate) > 0.01) || (videoSignal.interlaced != meta.interlaced)) {
                        std::string title = sourceURL + " " + std::to_string(meta.size[0]) + "x" + std::to_string(meta.size[1]) +
                            (meta.interlaced ? " i " : " p ") + std::to_string(meta.framerate) + "Hz";
                        glfwSetWindowTitle(window, title.c_str());
                        videoSignal.size[0] = meta.size[0];
                        videoSignal.size[1] = meta.size[1];
                        videoSignal.framerate = meta.framerate;
                        videoSignal.interlaced = meta.interlaced;
                    }

                    // Update viewport and present frame (only when we have a new frame)
                    int w, h;
                    glfwGetFramebufferSize(window, &w, &h);
                    glViewport(0, 0, w, h);
                    glfwSwapBuffers(window);
                    cnt++;
                }

                // Always poll events to keep window responsive
                glfwPollEvents();

                // Calculate and display performance statistics
                auto now = std::chrono::system_clock::now();
                std::chrono::duration<double> dur = now - fpsStart;
                if (dur.count() >= 1.0) {
                    resourceStats.avgFPS = double(cnt) / dur.count();
                    size_t dropped = videoQueue.getDroppedFrames();
                    resourceStats.updateAudioThroughput(); // Update audio stats
                    printf("Video: %.2fFPS - missed %d | Audio: %.0fkb/s - missed %d\n",
                        resourceStats.avgFPS, missed,
                        resourceStats.audioKBps, resourceStats.audioMissed.load());
                    cnt = 0;
                    fpsStart = now;
                }
            }
        }
    } // YUYV resources released here while context is valid

    // CRITICAL: Clean up GLFW resources after GPU resources are freed
    glfwDestroyWindow(window);
    glfwTerminate();

    //-------------------------------------------------------------------------
    // Comprehensive cleanup (critical for memory leak prevention)
    //-------------------------------------------------------------------------

    // 1. Stop video processing thread
    videoRunning = false;
    videoQueue.running = false;
    videoQueue.cv.notify_all(); // Wake up any waiting pop() calls
    if (videoProcessor.joinable())
        videoProcessor.join();  // Wait for thread to complete

    // 2. Stop audio processing thread
    audioRunning = false;
    if (audioProcessor.joinable())
        audioProcessor.join();  // Wait for thread to complete

    // 3. Shutdown WaveOut system (if used)
#ifdef _WIN32
    if (useWaveOut && waveOutPlayer.initialized)
        waveOutPlayer.shutdown();  // Releases all WaveOut resources
#endif

    // 4. Shutdown SDL3 audio system (if used)
    if (audioStream) {
        SDL_DestroyAudioStream(audioStream);  // Release audio stream
        audioStream = nullptr;
        SDL_QuitSubSystem(SDL_INIT_AUDIO);    // Shutdown SDL audio subsystem
    }

    printf("Application terminated successfully\n");

    return 0;
}

/**
 * Video processing thread function.
 * Continuously fetches video packets from LT client and adds them to the video queue.
 * Runs in separate thread to decouple network fetching from rendering.
 *
 * @param client Reference to LT API client
 * @param workerURL Video worker endpoint URL
 */
static void videoThread(lt::Client& client, const std::string& videoWorkerURL)
{
    size_t totalPacketsProcessed = 0;

    while (videoRunning && audioRunning) {
        // Fetch next batch of video packets from LT device
        lt::Worker worker;
        std::string err = client.Get(videoWorkerURL, worker);
        if (!err.empty()) {
            printf("Video worker error: %s\n", err.c_str());
            std::this_thread::sleep_for(std::chrono::milliseconds(16)); // ~60fps retry
            continue;
        }

        // Process video packets in this batch
        if (!worker->packets.empty()) {
            // Only push the latest frame if queue is getting full to avoid overflow
            // This prevents old frames from backing up
            if (videoQueue.size() >= 2) {
                // Queue has frames buffered, just take the latest one
                lt::Packet packet = worker->packets.back();
                if (packet->media.rfind("video/", 0) == 0) {
                    videoQueue.push(packet);
                    totalPacketsProcessed++;
                }
            } else {
                // Queue is low, push all frames to catch up
                for (auto& packet : worker->packets) {
                    if (packet->media.rfind("video/", 0) == 0) {
                        videoQueue.push(packet);
                        totalPacketsProcessed++;
                    }
                }
            }
        }

        // Small delay only if queue is getting full to let rendering catch up
        if (videoQueue.size() >= 3) {
            std::this_thread::sleep_for(std::chrono::milliseconds(5));
        }
    }

    printf("Video thread terminated. Processed %zu packets total.\n", totalPacketsProcessed);
}
