export const detectSpeaking = (
    stream: MediaStream,
    frameIdRef: React.MutableRefObject<number | undefined>,
    callback: (data: Uint8Array) => void
) => {
    //@ts-ignore
    window.AudioContext = window.AudioContext || window.webkitAudioContext;

    const ctx = new AudioContext();
    const analyser = ctx.createAnalyser();
    const streamNode = ctx.createMediaStreamSource(stream);

    streamNode.connect(analyser);

    const data = new Uint8Array(analyser.frequencyBinCount); // will hold our data

    let speakingDelay = 0;

    const loop = () => {
        frameIdRef.current = requestAnimationFrame(loop); // we'll loop every 60th of a second to check
        analyser.getByteFrequencyData(data); // get current data

        speakingDelay += 1;
        if (speakingDelay % 10 == 0) {
            callback(data);
        }
    };

    loop();
};
