/* Developed by Inventives, Inc. <https://inventives.ai> */
/* See LICENSE.md file in project root directory */

import wav from 'audiobuffer-to-wav';
import { FIFO } from 'util/fifo';

/** Event type for when we have a transcription */
export class TranscriptionEvent extends Event {
    readonly transcript: string
    readonly confidence: number
    readonly duration: number
    readonly wav: ArrayBuffer // 16-bit PCM uncompressed WAV
    readonly parent: Transcription
    constructor(t: Transcription) {
        super('transcript');
        this.parent = t;
        this.transcript = t.transcript;
        this.confidence = t.confidence;
        this.duration = t.duration;

        // Compile recorded ArrayBuffers into a single array
        const length = t.recording.reduce((c,x) => c + x.length, 0);
        const recording = new AudioBuffer({
            length,
            numberOfChannels: 1,
            sampleRate: t.recording[0].sampleRate
        });

        let i=0;
        for (const chunk of t.recording) {
            recording.copyToChannel(chunk.getChannelData(0), 0, i);
            i += chunk.length;
        }

        this.wav = wav(recording);
    }
}

/** Event type for when the transcription encouters an error */
export class TranscriptionError extends Event {
    readonly error: any;
    constructor(error: any) {
        super('error');
        this.error = error;
    }
}

/** Transcription class constructor options */
export interface TranscriptionOptions {
    sampleRate?: number
    vadTurnoff?: number
    encoding?: string
    logging?: boolean
    keywords?: string[]

    // Minimum thresholds for a valid transcripton
    minDuration?: number
    minCharacters?: number
}

/** Transcription Record class */
export class Transcription extends EventTarget {
    private readonly ws: WebSocket       // Websocket to DG/ASR
    transcript: string
    duration: number
    confidence: number

    // We use an internal buffer for transcription to prevent race conditions and to preserve the order of transcription
    private buffer: FIFO<AudioBuffer>
    closed: boolean     // Set to true when Transcription.close() is called
                        // The transmission is truly closed at the next buffer runthrough

    // These options are passed in to initialize the socket, but we already use it later to create an empty buffer of zeros to close out the connection
    readonly sampleRate: number
    readonly vadTurnoff: number

    logging: boolean    // Print out stuff for development?

    minDuration: number
    minCharacters: number

    recording: AudioBuffer[]

    constructor(token: string, opts?: TranscriptionOptions) {
        super();

        // Construct deepgram ASR streaming transcription request URL
        let wsUrl = "wss://api.deepgram.com/v1/listen?punctuate=true";
        
        // Encoding & sample-rate (dependent)
        if (opts?.encoding) {
            wsUrl += "&encoding=" + opts.encoding;
            if (opts?.sampleRate)
                wsUrl += "&sample_rate=" + String(opts.sampleRate);
        }
        else wsUrl += "&encoding=linear16&sample_rate=48000";

        // VAD turnoff period (ms)
        if (opts?.vadTurnoff)
            wsUrl += "&vad_turnoff=" + String(opts.vadTurnoff);

        // Any "expected" keywords to look for?
        if (opts?.keywords?.length) {
            for (const kw of opts.keywords)
                wsUrl += "&keywords=" + kw;
        }

        this.transcript = "";
        this.duration = 0;
        this.confidence = 0;
        this.buffer = new FIFO<AudioBuffer>();
        this.closed = false;

        this.sampleRate = opts?.sampleRate ?? 48000; // samples/s
        this.vadTurnoff = opts?.vadTurnoff ?? 500; // ms
        this.logging = opts?.logging ?? false;

        this.minDuration = opts?.minDuration ?? 1; // s
        this.minCharacters = opts?.minCharacters ?? 2;

        this.recording = [];

        // Create socket connection
        this.ws = new WebSocket(wsUrl, [ 'token', token ]);

        // FOR DEBUGGING
        // Setup a test voice server that accepts a single-channel 16-bit PCM WAV uncompressed audio data and saves to a file or does something with it to make sure the audio is being transmitted correctly.
        // This was EXTREMELY useful for testing - spent around 3 hours trying to figure out why I was getting terrible transcription back and it turned out that it was a race condition. We were saving references to audio buffers which the AudioContext was changing by the time we transmitted it! We're now deep cloning the data within the buffers and everything works great! :)
        // Also created a neat FIFO buffer class out of this!
        // this.ws = new WebSocket("ws://localhost:8081");

        // Forward error handler
        this.ws.onerror = (e) => {
            console.error("Transcription socket error");
            console.error(e);
            this.dispatchEvent(new TranscriptionError(e));
        }

        // Transcription result handler
        this.ws.onmessage = (msg) => {
            try {
                const rcv = JSON.parse(msg.data) as {
                    duration: number,
                    start: number,
                    is_final: boolean,
                    speech_final?: boolean,
                    channel: {
                        alternatives: {
                            transcript: string,
                            confidence: number
                        }[]
                    }
                }; // Only thing properties we need...
                
                if (rcv.is_final) {
                    const result = rcv.channel.alternatives[0];
                    this.transcript += ' ' + result.transcript;

                    // Compute weighted average for confidence
                    if (this.confidence > 0) {
                        this.confidence = ((rcv.duration * result.confidence) + (this.duration * this.confidence)) / (this.duration + rcv.duration);
                    }
                    else this.confidence = result.confidence;

                    this.duration += rcv.duration;
                }

                if (this.logging)
                    console.info("Transcription result:", rcv);

                // Ensure this is the final transcript after speech has ended
                if (!rcv.speech_final) return;

                if (this.logging)
                    console.info("Transcript finalized. Dispatching event.");

                this.transcript = this.transcript.substring(1); // Ignore the first space

                // Does this transcription match the minimum threshold to be considered valid?
                if (this.transcript.length >= this.minCharacters && this.duration >= this.minDuration)
                    this.dispatchEvent(new TranscriptionEvent(this));

                // If this is the final speech and we received transcript for an equal duration (with some wiggle-room) than we sent and the use requested to close this transcription resource, let's close the socket connection
                // TODO - by setting VAD_turnoff, it looks like we don't actually have to do this
                // Come back later in case that isn't goo enough.

                // Close the socket
                this.ws.close();
            }
            catch (e) {
                console.error("Transcription result parse error");
                console.error(e);
                this.dispatchEvent(new TranscriptionError(e));
            }
        }

        // Buffer run-through timout - we're using a timeout instead of an interval to prevent overruns or race conditions. We wait until one round has successfully transmitted because doing this again
        const bufferProcessor = () => {
            // Process current buffer
            if (this.ws.readyState === 0) {
                // Not yet ready - try again in a bit
                setTimeout(bufferProcessor, 250);
                return;
            }

            if (this.ws.readyState > 1) {
                // Closing/closed - uh oh!
                this.dispatchEvent(new TranscriptionError("The Websocket connection to the transcription service has been closed. Create a new transcription object to continue."));
                return;
            }

            try {
                while (!this.buffer.isEmpty) {
                    const samples = this.buffer.out();
                    if (!samples) continue; // Should never happen, but to appease the Typescript gods...

                    const data = wav(samples).slice(44); // Remove header
                    
                    // Stream to Deepgram ASR for transcription
                    if (this.logging)
                        console.info("Transmitting chunk:", data);

                    this.ws.send(data);

                    // Save to recording for logging later
                    this.recording.push(samples); // Cover raw samples to WAV later
                }
            }
            catch (err) {
                this.dispatchEvent(new TranscriptionError(err));
            }

            if (this.closed) return; // Done
            setTimeout(bufferProcessor, 250);
        };

        // Start when the socket has been opened
        this.ws.onopen = () => setTimeout(bufferProcessor, 250);
    }

    /** Send chunks of audio data for transcription */
    send = (...chunks: AudioBuffer[]) => {
        // We're actually just going to add this data to the buffer
        for (const chunk of chunks) {
            this.buffer.in(chunk);
            // this.ws.send(wav(chunk).slice(44)); // Test sending directly without buffer
            // This chunk will get send when the buffer goes through...
        }
    }

    /** Close transcription - last chunk of audio sent */
    close = (force: boolean = false) => {
        this.closed = true;     // Queue for true closing

        if (force) {
            try {
                this.buffer.clear();    // Clear buffer
                this.ws.close();        // Close websocket
            }
            catch (err) {
                // Don't need to do anything here...
            }
            return;
        }

        // Send a bunch of zeros...
        const length = 1.2 * this.vadTurnoff * this.sampleRate / 1000;
        const trailing = new AudioBuffer({
            length,
            sampleRate: this.sampleRate
        });

        trailing.copyToChannel(new Float32Array(length), 0);
        this.buffer.in(trailing);
    }

    /** Event listener promotions to super */
    addEventListener = (type: 'transcript' | 'error', callback: (event: Event) => void) => {
        super.addEventListener(type, callback);
    }

    remoteEventListener = (type: 'transcript' | 'error', callback: (event: Event) => void) => {
        super.removeEventListener(type, callback);
    }

    dispatchEvent = (event: TranscriptionEvent | TranscriptionError) => {
        return super.dispatchEvent(event);
    }
}