#!/usr/bin/env node
/**
 * multimodal/speech.mjs — Text-to-speech and speech-to-text utilities.
 *
 * Wraps platform-native TTS (macOS `say`, Linux `espeak`, or `piper`)
 * and STT (`whisper` CLI) with availability detection and caching.
 *
 * Pure ESM. No build step. Uses only Node.js standard APIs.
 */

import { execSync, spawn } from 'child_process';
import { existsSync, readFileSync, unlinkSync } from 'fs';
import { resolve, dirname, basename, join } from 'path';
import { fileURLToPath } from 'url';

const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);

// ── Availability caches ──────────────────────────────────────────

let _ttsAvailable = null;   // null = not yet checked
let _ttsBackend = null;     // 'say' | 'espeak' | 'piper' | null
let _sttAvailable = null;   // null = not yet checked

/**
 * Checks whether a CLI command exists on the PATH.
 * @param {string} cmd
 * @returns {boolean}
 */
function commandExists(cmd) {
  try {
    execSync(`which ${cmd}`, { stdio: 'ignore' });
    return true;
  } catch {
    return false;
  }
}

// ── TTS ──────────────────────────────────────────────────────────

/**
 * Returns true if at least one TTS backend is available.
 * Result is cached after the first call.
 * @returns {boolean}
 */
export function isTTSAvailable() {
  if (_ttsAvailable !== null) return _ttsAvailable;

  for (const cmd of ['say', 'espeak', 'piper']) {
    if (commandExists(cmd)) {
      _ttsBackend = cmd;
      _ttsAvailable = true;
      return true;
    }
  }
  _ttsBackend = null;
  _ttsAvailable = false;
  return false;
}

/**
 * Convert text to speech using the first available TTS backend.
 *
 * @param {string} text       The text to speak.
 * @param {object} [opts]     Options.
 * @param {string} [opts.voice]      Voice name (backend-specific).
 * @param {string} [opts.outputPath] If provided, write audio to this file instead of playing.
 * @returns {Promise<{success: boolean, outputPath?: string}>}
 */
export async function textToSpeech(text, opts = {}) {
  if (!text || typeof text !== 'string' || text.trim().length === 0) {
    throw new Error('textToSpeech requires non-empty text');
  }

  if (!isTTSAvailable()) {
    throw new Error('No TTS backend available (need say, espeak, or piper)');
  }

  const { voice, outputPath } = opts;

  return new Promise((resolvePromise, reject) => {
    let args;

    switch (_ttsBackend) {
      case 'say': {
        // macOS say
        args = [];
        if (voice) args.push('-v', voice);
        if (outputPath) args.push('-o', outputPath);
        args.push(text);
        break;
      }
      case 'espeak': {
        args = [];
        if (voice) args.push('-v', voice);
        if (outputPath) args.push('-w', outputPath);
        args.push(text);
        break;
      }
      case 'piper': {
        // piper reads text from stdin
        args = ['--output_file', outputPath || '/dev/null'];
        if (voice) args.push('--model', voice);
        break;
      }
      default:
        return reject(new Error('No TTS backend resolved'));
    }

    const child = spawn(_ttsBackend, args, { stdio: _ttsBackend === 'piper' ? ['pipe', 'pipe', 'pipe'] : ['ignore', 'pipe', 'pipe'] });

    let stderr = '';
    if (child.stderr) {
      child.stderr.on('data', (chunk) => { stderr += chunk.toString(); });
    }

    // For piper, pipe text via stdin
    if (_ttsBackend === 'piper') {
      child.stdin.write(text);
      child.stdin.end();
    }

    child.on('error', (err) => reject(new Error(`TTS spawn error: ${err.message}`)));

    child.on('close', (code) => {
      if (code !== 0) {
        return reject(new Error(`TTS exited with code ${code}: ${stderr.trim()}`));
      }
      const result = { success: true };
      if (outputPath) result.outputPath = outputPath;
      resolvePromise(result);
    });
  });
}

// ── STT ──────────────────────────────────────────────────────────

// ── STT backend resolution ──
let _sttBackend = null;  // 'whisper' | 'whisper-cli' | null

/**
 * Returns true if a whisper CLI is available (openai-whisper or whisper.cpp).
 * Result is cached after the first call.
 * @returns {boolean}
 */
export function isSTTAvailable() {
  if (_sttAvailable !== null) return _sttAvailable;
  // Try openai-whisper (pip) first, then whisper-cpp (homebrew installs as whisper-cli)
  for (const cmd of ['whisper', 'whisper-cli']) {
    if (commandExists(cmd)) {
      _sttBackend = cmd;
      _sttAvailable = true;
      return true;
    }
  }
  _sttBackend = null;
  _sttAvailable = false;
  return false;
}

/**
 * Transcribe an audio file to text using OpenAI Whisper CLI.
 *
 * @param {string} audioPath  Path to the audio file (wav, mp3, etc.)
 * @returns {Promise<string>} The transcribed text.
 */
export async function speechToText(audioPath) {
  if (!audioPath || typeof audioPath !== 'string') {
    throw new Error('speechToText requires a valid audio file path');
  }

  if (!existsSync(audioPath)) {
    throw new Error(`Audio file not found: ${audioPath}`);
  }

  if (!isSTTAvailable()) {
    throw new Error('Whisper CLI is not available (install with: pip install openai-whisper  OR  brew install whisper-cpp)');
  }

  const audioDir = dirname(resolve(audioPath));
  const audioBase = basename(audioPath).replace(/\.[^.]+$/, '');

  // whisper-cli (whisper.cpp) outputs to stdout; openai-whisper writes to files.
  const isWhisperCpp = _sttBackend === 'whisper-cli';

  return new Promise((resolvePromise, reject) => {
    let args;
    if (isWhisperCpp) {
      // whisper.cpp via homebrew: whisper-cli -f <file> -m <model> --no-timestamps
      // Look for the model in our data directory, fall back to whisper.cpp default.
      const modelPath = join(__dirname, '..', 'data', 'whisper-models', 'ggml-base.en.bin');
      args = ['-f', audioPath, '--no-timestamps'];
      if (existsSync(modelPath)) {
        args.push('-m', modelPath);
      }
    } else {
      // openai-whisper (Python): whisper <file> --output_format txt --output_dir <dir>
      args = [audioPath, '--output_format', 'txt', '--output_dir', audioDir];
    }

    const child = spawn(_sttBackend, args, {
      stdio: ['ignore', 'pipe', 'pipe']
    });

    let stdout = '';
    let stderr = '';
    if (child.stdout) {
      child.stdout.on('data', (chunk) => { stdout += chunk.toString(); });
    }
    if (child.stderr) {
      child.stderr.on('data', (chunk) => { stderr += chunk.toString(); });
    }

    child.on('error', (err) => reject(new Error(`Whisper spawn error: ${err.message}`)));

    child.on('close', (code) => {
      if (code !== 0) {
        return reject(new Error(`Whisper exited with code ${code}: ${stderr.trim()}`));
      }

      if (isWhisperCpp) {
        // whisper.cpp outputs transcript to stdout
        const transcript = stdout.trim();
        resolvePromise(transcript);
      } else {
        // openai-whisper writes <basename>.txt alongside the input file
        const txtPath = join(audioDir, `${audioBase}.txt`);
        if (!existsSync(txtPath)) {
          return reject(new Error(`Whisper output not found at ${txtPath}`));
        }

        const transcript = readFileSync(txtPath, 'utf-8').trim();

        // Clean up the generated txt file
        try { unlinkSync(txtPath); } catch { /* ignore cleanup errors */ }

        resolvePromise(transcript);
      }
    });
  });
}
