package com.ewaytek.deepseek.service.dify.tts;

import com.alibaba.nls.client.protocol.InputFormatEnum;
import com.alibaba.nls.client.protocol.NlsClient;
import com.alibaba.nls.client.protocol.SampleRateEnum;
import com.alibaba.nls.client.protocol.asr.SpeechRecognizer;
import com.alibaba.nls.client.protocol.asr.SpeechRecognizerListener;
import com.alibaba.nls.client.protocol.asr.SpeechRecognizerResponse;
import com.ewaytek.deepseek.config.AliTtsConfig;
import lombok.Data;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import javax.annotation.Resource;
import java.util.concurrent.CountDownLatch;

/**
 * @author yangtq
 * @date 2025/3/28
 */
@Data
@Component
public class SpeechRecognizerService {

    private static final Logger logger = LoggerFactory.getLogger(SpeechRecognizerService.class);

    @Autowired
    private AliTtsConfig aliTtsConfig;

    public interface RecognitionCallback {
        void onResult(String result);
        void onError(String errorMessage);
    }

    private static SpeechRecognizerListener getRecognizerListener(RecognitionCallback callback) {
        return new SpeechRecognizerListener() {
            @Override
            public void onRecognitionResultChanged(SpeechRecognizerResponse response) {
                if (callback != null) {
                    callback.onResult(response.getRecognizedText());
                }
            }

            @Override
            public void onRecognitionCompleted(SpeechRecognizerResponse response) {
                if (callback != null) {
                    callback.onResult(response.getRecognizedText());
                }
            }

            @Override
            public void onStarted(SpeechRecognizerResponse speechRecognizerResponse) {}

            @Override
            public void onFail(SpeechRecognizerResponse response) {
                if (callback != null) {
                    callback.onError(response.getStatusText());
                }
            }
        };
    }

    public String process(byte[] audioData) {
        final String[] result = {null};
        final CountDownLatch latch = new CountDownLatch(1);

        SpeechRecognizer recognizer = null;
        try {
            SpeechRecognizerListener listener = getRecognizerListener(new RecognitionCallback() {
                @Override
                public void onResult(String text) {
                    result[0] = text;
                    latch.countDown();
                }

                @Override
                public void onError(String errorMessage) {
                    logger.error("Recognition error: {}", errorMessage);
                    latch.countDown();
                }
            });

            recognizer = new SpeechRecognizer(AliTtsConfig.getNlsClient(), listener);
            recognizer.setAppKey(aliTtsConfig.getAppKey());
            recognizer.setFormat(InputFormatEnum.PCM);
            recognizer.setSampleRate(SampleRateEnum.SAMPLE_RATE_8K);
            recognizer.setEnableIntermediateResult(true);
            recognizer.addCustomedParam("enable_voice_detection", true);

            long now = System.currentTimeMillis();
            recognizer.start();
            logger.info("ASR start latency : " + (System.currentTimeMillis() - now) + " ms");

            byte[] chunk = new byte[3200];
            for (int i = 0; i < audioData.length; i += chunk.length) {
                int length = Math.min(chunk.length, audioData.length - i);
                System.arraycopy(audioData, i, chunk, 0, length);
                recognizer.send(chunk, length);
                Thread.sleep(10); // 模拟实时发送
            }

            now = System.currentTimeMillis();
            recognizer.stop();
            logger.info("ASR stop latency : " + (System.currentTimeMillis() - now) + " ms");

            latch.await();

        } catch (Exception e) {
            logger.error("Error during speech recognition", e);
        } finally {
            if (recognizer != null) {
                recognizer.close();
            }
        }

        return result[0];
    }
}
