Ghost Socket: 크롬 브라우저에서 음성인식 SpeechRecognition API 구현

이것은 현재 구글 크롬 브라우저에서만 가능한것 같다. 다른 브라우저(파이어폭스, 익스플로러)에서는 되지 않는다. 간단히 예제를 만들어 보자.

페이지에 접속하면 이런 화면이다. Start버튼을 누르면 SpeechRecognition 을 음성인식이 시작된다. 인식된 데이터를 왼쪽 필드에 출력해 준다. 이어서 speechSynthesis 를 이용해서 음성으로 출력해 주게 된다. 일단 여기 까지만 구현해 보자.

<!DOCTYPE html>
<html>
<head>
     <meta charset="utf-8">
     <title>Speech to Text</title>
     <style>
         body{
             text-align: center;
         }

        button{
             padding: 8px;
             /*border-radius: 6px;*/
         }
         #message {
             color: #996600;
         }

.textWrapper{
             width: 800px;
             margin: 0 auto;
             display: flex;
             flex-direction: row;
         }
         .textbox{
             height: 100px;
             border: 1px solid #d3d3d3;
             flex: 1;
             margin : 5px 15px;
             border-radius: 6px;
             text-align: left;
             padding: 16px;
         }
     </style>
     
     
</head>
<body>

<h1>Speech to Text Example!!!</h1>

    <button id="speech" onclick="speech_to_text()">Start STT</button>
     <button id="stop" onclick="stop()">Stop</button>
     <p id="message">버튼을 누르고 아무말이나 하세요.</p>

        var message = document.querySelector("#message");
         var button = document.querySelector("#speech");
         var korea = document.querySelector("#korea");
         var english = document.querySelector("#english");
         var isRecognizing = false;

        if ('SpeechRecognition' in window) {
           // Speech recognition support. Talk to your apps!
           console.log("음성인식을 지원하는 브라우저입니다.")
         }

        try {
             var recognition = new (window.SpeechRecognition || window.webkitSpeechRecognition || window.mozSpeechRecognition || window.msSpeechRecognition)();
         } catch(e){
             console.error(e);
         }

        recognition.lang = 'ko-KR'; //선택하게 해줘야 할듯 .
         recognition.interimResults = false;
         recognition.maxAlternatives = 5;
         //recognition.continuous = true;

         function speech_to_text(){

             recognition.start();
             isRecognizing = true;

            recognition.onstart = function(){
                 console.log("음성인식이 시작 되었습니다. 이제 마이크에 무슨 말이든 하세요.")
                 message.innerHTML = "음성인식 시작...";
                 button.innerHTML = "Listening...";
                 button.disabled = true;
             }

            recognition.onspeechend = function(){
                 message.innerHTML = "버튼을 누르고 아무말이나 하세요.";
                 button.disabled = false;
                 button.innerHTML = "Start STT";
             }

            recognition.onresult = function(event) {
                 console.log('You said: ', event.results[0][0].transcript);
                 // 결과를 출력
                 var resText = event.results[0][0].transcript;
                 korea.innerHTML = resText;

                //text to sppech
                 text_to_speech(resText);

             };

            recognition.onend = function(){
                 message.innerHTML = "버튼을 누르고 아무말이나 하세요.";
                 button.disabled = false;
                 button.innerHTML = "Start STT";
                 isRecognizing = false;

}
}

        function stop(){
             recognition.stop();
             message.innerHTML = "버튼을 누르고 아무말이나 하세요.";
             button.disabled = false;
             button.innerHTML = "Start STT";
             isRecognizing = false;
         }

        // Text to speech
         function text_to_speech(txt){
             // Web Speech API - speech synthesis
             if ('speechSynthesis' in window) {
              // Synthesis support. Make your web apps talk!
                  console.log("음성합성을 지원하는 브라우저입니다.");
             }
             var msg = new SpeechSynthesisUtterance();
             var voices = window.speechSynthesis.getVoices();
             //msg.voice = voices[10]; // 두번째 부터 완전 외국인 발음이 됨. 사용하지 말것.
             msg.voiceURI = 'native';
             msg.volume = 1; // 0 to 1
             msg.rate = 1.3; // 0.1 to 10
             //msg.pitch = 2; //0 to 2
             msg.text = txt;
             msg.lang = 'ko-KR';

            msg.onend = function(e) {
                 if(isRecognizing == false){
                     recognition.start();
                 }
                   console.log('Finished in ' + event.elapsedTime + ' seconds.');
             };
             window.speechSynthesis.speak(msg);
         }

</script>

</body>
</html>

전체 코드인데 제대로 된건지는 아직 모르겠다. 일단 작동은 하니깐. 여러가지 메소드들이 있는데 정확한 사용법은 앞으로 예제를 더 만들어 보면서 바로 잡도로 하자. 예제는 단순히 앞 음성인식이 끝이 나면 자도으로 다시 음성인식을 시작하도록 하였다. 다른 방법도 있을것 같은데 … 자세한 내용은 아래 사이트를 참조 하자.

https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition

앞으로 계획은 한글로 인식된 음성을 gTTS를 이용해서 영어로 번역해서 음성으로 출력하게 하면 된다. 자동 영어 번역기 ㅋㅋ 한글-> 영어 혹은 영어 –> 한글을 옵션으로 선택하게 해주는 버튼도 추가를 해줘야 할것 같다.

데모 사이트 : https://timbuktu031.github.io/voice_recogintion/

크롬 브라우저에서만 테스트 가능하다. 이게 문제네…

Ghost Socket

top drop menu

Recent Post

월요일, 11월 19

크롬 브라우저에서 음성인식 SpeechRecognition API 구현