How to implement the Web Speech API in HTML5

This JavaScript web speech API allow, developers to combine speech popularity functionality into internet applications, making it simpler for users to have interaction with the website the use of their voice, this transform spoken words into textual content on a website.

Here’s a brief assessment of how you may implement this the usage of JavaScript:

  1. We have a button that, when clicked, starts the speech recognition process.
  2. When speech is recognized (onresult event), the recognized text is displayed in a paragraph element with the ID “output”.
  3. If there’s an error during speech recognition (onerror event), an error message is logged to the console.
  4. The startSpeechRecognition() function handles creating the SpeechRecognition instance, setting the language, and starting the recognition process when the button is clicked.
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Speech Recognition Example</title>
</head>
<body>

<button onclick="startSpeechRecognition()">Start Speech Recognition</button>

<div id="results"> 
 <span id="interim_output"></span>  
 <span id="final_output"></span> 
</div>

<script type="text/javascript">
  var recognizing = false;
  var final_output_transcript = '';

  if ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window) {
      var recognition = new(window.SpeechRecognition || window.webkitSpeechRecognition)();

      recognition.interimResults = true;
      recognition.continuous = true;

      recognition.onerror = function(event) {
          console.error('Speech recognition error:', event.error);
      };

      recognition.onstart = function() {
          recognizing = true;
      };

      recognition.onend = function() {
          recognizing = false;
      };

      // Event handler for when speech is recognized
      recognition.onresult = function(event) {
          var interim_output_transcript = '';
          // Loop through speech recognition results
          for (var i = event.resultIndex; i < event.results.length; ++i) {
              // Check if the result is final or interim
              if (event.results[i].isFinal) {
                  // Concatenate final transcript
                  final_output_transcript += event.results[i][0].transcript;
              } else {
                  // Concatenate interim transcript
                  interim_output_transcript += event.results[i][0].transcript;
              }
          }
          // Capitalize the final transcript
          final_output_transcript = capitalize(final_output_transcript);

          // Update HTML elements with line breaks for final and interim transcripts
          final_output.innerHTML = linebreak(final_output_transcript);
          interim_output.innerHTML = linebreak(interim_output_transcript);
      };
  } else {
      console.error('Speech recognition not supported in this browser.');
  }

  function startSpeechRecognition(event) {
      if (recognizing) {
          recognition.stop();
          return;
      }
      final_output_transcript = '';
      recognition.lang = 'en-US';
      recognition.start();
      final_output.innerHTML = '';
      interim_output.innerHTML = '';
  }

  var two_line = /\n\n/g;
  var one_line = /\n/g;

  function linebreak(s) {
      return s.replace(two_line, '<p></p>').replace(one_line, '<br>');
  }

  function capitalize(s) {
      return s.replace(s.substr(0, 1), function(m) {
          return m.toUpperCase();
      });
  }
</script>

</body>
</html>
Scroll to Top