templates/record.html

<!DOCTYPE html>
<html lang="en">
  <!--
      webrec: Record and Upload Audio with Stop on Silence
  
      This Python Flask HTML template, part of the webrec application, 
      serves as the main user interface for recording and uploading audio
      with an additional feature to stop recording on silence detection. 
      The page includes styles, scripts for voice activity detection, and
      handling of audio recording functionalities.

      The interface consists of controls for recording, visual indicators 
      for audio levels and the length of the recording, and options for 
      voice activity detection settings. JavaScript is extensively used 
      for handling audio recording, processing, and uploading functions,
      using the current WebRTC GetUserMedia and AudioWorklet APIs.

      The application uses external libraries for enhanced features like 
      voice activity detection in WebAssembly and audio processing in Pysox.

      This application is released under the MIT License, November 24, 2023.

      run: https://webrec.replit.app

      dev: https://replit.com/@jsalsman/webrec

      github: https://github.com/jsalsman/webrec
  -->
<head>
  <meta charset="UTF-8">
  <title>webrec: Record and Upload Audio</title>
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <link rel="icon" href="/static/microphone.png">

  <!-- <script src="/socket.io/socket.io.js"></script> -->
  <script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.7.2/socket.io.min.js"></script>

  <!-- Voice Activity Detection: @ricky0123/vad-web, see below -->
  <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
  <script src="https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.7/dist/bundle.min.js"></script>

  <style>
    body {
      font-family: 'Verdana', sans-serif;
      background-color: #fafafa; /* very light gray */
      -webkit-text-size-adjust: 100%;
    }

    a {
      text-decoration: none; /* Removes underlining from links */
    }

    h1, h2, h3 {
      font-family: 'Georgia', serif;
    }

    button {
      font-family: 'Verdana', sans-serif;
      font-size: 16px;
      padding: 10px 20px; 
      border: 0; /* No border */
      border-radius: 5px; /* Rounds the corners */
      background-color: lightgreen; /* background */
      color: black; /* text */
      margin: 0; /* No margins */
      box-sizing: border-box; /* Include padding in total width and height */
      -webkit-appearance: none; /* Removes default styling on iOS */
      appearance: none; /* Removes default styling on other browsers */
    }

    button:disabled {
      background-color: gray;
      color: white;
    }

    /* Flex container for meters */
    .meter-container {
      display: flex;
      flex-direction: row;
      align-items: center; /* Centers items vertically */
      gap: 15px;
      max-width: 500px; /* Maximum width of the container */
      margin-bottom: 20px; /* Adds some space below each meter */
    }

    meter {
      flex: 1; /* Allows the meter to grow */
    }

    /* Flex container for buttons */
    .button-container {
      display: flex;
      flex-direction: row;
      gap: 15px;
      width: 100%; /* The container takes up 100% of the parent's width */
      max-width: 500px; /* Maximum width of the container */
      justify-content: space-between;
      margin-top: 20px; /* Adds some space above */
    }

    /* Styles for the buttons */
    #start, #startOver {
      flex: 1; /* Allows the buttons to grow and fill the container */
    }

    /* Style for the Safari help link container */
    #safari-help {
      margin-top: 20px; /* Adds some space above the help link */
    }
  </style>

</head>
<body>
  <h2>Record and Upload Audio with Stop on Silence</h2>

  <!-- VAD toggle and delay selection -->
  <div style="font-size: 120%;">
    <label><input type="checkbox" id="vad" checked> Stop</label> after:
    <label><input type="radio" name="sil_dur" value="1">&nbsp;1,</label>
    <label><input type="radio" name="sil_dur" value="2" checked>&nbsp;2,</label> or
    <label><input type="radio" name="sil_dur" value="3">&nbsp;3 seconds</label>
    of silence.
  </div>
  <br>
  
  <!-- Meter for audio level -->
  <div class="meter-container">
    <label for="level">Audio level:</label>
    <meter id="level" min="0" max="1" low="0.05" high="0.7" value="0"></meter>
    <span id="vadSpan">Silence</span>
  </div>

  <!-- Meter for recording time -->
  <div class="meter-container">
    <label for="full"><span id="secs">00.0</span> of up to 60 seconds:</label>
    <meter id="full" min="0" max="60" high="55" value="0"></meter>
  </div>

  <!-- Buttons -->
  <div class="button-container">
    <button id="start" disabled>Initializing...</button>
    <button id="startOver" style="display: none;">Start Over</button>
  </div>

  <!-- Safari users' help link -->
  <div id="safari-help" style="display: none;">
    <h3>Apple Safari users: <a 
      href="https://support.apple.com/guide/mac-help/control-access-to-the-microphone-on-mac-mchla1b1e1fe/mac"
      >please see here if you are having trouble.</a></h3>
  </div>
  <br>

  <!-- WebRTC noise supression toggle -->
  <div style="font-size: 120%">
    <span id="nsSpan"><label><input type="checkbox" id="suppressNoise">
      Noise supression (may degrade quality)</label></span>
  </div>

  <!-- Socket.IO status and data -->
  <span id='socketSpan'></span><span id='socketData'></span>

  <script>
    // Global variables
    var context, gotContext = false;  // Audio context for managing audio
    // Stream for audio data, AudioWorkletNode from recording-processing.js
    var stream, recorder = null, isRecording = false;  // and recording flag
    // vad.MicVAD() Voice Activity Detection and Timeout for silence
    var myvad, vad_timeout = null;
    // Flag for if Socket.IO is streaming, the socket, and the buffer list
    var socket = false, mySocket, chunkBuffer;
    // How many chunks and kilobytes have been streamed so far
    var chunksStreamed = 0, streamedKB = 0;
  
    // Check if the Web Audio API is supported by the browser
    if (!window.audioContext ) {
      try {
        // Polyfill for AudioContext for wider browser support
        window.AudioContext = 
          window.AudioContext || window.webkitAudioContext;
        // Create a new AudioContext with a specific sample rate
        context = new AudioContext({ sampleRate: 16000 });
        // Check if the AudioContext is active
        if (context.state === 'running') {
          gotContext = true;
        } else {            
          // If suspended, AudioContext requires a user click to activate
          document.getElementById('start').textContent = 'Detect Audio Levels';
          document.getElementById('start').disabled = false;
        }
      } catch (e) {
        // Handle the case where Web Audio API is not supported by the browser
        document.getElementById('start').disabled = true;
        alert('Web audio is not supported on this browser.');
      }
    } else {
      // Initialize the AudioContext for browsers that support it natively
      context = new AudioContext({ sampleRate: 16000 });
      // Check and update context status
      if (context.state === 'running') {
        gotContext = true;
      } else {
        // If AudioContext is suspended, enable it with user interaction
        document.getElementById('start').textContent = 'Detect Audio Levels';
        document.getElementById('start').disabled = false;
      }
    }
    
    // Use WebRTC API to check for microphone access
    navigator.mediaDevices.enumerateDevices().then(async devices => {
      // Check if any of the media devices are microphones
      if (devices.some(device => device.kind === 'audioinput'
                       && device.label)) {
        // The user has granted microphone access
        if ( {{ force_click|tojson }} ) {  // |tojson lowercases booleans
          document.getElementById('start').textContent = 'Detect Audio Levels';
          document.getElementById('start').disabled = false;
        } else if (context.state === 'running') {
          await initRecorder();
        }
      } else {
        // Microphone access is not granted or no mic is available
        if (/^((?!chrome|android).)*safari/i.test(navigator.userAgent)) {
          // Special help for Apple Safari users on microphone permissions
          document.getElementById('safari-help').style.display = 'block';
        }
        document.getElementById('start').textContent = 'Allow Microphone Use';
        document.getElementById('start').disabled = false;
      }
    });
    
    // Add event listener for the 'start' button
    document.getElementById('start').addEventListener('click', async () => {
      if (!recorder) {
        // If the recorder is not initialized, initialize it
        document.getElementById('start').disabled = true;
        document.getElementById('start').textContent = 'Initializing...';
        context.resume();
        await initRecorder();
      } else if (!isRecording) {
        // If not currently recording, start recording
        startRecording();
      } else {
        // If currently recording, stop recording
        stopRecording();
      }
    });
    
    // Event listener for the 'startOver' button
    document.getElementById('startOver').onclick = function() {
      if (recorder && isRecording) { 
        clearTimeout(vad_timeout);
        // Send a message to reset the recording if already in progress
        recorder.port.postMessage({ message: 'UPDATE_RECORDING_STATE', 
                       setRecording: true });  // Reset samples to zero
        // Tell the streaming server to start over too
        if (socket) {
          chunkBuffer = [];
          mySocket.emit('start_over');
        }
      }
    }

    // Event listener for the noise suppression checkbox
    document.getElementById('suppressNoise').addEventListener('change', 
                                               setNoiseSuppression);
  
    // Function to initialize the audio recorder
    async function initRecorder() {
      try {
        // Use the WebRTC getUserMedia API to access the user's microphone
        stream = await navigator.mediaDevices.getUserMedia(
          { audio: {  // Set noise supression from the checkbox
              noiseSuppression: document.getElementById('suppressNoise').checked,
              autoGainControl: false,    // Optional automatic gain control
              latency: 0,                // Ask for minimum latency
              echoCancellation: false }, // Echo cancellation unnecessary here
           video: false });  // No video
  
        // Create a MediaStreamAudioSourceNode from the audio stream
        const micSourceNode = new MediaStreamAudioSourceNode(context,
            {mediaStream: stream});
  
        // Hide help for Apple Safari browser microphone permissions
        document.getElementById('safari-help').style.display = 'none';
        
        // Add a custom audio processor script to the AudioContext
        await context.audioWorklet.addModule('/static/recording-processor.js');
        // Create an AudioWorkletNode as the recorder
        recorder = new AudioWorkletNode(context, 'recording-processor',
          { processorOptions: {
            numberOfChannels: 1,
            sampleRate: 16000,
            maxFrameCount: 16000 * 60 // Maximum frames for 1 minute of audio
        }});
  
        // Connect the microphone source node to the recorder
        await micSourceNode.connect(recorder);

        try {
          mySocket = io();
          mySocket.on('connect', function() {
              socket = true;
              document.getElementById('socketSpan').innerHTML =
                '<br> Socket.IO available.';
          });
        } catch (e) {
          console.log('Socket.IO failed to connect:', e)
        }

        // Handle messages from audio worklet: /static/recording-processor.js
        chunkBuffer = [];
        const chunkBufferSizeLimit = 8192;  // 8 KB

        recorder.port.onmessage = event => {

          // Set the audio level meter and try to stream Socket.IO upload
          if (event.data.message === 'UPDATE_RECORDING') {
            let seconds = event.data.recordingLength / 16000
            document.getElementById('full').value = seconds.toFixed(2);
            document.getElementById('secs').textContent = 
              seconds.toFixed(1).padStart(4, '0');

            if (socket) {
              // Add new samples to the chunk buffer
              chunkBuffer.push(...event.data.bufferSlice);
  
              // Send the chunk if the buffer reaches the size limit
              if (chunkBuffer.length >= chunkBufferSizeLimit) {
                mySocket.emit('audio_chunk', new Uint8Array(chunkBuffer), 
                              (status, error) => {
                  if (status === 'fail') {
                    socket = false; // Disable the socket on failure
                    document.getElementById('socketSpan').innerHTML =
                      '<br> Socket.IO failed.';
                    document.getElementById('socketData').textContent =
                      ` After ${streamedKB.toFixed(0)} KB in ${chunksStreamed} chunks.`;
                    log('audio_chunk socket error: ' + error)
                  }
                });
                
                // Update and display progress information
                chunksStreamed+= 1;
                streamedKB += chunkBuffer.length / 1024; // Bytes to KB
                document.getElementById('socketData').textContent =
                  ` Sent ${streamedKB.toFixed(0)} KB in ${chunksStreamed} chunks.`;

                chunkBuffer = []; // Clear the buffer after sending
              }
            }

          // Recording ended, close socket or upload file
          } else if (event.data.message === 'SHARE_RECORDING_BUFFER') { 
            let length = event.data.recordingLength;
            console.log('Seconds Recorded: ' + length / 16000);

            // send the final bufferSlice
            if (socket) {
              if (chunkBuffer.length) {
                mySocket.emit('audio_chunk', new Uint8Array(chunkBuffer), 
                              (status, error) => {
                  if (status === 'fail') {
                    socket = false; // Disable the socket on failure
                    log('audio_chunk socket error: ' + error)
                  } else {
                    // Update and display progress information
                    chunksStreamed+= 1;
                    streamedKB += chunkBuffer.length / 1024; // Convert bytes to KB
                    document.getElementById('socketData').textContent =
                      ` Finished with ${streamedKB.toFixed(0)} KB in ${chunksStreamed} chunks.`;
                  }
                })
              }
              mySocket.emit('end_recording', (status, error) => {
                if (status === 'fail') {
                  console.log('end_recording socket error: ' + error)
                  socket = false; // Disable the socket on failure
                  mySocket.disconnect();
                  alert('Error ending recording: ' + error)
                } else if (status.startsWith('/playback/')) {
                  mySocket.disconnect();
                  window.location = status;
                }
              })
            }

            if (!socket) {
              document.getElementById('socketSpan').innerHTML =
                '<br> Socket.IO failed.';
              document.getElementById('socketData').textContent =
                ` After ${streamedKB.toFixed(0)} KB in ${chunksStreamed} chunks.`;
  
              // Put the raw PCM data in a blob and upload it
              let formData = new FormData();
              let blob = new Blob([event.data.buffer.slice(0, length * 2)],
                                  { type: 'audio/l16' });
              formData.append('audio', blob, 'audio.raw');            
              fetch('/upload-audio', {
                method: 'POST',
                body: formData
              }).then(response => {
                if (response.ok) {
                  window.location.href = response.url;
                }
              }).catch(error => {
                console.error('Upload error: ', error);
                alert('Upload error: ' + error);
                // Disallow further interaction
                document.getElementById('start').disabled = true;
                document.getElementById('startOver').disabled = true;
              });
            }
    
          // Stop the recording if the buffer is full
          } else if (event.data.message === 'MAX_RECORDING_LENGTH_REACHED') {
            stopRecording();
  
          // Set the audio level meter
          } else if (event.data.message === 'UPDATE_VISUALIZERS') {
            let expKx = Math.exp(20 * event.data.gain);    // Don't ask me;
            document.getElementById('level').value =       // ChatGPT-4 came
              ((expKx - 1) / (expKx + Math.E)).toFixed(2); // up with this.
          }
        }
    
        // Voice Activity Detection: @ricky0123/vad-web
        // https://www.vad.ricky0123.com/docs/browser/
  
        // Suppress sub-warning "errors" in the console log from ort.js
        const origConsoleError = console.error; // Save original console.error()
        console.error = (...args) => { 
          // Check if the error message contains this specific text
          if (!args[0].includes("not used by any node and should be removed")) {
            origConsoleError(...args); // Otherwise call regular console.error
          }
        };
        myvad = await vad.MicVAD.new({
          additionalAudioConstraints: { audio: stream },
          onSpeechStart: () => {
            //console.log('VAD: Speaking started');
            document.getElementById('vadSpan').textContent = 'Speech';
            document.getElementById('vadSpan').style.color = 'darkgreen';
            clearTimeout(vad_timeout);
          },
          onSpeechEnd: (audio) => {
            //console.log('VAD: Speaking stopped');
            document.getElementById('vadSpan').textContent = 'Silence';
            document.getElementById('vadSpan').style.color = 'darkgray';
            if (isRecording && document.getElementById('vad').checked) {
              silence_secs = document.querySelector(
                                        'input[name="sil_dur"]:checked').value;
              vad_timeout = setTimeout(stopRecording, silence_secs * 1000);
            } else {
              clearTimeout(vad_timeout);
            }
          }
        });
        console.error = origConsoleError; // Restore original console.error()
        myvad.start();
  
        document.getElementById('start').textContent = 'Start Recording';
        document.getElementById('start').disabled = false;
  
        } catch (e) {
          let contextInfo = context ? 
            `AudioContext state: ${context.state}, rate: ${context.sampleRate}`
            : 'AudioContext not initialized';
          let streamInfo = stream ?
            `MediaStream active: ${stream.active}, tracks: ${stream.getTracks().length}` 
            : 'MediaStream not initialized';
          let errorMessage = `Call stack: ${Error().stack}\n` +
                             `${contextInfo}\n${streamInfo}\n` +
                             `Browser: ${navigator.userAgent}`;
          console.error('Error initializing recorder:', e, '\n', errorMessage);
          alert('Error initializing recorder: ' + e + '\n' + errorMessage);
          location.reload();
        }
      }
  
      function startRecording() {
        setNoiseSuppression();
        recorder.port.postMessage({ message: 'UPDATE_RECORDING_STATE', 
                       setRecording: true });  // Begin recording
        isRecording = true;
        
        document.getElementById('start').textContent = 'End Recording';
        document.getElementById('startOver').style.display = 'inline';
        document.getElementById('vadSpan').textContent = 'Sensing';
      }
  
      function stopRecording() {
        clearTimeout(vad_timeout);
        document.getElementById('start').disabled = true;
        document.getElementById('startOver').disabled = true;
        recorder.port.postMessage({ message: 'UPDATE_RECORDING_STATE', 
                       setRecording: false });  // Halt recording
        isRecording = false;
      }
  
    // Setter for the noise suppression checkbox
    function setNoiseSuppression() {
      if (stream) {
        let audioTrack = stream.getAudioTracks()[0];
        if (audioTrack) {
          audioTrack.applyConstraints({ noiseSuppression:
                       document.getElementById('suppressNoise').checked })
            .then(() => {
              let currentSettings = audioTrack.getSettings();
              if (currentSettings.noiseSuppression
                  !== document.getElementById('suppressNoise').checked) {
                console.warn('Noise suppression toggle did not apply.');
                // Replace the checkbox and label with a message and reload link
                document.getElementById('nsSpan').innerHTML =
                  'Noise suppression can no longer be changed without ' +
                  '<a href="/record?forceClick">reloading</a>.';
                }
            })
            .catch(err => {
              console.error('Failed to toggle noise suppression:', err);
            });
        }
      }
    }
  
  </script>

  <br><br>
  
  Python Flask and JavaScript source code is
  <a href="https://replit.com/@jsalsman/webrec">on Replit</a>
  and <a href="https://github.com/jsalsman/webrec">GitHub.</a>
</body>
</html>