Stuttering / robotic audio in WAV recording via Web Audio API

I am using the Web Audio API to capture audio browser audio WAVE clips from app users while they hold on to a specific key (like push-to-talk) ( update : also happens when you're not using push-to-talk). The sound stutters in many of the recordings; you can hear an example here (start in ~ 5 seconds) and another example here . What can I do to diagnose (or fix) this problem? (I played around a bit with the buffer size createScriptProcessor

but to no avail).

Computers using the app are all MacBook Pros running Chrome 36 on OS X 10.8 or 10.9 ( update : also Chrome 39/40 to 10.10). Here's the chrome: // version for the machine that recorded the sample linked above:

Google Chrome:   36.0.1985.143 (Official Build 287914) 
OS:              Mac OS X 
Blink:           537.36 (@179211)
JavaScript:      V8 3.26.31.15
Flash:           14.0.0.177
User Agent:      Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36
Command Line:    /Applications/Google Chrome.app/Contents/MacOS/Google Chrome --flag-switches-begin --flag-switches-end
Executable Path: /Applications/Google Chrome.app/Contents/MacOS/Google Chrome
Profile Path:    /Users/jason/Library/Application Support/Google/Chrome/Default
Variations:      e950616e-37fb3cc2
                 8afebf76-771ac34e
                 c70841c8-4866ef6e
                 195ce1b5-d93a0620
                 c4126e6a-ca7d8d80
                 9e5c75f1-ad69ceb0
                 262f996f-7075cd8
                 24dca50e-837c4893
                 ca65a9fe-91ac3782
                 8d790604-9cb2a91c
                 4ea303a6-3d47f4f4
                 d8f57532-3f4a17df
                 b2612322-f8cf70e2
                 5a3c10b5-e1cc0f14
                 244ca1ac-4ad60575
                 f47ae82a-86f22ee5
                 5e29d81-cf4f6ead
                 3ac60855-486e2a9c
                 246fb659-6e597ede
                 f296190c-65255996
                 4442aae2-6e597ede
                 ed1d377-e1cc0f14
                 75f0f0a0-a5822863
                 e2b18481-d7f6b13c
                 e7e71889-4ad60575
                 cbf0c14e-bf3e6cfd

      

The relevant parts of the code that make the entries look like this (simplified):

function startRecording() {
  navigator.getUserMedia({audio: true, video: false}, function (stream) {
    audioContext = audioContext || new window.webkitAudioContext();
    input = audioContext.createMediaStreamSource(stream);
    node = input.context.createScriptProcessor(4096, 1, 1);

    input.connect(node);
    node.connect(audioContext.destination);

    this.worker = new Worker(this.workerUrl); // see Web Worker code, below
    this.worker.addEventListener("message", this.handleWorkerMessage.bind(this));
    this.worker.postMessage({command: "init"});

    node.addEventListener("audioprocess", this.onAudioProcess);
  });
}

function stopRecording() {
  this.recording = false;
  this.worker.postMessage({command: "end"});
}

function onAudioProcess = function(evt) {
  if (!this.recording || stream.ended) return;

  var channelLeft = evt.inputBuffer.getChannelData(0);
  channelLeft = new Float32Array(channelLeft);
  this.worker.postMessage({command: "encode", buffer: channelLeft});
}

function handleWorkerMessage = function(evt) {
  var data = evt.data;
  switch (data.command) {
  case "end":
    this.appendToBuffer(data.buffer);
    var view;
    try {
      view = new DataView(this.buffer);
      var blob = new Blob([view], {type: this.mimeType});
      this.callback(blob);
    } finally {
      this.worker.terminate();
      node.removeEventListener("audioprocess", this.onAudioProcess);
    }
    break;
  }
}

function appendToBuffer = function(buffer) {
  if (!this.buffer) {
    this.buffer = buffer;
  } else {
    var tmp = new Uint8Array(this.buffer.byteLength + buffer.byteLength);
    tmp.set(new Uint8Array(this.buffer), 0);
    tmp.set(new Uint8Array(buffer), this.buffer.byteLength);
    this.buffer = tmp.buffer;
  }
}

      

Here is a web worker that saves the buffers passed to it and builds the WAV at the end (this code borrows heavily from RecordRTC):

var buffers,
    length = 0,
    sampleRate = 44100;

function concatBuffers(buffers, totalLength) {
  var buf;
  var result = new Float32Array(totalLength);
  var offset = 0;
  var lng = buffers.length;
  for (var i = 0; i < lng; i++) {
    var buf = buffers[i];
    result.set(buf, offset);
    offset += buf.length;
  }
  return result;
}

function writeUTFBytes(view, offset, string) {
  var lng = string.length;
  for (var i = 0; i < lng; i++) {
    view.setUint8(offset + i, string.charCodeAt(i));
  }
}

this.addEventListener("message", function(evt) {
  var data = evt.data;

  switch (data.command) {
  case "init":
    buffers = [];
    break;
  case "encode":
    buffers.push(new Float32Array(data.buffer));
    length += data.buffer.length;
    break;
  case "end":
    var pcmBuffer = concatBuffers(buffers, length);
    var wavBuffer = new ArrayBuffer(44 + pcmBuffer.length * 2);
    var view = new DataView(wavBuffer);

    // RIFF chunk descriptor
    writeUTFBytes(view, 0, "RIFF");
    view.setUint32(4, 44 + pcmBuffer.length * 2, true);
    writeUTFBytes(view, 8, 'WAVE');

    // FMT sub-chunk
    writeUTFBytes(view, 12, 'fmt ');
    view.setUint32(16, 16, true);
    view.setUint16(20, 1, true);

    view.setUint16(22, 1, true); // one channel
    view.setUint32(24, sampleRate, true);
    view.setUint32(28, sampleRate * 4, true);
    view.setUint16(32, 4, true);
    view.setUint16(34, 16, true);

    // data sub-chunk
    writeUTFBytes(view, 36, 'data');
    view.setUint32(40, pcmBuffer.length * 2, true);

    // PCM samples
    var lng = pcmBuffer.length;
    var index = 44;
    volume = 1;
    for (var i = 0; i < lng; i++) {
      view.setInt16(index, pcmBuffer[i] * (0x7FFF * volume), true);
      index += 2;
    }

    this.postMessage({command: "end", buffer: wavBuffer});
    break;
  }
});

      

Update

We started using the same technique on another product and we see the same behavior and we are not using the push-to-talk system on this product.

+3


source to share


1 answer


I have a VERY strong feeling, based on sound and time delay, that this is actually your key handler - looks like the code around the hold-down-key-to-conversation is causing the start / stop storm?



+2


source







All Articles