Stuttering / robotic audio in WAV recording via Web Audio API
I am using the Web Audio API to capture audio browser audio WAVE clips from app users while they hold on to a specific key (like push-to-talk) ( update : also happens when you're not using push-to-talk). The sound stutters in many of the recordings; you can hear an example here (start in ~ 5 seconds) and another example here . What can I do to diagnose (or fix) this problem? (I played around a bit with the buffer size createScriptProcessor
but to no avail).
Computers using the app are all MacBook Pros running Chrome 36 on OS X 10.8 or 10.9 ( update : also Chrome 39/40 to 10.10). Here's the chrome: // version for the machine that recorded the sample linked above:
Google Chrome: 36.0.1985.143 (Official Build 287914) OS: Mac OS X Blink: 537.36 (@179211) JavaScript: V8 3.26.31.15 Flash: 14.0.0.177 User Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36 Command Line: /Applications/Google Chrome.app/Contents/MacOS/Google Chrome --flag-switches-begin --flag-switches-end Executable Path: /Applications/Google Chrome.app/Contents/MacOS/Google Chrome Profile Path: /Users/jason/Library/Application Support/Google/Chrome/Default Variations: e950616e-37fb3cc2 8afebf76-771ac34e c70841c8-4866ef6e 195ce1b5-d93a0620 c4126e6a-ca7d8d80 9e5c75f1-ad69ceb0 262f996f-7075cd8 24dca50e-837c4893 ca65a9fe-91ac3782 8d790604-9cb2a91c 4ea303a6-3d47f4f4 d8f57532-3f4a17df b2612322-f8cf70e2 5a3c10b5-e1cc0f14 244ca1ac-4ad60575 f47ae82a-86f22ee5 5e29d81-cf4f6ead 3ac60855-486e2a9c 246fb659-6e597ede f296190c-65255996 4442aae2-6e597ede ed1d377-e1cc0f14 75f0f0a0-a5822863 e2b18481-d7f6b13c e7e71889-4ad60575 cbf0c14e-bf3e6cfd
The relevant parts of the code that make the entries look like this (simplified):
function startRecording() {
navigator.getUserMedia({audio: true, video: false}, function (stream) {
audioContext = audioContext || new window.webkitAudioContext();
input = audioContext.createMediaStreamSource(stream);
node = input.context.createScriptProcessor(4096, 1, 1);
input.connect(node);
node.connect(audioContext.destination);
this.worker = new Worker(this.workerUrl); // see Web Worker code, below
this.worker.addEventListener("message", this.handleWorkerMessage.bind(this));
this.worker.postMessage({command: "init"});
node.addEventListener("audioprocess", this.onAudioProcess);
});
}
function stopRecording() {
this.recording = false;
this.worker.postMessage({command: "end"});
}
function onAudioProcess = function(evt) {
if (!this.recording || stream.ended) return;
var channelLeft = evt.inputBuffer.getChannelData(0);
channelLeft = new Float32Array(channelLeft);
this.worker.postMessage({command: "encode", buffer: channelLeft});
}
function handleWorkerMessage = function(evt) {
var data = evt.data;
switch (data.command) {
case "end":
this.appendToBuffer(data.buffer);
var view;
try {
view = new DataView(this.buffer);
var blob = new Blob([view], {type: this.mimeType});
this.callback(blob);
} finally {
this.worker.terminate();
node.removeEventListener("audioprocess", this.onAudioProcess);
}
break;
}
}
function appendToBuffer = function(buffer) {
if (!this.buffer) {
this.buffer = buffer;
} else {
var tmp = new Uint8Array(this.buffer.byteLength + buffer.byteLength);
tmp.set(new Uint8Array(this.buffer), 0);
tmp.set(new Uint8Array(buffer), this.buffer.byteLength);
this.buffer = tmp.buffer;
}
}
Here is a web worker that saves the buffers passed to it and builds the WAV at the end (this code borrows heavily from RecordRTC):
var buffers,
length = 0,
sampleRate = 44100;
function concatBuffers(buffers, totalLength) {
var buf;
var result = new Float32Array(totalLength);
var offset = 0;
var lng = buffers.length;
for (var i = 0; i < lng; i++) {
var buf = buffers[i];
result.set(buf, offset);
offset += buf.length;
}
return result;
}
function writeUTFBytes(view, offset, string) {
var lng = string.length;
for (var i = 0; i < lng; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}
this.addEventListener("message", function(evt) {
var data = evt.data;
switch (data.command) {
case "init":
buffers = [];
break;
case "encode":
buffers.push(new Float32Array(data.buffer));
length += data.buffer.length;
break;
case "end":
var pcmBuffer = concatBuffers(buffers, length);
var wavBuffer = new ArrayBuffer(44 + pcmBuffer.length * 2);
var view = new DataView(wavBuffer);
// RIFF chunk descriptor
writeUTFBytes(view, 0, "RIFF");
view.setUint32(4, 44 + pcmBuffer.length * 2, true);
writeUTFBytes(view, 8, 'WAVE');
// FMT sub-chunk
writeUTFBytes(view, 12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true);
view.setUint16(22, 1, true); // one channel
view.setUint32(24, sampleRate, true);
view.setUint32(28, sampleRate * 4, true);
view.setUint16(32, 4, true);
view.setUint16(34, 16, true);
// data sub-chunk
writeUTFBytes(view, 36, 'data');
view.setUint32(40, pcmBuffer.length * 2, true);
// PCM samples
var lng = pcmBuffer.length;
var index = 44;
volume = 1;
for (var i = 0; i < lng; i++) {
view.setInt16(index, pcmBuffer[i] * (0x7FFF * volume), true);
index += 2;
}
this.postMessage({command: "end", buffer: wavBuffer});
break;
}
});
Update
We started using the same technique on another product and we see the same behavior and we are not using the push-to-talk system on this product.
source to share