diff --git a/src/@types/global.d.ts b/src/@types/global.d.ts index ee0963e537..41257c21f0 100644 --- a/src/@types/global.d.ts +++ b/src/@types/global.d.ts @@ -129,4 +129,30 @@ declare global { // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Error/columnNumber columnNumber?: number; } + + // https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278 + interface AudioWorkletProcessor { + readonly port: MessagePort; + process( + inputs: Float32Array[][], + outputs: Float32Array[][], + parameters: Record + ): boolean; + } + + // https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278 + const AudioWorkletProcessor: { + prototype: AudioWorkletProcessor; + new (options?: AudioWorkletNodeOptions): AudioWorkletProcessor; + }; + + // https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278 + function registerProcessor( + name: string, + processorCtor: (new ( + options?: AudioWorkletNodeOptions + ) => AudioWorkletProcessor) & { + parameterDescriptors?: AudioParamDescriptor[]; + } + ); } diff --git a/src/components/views/rooms/VoiceRecordComposerTile.tsx b/src/components/views/rooms/VoiceRecordComposerTile.tsx index 1210a44958..9b7f0da472 100644 --- a/src/components/views/rooms/VoiceRecordComposerTile.tsx +++ b/src/components/views/rooms/VoiceRecordComposerTile.tsx @@ -53,9 +53,38 @@ export default class VoiceRecordComposerTile extends React.PureComponent Math.round(v * 1024)), + }, }); await VoiceRecordingStore.instance.disposeRecording(); this.setState({recorder: null}); diff --git a/src/utils/arrays.ts b/src/utils/arrays.ts index 52308937f7..8ab66dfb29 100644 --- a/src/utils/arrays.ts +++ b/src/utils/arrays.ts @@ -54,7 +54,7 @@ export function arraySeed(val: T, length: number): T[] { * @param a The array to clone. Must be defined. * @returns A copy of the array. */ -export function arrayFastClone(a: any[]): any[] { +export function arrayFastClone(a: T[]): T[] { return a.slice(0, a.length); } diff --git a/src/voice/RecorderWorklet.ts b/src/voice/RecorderWorklet.ts new file mode 100644 index 0000000000..7343d37066 --- /dev/null +++ b/src/voice/RecorderWorklet.ts @@ -0,0 +1,67 @@ +/* +Copyright 2021 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import {IAmplitudePayload, ITimingPayload, PayloadEvent, WORKLET_NAME} from "./consts"; +import {percentageOf} from "../utils/numbers"; + +// from AudioWorkletGlobalScope: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorkletGlobalScope +declare const currentTime: number; +// declare const currentFrame: number; +// declare const sampleRate: number; + +class MxVoiceWorklet extends AudioWorkletProcessor { + private nextAmplitudeSecond = 0; + + process(inputs, outputs, parameters) { + // We only fire amplitude updates once a second to avoid flooding the recording instance + // with useless data. Much of the data would end up discarded, so we ratelimit ourselves + // here. + const currentSecond = Math.round(currentTime); + if (currentSecond === this.nextAmplitudeSecond) { + // We're expecting exactly one mono input source, so just grab the very first frame of + // samples for the analysis. + const monoChan = inputs[0][0]; + + // The amplitude of the frame's samples is effectively the loudness of the frame. This + // translates into a bar which can be rendered as part of the whole recording clip's + // waveform. + // + // We translate the amplitude down to 0-1 for sanity's sake. + const minVal = Math.min(...monoChan); + const maxVal = Math.max(...monoChan); + const amplitude = percentageOf(maxVal, -1, 1) - percentageOf(minVal, -1, 1); + + this.port.postMessage({ + ev: PayloadEvent.AmplitudeMark, + amplitude: amplitude, + forSecond: currentSecond, + }); + this.nextAmplitudeSecond++; + } + + // We mostly use this worklet to fire regular clock updates through to components + this.port.postMessage({ev: PayloadEvent.Timekeep, timeSeconds: currentTime}); + + // We're supposed to return false when we're "done" with the audio clip, but seeing as + // we are acting as a passive processor we are never truly "done". The browser will clean + // us up when it is done with us. + return true; + } +} + +registerProcessor(WORKLET_NAME, MxVoiceWorklet); + +export default null; // to appease module loaders (we never use the export) diff --git a/src/voice/VoiceRecording.ts b/src/voice/VoiceRecording.ts index 55775ff786..b0cc3cd407 100644 --- a/src/voice/VoiceRecording.ts +++ b/src/voice/VoiceRecording.ts @@ -23,6 +23,8 @@ import {clamp} from "../utils/numbers"; import EventEmitter from "events"; import {IDestroyable} from "../utils/IDestroyable"; import {Singleflight} from "../utils/Singleflight"; +import {PayloadEvent, WORKLET_NAME} from "./consts"; +import {arrayFastClone} from "../utils/arrays"; const CHANNELS = 1; // stereo isn't important const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality. @@ -49,16 +51,34 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { private recorderSource: MediaStreamAudioSourceNode; private recorderStream: MediaStream; private recorderFFT: AnalyserNode; - private recorderProcessor: ScriptProcessorNode; + private recorderWorklet: AudioWorkletNode; private buffer = new Uint8Array(0); private mxc: string; private recording = false; private observable: SimpleObservable; + private amplitudes: number[] = []; // at each second mark, generated public constructor(private client: MatrixClient) { super(); } + public get finalWaveform(): number[] { + return arrayFastClone(this.amplitudes); + } + + public get contentType(): string { + return "audio/ogg"; + } + + public get contentLength(): number { + return this.buffer.length; + } + + public get durationSeconds(): number { + if (!this.recorder) throw new Error("Duration not available without a recording"); + return this.recorderContext.currentTime; + } + private async makeRecorder() { this.recorderStream = await navigator.mediaDevices.getUserMedia({ audio: { @@ -80,18 +100,34 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { // it makes the time domain less than helpful. this.recorderFFT.fftSize = 64; - // We use an audio processor to get accurate timing information. - // The size of the audio buffer largely decides how quickly we push timing/waveform data - // out of this class. Smaller buffers mean we update more frequently as we can't hold as - // many bytes. Larger buffers mean slower updates. For scale, 1024 gives us about 30Hz of - // updates and 2048 gives us about 20Hz. We use 1024 to get as close to perceived realtime - // as possible. Must be a power of 2. - this.recorderProcessor = this.recorderContext.createScriptProcessor(1024, CHANNELS, CHANNELS); + // Set up our worklet. We use this for timing information and waveform analysis: the + // web audio API prefers this be done async to avoid holding the main thread with math. + const mxRecorderWorkletPath = document.body.dataset.vectorRecorderWorkletScript; + if (!mxRecorderWorkletPath) { + throw new Error("Unable to create recorder: no worklet script registered"); + } + await this.recorderContext.audioWorklet.addModule(mxRecorderWorkletPath); + this.recorderWorklet = new AudioWorkletNode(this.recorderContext, WORKLET_NAME); // Connect our inputs and outputs this.recorderSource.connect(this.recorderFFT); - this.recorderSource.connect(this.recorderProcessor); - this.recorderProcessor.connect(this.recorderContext.destination); + this.recorderSource.connect(this.recorderWorklet); + this.recorderWorklet.connect(this.recorderContext.destination); + + // Dev note: we can't use `addEventListener` for some reason. It just doesn't work. + this.recorderWorklet.port.onmessage = (ev) => { + switch (ev.data['ev']) { + case PayloadEvent.Timekeep: + this.processAudioUpdate(ev.data['timeSeconds']); + break; + case PayloadEvent.AmplitudeMark: + // Sanity check to make sure we're adding about one sample per second + if (ev.data['forSecond'] === this.amplitudes.length) { + this.amplitudes.push(ev.data['amplitude']); + } + break; + } + }; this.recorder = new Recorder({ encoderPath, // magic from webpack @@ -138,7 +174,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { return this.mxc; } - private processAudioUpdate = (ev: AudioProcessingEvent) => { + private processAudioUpdate = (timeSeconds: number) => { if (!this.recording) return; // The time domain is the input to the FFT, which means we use an array of the same @@ -162,12 +198,12 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { this.observable.update({ waveform: translatedData, - timeSeconds: ev.playbackTime, + timeSeconds: timeSeconds, }); // Now that we've updated the data/waveform, let's do a time check. We don't want to // go horribly over the limit. We also emit a warning state if needed. - const secondsLeft = TARGET_MAX_LENGTH - ev.playbackTime; + const secondsLeft = TARGET_MAX_LENGTH - timeSeconds; if (secondsLeft <= 0) { // noinspection JSIgnoredPromiseFromCall - we aren't concerned with it overlapping this.stop(); @@ -191,7 +227,6 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { } this.observable = new SimpleObservable(); await this.makeRecorder(); - this.recorderProcessor.addEventListener("audioprocess", this.processAudioUpdate); await this.recorder.start(); this.recording = true; this.emit(RecordingState.Started); @@ -205,6 +240,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { // Disconnect the source early to start shutting down resources this.recorderSource.disconnect(); + this.recorderWorklet.disconnect(); await this.recorder.stop(); // close the context after the recorder so the recorder doesn't try to @@ -216,7 +252,6 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { // Finally do our post-processing and clean up this.recording = false; - this.recorderProcessor.removeEventListener("audioprocess", this.processAudioUpdate); await this.recorder.close(); this.emit(RecordingState.Ended); @@ -240,7 +275,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { this.emit(RecordingState.Uploading); this.mxc = await this.client.uploadContent(new Blob([this.buffer], { - type: "audio/ogg", + type: this.contentType, }), { onlyContentUri: false, // to stop the warnings in the console }).then(r => r['content_uri']); diff --git a/src/voice/consts.ts b/src/voice/consts.ts new file mode 100644 index 0000000000..c530c60f0b --- /dev/null +++ b/src/voice/consts.ts @@ -0,0 +1,37 @@ +/* +Copyright 2021 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +export const WORKLET_NAME = "mx-voice-worklet"; + +export enum PayloadEvent { + Timekeep = "timekeep", + AmplitudeMark = "amplitude_mark", +} + +export interface IPayload { + ev: PayloadEvent; +} + +export interface ITimingPayload extends IPayload { + ev: PayloadEvent.Timekeep; + timeSeconds: number; +} + +export interface IAmplitudePayload extends IPayload { + ev: PayloadEvent.AmplitudeMark; + forSecond: number; + amplitude: number; +}