From 8ddd14e252dec4f3c87e61492561bb83ae8d6e38 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Mon, 22 Mar 2021 20:54:09 -0600 Subject: [PATCH 01/11] Early concept for rendering the frequency waveform --- res/css/_components.scss | 1 + .../views/voice_messages/_FrequencyBars.scss | 34 +++++++++++ .../views/rooms/VoiceRecordComposerTile.tsx | 12 ++-- .../views/voice_messages/FrequencyBars.tsx | 58 +++++++++++++++++++ src/utils/arrays.ts | 35 +++++++++++ src/voice/VoiceRecorder.ts | 4 +- 6 files changed, 136 insertions(+), 8 deletions(-) create mode 100644 res/css/views/voice_messages/_FrequencyBars.scss create mode 100644 src/components/views/voice_messages/FrequencyBars.tsx diff --git a/res/css/_components.scss b/res/css/_components.scss index 9c895490b3..33dc6e72cf 100644 --- a/res/css/_components.scss +++ b/res/css/_components.scss @@ -246,6 +246,7 @@ @import "./views/toasts/_AnalyticsToast.scss"; @import "./views/toasts/_NonUrgentEchoFailureToast.scss"; @import "./views/verification/_VerificationShowSas.scss"; +@import "./views/voice_messages/_FrequencyBars.scss"; @import "./views/voip/_CallContainer.scss"; @import "./views/voip/_CallView.scss"; @import "./views/voip/_DialPad.scss"; diff --git a/res/css/views/voice_messages/_FrequencyBars.scss b/res/css/views/voice_messages/_FrequencyBars.scss new file mode 100644 index 0000000000..b38cdfff92 --- /dev/null +++ b/res/css/views/voice_messages/_FrequencyBars.scss @@ -0,0 +1,34 @@ +/* +Copyright 2021 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +.mx_FrequencyBars { + position: relative; + height: 30px; // tallest bar can only be 30px + + display: flex; + align-items: center; // so the bars grow from the middle + + .mx_FrequencyBars_bar { + width: 2px; + margin-left: 1px; + margin-right: 1px; + background-color: $muted-fg-color; + display: inline-block; + min-height: 2px; + max-height: 100%; + border-radius: 2px; // give them soft endcaps + } +} diff --git a/src/components/views/rooms/VoiceRecordComposerTile.tsx b/src/components/views/rooms/VoiceRecordComposerTile.tsx index 0d381001a1..c57fc79eeb 100644 --- a/src/components/views/rooms/VoiceRecordComposerTile.tsx +++ b/src/components/views/rooms/VoiceRecordComposerTile.tsx @@ -21,6 +21,7 @@ import {VoiceRecorder} from "../../../voice/VoiceRecorder"; import {Room} from "matrix-js-sdk/src/models/room"; import {MatrixClientPeg} from "../../../MatrixClientPeg"; import classNames from "classnames"; +import FrequencyBars from "../voice_messages/FrequencyBars"; interface IProps { room: Room; @@ -57,10 +58,6 @@ export default class VoiceRecordComposerTile extends React.PureComponent { - // console.log('@@ UPDATE', freq); - // }); this.setState({recorder}); }; @@ -71,18 +68,21 @@ export default class VoiceRecordComposerTile extends React.PureComponent; } - return ( + return (<> + {bars} - ); + ); } } diff --git a/src/components/views/voice_messages/FrequencyBars.tsx b/src/components/views/voice_messages/FrequencyBars.tsx new file mode 100644 index 0000000000..73ea7bc862 --- /dev/null +++ b/src/components/views/voice_messages/FrequencyBars.tsx @@ -0,0 +1,58 @@ +/* +Copyright 2021 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import React from "react"; +import {IFrequencyPackage, VoiceRecorder} from "../../../voice/VoiceRecorder"; +import {replaceableComponent} from "../../../utils/replaceableComponent"; +import {arrayFastResample, arraySeed} from "../../../utils/arrays"; +import {percentageOf} from "../../../utils/numbers"; + +interface IProps { + recorder: VoiceRecorder +} + +interface IState { + heights: number[]; +} + +const DOWNSAMPLE_TARGET = 35; // number of bars + +@replaceableComponent("views.voice_messages.FrequencyBars") +export default class FrequencyBars extends React.PureComponent { + public constructor(props) { + super(props); + + this.state = {heights: arraySeed(0, DOWNSAMPLE_TARGET)}; + this.props.recorder.frequencyData.onUpdate(this.onFrequencyData); + } + + private onFrequencyData = (freq: IFrequencyPackage) => { + // We're downsampling from about 1024 points to about 35, so this function is fine (see docs/impl) + const bars = arrayFastResample(Array.from(freq.dbBars), DOWNSAMPLE_TARGET); + this.setState({ + // Values are somewhat arbitrary, but help decide what shape the graph should be + heights: bars.map(b => percentageOf(b, -150, -70) * 100), + }); + }; + + public render() { + return
+ {this.state.heights.map((h, i) => { + return ; + })} +
; + } +} diff --git a/src/utils/arrays.ts b/src/utils/arrays.ts index fa5515878f..52308937f7 100644 --- a/src/utils/arrays.ts +++ b/src/utils/arrays.ts @@ -14,6 +14,41 @@ See the License for the specific language governing permissions and limitations under the License. */ +/** + * Quickly resample an array to have less data points. This isn't a perfect representation, + * though this does work best if given a large array to downsample to a much smaller array. + * @param {number[]} input The input array to downsample. + * @param {number} points The number of samples to end up with. + * @returns {number[]} The downsampled array. + */ +export function arrayFastResample(input: number[], points: number): number[] { + // Heavily inpired by matrix-media-repo (used with permission) + // https://github.com/turt2live/matrix-media-repo/blob/abe72c87d2e29/util/util_audio/fastsample.go#L10 + const everyNth = Math.round(input.length / points); + const samples: number[] = []; + for (let i = 0; i < input.length; i += everyNth) { + samples.push(input[i]); + } + while (samples.length < points) { + samples.push(input[input.length - 1]); + } + return samples; +} + +/** + * Creates an array of the given length, seeded with the given value. + * @param {T} val The value to seed the array with. + * @param {number} length The length of the array to create. + * @returns {T[]} The array. + */ +export function arraySeed(val: T, length: number): T[] { + const a: T[] = []; + for (let i = 0; i < length; i++) { + a.push(val); + } + return a; +} + /** * Clones an array as fast as possible, retaining references of the array's values. * @param a The array to clone. Must be defined. diff --git a/src/voice/VoiceRecorder.ts b/src/voice/VoiceRecorder.ts index 06c0d939fc..4bdd0b0af3 100644 --- a/src/voice/VoiceRecorder.ts +++ b/src/voice/VoiceRecorder.ts @@ -23,7 +23,7 @@ import {SimpleObservable} from "matrix-widget-api"; const CHANNELS = 1; // stereo isn't important const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality. const BITRATE = 24000; // 24kbps is pretty high quality for our use case in opus. -const FREQ_SAMPLE_RATE = 4; // Target rate of frequency data (samples / sec). We don't need this super often. +const FREQ_SAMPLE_RATE = 10; // Target rate of frequency data (samples / sec). We don't need this super often. export interface IFrequencyPackage { dbBars: Float32Array; @@ -60,7 +60,7 @@ export class VoiceRecorder { }, }); this.recorderContext = new AudioContext({ - latencyHint: "interactive", + // latencyHint: "interactive", // we don't want a latency hint (this causes data smoothing) sampleRate: SAMPLE_RATE, // once again, the browser will resample for us }); this.recorderSource = this.recorderContext.createMediaStreamSource(this.recorderStream); From 449e028bbd1d537ded2cf21d2ba4581529153e67 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 24 Mar 2021 23:31:02 -0600 Subject: [PATCH 02/11] Actually use a waveform instead of the frequency data --- res/css/_components.scss | 2 +- .../{_FrequencyBars.scss => _Waveform.scss} | 4 +- .../views/rooms/VoiceRecordComposerTile.tsx | 8 +-- .../views/voice_messages/FrequencyBars.tsx | 58 ----------------- .../voice_messages/LiveRecordingWaveform.tsx | 64 +++++++++++++++++++ .../views/voice_messages/Waveform.tsx | 48 ++++++++++++++ src/voice/VoiceRecorder.ts | 55 +++++++++++----- 7 files changed, 159 insertions(+), 80 deletions(-) rename res/css/views/voice_messages/{_FrequencyBars.scss => _Waveform.scss} (95%) delete mode 100644 src/components/views/voice_messages/FrequencyBars.tsx create mode 100644 src/components/views/voice_messages/LiveRecordingWaveform.tsx create mode 100644 src/components/views/voice_messages/Waveform.tsx diff --git a/res/css/_components.scss b/res/css/_components.scss index 33dc6e72cf..1eabd6f5c6 100644 --- a/res/css/_components.scss +++ b/res/css/_components.scss @@ -246,7 +246,7 @@ @import "./views/toasts/_AnalyticsToast.scss"; @import "./views/toasts/_NonUrgentEchoFailureToast.scss"; @import "./views/verification/_VerificationShowSas.scss"; -@import "./views/voice_messages/_FrequencyBars.scss"; +@import "./views/voice_messages/_Waveform.scss"; @import "./views/voip/_CallContainer.scss"; @import "./views/voip/_CallView.scss"; @import "./views/voip/_DialPad.scss"; diff --git a/res/css/views/voice_messages/_FrequencyBars.scss b/res/css/views/voice_messages/_Waveform.scss similarity index 95% rename from res/css/views/voice_messages/_FrequencyBars.scss rename to res/css/views/voice_messages/_Waveform.scss index b38cdfff92..23eedf2dbd 100644 --- a/res/css/views/voice_messages/_FrequencyBars.scss +++ b/res/css/views/voice_messages/_Waveform.scss @@ -14,14 +14,14 @@ See the License for the specific language governing permissions and limitations under the License. */ -.mx_FrequencyBars { +.mx_Waveform { position: relative; height: 30px; // tallest bar can only be 30px display: flex; align-items: center; // so the bars grow from the middle - .mx_FrequencyBars_bar { + .mx_Waveform_bar { width: 2px; margin-left: 1px; margin-right: 1px; diff --git a/src/components/views/rooms/VoiceRecordComposerTile.tsx b/src/components/views/rooms/VoiceRecordComposerTile.tsx index c57fc79eeb..061daab915 100644 --- a/src/components/views/rooms/VoiceRecordComposerTile.tsx +++ b/src/components/views/rooms/VoiceRecordComposerTile.tsx @@ -21,7 +21,7 @@ import {VoiceRecorder} from "../../../voice/VoiceRecorder"; import {Room} from "matrix-js-sdk/src/models/room"; import {MatrixClientPeg} from "../../../MatrixClientPeg"; import classNames from "classnames"; -import FrequencyBars from "../voice_messages/FrequencyBars"; +import LiveRecordingWaveform from "../voice_messages/LiveRecordingWaveform"; interface IProps { room: Room; @@ -68,16 +68,16 @@ export default class VoiceRecordComposerTile extends React.PureComponent; + waveform = ; } return (<> - {bars} + {waveform} { - public constructor(props) { - super(props); - - this.state = {heights: arraySeed(0, DOWNSAMPLE_TARGET)}; - this.props.recorder.frequencyData.onUpdate(this.onFrequencyData); - } - - private onFrequencyData = (freq: IFrequencyPackage) => { - // We're downsampling from about 1024 points to about 35, so this function is fine (see docs/impl) - const bars = arrayFastResample(Array.from(freq.dbBars), DOWNSAMPLE_TARGET); - this.setState({ - // Values are somewhat arbitrary, but help decide what shape the graph should be - heights: bars.map(b => percentageOf(b, -150, -70) * 100), - }); - }; - - public render() { - return
- {this.state.heights.map((h, i) => { - return ; - })} -
; - } -} diff --git a/src/components/views/voice_messages/LiveRecordingWaveform.tsx b/src/components/views/voice_messages/LiveRecordingWaveform.tsx new file mode 100644 index 0000000000..506532744a --- /dev/null +++ b/src/components/views/voice_messages/LiveRecordingWaveform.tsx @@ -0,0 +1,64 @@ +/* +Copyright 2021 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import React from "react"; +import {IRecordingUpdate, VoiceRecorder} from "../../../voice/VoiceRecorder"; +import {replaceableComponent} from "../../../utils/replaceableComponent"; +import {arrayFastResample, arraySeed} from "../../../utils/arrays"; +import {clamp, percentageOf} from "../../../utils/numbers"; +import Waveform from "./Waveform"; + +interface IProps { + recorder: VoiceRecorder; +} + +interface IState { + heights: number[]; +} + +const DOWNSAMPLE_TARGET = 35; // number of bars we want + +/** + * A waveform which shows the waveform of a live recording + */ +@replaceableComponent("views.voice_messages.LiveRecordingWaveform") +export default class LiveRecordingWaveform extends React.PureComponent { + public constructor(props) { + super(props); + + this.state = {heights: arraySeed(0, DOWNSAMPLE_TARGET)}; + this.props.recorder.liveData.onUpdate(this.onRecordingUpdate); + } + + private onRecordingUpdate = (update: IRecordingUpdate) => { + // The waveform and the downsample target are pretty close, so we should be fine to + // do this, despite the docs on arrayFastResample. + const bars = arrayFastResample(Array.from(update.waveform), DOWNSAMPLE_TARGET); + this.setState({ + // The incoming data is between zero and one, but typically even screaming into a + // microphone won't send you over 0.6, so we "cap" the graph at about 0.4 for a + // point where the average user can still see feedback and be perceived as peaking + // when talking "loudly". + // + // We multiply by 100 because the Waveform component wants values in 0-100 (percentages) + heights: bars.map(b => percentageOf(b, 0, 0.40) * 100), + }); + }; + + public render() { + return ; + } +} diff --git a/src/components/views/voice_messages/Waveform.tsx b/src/components/views/voice_messages/Waveform.tsx new file mode 100644 index 0000000000..9736db54d1 --- /dev/null +++ b/src/components/views/voice_messages/Waveform.tsx @@ -0,0 +1,48 @@ +/* +Copyright 2021 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import React from "react"; +import {IRecordingUpdate, VoiceRecorder} from "../../../voice/VoiceRecorder"; +import {replaceableComponent} from "../../../utils/replaceableComponent"; +import {arrayFastResample, arraySeed} from "../../../utils/arrays"; +import {percentageOf} from "../../../utils/numbers"; + +interface IProps { + heights: number[]; // percentages as integers (0-100) +} + +interface IState { +} + +/** + * A simple waveform component. This renders bars (centered vertically) for each + * height provided in the component properties. Updating the properties will update + * the rendered waveform. + */ +@replaceableComponent("views.voice_messages.Waveform") +export default class Waveform extends React.PureComponent { + public constructor(props) { + super(props); + } + + public render() { + return
+ {this.props.heights.map((h, i) => { + return ; + })} +
; + } +} diff --git a/src/voice/VoiceRecorder.ts b/src/voice/VoiceRecorder.ts index 4bdd0b0af3..a85c3acad3 100644 --- a/src/voice/VoiceRecorder.ts +++ b/src/voice/VoiceRecorder.ts @@ -25,10 +25,8 @@ const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose const BITRATE = 24000; // 24kbps is pretty high quality for our use case in opus. const FREQ_SAMPLE_RATE = 10; // Target rate of frequency data (samples / sec). We don't need this super often. -export interface IFrequencyPackage { - dbBars: Float32Array; - dbMin: number; - dbMax: number; +export interface IRecordingUpdate { + waveform: number[]; // floating points between 0 (low) and 1 (high). // TODO: @@ TravisR: Generalize this for a timing package? } @@ -38,11 +36,11 @@ export class VoiceRecorder { private recorderContext: AudioContext; private recorderSource: MediaStreamAudioSourceNode; private recorderStream: MediaStream; - private recorderFreqNode: AnalyserNode; + private recorderFFT: AnalyserNode; private buffer = new Uint8Array(0); private mxc: string; private recording = false; - private observable: SimpleObservable; + private observable: SimpleObservable; private freqTimerId: number; public constructor(private client: MatrixClient) { @@ -64,8 +62,16 @@ export class VoiceRecorder { sampleRate: SAMPLE_RATE, // once again, the browser will resample for us }); this.recorderSource = this.recorderContext.createMediaStreamSource(this.recorderStream); - this.recorderFreqNode = this.recorderContext.createAnalyser(); - this.recorderSource.connect(this.recorderFreqNode); + this.recorderFFT = this.recorderContext.createAnalyser(); + + // Bring the FFT time domain down a bit. The default is 2048, and this must be a power + // of two. We use 64 points because we happen to know down the line we need less than + // that, but 32 would be too few. Large numbers are not helpful here and do not add + // precision: they introduce higher precision outputs of the FFT (frequency data), but + // it makes the time domain less than helpful. + this.recorderFFT.fftSize = 64; + + this.recorderSource.connect(this.recorderFFT); this.recorder = new Recorder({ encoderPath, // magic from webpack encoderSampleRate: SAMPLE_RATE, @@ -91,7 +97,7 @@ export class VoiceRecorder { }; } - public get frequencyData(): SimpleObservable { + public get liveData(): SimpleObservable { if (!this.recording) throw new Error("No observable when not recording"); return this.observable; } @@ -121,16 +127,35 @@ export class VoiceRecorder { if (this.observable) { this.observable.close(); } - this.observable = new SimpleObservable(); + this.observable = new SimpleObservable(); await this.makeRecorder(); this.freqTimerId = setInterval(() => { if (!this.recording) return; - const data = new Float32Array(this.recorderFreqNode.frequencyBinCount); - this.recorderFreqNode.getFloatFrequencyData(data); + + // The time domain is the input to the FFT, which means we use an array of the same + // size. The time domain is also known as the audio waveform. We're ignoring the + // output of the FFT here (frequency data) because we're not interested in it. + // + // We use bytes out of the analyser because floats have weird precision problems + // and are slightly more difficult to work with. The bytes are easy to work with, + // which is why we pick them (they're also more precise, but we care less about that). + const data = new Uint8Array(this.recorderFFT.fftSize); + this.recorderFFT.getByteTimeDomainData(data); + + // Because we're dealing with a uint array we need to do math a bit differently. + // If we just `Array.from()` the uint array, we end up with 1s and 0s, which aren't + // what we're after. Instead, we have to use a bit of manual looping to correctly end + // up with the right values + const translatedData: number[] = []; + for (let i = 0; i < data.length; i++) { + // All we're doing here is inverting the amplitude and putting the metric somewhere + // between zero and one. Without the inversion, lower values are "louder", which is + // not super helpful. + translatedData.push(1 - (data[i] / 128.0)); + } + this.observable.update({ - dbBars: data, - dbMin: this.recorderFreqNode.minDecibels, - dbMax: this.recorderFreqNode.maxDecibels, + waveform: translatedData, }); }, 1000 / FREQ_SAMPLE_RATE) as any as number; // XXX: Linter doesn't understand timer environment await this.recorder.start(); From 1419ac6b69ee24cb0a59526c5f7b0e14f3f48aa0 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Thu, 25 Mar 2021 17:12:26 -0600 Subject: [PATCH 03/11] Hook up a clock and implement proper design --- .../views/rooms/_VoiceRecordComposerTile.scss | 40 +++++++++ res/css/views/voice_messages/_Waveform.scss | 20 +++-- .../legacy-light/css/_legacy-light.scss | 3 + res/themes/light/css/_light.scss | 3 + .../views/rooms/VoiceRecordComposerTile.tsx | 19 ++++- src/components/views/voice_messages/Clock.tsx | 42 ++++++++++ .../voice_messages/LiveRecordingClock.tsx | 55 +++++++++++++ .../voice_messages/LiveRecordingWaveform.tsx | 4 +- src/voice/VoiceRecorder.ts | 82 +++++++++++-------- 9 files changed, 222 insertions(+), 46 deletions(-) create mode 100644 src/components/views/voice_messages/Clock.tsx create mode 100644 src/components/views/voice_messages/LiveRecordingClock.tsx diff --git a/res/css/views/rooms/_VoiceRecordComposerTile.scss b/res/css/views/rooms/_VoiceRecordComposerTile.scss index bb36991b4f..2fb112a38c 100644 --- a/res/css/views/rooms/_VoiceRecordComposerTile.scss +++ b/res/css/views/rooms/_VoiceRecordComposerTile.scss @@ -34,3 +34,43 @@ limitations under the License. background-color: $voice-record-stop-symbol-color; } } + +.mx_VoiceRecordComposerTile_waveformContainer { + padding: 5px; + padding-right: 4px; // there's 1px from the waveform itself, so account for that + padding-left: 15px; // +10px for the live circle, +5px for regular padding + background-color: $voice-record-waveform-bg-color; + border-radius: 12px; + margin-right: 12px; // isolate from stop button + + // Cheat at alignment a bit + display: flex; + align-items: center; + + position: relative; // important for the live circle + + color: $voice-record-waveform-fg-color; + font-size: $font-14px; + + &::before { + // TODO: @@ TravisR: Animate + content: ''; + background-color: $voice-record-live-circle-color; + width: 10px; + height: 10px; + position: absolute; + left: 8px; + top: 16px; // vertically center + border-radius: 10px; + } + + .mx_Waveform_bar { + background-color: $voice-record-waveform-fg-color; + } + + .mx_Clock { + padding-right: 8px; // isolate from waveform + padding-left: 10px; // isolate from live circle + width: 42px; // we're not using a monospace font, so fake it + } +} diff --git a/res/css/views/voice_messages/_Waveform.scss b/res/css/views/voice_messages/_Waveform.scss index 23eedf2dbd..cf03c84601 100644 --- a/res/css/views/voice_messages/_Waveform.scss +++ b/res/css/views/voice_messages/_Waveform.scss @@ -17,18 +17,24 @@ limitations under the License. .mx_Waveform { position: relative; height: 30px; // tallest bar can only be 30px + top: 1px; // because of our border trick (see below), we're off by 1px of aligntment display: flex; align-items: center; // so the bars grow from the middle + overflow: hidden; // this is cheaper than a `max-height: calc(100% - 4px)` in the bar's CSS. + + // A bar is meant to be a 2x2 circle when at zero height, and otherwise a 2px wide line + // with rounded caps. .mx_Waveform_bar { - width: 2px; - margin-left: 1px; + width: 0; // 0px width means we'll end up using the border as our width + border: 1px solid transparent; // transparent means we'll use the background colour + border-radius: 2px; // rounded end caps, based on the border + min-height: 0; // like the width, we'll rely on the border to give us height + max-height: 100%; // this makes the `height: 42%` work on the element + margin-left: 1px; // we want 2px between each bar, so 1px on either side for balance margin-right: 1px; - background-color: $muted-fg-color; - display: inline-block; - min-height: 2px; - max-height: 100%; - border-radius: 2px; // give them soft endcaps + + // background color is handled by the parent components } } diff --git a/res/themes/legacy-light/css/_legacy-light.scss b/res/themes/legacy-light/css/_legacy-light.scss index d7ee496d80..c22a8fa2ff 100644 --- a/res/themes/legacy-light/css/_legacy-light.scss +++ b/res/themes/legacy-light/css/_legacy-light.scss @@ -191,6 +191,9 @@ $space-button-outline-color: #E3E8F0; $voice-record-stop-border-color: #E3E8F0; $voice-record-stop-symbol-color: $warning-color; +$voice-record-waveform-bg-color: #E3E8F0; +$voice-record-waveform-fg-color: $muted-fg-color; +$voice-record-live-circle-color: $warning-color; $roomtile-preview-color: #9e9e9e; $roomtile-default-badge-bg-color: #61708b; diff --git a/res/themes/light/css/_light.scss b/res/themes/light/css/_light.scss index 577204ef0c..c778420094 100644 --- a/res/themes/light/css/_light.scss +++ b/res/themes/light/css/_light.scss @@ -182,6 +182,9 @@ $space-button-outline-color: #E3E8F0; $voice-record-stop-border-color: #E3E8F0; $voice-record-stop-symbol-color: $warning-color; +$voice-record-waveform-bg-color: #E3E8F0; +$voice-record-waveform-fg-color: $muted-fg-color; +$voice-record-live-circle-color: $warning-color; $roomtile-preview-color: $secondary-fg-color; $roomtile-default-badge-bg-color: #61708b; diff --git a/src/components/views/rooms/VoiceRecordComposerTile.tsx b/src/components/views/rooms/VoiceRecordComposerTile.tsx index 061daab915..b4999ac0df 100644 --- a/src/components/views/rooms/VoiceRecordComposerTile.tsx +++ b/src/components/views/rooms/VoiceRecordComposerTile.tsx @@ -22,6 +22,8 @@ import {Room} from "matrix-js-sdk/src/models/room"; import {MatrixClientPeg} from "../../../MatrixClientPeg"; import classNames from "classnames"; import LiveRecordingWaveform from "../voice_messages/LiveRecordingWaveform"; +import {replaceableComponent} from "../../../utils/replaceableComponent"; +import LiveRecordingClock from "../voice_messages/LiveRecordingClock"; interface IProps { room: Room; @@ -32,6 +34,10 @@ interface IState { recorder?: VoiceRecorder; } +/** + * Container tile for rendering the voice message recorder in the composer. + */ +@replaceableComponent("views.rooms.VoiceRecordComposerTile") export default class VoiceRecordComposerTile extends React.PureComponent { public constructor(props) { super(props); @@ -61,6 +67,15 @@ export default class VoiceRecordComposerTile extends React.PureComponent + + + ; + } + public render() { const classes = classNames({ 'mx_MessageComposer_button': !this.state.recorder, @@ -68,16 +83,14 @@ export default class VoiceRecordComposerTile extends React.PureComponent; } return (<> - {waveform} + {this.renderWaveformArea()} { + public constructor(props) { + super(props); + } + + public render() { + const minutes = Math.floor(this.props.seconds / 60).toFixed(0).padStart(2, '0'); + const seconds = Math.round(this.props.seconds % 60).toFixed(0).padStart(2, '0'); // hide millis + return {minutes}:{seconds}; + } +} diff --git a/src/components/views/voice_messages/LiveRecordingClock.tsx b/src/components/views/voice_messages/LiveRecordingClock.tsx new file mode 100644 index 0000000000..08b50e42c1 --- /dev/null +++ b/src/components/views/voice_messages/LiveRecordingClock.tsx @@ -0,0 +1,55 @@ +/* +Copyright 2021 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import React from "react"; +import {IRecordingUpdate, VoiceRecorder} from "../../../voice/VoiceRecorder"; +import {replaceableComponent} from "../../../utils/replaceableComponent"; +import Clock from "./Clock"; + +interface IProps { + recorder: VoiceRecorder; +} + +interface IState { + seconds: number; +} + +/** + * A clock for a live recording. + */ +@replaceableComponent("views.voice_messages.LiveRecordingClock") +export default class LiveRecordingClock extends React.PureComponent { + public constructor(props) { + super(props); + + this.state = {seconds: 0}; + this.props.recorder.liveData.onUpdate(this.onRecordingUpdate); + } + + shouldComponentUpdate(nextProps: Readonly, nextState: Readonly, nextContext: any): boolean { + const currentFloor = Math.floor(this.state.seconds); + const nextFloor = Math.floor(nextState.seconds); + return currentFloor !== nextFloor; + } + + private onRecordingUpdate = (update: IRecordingUpdate) => { + this.setState({seconds: update.timeSeconds}); + }; + + public render() { + return ; + } +} diff --git a/src/components/views/voice_messages/LiveRecordingWaveform.tsx b/src/components/views/voice_messages/LiveRecordingWaveform.tsx index 506532744a..8a2a5ae089 100644 --- a/src/components/views/voice_messages/LiveRecordingWaveform.tsx +++ b/src/components/views/voice_messages/LiveRecordingWaveform.tsx @@ -49,12 +49,12 @@ export default class LiveRecordingWaveform extends React.PureComponent percentageOf(b, 0, 0.40) * 100), + heights: bars.map(b => percentageOf(b, 0, 0.35) * 100), }); }; diff --git a/src/voice/VoiceRecorder.ts b/src/voice/VoiceRecorder.ts index a85c3acad3..dec8017b8b 100644 --- a/src/voice/VoiceRecorder.ts +++ b/src/voice/VoiceRecorder.ts @@ -23,12 +23,10 @@ import {SimpleObservable} from "matrix-widget-api"; const CHANNELS = 1; // stereo isn't important const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality. const BITRATE = 24000; // 24kbps is pretty high quality for our use case in opus. -const FREQ_SAMPLE_RATE = 10; // Target rate of frequency data (samples / sec). We don't need this super often. export interface IRecordingUpdate { waveform: number[]; // floating points between 0 (low) and 1 (high). - - // TODO: @@ TravisR: Generalize this for a timing package? + timeSeconds: number; // float } export class VoiceRecorder { @@ -37,11 +35,11 @@ export class VoiceRecorder { private recorderSource: MediaStreamAudioSourceNode; private recorderStream: MediaStream; private recorderFFT: AnalyserNode; + private recorderProcessor: ScriptProcessorNode; private buffer = new Uint8Array(0); private mxc: string; private recording = false; private observable: SimpleObservable; - private freqTimerId: number; public constructor(private client: MatrixClient) { } @@ -71,7 +69,20 @@ export class VoiceRecorder { // it makes the time domain less than helpful. this.recorderFFT.fftSize = 64; + // We use an audio processor to get accurate timing information. + // The size of the audio buffer largely decides how quickly we push timing/waveform data + // out of this class. Smaller buffers mean we update more frequently as we can't hold as + // many bytes. Larger buffers mean slower updates. For scale, 1024 gives us about 30Hz of + // updates and 2048 gives us about 20Hz. We use 2048 because it updates frequently enough + // to feel realtime (~20fps, which is what humans perceive as "realtime"). Must be a power + // of 2. + this.recorderProcessor = this.recorderContext.createScriptProcessor(2048, CHANNELS, CHANNELS); + + // Connect our inputs and outputs this.recorderSource.connect(this.recorderFFT); + this.recorderSource.connect(this.recorderProcessor); + this.recorderProcessor.connect(this.recorderContext.destination); + this.recorder = new Recorder({ encoderPath, // magic from webpack encoderSampleRate: SAMPLE_RATE, @@ -117,6 +128,37 @@ export class VoiceRecorder { return this.mxc; } + private tryUpdateLiveData = (ev: AudioProcessingEvent) => { + if (!this.recording) return; + + // The time domain is the input to the FFT, which means we use an array of the same + // size. The time domain is also known as the audio waveform. We're ignoring the + // output of the FFT here (frequency data) because we're not interested in it. + // + // We use bytes out of the analyser because floats have weird precision problems + // and are slightly more difficult to work with. The bytes are easy to work with, + // which is why we pick them (they're also more precise, but we care less about that). + const data = new Uint8Array(this.recorderFFT.fftSize); + this.recorderFFT.getByteTimeDomainData(data); + + // Because we're dealing with a uint array we need to do math a bit differently. + // If we just `Array.from()` the uint array, we end up with 1s and 0s, which aren't + // what we're after. Instead, we have to use a bit of manual looping to correctly end + // up with the right values + const translatedData: number[] = []; + for (let i = 0; i < data.length; i++) { + // All we're doing here is inverting the amplitude and putting the metric somewhere + // between zero and one. Without the inversion, lower values are "louder", which is + // not super helpful. + translatedData.push(1 - (data[i] / 128.0)); + } + + this.observable.update({ + waveform: translatedData, + timeSeconds: ev.playbackTime, + }); + }; + public async start(): Promise { if (this.mxc || this.hasRecording) { throw new Error("Recording already prepared"); @@ -129,35 +171,7 @@ export class VoiceRecorder { } this.observable = new SimpleObservable(); await this.makeRecorder(); - this.freqTimerId = setInterval(() => { - if (!this.recording) return; - - // The time domain is the input to the FFT, which means we use an array of the same - // size. The time domain is also known as the audio waveform. We're ignoring the - // output of the FFT here (frequency data) because we're not interested in it. - // - // We use bytes out of the analyser because floats have weird precision problems - // and are slightly more difficult to work with. The bytes are easy to work with, - // which is why we pick them (they're also more precise, but we care less about that). - const data = new Uint8Array(this.recorderFFT.fftSize); - this.recorderFFT.getByteTimeDomainData(data); - - // Because we're dealing with a uint array we need to do math a bit differently. - // If we just `Array.from()` the uint array, we end up with 1s and 0s, which aren't - // what we're after. Instead, we have to use a bit of manual looping to correctly end - // up with the right values - const translatedData: number[] = []; - for (let i = 0; i < data.length; i++) { - // All we're doing here is inverting the amplitude and putting the metric somewhere - // between zero and one. Without the inversion, lower values are "louder", which is - // not super helpful. - translatedData.push(1 - (data[i] / 128.0)); - } - - this.observable.update({ - waveform: translatedData, - }); - }, 1000 / FREQ_SAMPLE_RATE) as any as number; // XXX: Linter doesn't understand timer environment + this.recorderProcessor.addEventListener("audioprocess", this.tryUpdateLiveData); await this.recorder.start(); this.recording = true; } @@ -179,8 +193,8 @@ export class VoiceRecorder { this.recorderStream.getTracks().forEach(t => t.stop()); // Finally do our post-processing and clean up - clearInterval(this.freqTimerId); this.recording = false; + this.recorderProcessor.removeEventListener("audioprocess", this.tryUpdateLiveData); await this.recorder.close(); return this.buffer; From 101679f64708855b55f947a7d739685a5e996d82 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Thu, 25 Mar 2021 17:30:44 -0600 Subject: [PATCH 04/11] Adjust some settings --- .../views/voice_messages/LiveRecordingWaveform.tsx | 4 ++-- src/voice/VoiceRecorder.ts | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/components/views/voice_messages/LiveRecordingWaveform.tsx b/src/components/views/voice_messages/LiveRecordingWaveform.tsx index 8a2a5ae089..d0048ac9cb 100644 --- a/src/components/views/voice_messages/LiveRecordingWaveform.tsx +++ b/src/components/views/voice_messages/LiveRecordingWaveform.tsx @@ -49,12 +49,12 @@ export default class LiveRecordingWaveform extends React.PureComponent percentageOf(b, 0, 0.35) * 100), + heights: bars.map(b => percentageOf(b, 0, 0.50) * 100), }); }; diff --git a/src/voice/VoiceRecorder.ts b/src/voice/VoiceRecorder.ts index dec8017b8b..6a3d392ce4 100644 --- a/src/voice/VoiceRecorder.ts +++ b/src/voice/VoiceRecorder.ts @@ -73,10 +73,9 @@ export class VoiceRecorder { // The size of the audio buffer largely decides how quickly we push timing/waveform data // out of this class. Smaller buffers mean we update more frequently as we can't hold as // many bytes. Larger buffers mean slower updates. For scale, 1024 gives us about 30Hz of - // updates and 2048 gives us about 20Hz. We use 2048 because it updates frequently enough - // to feel realtime (~20fps, which is what humans perceive as "realtime"). Must be a power - // of 2. - this.recorderProcessor = this.recorderContext.createScriptProcessor(2048, CHANNELS, CHANNELS); + // updates and 2048 gives us about 20Hz. We use 1024 to get as close to perceived realtime + // as possible. Must be a power of 2. + this.recorderProcessor = this.recorderContext.createScriptProcessor(1024, CHANNELS, CHANNELS); // Connect our inputs and outputs this.recorderSource.connect(this.recorderFFT); From a848febd3d31d91033f63dc395d9d2f3696ab1ad Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Thu, 25 Mar 2021 17:45:00 -0600 Subject: [PATCH 05/11] Definitely didn't copy/paste these --- src/components/views/voice_messages/LiveRecordingWaveform.tsx | 2 +- src/components/views/voice_messages/Waveform.tsx | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/components/views/voice_messages/LiveRecordingWaveform.tsx b/src/components/views/voice_messages/LiveRecordingWaveform.tsx index d0048ac9cb..b94eae0f56 100644 --- a/src/components/views/voice_messages/LiveRecordingWaveform.tsx +++ b/src/components/views/voice_messages/LiveRecordingWaveform.tsx @@ -18,7 +18,7 @@ import React from "react"; import {IRecordingUpdate, VoiceRecorder} from "../../../voice/VoiceRecorder"; import {replaceableComponent} from "../../../utils/replaceableComponent"; import {arrayFastResample, arraySeed} from "../../../utils/arrays"; -import {clamp, percentageOf} from "../../../utils/numbers"; +import {percentageOf} from "../../../utils/numbers"; import Waveform from "./Waveform"; interface IProps { diff --git a/src/components/views/voice_messages/Waveform.tsx b/src/components/views/voice_messages/Waveform.tsx index 9736db54d1..4c3edcb927 100644 --- a/src/components/views/voice_messages/Waveform.tsx +++ b/src/components/views/voice_messages/Waveform.tsx @@ -15,10 +15,7 @@ limitations under the License. */ import React from "react"; -import {IRecordingUpdate, VoiceRecorder} from "../../../voice/VoiceRecorder"; import {replaceableComponent} from "../../../utils/replaceableComponent"; -import {arrayFastResample, arraySeed} from "../../../utils/arrays"; -import {percentageOf} from "../../../utils/numbers"; interface IProps { heights: number[]; // percentages as integers (0-100) From e523ce60360e3da384411d850a18a42a238e4623 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Mon, 29 Mar 2021 21:25:06 -0600 Subject: [PATCH 06/11] Fix float operations to make a little more sense. --- .../voice_messages/LiveRecordingWaveform.tsx | 12 ++++------ .../views/voice_messages/Waveform.tsx | 6 ++--- src/voice/VoiceRecorder.ts | 24 ++++++++----------- 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/src/components/views/voice_messages/LiveRecordingWaveform.tsx b/src/components/views/voice_messages/LiveRecordingWaveform.tsx index b94eae0f56..e7cab4a5cb 100644 --- a/src/components/views/voice_messages/LiveRecordingWaveform.tsx +++ b/src/components/views/voice_messages/LiveRecordingWaveform.tsx @@ -49,16 +49,14 @@ export default class LiveRecordingWaveform extends React.PureComponent percentageOf(b, 0, 0.50) * 100), + // microphone won't send you over 0.6, so we artificially adjust the gain for the + // waveform. This results in a slightly more cinematic/animated waveform for the + // user. + heights: bars.map(b => percentageOf(b, 0, 0.50)), }); }; public render() { - return ; + return ; } } diff --git a/src/components/views/voice_messages/Waveform.tsx b/src/components/views/voice_messages/Waveform.tsx index 4c3edcb927..5fa68dcadc 100644 --- a/src/components/views/voice_messages/Waveform.tsx +++ b/src/components/views/voice_messages/Waveform.tsx @@ -18,7 +18,7 @@ import React from "react"; import {replaceableComponent} from "../../../utils/replaceableComponent"; interface IProps { - heights: number[]; // percentages as integers (0-100) + relHeights: number[]; // relative heights (0-1) } interface IState { @@ -37,8 +37,8 @@ export default class Waveform extends React.PureComponent { public render() { return
- {this.props.heights.map((h, i) => { - return ; + {this.props.relHeights.map((h, i) => { + return ; })}
; } diff --git a/src/voice/VoiceRecorder.ts b/src/voice/VoiceRecorder.ts index 6a3d392ce4..50497438ca 100644 --- a/src/voice/VoiceRecorder.ts +++ b/src/voice/VoiceRecorder.ts @@ -19,6 +19,7 @@ import encoderPath from 'opus-recorder/dist/encoderWorker.min.js'; import {MatrixClient} from "matrix-js-sdk/src/client"; import CallMediaHandler from "../CallMediaHandler"; import {SimpleObservable} from "matrix-widget-api"; +import {percentageOf} from "../utils/numbers"; const CHANNELS = 1; // stereo isn't important const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality. @@ -133,23 +134,18 @@ export class VoiceRecorder { // The time domain is the input to the FFT, which means we use an array of the same // size. The time domain is also known as the audio waveform. We're ignoring the // output of the FFT here (frequency data) because we're not interested in it. - // - // We use bytes out of the analyser because floats have weird precision problems - // and are slightly more difficult to work with. The bytes are easy to work with, - // which is why we pick them (they're also more precise, but we care less about that). - const data = new Uint8Array(this.recorderFFT.fftSize); - this.recorderFFT.getByteTimeDomainData(data); + const data = new Float32Array(this.recorderFFT.fftSize); + this.recorderFFT.getFloatTimeDomainData(data); - // Because we're dealing with a uint array we need to do math a bit differently. - // If we just `Array.from()` the uint array, we end up with 1s and 0s, which aren't - // what we're after. Instead, we have to use a bit of manual looping to correctly end - // up with the right values + // We can't just `Array.from()` the array because we're dealing with 32bit floats + // and the built-in function won't consider that when converting between numbers. + // However, the runtime will convert the float32 to a float64 during the math operations + // which is why the loop works below. Note that a `.map()` call also doesn't work + // and will instead return a Float32Array still. const translatedData: number[] = []; for (let i = 0; i < data.length; i++) { - // All we're doing here is inverting the amplitude and putting the metric somewhere - // between zero and one. Without the inversion, lower values are "louder", which is - // not super helpful. - translatedData.push(1 - (data[i] / 128.0)); + // We're clamping the values so we can do that math operation mentioned above. + translatedData.push(percentageOf(data[i], 0, 1)); } this.observable.update({ From 5c685dcf35fbe8c896f22882a6843dc0cc0b835d Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Mon, 29 Mar 2021 21:59:31 -0600 Subject: [PATCH 07/11] Avoid use of deprecated APIs, instead using an AudioWorklet A bit annoying that it is async, but it'll do. --- src/voice/VoiceRecorder.ts | 30 ++++++++++++++++-------------- src/voice/mxVoiceWorklet.js | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 14 deletions(-) create mode 100644 src/voice/mxVoiceWorklet.js diff --git a/src/voice/VoiceRecorder.ts b/src/voice/VoiceRecorder.ts index 50497438ca..319a6c3a37 100644 --- a/src/voice/VoiceRecorder.ts +++ b/src/voice/VoiceRecorder.ts @@ -16,6 +16,7 @@ limitations under the License. import * as Recorder from 'opus-recorder'; import encoderPath from 'opus-recorder/dist/encoderWorker.min.js'; +import mxVoiceWorkletPath from './mxVoiceWorklet'; import {MatrixClient} from "matrix-js-sdk/src/client"; import CallMediaHandler from "../CallMediaHandler"; import {SimpleObservable} from "matrix-widget-api"; @@ -36,7 +37,7 @@ export class VoiceRecorder { private recorderSource: MediaStreamAudioSourceNode; private recorderStream: MediaStream; private recorderFFT: AnalyserNode; - private recorderProcessor: ScriptProcessorNode; + private recorderWorklet: AudioWorkletNode; private buffer = new Uint8Array(0); private mxc: string; private recording = false; @@ -70,18 +71,20 @@ export class VoiceRecorder { // it makes the time domain less than helpful. this.recorderFFT.fftSize = 64; - // We use an audio processor to get accurate timing information. - // The size of the audio buffer largely decides how quickly we push timing/waveform data - // out of this class. Smaller buffers mean we update more frequently as we can't hold as - // many bytes. Larger buffers mean slower updates. For scale, 1024 gives us about 30Hz of - // updates and 2048 gives us about 20Hz. We use 1024 to get as close to perceived realtime - // as possible. Must be a power of 2. - this.recorderProcessor = this.recorderContext.createScriptProcessor(1024, CHANNELS, CHANNELS); + await this.recorderContext.audioWorklet.addModule(mxVoiceWorkletPath); + this.recorderWorklet = new AudioWorkletNode(this.recorderContext, "mx-voice-worklet"); // Connect our inputs and outputs this.recorderSource.connect(this.recorderFFT); - this.recorderSource.connect(this.recorderProcessor); - this.recorderProcessor.connect(this.recorderContext.destination); + this.recorderSource.connect(this.recorderWorklet); + this.recorderWorklet.connect(this.recorderContext.destination); + + // Dev note: we can't use `addEventListener` for some reason. It just doesn't work. + this.recorderWorklet.port.onmessage = (ev) => { + if (ev.data['ev'] === 'proc') { + this.tryUpdateLiveData(ev.data['timeMs']); + } + }; this.recorder = new Recorder({ encoderPath, // magic from webpack @@ -128,7 +131,7 @@ export class VoiceRecorder { return this.mxc; } - private tryUpdateLiveData = (ev: AudioProcessingEvent) => { + private tryUpdateLiveData = (timeMillis: number) => { if (!this.recording) return; // The time domain is the input to the FFT, which means we use an array of the same @@ -150,7 +153,7 @@ export class VoiceRecorder { this.observable.update({ waveform: translatedData, - timeSeconds: ev.playbackTime, + timeSeconds: timeMillis / 1000, }); }; @@ -166,7 +169,6 @@ export class VoiceRecorder { } this.observable = new SimpleObservable(); await this.makeRecorder(); - this.recorderProcessor.addEventListener("audioprocess", this.tryUpdateLiveData); await this.recorder.start(); this.recording = true; } @@ -178,6 +180,7 @@ export class VoiceRecorder { // Disconnect the source early to start shutting down resources this.recorderSource.disconnect(); + this.recorderWorklet.disconnect(); await this.recorder.stop(); // close the context after the recorder so the recorder doesn't try to @@ -189,7 +192,6 @@ export class VoiceRecorder { // Finally do our post-processing and clean up this.recording = false; - this.recorderProcessor.removeEventListener("audioprocess", this.tryUpdateLiveData); await this.recorder.close(); return this.buffer; diff --git a/src/voice/mxVoiceWorklet.js b/src/voice/mxVoiceWorklet.js new file mode 100644 index 0000000000..a74f5c17c9 --- /dev/null +++ b/src/voice/mxVoiceWorklet.js @@ -0,0 +1,35 @@ +/* +Copyright 2021 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +class MxVoiceWorklet extends AudioWorkletProcessor { + constructor() { + super(); + + this._timeStart = 0; + } + + process(inputs, outputs, parameters) { + const now = (new Date()).getTime(); + if (this._timeStart === 0) { + this._timeStart = now; + } + + this.port.postMessage({ev: 'proc', timeMs: now - this._timeStart}); + return true; + } +} + +registerProcessor('mx-voice-worklet', MxVoiceWorklet); From 9998f18d67067ef2a3f3b4c970c636d42a1563a9 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Mon, 29 Mar 2021 21:59:45 -0600 Subject: [PATCH 08/11] Stop React complaining about componentShouldUpdate() --- src/components/views/voice_messages/LiveRecordingClock.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/views/voice_messages/LiveRecordingClock.tsx b/src/components/views/voice_messages/LiveRecordingClock.tsx index 08b50e42c1..00316d196a 100644 --- a/src/components/views/voice_messages/LiveRecordingClock.tsx +++ b/src/components/views/voice_messages/LiveRecordingClock.tsx @@ -31,7 +31,7 @@ interface IState { * A clock for a live recording. */ @replaceableComponent("views.voice_messages.LiveRecordingClock") -export default class LiveRecordingClock extends React.PureComponent { +export default class LiveRecordingClock extends React.Component { public constructor(props) { super(props); From 9c2d44805d2c7238bd0c339daf3b11006c1e471c Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Mon, 29 Mar 2021 21:59:31 -0600 Subject: [PATCH 09/11] Revert "Avoid use of deprecated APIs, instead using an AudioWorklet" This reverts commit 5c685dcf35fbe8c896f22882a6843dc0cc0b835d. --- src/voice/VoiceRecorder.ts | 30 ++++++++++++++---------------- src/voice/mxVoiceWorklet.js | 35 ----------------------------------- 2 files changed, 14 insertions(+), 51 deletions(-) delete mode 100644 src/voice/mxVoiceWorklet.js diff --git a/src/voice/VoiceRecorder.ts b/src/voice/VoiceRecorder.ts index 319a6c3a37..50497438ca 100644 --- a/src/voice/VoiceRecorder.ts +++ b/src/voice/VoiceRecorder.ts @@ -16,7 +16,6 @@ limitations under the License. import * as Recorder from 'opus-recorder'; import encoderPath from 'opus-recorder/dist/encoderWorker.min.js'; -import mxVoiceWorkletPath from './mxVoiceWorklet'; import {MatrixClient} from "matrix-js-sdk/src/client"; import CallMediaHandler from "../CallMediaHandler"; import {SimpleObservable} from "matrix-widget-api"; @@ -37,7 +36,7 @@ export class VoiceRecorder { private recorderSource: MediaStreamAudioSourceNode; private recorderStream: MediaStream; private recorderFFT: AnalyserNode; - private recorderWorklet: AudioWorkletNode; + private recorderProcessor: ScriptProcessorNode; private buffer = new Uint8Array(0); private mxc: string; private recording = false; @@ -71,20 +70,18 @@ export class VoiceRecorder { // it makes the time domain less than helpful. this.recorderFFT.fftSize = 64; - await this.recorderContext.audioWorklet.addModule(mxVoiceWorkletPath); - this.recorderWorklet = new AudioWorkletNode(this.recorderContext, "mx-voice-worklet"); + // We use an audio processor to get accurate timing information. + // The size of the audio buffer largely decides how quickly we push timing/waveform data + // out of this class. Smaller buffers mean we update more frequently as we can't hold as + // many bytes. Larger buffers mean slower updates. For scale, 1024 gives us about 30Hz of + // updates and 2048 gives us about 20Hz. We use 1024 to get as close to perceived realtime + // as possible. Must be a power of 2. + this.recorderProcessor = this.recorderContext.createScriptProcessor(1024, CHANNELS, CHANNELS); // Connect our inputs and outputs this.recorderSource.connect(this.recorderFFT); - this.recorderSource.connect(this.recorderWorklet); - this.recorderWorklet.connect(this.recorderContext.destination); - - // Dev note: we can't use `addEventListener` for some reason. It just doesn't work. - this.recorderWorklet.port.onmessage = (ev) => { - if (ev.data['ev'] === 'proc') { - this.tryUpdateLiveData(ev.data['timeMs']); - } - }; + this.recorderSource.connect(this.recorderProcessor); + this.recorderProcessor.connect(this.recorderContext.destination); this.recorder = new Recorder({ encoderPath, // magic from webpack @@ -131,7 +128,7 @@ export class VoiceRecorder { return this.mxc; } - private tryUpdateLiveData = (timeMillis: number) => { + private tryUpdateLiveData = (ev: AudioProcessingEvent) => { if (!this.recording) return; // The time domain is the input to the FFT, which means we use an array of the same @@ -153,7 +150,7 @@ export class VoiceRecorder { this.observable.update({ waveform: translatedData, - timeSeconds: timeMillis / 1000, + timeSeconds: ev.playbackTime, }); }; @@ -169,6 +166,7 @@ export class VoiceRecorder { } this.observable = new SimpleObservable(); await this.makeRecorder(); + this.recorderProcessor.addEventListener("audioprocess", this.tryUpdateLiveData); await this.recorder.start(); this.recording = true; } @@ -180,7 +178,6 @@ export class VoiceRecorder { // Disconnect the source early to start shutting down resources this.recorderSource.disconnect(); - this.recorderWorklet.disconnect(); await this.recorder.stop(); // close the context after the recorder so the recorder doesn't try to @@ -192,6 +189,7 @@ export class VoiceRecorder { // Finally do our post-processing and clean up this.recording = false; + this.recorderProcessor.removeEventListener("audioprocess", this.tryUpdateLiveData); await this.recorder.close(); return this.buffer; diff --git a/src/voice/mxVoiceWorklet.js b/src/voice/mxVoiceWorklet.js deleted file mode 100644 index a74f5c17c9..0000000000 --- a/src/voice/mxVoiceWorklet.js +++ /dev/null @@ -1,35 +0,0 @@ -/* -Copyright 2021 The Matrix.org Foundation C.I.C. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -class MxVoiceWorklet extends AudioWorkletProcessor { - constructor() { - super(); - - this._timeStart = 0; - } - - process(inputs, outputs, parameters) { - const now = (new Date()).getTime(); - if (this._timeStart === 0) { - this._timeStart = now; - } - - this.port.postMessage({ev: 'proc', timeMs: now - this._timeStart}); - return true; - } -} - -registerProcessor('mx-voice-worklet', MxVoiceWorklet); From 60326e359a9c082fd002e262e50921c5f496a2ca Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Tue, 30 Mar 2021 12:01:35 -0600 Subject: [PATCH 10/11] Clarify comment --- src/voice/VoiceRecorder.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/voice/VoiceRecorder.ts b/src/voice/VoiceRecorder.ts index 529781a090..0d890160c3 100644 --- a/src/voice/VoiceRecorder.ts +++ b/src/voice/VoiceRecorder.ts @@ -140,7 +140,9 @@ export class VoiceRecorder { // and will instead return a Float32Array still. const translatedData: number[] = []; for (let i = 0; i < data.length; i++) { - // We're clamping the values so we can do that math operation mentioned above. + // We're clamping the values so we can do that math operation mentioned above, + // and to ensure that we produce consistent data (it's possible for the array + // to exceed the specified range with some audio input devices). translatedData.push(percentageOf(data[i], 0, 1)); } From b15412056e834e200f799f2d7edc2436b04cdb97 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Tue, 30 Mar 2021 12:08:11 -0600 Subject: [PATCH 11/11] It helps to use the right function --- src/voice/VoiceRecorder.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/voice/VoiceRecorder.ts b/src/voice/VoiceRecorder.ts index 0d890160c3..077990ac17 100644 --- a/src/voice/VoiceRecorder.ts +++ b/src/voice/VoiceRecorder.ts @@ -19,7 +19,7 @@ import encoderPath from 'opus-recorder/dist/encoderWorker.min.js'; import {MatrixClient} from "matrix-js-sdk/src/client"; import CallMediaHandler from "../CallMediaHandler"; import {SimpleObservable} from "matrix-widget-api"; -import {percentageOf} from "../utils/numbers"; +import {clamp} from "../utils/numbers"; const CHANNELS = 1; // stereo isn't important const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality. @@ -143,7 +143,7 @@ export class VoiceRecorder { // We're clamping the values so we can do that math operation mentioned above, // and to ensure that we produce consistent data (it's possible for the array // to exceed the specified range with some audio input devices). - translatedData.push(percentageOf(data[i], 0, 1)); + translatedData.push(clamp(data[i], 0, 1)); } this.observable.update({