Add more encryption properties to PostHog (#12415)
* report time to decrypt an event if it's slow * add more stats * fix some tests * keep original timestamp, and report non-visible decryption failures * add statistic for whether the user trusts their own device * revert local playwright docker changes * apply changes from review * also remove logout event handler on logout * apply changes from review * make eventDecrypted callback private
This commit is contained in:
parent
0c04ec1def
commit
eed8d1538b
5 changed files with 626 additions and 180 deletions
|
@ -14,38 +14,60 @@ See the License for the specific language governing permissions and
|
|||
limitations under the License.
|
||||
*/
|
||||
|
||||
import { MatrixEvent } from "matrix-js-sdk/src/matrix";
|
||||
import { CryptoEvent, HttpApiEvent, MatrixClient, MatrixEventEvent, MatrixEvent } from "matrix-js-sdk/src/matrix";
|
||||
import { Error as ErrorEvent } from "@matrix-org/analytics-events/types/typescript/Error";
|
||||
import { DecryptionFailureCode } from "matrix-js-sdk/src/crypto-api";
|
||||
|
||||
import { PosthogAnalytics } from "./PosthogAnalytics";
|
||||
|
||||
export class DecryptionFailure {
|
||||
public readonly ts: number;
|
||||
/**
|
||||
* The time between our initial failure to decrypt and our successful
|
||||
* decryption (if we managed to decrypt).
|
||||
*/
|
||||
public timeToDecryptMillis?: number;
|
||||
|
||||
public constructor(
|
||||
public readonly failedEventId: string,
|
||||
public readonly errorCode: DecryptionFailureCode,
|
||||
) {
|
||||
this.ts = Date.now();
|
||||
}
|
||||
/**
|
||||
* The time that we failed to decrypt the event. If we failed to decrypt
|
||||
* multiple times, this will be the time of the first failure.
|
||||
*/
|
||||
public readonly ts: number,
|
||||
/**
|
||||
* Is the sender on a different server from us?
|
||||
*/
|
||||
public readonly isFederated: boolean | undefined,
|
||||
/**
|
||||
* Was the failed event ever visible to the user?
|
||||
*/
|
||||
public wasVisibleToUser: boolean,
|
||||
/**
|
||||
* Has the user verified their own cross-signing identity, as of the most
|
||||
* recent decryption attempt for this event?
|
||||
*/
|
||||
public userTrustsOwnIdentity: boolean | undefined,
|
||||
) {}
|
||||
}
|
||||
|
||||
type ErrorCode = ErrorEvent["name"];
|
||||
type TrackingFn = (count: number, trackedErrCode: ErrorCode, rawError: string) => void;
|
||||
/** Properties associated with decryption errors, for classifying the error. */
|
||||
export type ErrorProperties = Omit<ErrorEvent, "eventName" | "domain" | "name" | "context">;
|
||||
type TrackingFn = (trackedErrCode: ErrorCode, rawError: string, properties: ErrorProperties) => void;
|
||||
export type ErrCodeMapFn = (errcode: DecryptionFailureCode) => ErrorCode;
|
||||
|
||||
export class DecryptionFailureTracker {
|
||||
private static internalInstance = new DecryptionFailureTracker(
|
||||
(total, errorCode, rawError) => {
|
||||
for (let i = 0; i < total; i++) {
|
||||
PosthogAnalytics.instance.trackEvent<ErrorEvent>({
|
||||
eventName: "Error",
|
||||
domain: "E2EE",
|
||||
name: errorCode,
|
||||
context: `mxc_crypto_error_type_${rawError}`,
|
||||
});
|
||||
}
|
||||
(errorCode, rawError, properties) => {
|
||||
const event: ErrorEvent = {
|
||||
eventName: "Error",
|
||||
domain: "E2EE",
|
||||
name: errorCode,
|
||||
context: `mxc_crypto_error_type_${rawError}`,
|
||||
...properties,
|
||||
};
|
||||
PosthogAnalytics.instance.trackEvent<ErrorEvent>(event);
|
||||
},
|
||||
(errorCode) => {
|
||||
// Map JS-SDK error codes to tracker codes for aggregation
|
||||
|
@ -66,51 +88,78 @@ export class DecryptionFailureTracker {
|
|||
},
|
||||
);
|
||||
|
||||
// Map of event IDs to DecryptionFailure items.
|
||||
/** Map of event IDs to `DecryptionFailure` items.
|
||||
*
|
||||
* Every `CHECK_INTERVAL_MS`, this map is checked for failures that happened >
|
||||
* `MAXIMUM_LATE_DECRYPTION_PERIOD` ago (considered undecryptable), or
|
||||
* decryptions that took > `GRACE_PERIOD_MS` (considered late decryptions).
|
||||
* These are accumulated in `failuresToReport`.
|
||||
*/
|
||||
public failures: Map<string, DecryptionFailure> = new Map();
|
||||
|
||||
// Set of event IDs that have been visible to the user.
|
||||
/** Set of event IDs that have been visible to the user.
|
||||
*
|
||||
* This will only contain events that are not already in `failures` or in
|
||||
* `trackedEvents`.
|
||||
*/
|
||||
public visibleEvents: Set<string> = new Set();
|
||||
|
||||
// Map of visible event IDs to `DecryptionFailure`s. Every
|
||||
// `CHECK_INTERVAL_MS`, this map is checked for failures that
|
||||
// happened > `GRACE_PERIOD_MS` ago. Those that did are
|
||||
// accumulated in `failureCounts`.
|
||||
public visibleFailures: Map<string, DecryptionFailure> = new Map();
|
||||
/** The failures that will be reported at the next tracking interval. These are
|
||||
* events that we have decided are undecryptable due to exceeding the
|
||||
* `MAXIMUM_LATE_DECRYPTION_PERIOD`, or that we decrypted but we consider as late
|
||||
* decryptions. */
|
||||
public failuresToReport: Set<DecryptionFailure> = new Set();
|
||||
|
||||
/**
|
||||
* A histogram of the number of failures that will be tracked at the next tracking
|
||||
* interval, split by failure error code.
|
||||
*/
|
||||
private failureCounts: Map<DecryptionFailureCode, number> = new Map();
|
||||
|
||||
// Event IDs of failures that were tracked previously
|
||||
/** Event IDs of failures that were tracked previously */
|
||||
public trackedEvents: Set<string> = new Set();
|
||||
|
||||
// Set to an interval ID when `start` is called
|
||||
/** Set to an interval ID when `start` is called */
|
||||
public checkInterval: number | null = null;
|
||||
public trackInterval: number | null = null;
|
||||
|
||||
// Spread the load on `Analytics` by tracking at a low frequency, `TRACK_INTERVAL_MS`.
|
||||
/** Spread the load on `Analytics` by tracking at a low frequency, `TRACK_INTERVAL_MS`. */
|
||||
public static TRACK_INTERVAL_MS = 60000;
|
||||
|
||||
// Call `checkFailures` every `CHECK_INTERVAL_MS`.
|
||||
/** Call `checkFailures` every `CHECK_INTERVAL_MS`. */
|
||||
public static CHECK_INTERVAL_MS = 40000;
|
||||
|
||||
// Give events a chance to be decrypted by waiting `GRACE_PERIOD_MS` before counting
|
||||
// the failure in `failureCounts`.
|
||||
public static GRACE_PERIOD_MS = 30000;
|
||||
/** If the event is successfully decrypted in less than 4s, we don't report. */
|
||||
public static GRACE_PERIOD_MS = 4000;
|
||||
|
||||
/** Maximum time for an event to be decrypted to be considered a late
|
||||
* decryption. If it takes longer, we consider it undecryptable. */
|
||||
public static MAXIMUM_LATE_DECRYPTION_PERIOD = 60000;
|
||||
|
||||
/** Properties that will be added to all reported events (mainly reporting
|
||||
* information about the Matrix client). */
|
||||
private baseProperties?: ErrorProperties = {};
|
||||
|
||||
/** The user's domain (homeserver name). */
|
||||
private userDomain?: string;
|
||||
|
||||
/** Whether the user has verified their own cross-signing keys. */
|
||||
private userTrustsOwnIdentity: boolean | undefined = undefined;
|
||||
|
||||
/** Whether we are currently checking our own verification status. */
|
||||
private checkingVerificationStatus: boolean = false;
|
||||
|
||||
/** Whether we should retry checking our own verification status after we're
|
||||
* done our current check. i.e. we got notified that our keys changed while
|
||||
* we were already checking, so the result could be out of date. */
|
||||
private retryVerificationStatus: boolean = false;
|
||||
|
||||
/**
|
||||
* Create a new DecryptionFailureTracker.
|
||||
*
|
||||
* Call `eventDecrypted(event, err)` on this instance when an event is decrypted.
|
||||
*
|
||||
* Call `start()` to start the tracker, and `stop()` to stop tracking.
|
||||
* Call `start(client)` to start the tracker. The tracker will listen for
|
||||
* decryption events on the client and track decryption failures, and will
|
||||
* automatically stop tracking when the client logs out.
|
||||
*
|
||||
* @param {function} fn The tracking function, which will be called when failures
|
||||
* are tracked. The function should have a signature `(count, trackedErrorCode) => {...}`,
|
||||
* where `count` is the number of failures and `errorCode` matches the output of `errorCodeMapFn`.
|
||||
* are tracked. The function should have a signature `(trackedErrorCode, rawError, properties) => {...}`,
|
||||
* where `errorCode` matches the output of `errorCodeMapFn`, `rawError` is the original
|
||||
* error (that is, the input to `errorCodeMapFn`), and `properties` is a map of the
|
||||
* error properties for classifying the error.
|
||||
*
|
||||
* @param {function} errorCodeMapFn The function used to map decryption failure reason codes to the
|
||||
* `trackedErrorCode`.
|
||||
|
@ -140,32 +189,62 @@ export class DecryptionFailureTracker {
|
|||
// localStorage.setItem('mx-decryption-failure-event-ids', JSON.stringify([...this.trackedEvents]));
|
||||
// }
|
||||
|
||||
public eventDecrypted(e: MatrixEvent): void {
|
||||
/** Callback for when an event is decrypted.
|
||||
*
|
||||
* This function is called by our `MatrixEventEvent.Decrypted` event
|
||||
* handler after a decryption attempt on an event, whether the decryption
|
||||
* is successful or not.
|
||||
*
|
||||
* @param matrixEvent the event that was decrypted
|
||||
*
|
||||
* @param nowTs the current timestamp
|
||||
*/
|
||||
private eventDecrypted(e: MatrixEvent, nowTs: number): void {
|
||||
// for now we only track megolm decryption failures
|
||||
if (e.getWireContent().algorithm != "m.megolm.v1.aes-sha2") {
|
||||
return;
|
||||
}
|
||||
|
||||
const errCode = e.decryptionFailureReason;
|
||||
if (errCode !== null) {
|
||||
this.addDecryptionFailure(new DecryptionFailure(e.getId()!, errCode));
|
||||
} else {
|
||||
if (errCode === null) {
|
||||
// Could be an event in the failures, remove it
|
||||
this.removeDecryptionFailuresForEvent(e);
|
||||
this.removeDecryptionFailuresForEvent(e, nowTs);
|
||||
return;
|
||||
}
|
||||
|
||||
const eventId = e.getId()!;
|
||||
|
||||
// if we already have a record of this event, use the previously-recorded timestamp
|
||||
const failure = this.failures.get(eventId);
|
||||
const ts = failure ? failure.ts : nowTs;
|
||||
|
||||
const sender = e.getSender();
|
||||
const senderDomain = sender?.replace(/^.*?:/, "");
|
||||
let isFederated: boolean | undefined;
|
||||
if (this.userDomain !== undefined && senderDomain !== undefined) {
|
||||
isFederated = this.userDomain !== senderDomain;
|
||||
}
|
||||
const wasVisibleToUser = this.visibleEvents.has(eventId);
|
||||
this.addDecryptionFailure(
|
||||
new DecryptionFailure(eventId, errCode, ts, isFederated, wasVisibleToUser, this.userTrustsOwnIdentity),
|
||||
);
|
||||
}
|
||||
|
||||
public addVisibleEvent(e: MatrixEvent): void {
|
||||
const eventId = e.getId()!;
|
||||
|
||||
// if it's already reported, we don't need to do anything
|
||||
if (this.trackedEvents.has(eventId)) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.visibleEvents.add(eventId);
|
||||
if (this.failures.has(eventId) && !this.visibleFailures.has(eventId)) {
|
||||
this.visibleFailures.set(eventId, this.failures.get(eventId)!);
|
||||
// if we've already marked the event as a failure, mark it as visible
|
||||
// in the failure object
|
||||
const failure = this.failures.get(eventId);
|
||||
if (failure) {
|
||||
failure.wasVisibleToUser = true;
|
||||
}
|
||||
|
||||
this.visibleEvents.add(eventId);
|
||||
}
|
||||
|
||||
public addDecryptionFailure(failure: DecryptionFailure): void {
|
||||
|
@ -176,21 +255,55 @@ export class DecryptionFailureTracker {
|
|||
}
|
||||
|
||||
this.failures.set(eventId, failure);
|
||||
if (this.visibleEvents.has(eventId) && !this.visibleFailures.has(eventId)) {
|
||||
this.visibleFailures.set(eventId, failure);
|
||||
}
|
||||
|
||||
public removeDecryptionFailuresForEvent(e: MatrixEvent, nowTs: number): void {
|
||||
const eventId = e.getId()!;
|
||||
const failure = this.failures.get(eventId);
|
||||
if (failure) {
|
||||
this.failures.delete(eventId);
|
||||
|
||||
const timeToDecryptMillis = nowTs - failure.ts;
|
||||
if (timeToDecryptMillis < DecryptionFailureTracker.GRACE_PERIOD_MS) {
|
||||
// the event decrypted on time, so we don't need to report it
|
||||
return;
|
||||
} else if (timeToDecryptMillis <= DecryptionFailureTracker.MAXIMUM_LATE_DECRYPTION_PERIOD) {
|
||||
// The event is a late decryption, so store the time it took.
|
||||
// If the time to decrypt is longer than
|
||||
// MAXIMUM_LATE_DECRYPTION_PERIOD, we consider the event as
|
||||
// undecryptable, and leave timeToDecryptMillis undefined
|
||||
failure.timeToDecryptMillis = timeToDecryptMillis;
|
||||
}
|
||||
this.failuresToReport.add(failure);
|
||||
}
|
||||
}
|
||||
|
||||
public removeDecryptionFailuresForEvent(e: MatrixEvent): void {
|
||||
const eventId = e.getId()!;
|
||||
this.failures.delete(eventId);
|
||||
this.visibleFailures.delete(eventId);
|
||||
private async handleKeysChanged(client: MatrixClient): Promise<void> {
|
||||
if (this.checkingVerificationStatus) {
|
||||
// Flag that we'll need to do another check once the current check completes.
|
||||
this.retryVerificationStatus = true;
|
||||
return;
|
||||
}
|
||||
|
||||
this.checkingVerificationStatus = true;
|
||||
try {
|
||||
do {
|
||||
this.retryVerificationStatus = false;
|
||||
this.userTrustsOwnIdentity = (
|
||||
await client.getCrypto()!.getUserVerificationStatus(client.getUserId()!)
|
||||
).isCrossSigningVerified();
|
||||
} while (this.retryVerificationStatus);
|
||||
} finally {
|
||||
this.checkingVerificationStatus = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start checking for and tracking failures.
|
||||
*/
|
||||
public start(): void {
|
||||
public start(client: MatrixClient): void {
|
||||
this.calculateClientProperties(client);
|
||||
this.registerHandlers(client);
|
||||
this.checkInterval = window.setInterval(
|
||||
() => this.checkFailures(Date.now()),
|
||||
DecryptionFailureTracker.CHECK_INTERVAL_MS,
|
||||
|
@ -199,63 +312,130 @@ export class DecryptionFailureTracker {
|
|||
this.trackInterval = window.setInterval(() => this.trackFailures(), DecryptionFailureTracker.TRACK_INTERVAL_MS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear state and stop checking for and tracking failures.
|
||||
*/
|
||||
public stop(): void {
|
||||
if (this.checkInterval) clearInterval(this.checkInterval);
|
||||
if (this.trackInterval) clearInterval(this.trackInterval);
|
||||
private async calculateClientProperties(client: MatrixClient): Promise<void> {
|
||||
const baseProperties: ErrorProperties = {};
|
||||
this.baseProperties = baseProperties;
|
||||
|
||||
this.failures = new Map();
|
||||
this.visibleEvents = new Set();
|
||||
this.visibleFailures = new Map();
|
||||
this.failureCounts = new Map();
|
||||
this.userDomain = client.getDomain() ?? undefined;
|
||||
if (this.userDomain === "matrix.org") {
|
||||
baseProperties.isMatrixDotOrg = true;
|
||||
} else if (this.userDomain !== undefined) {
|
||||
baseProperties.isMatrixDotOrg = false;
|
||||
}
|
||||
|
||||
const crypto = client.getCrypto();
|
||||
if (crypto) {
|
||||
const version = crypto.getVersion();
|
||||
if (version.startsWith("Rust SDK")) {
|
||||
baseProperties.cryptoSDK = "Rust";
|
||||
} else {
|
||||
baseProperties.cryptoSDK = "Legacy";
|
||||
}
|
||||
this.userTrustsOwnIdentity = (
|
||||
await crypto.getUserVerificationStatus(client.getUserId()!)
|
||||
).isCrossSigningVerified();
|
||||
}
|
||||
}
|
||||
|
||||
private registerHandlers(client: MatrixClient): void {
|
||||
// After the client attempts to decrypt an event, we examine it to see
|
||||
// if it needs to be reported.
|
||||
const decryptedHandler = (e: MatrixEvent): void => this.eventDecrypted(e, Date.now());
|
||||
// When our keys change, we check if the cross-signing keys are now trusted.
|
||||
const keysChangedHandler = (): void => {
|
||||
this.handleKeysChanged(client).catch((e) => {
|
||||
console.log("Error handling KeysChanged event", e);
|
||||
});
|
||||
};
|
||||
// When logging out, remove our handlers and destroy state
|
||||
const loggedOutHandler = (): void => {
|
||||
client.removeListener(MatrixEventEvent.Decrypted, decryptedHandler);
|
||||
client.removeListener(CryptoEvent.KeysChanged, keysChangedHandler);
|
||||
client.removeListener(HttpApiEvent.SessionLoggedOut, loggedOutHandler);
|
||||
this.stop();
|
||||
};
|
||||
|
||||
client.on(MatrixEventEvent.Decrypted, decryptedHandler);
|
||||
client.on(CryptoEvent.KeysChanged, keysChangedHandler);
|
||||
client.on(HttpApiEvent.SessionLoggedOut, loggedOutHandler);
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark failures that occurred before nowTs - GRACE_PERIOD_MS as failures that should be
|
||||
* tracked. Only mark one failure per event ID.
|
||||
* Clear state and stop checking for and tracking failures.
|
||||
*/
|
||||
private stop(): void {
|
||||
if (this.checkInterval) clearInterval(this.checkInterval);
|
||||
if (this.trackInterval) clearInterval(this.trackInterval);
|
||||
|
||||
this.userTrustsOwnIdentity = undefined;
|
||||
this.failures = new Map();
|
||||
this.visibleEvents = new Set();
|
||||
this.failuresToReport = new Set();
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark failures as undecryptable or late. Only mark one failure per event ID.
|
||||
*
|
||||
* @param {number} nowTs the timestamp that represents the time now.
|
||||
*/
|
||||
public checkFailures(nowTs: number): void {
|
||||
const failuresGivenGrace: Set<DecryptionFailure> = new Set();
|
||||
const failuresNotReady: Map<string, DecryptionFailure> = new Map();
|
||||
for (const [eventId, failure] of this.visibleFailures) {
|
||||
if (nowTs > failure.ts + DecryptionFailureTracker.GRACE_PERIOD_MS) {
|
||||
failuresGivenGrace.add(failure);
|
||||
this.trackedEvents.add(eventId);
|
||||
for (const [eventId, failure] of this.failures) {
|
||||
if (
|
||||
failure.timeToDecryptMillis !== undefined ||
|
||||
nowTs > failure.ts + DecryptionFailureTracker.MAXIMUM_LATE_DECRYPTION_PERIOD
|
||||
) {
|
||||
// we report failures under two conditions:
|
||||
// - if `timeToDecryptMillis` is set, we successfully decrypted
|
||||
// the event, but we got the key late. We report it so that we
|
||||
// have the late decrytion stats.
|
||||
// - we haven't decrypted yet and it's past the time for it to be
|
||||
// considered a "late" decryption, so we count it as
|
||||
// undecryptable.
|
||||
this.addFailure(eventId, failure);
|
||||
} else {
|
||||
// the event isn't old enough, so we still need to keep track of it
|
||||
failuresNotReady.set(eventId, failure);
|
||||
}
|
||||
}
|
||||
this.visibleFailures = failuresNotReady;
|
||||
this.failures = failuresNotReady;
|
||||
|
||||
// Commented out for now for expediency, we need to consider unbound nature of storing
|
||||
// this in localStorage
|
||||
// this.saveTrackedEvents();
|
||||
|
||||
this.aggregateFailures(failuresGivenGrace);
|
||||
}
|
||||
|
||||
private aggregateFailures(failures: Set<DecryptionFailure>): void {
|
||||
for (const failure of failures) {
|
||||
const errorCode = failure.errorCode;
|
||||
this.failureCounts.set(errorCode, (this.failureCounts.get(errorCode) ?? 0) + 1);
|
||||
}
|
||||
private addFailure(eventId: string, failure: DecryptionFailure): void {
|
||||
this.failuresToReport.add(failure);
|
||||
this.trackedEvents.add(eventId);
|
||||
// once we've added it to trackedEvents, we won't check
|
||||
// visibleEvents for it any more
|
||||
this.visibleEvents.delete(eventId);
|
||||
}
|
||||
|
||||
/**
|
||||
* If there are failures that should be tracked, call the given trackDecryptionFailure
|
||||
* function with the number of failures that should be tracked.
|
||||
* function with the failures that should be tracked.
|
||||
*/
|
||||
public trackFailures(): void {
|
||||
for (const [errorCode, count] of this.failureCounts.entries()) {
|
||||
if (count > 0) {
|
||||
const trackedErrorCode = this.errorCodeMapFn(errorCode);
|
||||
|
||||
this.fn(count, trackedErrorCode, errorCode);
|
||||
this.failureCounts.set(errorCode, 0);
|
||||
for (const failure of this.failuresToReport) {
|
||||
const errorCode = failure.errorCode;
|
||||
const trackedErrorCode = this.errorCodeMapFn(errorCode);
|
||||
const properties: ErrorProperties = {
|
||||
timeToDecryptMillis: failure.timeToDecryptMillis ?? -1,
|
||||
wasVisibleToUser: failure.wasVisibleToUser,
|
||||
};
|
||||
if (failure.isFederated !== undefined) {
|
||||
properties.isFederated = failure.isFederated;
|
||||
}
|
||||
if (failure.userTrustsOwnIdentity !== undefined) {
|
||||
properties.userTrustsOwnIdentity = failure.userTrustsOwnIdentity;
|
||||
}
|
||||
if (this.baseProperties) {
|
||||
Object.assign(properties, this.baseProperties);
|
||||
}
|
||||
this.fn(trackedErrorCode, errorCode, properties);
|
||||
}
|
||||
this.failuresToReport = new Set();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,6 @@ import {
|
|||
EventType,
|
||||
HttpApiEvent,
|
||||
MatrixClient,
|
||||
MatrixEventEvent,
|
||||
MatrixEvent,
|
||||
RoomType,
|
||||
SyncStateData,
|
||||
|
@ -1592,11 +1591,7 @@ export default class MatrixChat extends React.PureComponent<IProps, IState> {
|
|||
// tracked events across sessions.
|
||||
// dft.loadTrackedEventHashMap();
|
||||
|
||||
dft.start();
|
||||
|
||||
// When logging out, stop tracking failures and destroy state
|
||||
cli.on(HttpApiEvent.SessionLoggedOut, () => dft.stop());
|
||||
cli.on(MatrixEventEvent.Decrypted, (e) => dft.eventDecrypted(e));
|
||||
dft.start(cli);
|
||||
|
||||
cli.on(ClientEvent.Room, (room) => {
|
||||
if (cli.isCryptoEnabled()) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue