Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Remove old R30 because R30v2 supercedes it #10428

Merged
merged 5 commits into from
May 19, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/10428.removal
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Remove the old version of the R30 (30-day retained users) phone-home metric.
4 changes: 0 additions & 4 deletions synapse/app/phone_stats_home.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,6 @@ async def phone_stats_home(hs, stats, stats_process=_stats_process):
daily_sent_messages = await hs.get_datastore().count_daily_sent_messages()
stats["daily_sent_messages"] = daily_sent_messages

r30_results = await hs.get_datastore().count_r30_users()
for name, count in r30_results.items():
stats["r30_users_" + name] = count

r30v2_results = await hs.get_datastore().count_r30_users()
for name, count in r30v2_results.items():
stats["r30v2_users_" + name] = count
Expand Down
83 changes: 0 additions & 83 deletions synapse/storage/databases/main/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,89 +233,6 @@ def _count_users(self, txn, time_from):
(count,) = txn.fetchone()
return count

async def count_r30_users(self) -> Dict[str, int]:
"""
Counts the number of 30 day retained users, defined as:-
* Users who have created their accounts more than 30 days ago
* Where last seen at most 30 days ago
* Where account creation and last_seen are > 30 days apart

Returns:
A mapping of counts globally as well as broken out by platform.
"""

def _count_r30_users(txn):
thirty_days_in_secs = 86400 * 30
now = int(self._clock.time())
thirty_days_ago_in_secs = now - thirty_days_in_secs

sql = """
SELECT platform, COALESCE(count(*), 0) FROM (
SELECT
users.name, platform, users.creation_ts * 1000,
MAX(uip.last_seen)
FROM users
INNER JOIN (
SELECT
user_id,
last_seen,
CASE
WHEN user_agent LIKE '%%Android%%' THEN 'android'
WHEN user_agent LIKE '%%iOS%%' THEN 'ios'
WHEN user_agent LIKE '%%Electron%%' THEN 'electron'
WHEN user_agent LIKE '%%Mozilla%%' THEN 'web'
WHEN user_agent LIKE '%%Gecko%%' THEN 'web'
ELSE 'unknown'
END
AS platform
FROM user_ips
) uip
ON users.name = uip.user_id
AND users.appservice_id is NULL
AND users.creation_ts < ?
AND uip.last_seen/1000 > ?
AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
GROUP BY users.name, platform, users.creation_ts
) u GROUP BY platform
"""

results = {}
txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs))

for row in txn:
if row[0] == "unknown":
pass
results[row[0]] = row[1]

sql = """
SELECT COALESCE(count(*), 0) FROM (
SELECT users.name, users.creation_ts * 1000,
MAX(uip.last_seen)
FROM users
INNER JOIN (
SELECT
user_id,
last_seen
FROM user_ips
) uip
ON users.name = uip.user_id
AND appservice_id is NULL
AND users.creation_ts < ?
AND uip.last_seen/1000 > ?
AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
GROUP BY users.name, users.creation_ts
) u
"""

txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs))

(count,) = txn.fetchone()
results["all"] = count

return results

return await self.db_pool.runInteraction("count_r30_users", _count_r30_users)

async def count_r30v2_users(self) -> Dict[str, int]:
"""
Counts the number of 30 day retained users, defined as users that:
Expand Down
152 changes: 0 additions & 152 deletions tests/app/test_phone_stats_home.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,159 +2,12 @@
from synapse.app.phone_stats_home import start_phone_stats_home
from synapse.rest.client.v1 import login, room

from tests import unittest
from tests.unittest import HomeserverTestCase

FIVE_MINUTES_IN_SECONDS = 300
ONE_DAY_IN_SECONDS = 86400


class PhoneHomeTestCase(HomeserverTestCase):
servlets = [
synapse.rest.admin.register_servlets_for_client_rest_resource,
room.register_servlets,
login.register_servlets,
]

# Override the retention time for the user_ips table because otherwise it
# gets pruned too aggressively for our R30 test.
@unittest.override_config({"user_ips_max_age": "365d"})
def test_r30_minimum_usage(self):
"""
Tests the minimum amount of interaction necessary for the R30 metric
to consider a user 'retained'.
"""

# Register a user, log it in, create a room and send a message
user_id = self.register_user("u1", "secret!")
access_token = self.login("u1", "secret!")
room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token)
self.helper.send(room_id, "message", tok=access_token)

# Check the R30 results do not count that user.
r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
self.assertEqual(r30_results, {"all": 0})

# Advance 30 days (+ 1 second, because strict inequality causes issues if we are
# bang on 30 days later).
self.reactor.advance(30 * ONE_DAY_IN_SECONDS + 1)

# (Make sure the user isn't somehow counted by this point.)
r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
self.assertEqual(r30_results, {"all": 0})

# Send a message (this counts as activity)
self.helper.send(room_id, "message2", tok=access_token)

# We have to wait some time for _update_client_ips_batch to get
# called and update the user_ips table.
self.reactor.advance(2 * 60 * 60)

# *Now* the user is counted.
r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
self.assertEqual(r30_results, {"all": 1, "unknown": 1})

# Advance 29 days. The user has now not posted for 29 days.
self.reactor.advance(29 * ONE_DAY_IN_SECONDS)

# The user is still counted.
r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
self.assertEqual(r30_results, {"all": 1, "unknown": 1})

# Advance another day. The user has now not posted for 30 days.
self.reactor.advance(ONE_DAY_IN_SECONDS)

# The user is now no longer counted in R30.
r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
self.assertEqual(r30_results, {"all": 0})

def test_r30_minimum_usage_using_default_config(self):
"""
Tests the minimum amount of interaction necessary for the R30 metric
to consider a user 'retained'.

N.B. This test does not override the `user_ips_max_age` config setting,
which defaults to 28 days.
"""

# Register a user, log it in, create a room and send a message
user_id = self.register_user("u1", "secret!")
access_token = self.login("u1", "secret!")
room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token)
self.helper.send(room_id, "message", tok=access_token)

# Check the R30 results do not count that user.
r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
self.assertEqual(r30_results, {"all": 0})

# Advance 30 days (+ 1 second, because strict inequality causes issues if we are
# bang on 30 days later).
self.reactor.advance(30 * ONE_DAY_IN_SECONDS + 1)

# (Make sure the user isn't somehow counted by this point.)
r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
self.assertEqual(r30_results, {"all": 0})

# Send a message (this counts as activity)
self.helper.send(room_id, "message2", tok=access_token)

# We have to wait some time for _update_client_ips_batch to get
# called and update the user_ips table.
self.reactor.advance(2 * 60 * 60)

# *Now* the user is counted.
r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
self.assertEqual(r30_results, {"all": 1, "unknown": 1})

# Advance 27 days. The user has now not posted for 27 days.
self.reactor.advance(27 * ONE_DAY_IN_SECONDS)

# The user is still counted.
r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
self.assertEqual(r30_results, {"all": 1, "unknown": 1})

# Advance another day. The user has now not posted for 28 days.
self.reactor.advance(ONE_DAY_IN_SECONDS)

# The user is now no longer counted in R30.
# (This is because the user_ips table has been pruned, which by default
# only preserves the last 28 days of entries.)
r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
self.assertEqual(r30_results, {"all": 0})

def test_r30_user_must_be_retained_for_at_least_a_month(self):
"""
Tests that a newly-registered user must be retained for a whole month
before appearing in the R30 statistic, even if they post every day
during that time!
"""
# Register a user and send a message
user_id = self.register_user("u1", "secret!")
access_token = self.login("u1", "secret!")
room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token)
self.helper.send(room_id, "message", tok=access_token)

# Check the user does not contribute to R30 yet.
r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
self.assertEqual(r30_results, {"all": 0})

for _ in range(30):
# This loop posts a message every day for 30 days
self.reactor.advance(ONE_DAY_IN_SECONDS)
self.helper.send(room_id, "I'm still here", tok=access_token)

# Notice that the user *still* does not contribute to R30!
r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
self.assertEqual(r30_results, {"all": 0})

self.reactor.advance(ONE_DAY_IN_SECONDS)
self.helper.send(room_id, "Still here!", tok=access_token)

# *Now* the user appears in R30.
r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
self.assertEqual(r30_results, {"all": 1, "unknown": 1})


class PhoneHomeR30V2TestCase(HomeserverTestCase):
servlets = [
synapse.rest.admin.register_servlets_for_client_rest_resource,
Expand Down Expand Up @@ -356,11 +209,6 @@ def test_r30v2_returning_dormant_users_not_counted(self):
r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0}
)

# Check that this is a situation where old R30 differs:
# old R30 DOES count this as 'retained'.
r30_results = self.get_success(store.count_r30_users())
self.assertEqual(r30_results, {"all": 1, "ios": 1})

# Now we want to check that the user will still be able to appear in
# R30v2 as long as the user performs some other activity between
# 30 and 60 days later.
Expand Down