perf: Optimize Tag.get_all for large datasets

Important performance optimization for handling large tag datasets:

- When total tags > 8192, limit 'pos' type tags to top 4096 by count
- Return all non-pos tags without limitation
- Significantly reduces memory usage and load time for large databases
- Maintains functionality while improving performance with massive tag counts
pull/898/head
zanllp 2026-01-22 01:44:11 +08:00
parent 40b8361ed0
commit bc6eb3c6dc
1 changed files with 24 additions and 6 deletions

View File

@ -764,13 +764,31 @@ class Tag:
@classmethod
def get_all(cls, conn):
with closing(conn.cursor()) as cur:
cur.execute("SELECT * FROM tag")
rows = cur.fetchall()
print("tag")
cur.execute("SELECT COUNT(*) FROM tag")
total_count = cur.fetchone()[0]
tags: list[Tag] = []
for row in rows:
tags.append(cls.from_row(row))
print(f"tag{row}")
if total_count > 4096:
# Get all non-pos tags
cur.execute("SELECT * FROM tag WHERE type != 'pos'")
rows = cur.fetchall()
for row in rows:
tags.append(cls.from_row(row))
# Get top 4096 pos tags ordered by count (descending)
cur.execute("SELECT * FROM tag WHERE type = 'pos' ORDER BY count DESC LIMIT 4096")
pos_rows = cur.fetchall()
for row in pos_rows:
tags.append(cls.from_row(row))
else:
# Get all tags normally
cur.execute("SELECT * FROM tag")
rows = cur.fetchall()
for row in rows:
tags.append(cls.from_row(row))
print(f"tag: loaded {len(tags)} tags (total: {total_count})")
return tags
@classmethod