perf: Optimize Tag.get_all for large datasets
Important performance optimization for handling large tag datasets: - When total tags > 8192, limit 'pos' type tags to top 4096 by count - Return all non-pos tags without limitation - Significantly reduces memory usage and load time for large databases - Maintains functionality while improving performance with massive tag countspull/898/head
parent
40b8361ed0
commit
bc6eb3c6dc
|
|
@ -764,13 +764,31 @@ class Tag:
|
|||
@classmethod
|
||||
def get_all(cls, conn):
|
||||
with closing(conn.cursor()) as cur:
|
||||
cur.execute("SELECT * FROM tag")
|
||||
rows = cur.fetchall()
|
||||
print("tag")
|
||||
cur.execute("SELECT COUNT(*) FROM tag")
|
||||
total_count = cur.fetchone()[0]
|
||||
|
||||
tags: list[Tag] = []
|
||||
for row in rows:
|
||||
tags.append(cls.from_row(row))
|
||||
print(f"tag{row}")
|
||||
|
||||
if total_count > 4096:
|
||||
# Get all non-pos tags
|
||||
cur.execute("SELECT * FROM tag WHERE type != 'pos'")
|
||||
rows = cur.fetchall()
|
||||
for row in rows:
|
||||
tags.append(cls.from_row(row))
|
||||
|
||||
# Get top 4096 pos tags ordered by count (descending)
|
||||
cur.execute("SELECT * FROM tag WHERE type = 'pos' ORDER BY count DESC LIMIT 4096")
|
||||
pos_rows = cur.fetchall()
|
||||
for row in pos_rows:
|
||||
tags.append(cls.from_row(row))
|
||||
else:
|
||||
# Get all tags normally
|
||||
cur.execute("SELECT * FROM tag")
|
||||
rows = cur.fetchall()
|
||||
for row in rows:
|
||||
tags.append(cls.from_row(row))
|
||||
|
||||
print(f"tag: loaded {len(tags)} tags (total: {total_count})")
|
||||
return tags
|
||||
|
||||
@classmethod
|
||||
|
|
|
|||
Loading…
Reference in New Issue