perf: Optimize Tag.get_all for large datasets

Important performance optimization for handling large tag datasets: - When total tags > 8192, limit 'pos' type tags to top 4096 by count - Return all non-pos tags without limitation - Significantly reduces memory usage and load time for large databases - Maintains functionality while improving performance with massive tag counts
2026-01-22 01:44:11 +08:00 · 2026-01-22 01:44:11 +08:00 · bc6eb3c6dc
parent 40b8361ed0
commit bc6eb3c6dc
1 changed files with 24 additions and 6 deletions
--- a/scripts/iib/db/datamodel.py
+++ b/scripts/iib/db/datamodel.py
@ -764,13 +764,31 @@ class Tag:
    @classmethod
    def get_all(cls, conn):
        with closing(conn.cursor()) as cur:
-            cur.execute("SELECT * FROM tag")
-            rows = cur.fetchall()
-            print("tag")
+            cur.execute("SELECT COUNT(*) FROM tag")
+            total_count = cur.fetchone()[0]
+            
            tags: list[Tag] = []
-            for row in rows:
-                tags.append(cls.from_row(row))
-                print(f"tag{row}")
+            
+            if total_count > 4096:
+                # Get all non-pos tags
+                cur.execute("SELECT * FROM tag WHERE type != 'pos'")
+                rows = cur.fetchall()
+                for row in rows:
+                    tags.append(cls.from_row(row))
+                
+                # Get top 4096 pos tags ordered by count (descending)
+                cur.execute("SELECT * FROM tag WHERE type = 'pos' ORDER BY count DESC LIMIT 4096")
+                pos_rows = cur.fetchall()
+                for row in pos_rows:
+                    tags.append(cls.from_row(row))
+            else:
+                # Get all tags normally
+                cur.execute("SELECT * FROM tag")
+                rows = cur.fetchall()
+                for row in rows:
+                    tags.append(cls.from_row(row))
+            
+            print(f"tag: loaded {len(tags)} tags (total: {total_count})")
            return tags

    @classmethod