Merge 5b470171df into 443074eee9

Add OpenAPI 3.1 specification for ComfyUI API (#13397 )
* Add OpenAPI 3.1 specification for ComfyUI API Adds a comprehensive OpenAPI 3.1 spec documenting all HTTP endpoints exposed by ComfyUI's server, including prompt execution, queue management, file uploads, userdata, settings, system stats, object info, assets, and internal routes. The spec was validated against the source code with adversarial review from multiple models, and passes Spectral linting with zero errors. Also removes openapi.yaml from .gitignore so the spec is tracked. * Mark /api/history endpoints as deprecated Address Jacob's review feedback on PR #13397 by explicitly marking the three /api/history operations as deprecated in the OpenAPI spec: * GET /api/history -> superseded by GET /api/jobs * POST /api/history -> superseded by /api/jobs management * GET /api/history/{prompt_id} -> superseded by GET /api/jobs/{job_id} Each operation gains deprecated: true plus a description that names the replacement. A formal sunset timeline (RFC 8594 Deprecation and RFC 8553 Sunset headers, minimum-runway policy) is being defined separately and will be applied as a follow-up. * Address Spectral lint findings in openapi.yaml - Add operation descriptions to 52 endpoints (prompt, queue, upload, view, models, userdata, settings, assets, internal, etc.) - Add schema descriptions to 22 component schemas - Add parameter descriptions to 8 path parameters that were missing them - Remove 6 unused component schemas: TaskOutput, EmbeddingsResponse, ExtensionsResponse, LogRawResponse, UserInfo, UserDataFullInfo No wire/shape changes. Reduces Spectral findings from 92 to 4. The remaining 4 are real issues (WebSocket 101 on /ws, loose error schema, and two snake_case warnings on real wire field names) and are worth addressing separately. * fix(openapi): address jtreminio oneOf review on /api/userdata Restructure the UserData response schemas to address the review feedback on the `oneOf` without a discriminator, and fix two accuracy bugs found while doing it. Changes - GET /api/userdata response: extract the inline `oneOf` to a named schema (`ListUserdataResponse`) and add the missing third variant returned when `split=true` and `full_info=false` (array of `[relative_path, ...path_components]`). Previously only two of the three actual server response shapes were described. - UserDataResponse (POST endpoints): correct the description — this schema is a single item, not a list — and point at the canonical `GetUserDataResponseFullFile` schema instead of the duplicate `UserDataResponseFull`. Also removes the malformed blank line in `UserDataResponseShort`. - Delete the now-unused `UserDataResponseFull` and `UserDataResponseShort` schemas (replaced by reuse of `GetUserDataResponseFullFile` and an inline string variant). - Add an `x-variant-selector` vendor extension to both `oneOf` sites documenting which query-parameter combination selects which branch, since a true OpenAPI `discriminator` is not applicable (the variants are type-disjoint and the selector lives in the request, not the response body). This keeps the shapes the server actually emits (no wire-breaking change) while making the selection rule explicit for SDK generators and readers. --------- Co-authored-by: guill <jacob.e.segal@gmail.com>
2026-04-24 00:06:47 -04:00 · 2026-04-23 21:00:25 -07:00 · 2026-04-23 20:51:34 -07:00 · 2026-04-23 20:42:22 -07:00 · 2026-04-23 22:19:00 -04:00 · 2026-04-23 16:56:13 -07:00
33 changed files with 4939 additions and 89 deletions
--- a/.gitignore
+++ b/.gitignore
@ -21,6 +21,5 @@ venv*/
 *.log
 web_custom_versions/
 .DS_Store
-openapi.yaml
 filtered-openapi.yaml
 uv.lock
--- a/blueprints/.glsl/Glow_30.frag
+++ b/blueprints/.glsl/Glow_30.frag
@ -2,7 +2,6 @@
 precision mediump float;

 uniform sampler2D u_image0;
-uniform vec2 u_resolution;
 uniform int u_int0;      // Blend mode
 uniform int u_int1;      // Color tint
 uniform float u_float0;  // Intensity
@ -75,7 +74,7 @@ void main() {
    float t0 = threshold - 0.15;
    float t1 = threshold + 0.15;
    
-    vec2 texelSize = 1.0 / u_resolution;
+    vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0));
    float radius2 = radius * radius;
    
    float sampleScale = clamp(radius * 0.75, 0.35, 1.0);
--- a/blueprints/.glsl/Image_Blur_1.frag
+++ b/blueprints/.glsl/Image_Blur_1.frag
@ -12,7 +12,6 @@ const int RADIAL_SAMPLES = 12;
 const float RADIAL_STRENGTH = 0.0003;

 uniform sampler2D u_image0;
-uniform vec2 u_resolution;
 uniform int u_int0;      // Blur type (BLUR_GAUSSIAN, BLUR_BOX, BLUR_RADIAL)
 uniform float u_float0;  // Blur radius/amount
 uniform int u_pass;      // Pass index (0 = horizontal, 1 = vertical)
@ -25,7 +24,7 @@ float gaussian(float x, float sigma) {
 }

 void main() {
-    vec2 texelSize = 1.0 / u_resolution;
+    vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0));
    float radius = max(u_float0, 0.0);

    // Radial (angular) blur - single pass, doesn't use separable
--- a/blueprints/.glsl/Sharpen_23.frag
+++ b/blueprints/.glsl/Sharpen_23.frag
@ -2,14 +2,13 @@
 precision highp float;

 uniform sampler2D u_image0;
-uniform vec2 u_resolution;
 uniform float u_float0;  // strength [0.0 – 2.0] typical: 0.3–1.0

 in vec2 v_texCoord;
 layout(location = 0) out vec4 fragColor0;

 void main() {
-    vec2 texel = 1.0 / u_resolution;
+    vec2 texel = 1.0 / vec2(textureSize(u_image0, 0));
    
    // Sample center and neighbors
    vec4 center = texture(u_image0, v_texCoord);
--- a/blueprints/.glsl/Unsharp_Mask_26.frag
+++ b/blueprints/.glsl/Unsharp_Mask_26.frag
@ -2,7 +2,6 @@
 precision highp float;

 uniform sampler2D u_image0;
-uniform vec2 u_resolution;
 uniform float u_float0;  // amount    [0.0 - 3.0]  typical: 0.5-1.5
 uniform float u_float1;  // radius    [0.5 - 10.0] blur radius in pixels
 uniform float u_float2;  // threshold [0.0 - 0.1]  min difference to sharpen
@ -19,7 +18,7 @@ float getLuminance(vec3 color) {
 }

 void main() {
-    vec2 texel = 1.0 / u_resolution;
+    vec2 texel = 1.0 / vec2(textureSize(u_image0, 0));
    float radius = max(u_float1, 0.5);
    float amount = u_float0;
    float threshold = u_float2;
--- a/blueprints/Glow.json
+++ b/blueprints/Glow.json
@ -268,7 +268,7 @@
              "Node name for S&R": "GLSLShader"
            },
            "widgets_values": [
-              "#version 300 es\nprecision mediump float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform int u_int0;      // Blend mode\nuniform int u_int1;      // Color tint\nuniform float u_float0;  // Intensity\nuniform float u_float1;  // Radius\nuniform float u_float2;  // Threshold\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst int BLEND_ADD      = 0;\nconst int BLEND_SCREEN   = 1;\nconst int BLEND_SOFT     = 2;\nconst int BLEND_OVERLAY  = 3;\nconst int BLEND_LIGHTEN  = 4;\n\nconst float GOLDEN_ANGLE = 2.39996323;\nconst int MAX_SAMPLES = 48;\nconst vec3 LUMA = vec3(0.299, 0.587, 0.114);\n\nfloat hash(vec2 p) {\n    p = fract(p * vec2(123.34, 456.21));\n    p += dot(p, p + 45.32);\n    return fract(p.x * p.y);\n}\n\nvec3 hexToRgb(int h) {\n    return vec3(\n        float((h >> 16) & 255),\n        float((h >> 8) & 255),\n        float(h & 255)\n    ) * (1.0 / 255.0);\n}\n\nvec3 blend(vec3 base, vec3 glow, int mode) {\n    if (mode == BLEND_SCREEN) {\n        return 1.0 - (1.0 - base) * (1.0 - glow);\n    }\n    if (mode == BLEND_SOFT) {\n        return mix(\n            base - (1.0 - 2.0 * glow) * base * (1.0 - base),\n            base + (2.0 * glow - 1.0) * (sqrt(base) - base),\n            step(0.5, glow)\n        );\n    }\n    if (mode == BLEND_OVERLAY) {\n        return mix(\n            2.0 * base * glow,\n            1.0 - 2.0 * (1.0 - base) * (1.0 - glow),\n            step(0.5, base)\n        );\n    }\n    if (mode == BLEND_LIGHTEN) {\n        return max(base, glow);\n    }\n    return base + glow;\n}\n\nvoid main() {\n    vec4 original = texture(u_image0, v_texCoord);\n    \n    float intensity = u_float0 * 0.05;\n    float radius = u_float1 * u_float1 * 0.012;\n    \n    if (intensity < 0.001 || radius < 0.1) {\n        fragColor = original;\n        return;\n    }\n    \n    float threshold = 1.0 - u_float2 * 0.01;\n    float t0 = threshold - 0.15;\n    float t1 = threshold + 0.15;\n    \n    vec2 texelSize = 1.0 / u_resolution;\n    float radius2 = radius * radius;\n    \n    float sampleScale = clamp(radius * 0.75, 0.35, 1.0);\n    int samples = int(float(MAX_SAMPLES) * sampleScale);\n    \n    float noise = hash(gl_FragCoord.xy);\n    float angleOffset = noise * GOLDEN_ANGLE;\n    float radiusJitter = 0.85 + noise * 0.3;\n    \n    float ca = cos(GOLDEN_ANGLE);\n    float sa = sin(GOLDEN_ANGLE);\n    vec2 dir = vec2(cos(angleOffset), sin(angleOffset));\n    \n    vec3 glow = vec3(0.0);\n    float totalWeight = 0.0;\n    \n    // Center tap\n    float centerMask = smoothstep(t0, t1, dot(original.rgb, LUMA));\n    glow += original.rgb * centerMask * 2.0;\n    totalWeight += 2.0;\n    \n    for (int i = 1; i < MAX_SAMPLES; i++) {\n        if (i >= samples) break;\n        \n        float fi = float(i);\n        float dist = sqrt(fi / float(samples)) * radius * radiusJitter;\n        \n        vec2 offset = dir * dist * texelSize;\n        vec3 c = texture(u_image0, v_texCoord + offset).rgb;\n        float mask = smoothstep(t0, t1, dot(c, LUMA));\n        \n        float w = 1.0 - (dist * dist) / (radius2 * 1.5);\n        w = max(w, 0.0);\n        w *= w;\n        \n        glow += c * mask * w;\n        totalWeight += w;\n        \n        dir = vec2(\n            dir.x * ca - dir.y * sa,\n            dir.x * sa + dir.y * ca\n        );\n    }\n    \n    glow *= intensity / max(totalWeight, 0.001);\n    \n    if (u_int1 > 0) {\n        glow *= hexToRgb(u_int1);\n    }\n    \n    vec3 result = blend(original.rgb, glow, u_int0);\n    result += (noise - 0.5) * (1.0 / 255.0);\n    \n    fragColor = vec4(clamp(result, 0.0, 1.0), original.a);\n}",
+              "#version 300 es\nprecision mediump float;\n\nuniform sampler2D u_image0;\nuniform int u_int0;      // Blend mode\nuniform int u_int1;      // Color tint\nuniform float u_float0;  // Intensity\nuniform float u_float1;  // Radius\nuniform float u_float2;  // Threshold\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst int BLEND_ADD      = 0;\nconst int BLEND_SCREEN   = 1;\nconst int BLEND_SOFT     = 2;\nconst int BLEND_OVERLAY  = 3;\nconst int BLEND_LIGHTEN  = 4;\n\nconst float GOLDEN_ANGLE = 2.39996323;\nconst int MAX_SAMPLES = 48;\nconst vec3 LUMA = vec3(0.299, 0.587, 0.114);\n\nfloat hash(vec2 p) {\n    p = fract(p * vec2(123.34, 456.21));\n    p += dot(p, p + 45.32);\n    return fract(p.x * p.y);\n}\n\nvec3 hexToRgb(int h) {\n    return vec3(\n        float((h >> 16) & 255),\n        float((h >> 8) & 255),\n        float(h & 255)\n    ) * (1.0 / 255.0);\n}\n\nvec3 blend(vec3 base, vec3 glow, int mode) {\n    if (mode == BLEND_SCREEN) {\n        return 1.0 - (1.0 - base) * (1.0 - glow);\n    }\n    if (mode == BLEND_SOFT) {\n        return mix(\n            base - (1.0 - 2.0 * glow) * base * (1.0 - base),\n            base + (2.0 * glow - 1.0) * (sqrt(base) - base),\n            step(0.5, glow)\n        );\n    }\n    if (mode == BLEND_OVERLAY) {\n        return mix(\n            2.0 * base * glow,\n            1.0 - 2.0 * (1.0 - base) * (1.0 - glow),\n            step(0.5, base)\n        );\n    }\n    if (mode == BLEND_LIGHTEN) {\n        return max(base, glow);\n    }\n    return base + glow;\n}\n\nvoid main() {\n    vec4 original = texture(u_image0, v_texCoord);\n    \n    float intensity = u_float0 * 0.05;\n    float radius = u_float1 * u_float1 * 0.012;\n    \n    if (intensity < 0.001 || radius < 0.1) {\n        fragColor = original;\n        return;\n    }\n    \n    float threshold = 1.0 - u_float2 * 0.01;\n    float t0 = threshold - 0.15;\n    float t1 = threshold + 0.15;\n    \n    vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0));\n    float radius2 = radius * radius;\n    \n    float sampleScale = clamp(radius * 0.75, 0.35, 1.0);\n    int samples = int(float(MAX_SAMPLES) * sampleScale);\n    \n    float noise = hash(gl_FragCoord.xy);\n    float angleOffset = noise * GOLDEN_ANGLE;\n    float radiusJitter = 0.85 + noise * 0.3;\n    \n    float ca = cos(GOLDEN_ANGLE);\n    float sa = sin(GOLDEN_ANGLE);\n    vec2 dir = vec2(cos(angleOffset), sin(angleOffset));\n    \n    vec3 glow = vec3(0.0);\n    float totalWeight = 0.0;\n    \n    // Center tap\n    float centerMask = smoothstep(t0, t1, dot(original.rgb, LUMA));\n    glow += original.rgb * centerMask * 2.0;\n    totalWeight += 2.0;\n    \n    for (int i = 1; i < MAX_SAMPLES; i++) {\n        if (i >= samples) break;\n        \n        float fi = float(i);\n        float dist = sqrt(fi / float(samples)) * radius * radiusJitter;\n        \n        vec2 offset = dir * dist * texelSize;\n        vec3 c = texture(u_image0, v_texCoord + offset).rgb;\n        float mask = smoothstep(t0, t1, dot(c, LUMA));\n        \n        float w = 1.0 - (dist * dist) / (radius2 * 1.5);\n        w = max(w, 0.0);\n        w *= w;\n        \n        glow += c * mask * w;\n        totalWeight += w;\n        \n        dir = vec2(\n            dir.x * ca - dir.y * sa,\n            dir.x * sa + dir.y * ca\n        );\n    }\n    \n    glow *= intensity / max(totalWeight, 0.001);\n    \n    if (u_int1 > 0) {\n        glow *= hexToRgb(u_int1);\n    }\n    \n    vec3 result = blend(original.rgb, glow, u_int0);\n    result += (noise - 0.5) * (1.0 / 255.0);\n    \n    fragColor = vec4(clamp(result, 0.0, 1.0), original.a);\n}",
              "from_input"
            ]
          },
--- a/blueprints/Image
+++ b/blueprints/Image
@ -331,7 +331,7 @@
              "Node name for S&R": "GLSLShader"
            },
            "widgets_values": [
-              "#version 300 es\n#pragma passes 2\nprecision highp float;\n\n// Blur type constants\nconst int BLUR_GAUSSIAN = 0;\nconst int BLUR_BOX = 1;\nconst int BLUR_RADIAL = 2;\n\n// Radial blur config\nconst int RADIAL_SAMPLES = 12;\nconst float RADIAL_STRENGTH = 0.0003;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform int u_int0;      // Blur type (BLUR_GAUSSIAN, BLUR_BOX, BLUR_RADIAL)\nuniform float u_float0;  // Blur radius/amount\nuniform int u_pass;      // Pass index (0 = horizontal, 1 = vertical)\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nfloat gaussian(float x, float sigma) {\n    return exp(-(x * x) / (2.0 * sigma * sigma));\n}\n\nvoid main() {\n    vec2 texelSize = 1.0 / u_resolution;\n    float radius = max(u_float0, 0.0);\n\n    // Radial (angular) blur - single pass, doesn't use separable\n    if (u_int0 == BLUR_RADIAL) {\n        // Only execute on first pass\n        if (u_pass > 0) {\n            fragColor0 = texture(u_image0, v_texCoord);\n            return;\n        }\n\n        vec2 center = vec2(0.5);\n        vec2 dir = v_texCoord - center;\n        float dist = length(dir);\n\n        if (dist < 1e-4) {\n            fragColor0 = texture(u_image0, v_texCoord);\n            return;\n        }\n\n        vec4 sum = vec4(0.0);\n        float totalWeight = 0.0;\n        float angleStep = radius * RADIAL_STRENGTH;\n\n        dir /= dist;\n\n        float cosStep = cos(angleStep);\n        float sinStep = sin(angleStep);\n\n        float negAngle = -float(RADIAL_SAMPLES) * angleStep;\n        vec2 rotDir = vec2(\n            dir.x * cos(negAngle) - dir.y * sin(negAngle),\n            dir.x * sin(negAngle) + dir.y * cos(negAngle)\n        );\n\n        for (int i = -RADIAL_SAMPLES; i <= RADIAL_SAMPLES; i++) {\n            vec2 uv = center + rotDir * dist;\n            float w = 1.0 - abs(float(i)) / float(RADIAL_SAMPLES);\n            sum += texture(u_image0, uv) * w;\n            totalWeight += w;\n\n            rotDir = vec2(\n                rotDir.x * cosStep - rotDir.y * sinStep,\n                rotDir.x * sinStep + rotDir.y * cosStep\n            );\n        }\n\n        fragColor0 = sum / max(totalWeight, 0.001);\n        return;\n    }\n\n    // Separable Gaussian / Box blur\n    int samples = int(ceil(radius));\n\n    if (samples == 0) {\n        fragColor0 = texture(u_image0, v_texCoord);\n        return;\n    }\n\n    // Direction: pass 0 = horizontal, pass 1 = vertical\n    vec2 dir = (u_pass == 0) ? vec2(1.0, 0.0) : vec2(0.0, 1.0);\n\n    vec4 color = vec4(0.0);\n    float totalWeight = 0.0;\n    float sigma = radius / 2.0;\n\n    for (int i = -samples; i <= samples; i++) {\n        vec2 offset = dir * float(i) * texelSize;\n        vec4 sample_color = texture(u_image0, v_texCoord + offset);\n\n        float weight;\n        if (u_int0 == BLUR_GAUSSIAN) {\n            weight = gaussian(float(i), sigma);\n        } else {\n            // BLUR_BOX\n            weight = 1.0;\n        }\n\n        color += sample_color * weight;\n        totalWeight += weight;\n    }\n\n    fragColor0 = color / totalWeight;\n}\n",
+              "#version 300 es\n#pragma passes 2\nprecision highp float;\n\n// Blur type constants\nconst int BLUR_GAUSSIAN = 0;\nconst int BLUR_BOX = 1;\nconst int BLUR_RADIAL = 2;\n\n// Radial blur config\nconst int RADIAL_SAMPLES = 12;\nconst float RADIAL_STRENGTH = 0.0003;\n\nuniform sampler2D u_image0;\nuniform int u_int0;      // Blur type (BLUR_GAUSSIAN, BLUR_BOX, BLUR_RADIAL)\nuniform float u_float0;  // Blur radius/amount\nuniform int u_pass;      // Pass index (0 = horizontal, 1 = vertical)\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nfloat gaussian(float x, float sigma) {\n    return exp(-(x * x) / (2.0 * sigma * sigma));\n}\n\nvoid main() {\n    vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0));\n    float radius = max(u_float0, 0.0);\n\n    // Radial (angular) blur - single pass, doesn't use separable\n    if (u_int0 == BLUR_RADIAL) {\n        // Only execute on first pass\n        if (u_pass > 0) {\n            fragColor0 = texture(u_image0, v_texCoord);\n            return;\n        }\n\n        vec2 center = vec2(0.5);\n        vec2 dir = v_texCoord - center;\n        float dist = length(dir);\n\n        if (dist < 1e-4) {\n            fragColor0 = texture(u_image0, v_texCoord);\n            return;\n        }\n\n        vec4 sum = vec4(0.0);\n        float totalWeight = 0.0;\n        float angleStep = radius * RADIAL_STRENGTH;\n\n        dir /= dist;\n\n        float cosStep = cos(angleStep);\n        float sinStep = sin(angleStep);\n\n        float negAngle = -float(RADIAL_SAMPLES) * angleStep;\n        vec2 rotDir = vec2(\n            dir.x * cos(negAngle) - dir.y * sin(negAngle),\n            dir.x * sin(negAngle) + dir.y * cos(negAngle)\n        );\n\n        for (int i = -RADIAL_SAMPLES; i <= RADIAL_SAMPLES; i++) {\n            vec2 uv = center + rotDir * dist;\n            float w = 1.0 - abs(float(i)) / float(RADIAL_SAMPLES);\n            sum += texture(u_image0, uv) * w;\n            totalWeight += w;\n\n            rotDir = vec2(\n                rotDir.x * cosStep - rotDir.y * sinStep,\n                rotDir.x * sinStep + rotDir.y * cosStep\n            );\n        }\n\n        fragColor0 = sum / max(totalWeight, 0.001);\n        return;\n    }\n\n    // Separable Gaussian / Box blur\n    int samples = int(ceil(radius));\n\n    if (samples == 0) {\n        fragColor0 = texture(u_image0, v_texCoord);\n        return;\n    }\n\n    // Direction: pass 0 = horizontal, pass 1 = vertical\n    vec2 dir = (u_pass == 0) ? vec2(1.0, 0.0) : vec2(0.0, 1.0);\n\n    vec4 color = vec4(0.0);\n    float totalWeight = 0.0;\n    float sigma = radius / 2.0;\n\n    for (int i = -samples; i <= samples; i++) {\n        vec2 offset = dir * float(i) * texelSize;\n        vec4 sample_color = texture(u_image0, v_texCoord + offset);\n\n        float weight;\n        if (u_int0 == BLUR_GAUSSIAN) {\n            weight = gaussian(float(i), sigma);\n        } else {\n            // BLUR_BOX\n            weight = 1.0;\n        }\n\n        color += sample_color * weight;\n        totalWeight += weight;\n    }\n\n    fragColor0 = color / totalWeight;\n}\n",
              "from_input"
            ]
          }
--- a/blueprints/Sharpen.json
+++ b/blueprints/Sharpen.json
@ -267,7 +267,7 @@
              "Node name for S&R": "GLSLShader"
            },
            "widgets_values": [
-              "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0;  // strength [0.0 – 2.0] typical: 0.3–1.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n    vec2 texel = 1.0 / u_resolution;\n    \n    // Sample center and neighbors\n    vec4 center = texture(u_image0, v_texCoord);\n    vec4 top    = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n    vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0,  texel.y));\n    vec4 left   = texture(u_image0, v_texCoord + vec2(-texel.x,  0.0));\n    vec4 right  = texture(u_image0, v_texCoord + vec2( texel.x,  0.0));\n    \n    // Edge enhancement (Laplacian)\n    vec4 edges = center * 4.0 - top - bottom - left - right;\n    \n    // Add edges back scaled by strength\n    vec4 sharpened = center + edges * u_float0;\n    \n    fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}",
+              "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform float u_float0;  // strength [0.0 – 2.0] typical: 0.3–1.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n    vec2 texel = 1.0 / vec2(textureSize(u_image0, 0));\n    \n    // Sample center and neighbors\n    vec4 center = texture(u_image0, v_texCoord);\n    vec4 top    = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n    vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0,  texel.y));\n    vec4 left   = texture(u_image0, v_texCoord + vec2(-texel.x,  0.0));\n    vec4 right  = texture(u_image0, v_texCoord + vec2( texel.x,  0.0));\n    \n    // Edge enhancement (Laplacian)\n    vec4 edges = center * 4.0 - top - bottom - left - right;\n    \n    // Add edges back scaled by strength\n    vec4 sharpened = center + edges * u_float0;\n    \n    fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}",
              "from_input"
            ]
          }
--- a/blueprints/Unsharp
+++ b/blueprints/Unsharp
@ -383,7 +383,7 @@
              "Node name for S&R": "GLSLShader"
            },
            "widgets_values": [
-              "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0;  // amount    [0.0 - 3.0]  typical: 0.5-1.5\nuniform float u_float1;  // radius    [0.5 - 10.0] blur radius in pixels\nuniform float u_float2;  // threshold [0.0 - 0.1]  min difference to sharpen\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nfloat gaussian(float x, float sigma) {\n    return exp(-(x * x) / (2.0 * sigma * sigma));\n}\n\nfloat getLuminance(vec3 color) {\n    return dot(color, vec3(0.2126, 0.7152, 0.0722));\n}\n\nvoid main() {\n    vec2 texel = 1.0 / u_resolution;\n    float radius = max(u_float1, 0.5);\n    float amount = u_float0;\n    float threshold = u_float2;\n\n    vec4 original = texture(u_image0, v_texCoord);\n\n    // Gaussian blur for the \"unsharp\" mask\n    int samples = int(ceil(radius));\n    float sigma = radius / 2.0;\n\n    vec4 blurred = vec4(0.0);\n    float totalWeight = 0.0;\n\n    for (int x = -samples; x <= samples; x++) {\n        for (int y = -samples; y <= samples; y++) {\n            vec2 offset = vec2(float(x), float(y)) * texel;\n            vec4 sample_color = texture(u_image0, v_texCoord + offset);\n\n            float dist = length(vec2(float(x), float(y)));\n            float weight = gaussian(dist, sigma);\n            blurred += sample_color * weight;\n            totalWeight += weight;\n        }\n    }\n    blurred /= totalWeight;\n\n    // Unsharp mask = original - blurred\n    vec3 mask = original.rgb - blurred.rgb;\n\n    // Luminance-based threshold with smooth falloff\n    float lumaDelta = abs(getLuminance(original.rgb) - getLuminance(blurred.rgb));\n    float thresholdScale = smoothstep(0.0, threshold, lumaDelta);\n    mask *= thresholdScale;\n\n    // Sharpen: original + mask * amount\n    vec3 sharpened = original.rgb + mask * amount;\n\n    fragColor0 = vec4(clamp(sharpened, 0.0, 1.0), original.a);\n}\n",
+              "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform float u_float0;  // amount    [0.0 - 3.0]  typical: 0.5-1.5\nuniform float u_float1;  // radius    [0.5 - 10.0] blur radius in pixels\nuniform float u_float2;  // threshold [0.0 - 0.1]  min difference to sharpen\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nfloat gaussian(float x, float sigma) {\n    return exp(-(x * x) / (2.0 * sigma * sigma));\n}\n\nfloat getLuminance(vec3 color) {\n    return dot(color, vec3(0.2126, 0.7152, 0.0722));\n}\n\nvoid main() {\n    vec2 texel = 1.0 / vec2(textureSize(u_image0, 0));\n    float radius = max(u_float1, 0.5);\n    float amount = u_float0;\n    float threshold = u_float2;\n\n    vec4 original = texture(u_image0, v_texCoord);\n\n    // Gaussian blur for the \"unsharp\" mask\n    int samples = int(ceil(radius));\n    float sigma = radius / 2.0;\n\n    vec4 blurred = vec4(0.0);\n    float totalWeight = 0.0;\n\n    for (int x = -samples; x <= samples; x++) {\n        for (int y = -samples; y <= samples; y++) {\n            vec2 offset = vec2(float(x), float(y)) * texel;\n            vec4 sample_color = texture(u_image0, v_texCoord + offset);\n\n            float dist = length(vec2(float(x), float(y)));\n            float weight = gaussian(dist, sigma);\n            blurred += sample_color * weight;\n            totalWeight += weight;\n        }\n    }\n    blurred /= totalWeight;\n\n    // Unsharp mask = original - blurred\n    vec3 mask = original.rgb - blurred.rgb;\n\n    // Luminance-based threshold with smooth falloff\n    float lumaDelta = abs(getLuminance(original.rgb) - getLuminance(blurred.rgb));\n    float thresholdScale = smoothstep(0.0, threshold, lumaDelta);\n    mask *= thresholdScale;\n\n    // Sharpen: original + mask * amount\n    vec3 sharpened = original.rgb + mask * amount;\n\n    fragColor0 = vec4(clamp(sharpened, 0.0, 1.0), original.a);\n}\n",
              "from_input"
            ]
          }
--- a/comfy/ldm/modules/attention.py
+++ b/comfy/ldm/modules/attention.py
@ -14,6 +14,8 @@ from .sub_quadratic_attention import efficient_dot_product_attention

 from comfy import model_management

+TORCH_HAS_GQA = model_management.torch_version_numeric >= (2, 5)
+
 if model_management.xformers_enabled():
    import xformers
    import xformers.ops
@ -150,7 +152,12 @@ def attention_basic(q, k, v, heads, mask=None, attn_precision=None, skip_reshape
        b, _, dim_head = q.shape
        dim_head //= heads

-    scale = dim_head ** -0.5
+    if kwargs.get("enable_gqa", False) and q.shape[-3] != k.shape[-3]:
+        n_rep = q.shape[-3] // k.shape[-3]
+        k = k.repeat_interleave(n_rep, dim=-3)
+        v = v.repeat_interleave(n_rep, dim=-3)
+
+    scale = kwargs.get("scale", dim_head ** -0.5)

    h = heads
    if skip_reshape:
@ -219,6 +226,10 @@ def attention_sub_quad(query, key, value, heads, mask=None, attn_precision=None,
        b, _, dim_head = query.shape
        dim_head //= heads

+    if "scale" in kwargs:
+        # Pre-scale query to match requested scale (cancels internal 1/sqrt(dim_head))
+        query = query * (kwargs["scale"] * dim_head ** 0.5)
+
    if skip_reshape:
        query = query.reshape(b * heads, -1, dim_head)
        value = value.reshape(b * heads, -1, dim_head)
@ -290,7 +301,7 @@ def attention_split(q, k, v, heads, mask=None, attn_precision=None, skip_reshape
        b, _, dim_head = q.shape
        dim_head //= heads

-    scale = dim_head ** -0.5
+    scale = kwargs.get("scale", dim_head ** -0.5)

    if skip_reshape:
         q, k, v = map(
@ -500,8 +511,13 @@ def attention_pytorch(q, k, v, heads, mask=None, attn_precision=None, skip_resha
        if mask.ndim == 3:
            mask = mask.unsqueeze(1)

+    # Pass through extra SDPA kwargs (scale, enable_gqa) if provided
+    # enable_gqa requires PyTorch 2.5+; older versions use manual KV expansion above
+    sdpa_keys = ("scale", "enable_gqa") if TORCH_HAS_GQA else ("scale",)
+    sdpa_extra = {k: v for k, v in kwargs.items() if k in sdpa_keys}
+
    if SDP_BATCH_LIMIT >= b:
-        out = comfy.ops.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False)
+        out = comfy.ops.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False, **sdpa_extra)
        if not skip_output_reshape:
            out = (
                out.transpose(1, 2).reshape(b, -1, heads * dim_head)
@ -519,7 +535,7 @@ def attention_pytorch(q, k, v, heads, mask=None, attn_precision=None, skip_resha
                k[i : i + SDP_BATCH_LIMIT],
                v[i : i + SDP_BATCH_LIMIT],
                attn_mask=m,
-                dropout_p=0.0, is_causal=False
+                dropout_p=0.0, is_causal=False, **sdpa_extra
            ).transpose(1, 2).reshape(-1, q.shape[2], heads * dim_head)
    return out

--- a/comfy/ldm/sam3/detector.py
+++ b/comfy/ldm/sam3/detector.py
@ -54,7 +54,7 @@ class SplitMHA(nn.Module):
        if mask is not None and mask.ndim == 2:
            mask = mask[:, None, None, :]  # [B, T] -> [B, 1, 1, T] for SDPA broadcast
        dtype = q.dtype  # manual_cast may produce mixed dtypes
-        out = optimized_attention(q, k.to(dtype), v.to(dtype), self.num_heads, mask=mask)
+        out = optimized_attention(q, k.to(dtype), v.to(dtype), self.num_heads, mask=mask, low_precision_attention=False)
        return self.out_proj(out)


--- a/comfy/ldm/sam3/sam.py
+++ b/comfy/ldm/sam3/sam.py
@ -40,7 +40,7 @@ class SAMAttention(nn.Module):
        q = self.q_proj(q)
        k = self.k_proj(k)
        v = self.v_proj(v)
-        return self.out_proj(optimized_attention(q, k, v, self.num_heads))
+        return self.out_proj(optimized_attention(q, k, v, self.num_heads, low_precision_attention=False))


 class TwoWayAttentionBlock(nn.Module):
@ -179,7 +179,7 @@ class Attention(nn.Module):
        q, k, v = qkv.permute(2, 0, 3, 1, 4).unbind(dim=0)
        if self.use_rope and freqs_cis is not None:
            q, k = apply_rope(q, k, freqs_cis)
-        return self.proj(optimized_attention(q, k, v, self.num_heads, skip_reshape=True))
+        return self.proj(optimized_attention(q, k, v, self.num_heads, skip_reshape=True, low_precision_attention=False))


 class Block(nn.Module):
--- a/comfy/ldm/sam3/tracker.py
+++ b/comfy/ldm/sam3/tracker.py
@ -364,7 +364,7 @@ class SplitAttn(nn.Module):
        v = self.v_proj(v)
        if rope is not None:
            q, k = apply_rope_memory(q, k, rope, self.num_heads, num_k_exclude_rope)
-        out = optimized_attention(q, k, v, self.num_heads)
+        out = optimized_attention(q, k, v, self.num_heads, low_precision_attention=False)
        return self.out_proj(out)


@ -657,7 +657,7 @@ class DecoupledMemoryAttnLayer(nn.Module):
        v = self.self_attn_v_proj(normed)
        if rope is not None:
            q, k = apply_rope_memory(q, k, rope, self.num_heads, 0)
-        x = x + self.self_attn_out_proj(optimized_attention(q, k, v, self.num_heads))
+        x = x + self.self_attn_out_proj(optimized_attention(q, k, v, self.num_heads, low_precision_attention=False))

        # Decoupled cross-attention: fuse image and memory projections
        normed = self.norm2(x)
@ -668,7 +668,7 @@ class DecoupledMemoryAttnLayer(nn.Module):
        v = self.cross_attn_v_proj(memory)
        if rope is not None:
            q, k = apply_rope_memory(q, k, rope, self.num_heads, num_k_exclude_rope)
-        x = x + self.cross_attn_out_proj(optimized_attention(q, k, v, self.num_heads))
+        x = x + self.cross_attn_out_proj(optimized_attention(q, k, v, self.num_heads, low_precision_attention=False))

        # FFN
        x = x + self.linear2(F.gelu(self.linear1(self.norm3(x))))
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -1159,6 +1159,93 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                        self._buffers[key] = fn(buf)
                return self

+        class Embedding(manual_cast.Embedding):
+            def _load_from_state_dict(self, state_dict, prefix, local_metadata,
+                                    strict, missing_keys, unexpected_keys, error_msgs):
+                weight_key = f"{prefix}weight"
+                layer_conf = state_dict.pop(f"{prefix}comfy_quant", None)
+                if layer_conf is not None:
+                    layer_conf = json.loads(layer_conf.numpy().tobytes())
+
+                # Only fp8 makes sense for embeddings (per-row dequant via index select).
+                # Block-scaled formats (NVFP4, MXFP8) can't do per-row lookup efficiently.
+                quant_format = layer_conf.get("format", None) if layer_conf is not None else None
+                if quant_format in ["float8_e4m3fn", "float8_e5m2"] and weight_key in state_dict:
+                    self.quant_format = quant_format
+                    qconfig = QUANT_ALGOS[quant_format]
+                    layout_cls = get_layout_class(qconfig["comfy_tensor_layout"])
+                    weight = state_dict.pop(weight_key)
+                    manually_loaded_keys = [weight_key]
+
+                    scale_key = f"{prefix}weight_scale"
+                    scale = state_dict.pop(scale_key, None)
+                    if scale is not None:
+                        scale = scale.float()
+                        manually_loaded_keys.append(scale_key)
+
+                    params = layout_cls.Params(
+                        scale=scale if scale is not None else torch.ones((), dtype=torch.float32),
+                        orig_dtype=MixedPrecisionOps._compute_dtype,
+                        orig_shape=(self.num_embeddings, self.embedding_dim),
+                    )
+                    self.weight = torch.nn.Parameter(
+                        QuantizedTensor(weight.to(dtype=qconfig["storage_t"]), qconfig["comfy_tensor_layout"], params),
+                        requires_grad=False)
+
+                    super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs)
+                    for k in manually_loaded_keys:
+                        if k in missing_keys:
+                            missing_keys.remove(k)
+                else:
+                    if layer_conf is not None:
+                        state_dict[f"{prefix}comfy_quant"] = torch.tensor(list(json.dumps(layer_conf).encode('utf-8')), dtype=torch.uint8)
+                    super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs)
+
+            def state_dict(self, *args, destination=None, prefix="", **kwargs):
+                if destination is not None:
+                    sd = destination
+                else:
+                    sd = {}
+
+                if not hasattr(self, 'weight') or self.weight is None:
+                    return sd
+
+                if isinstance(self.weight, QuantizedTensor):
+                    sd_out = self.weight.state_dict("{}weight".format(prefix))
+                    for k in sd_out:
+                        sd[k] = sd_out[k]
+
+                    quant_conf = {"format": self.quant_format}
+                    sd["{}comfy_quant".format(prefix)] = torch.tensor(list(json.dumps(quant_conf).encode('utf-8')), dtype=torch.uint8)
+                else:
+                    sd["{}weight".format(prefix)] = self.weight
+                return sd
+
+            def forward_comfy_cast_weights(self, input, out_dtype=None):
+                weight = self.weight
+
+                # Optimized path: lookup in fp8, dequantize only the selected rows.
+                if isinstance(weight, QuantizedTensor) and len(self.weight_function) == 0:
+                    qdata, _, offload_stream = cast_bias_weight(self, device=input.device, dtype=weight.dtype, offloadable=True)
+                    if isinstance(qdata, QuantizedTensor):
+                        scale = qdata._params.scale
+                        qdata = qdata._qdata
+                    else:
+                        scale = None
+
+                    x = torch.nn.functional.embedding(
+                        input, qdata, self.padding_idx, self.max_norm,
+                        self.norm_type, self.scale_grad_by_freq, self.sparse)
+                    uncast_bias_weight(self, qdata, None, offload_stream)
+                    target_dtype = out_dtype if out_dtype is not None else weight._params.orig_dtype
+                    x = x.to(dtype=target_dtype)
+                    if scale is not None and scale != 1.0:
+                        x = x * scale.to(dtype=target_dtype)
+                    return x
+
+                # Fallback for non-quantized or weight_function (LoRA) case
+                return super().forward_comfy_cast_weights(input, out_dtype=out_dtype)
+
    return MixedPrecisionOps

 def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, model_config=None):
--- a/comfy/rmsnorm.py
+++ b/comfy/rmsnorm.py
@ -3,7 +3,15 @@ import comfy.model_management

 RMSNorm = torch.nn.RMSNorm

-def rms_norm(x, weight=None, eps=1e-6):
+def rms_norm(x, weight=None, eps=1e-6, fused=True):
+    if not fused: # compatibility mode as torch native rms_norm results are slightly different
+        orig_dtype = x.dtype
+        normed = x.float() * torch.pow(x.float().pow(2).mean(-1, keepdim=True) + eps, -0.5)
+        if weight is not None:
+            weight = comfy.model_management.cast_to(weight, dtype=torch.float32, device=x.device)
+            normed = normed * weight
+        return normed.to(orig_dtype)
+
    if weight is None:
        return torch.nn.functional.rms_norm(x, (x.shape[-1],), eps=eps)
    else:
--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -64,6 +64,7 @@ import comfy.text_encoders.ace15
 import comfy.text_encoders.longcat_image
 import comfy.text_encoders.qwen35
 import comfy.text_encoders.ernie
+import comfy.text_encoders.gemma4

 import comfy.model_patcher
 import comfy.lora
@ -1256,6 +1257,9 @@ class TEModel(Enum):
    QWEN35_9B = 26
    QWEN35_27B = 27
    MINISTRAL_3_3B = 28
+    GEMMA_4_E4B = 29
+    GEMMA_4_E2B = 30
+    GEMMA_4_31B = 31


 def detect_te_model(sd):
@ -1281,6 +1285,12 @@ def detect_te_model(sd):
            return TEModel.BYT5_SMALL_GLYPH
        return TEModel.T5_BASE
    if 'model.layers.0.post_feedforward_layernorm.weight' in sd:
+        if 'model.layers.59.self_attn.q_norm.weight' in sd:
+            return TEModel.GEMMA_4_31B
+        if 'model.layers.41.self_attn.q_norm.weight' in sd and 'model.layers.47.self_attn.q_norm.weight' not in sd:
+            return TEModel.GEMMA_4_E4B
+        if 'model.layers.34.self_attn.q_norm.weight' in sd and 'model.layers.41.self_attn.q_norm.weight' not in sd:
+            return TEModel.GEMMA_4_E2B
        if 'model.layers.47.self_attn.q_norm.weight' in sd:
            return TEModel.GEMMA_3_12B
        if 'model.layers.0.self_attn.q_norm.weight' in sd:
@ -1420,6 +1430,13 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
            else:
                clip_target.clip = comfy.text_encoders.sa_t5.SAT5Model
                clip_target.tokenizer = comfy.text_encoders.sa_t5.SAT5Tokenizer
+        elif te_model in (TEModel.GEMMA_4_E4B, TEModel.GEMMA_4_E2B, TEModel.GEMMA_4_31B):
+            variant = {TEModel.GEMMA_4_E4B: comfy.text_encoders.gemma4.Gemma4_E4B,
+                       TEModel.GEMMA_4_E2B: comfy.text_encoders.gemma4.Gemma4_E2B,
+                       TEModel.GEMMA_4_31B: comfy.text_encoders.gemma4.Gemma4_31B}[te_model]
+            clip_target.clip = comfy.text_encoders.gemma4.gemma4_te(**llama_detect(clip_data), model_class=variant)
+            clip_target.tokenizer = variant.tokenizer
+            tokenizer_data["tokenizer_json"] = clip_data[0].get("tokenizer_json", None)
        elif te_model == TEModel.GEMMA_2_2B:
            clip_target.clip = comfy.text_encoders.lumina2.te(**llama_detect(clip_data))
            clip_target.tokenizer = comfy.text_encoders.lumina2.LuminaTokenizer
--- a/comfy/text_encoders/gemma4.py
+++ b/comfy/text_encoders/gemma4.py
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@ -382,18 +382,19 @@ class Gemma3_12B_Config:
    stop_tokens = [1, 106]

 class RMSNorm(nn.Module):
-    def __init__(self, dim: int, eps: float = 1e-5, add=False, device=None, dtype=None):
+    def __init__(self, dim: int, eps: float = 1e-5, add=False, device=None, dtype=None, fused=True):
        super().__init__()
        self.eps = eps
        self.weight = nn.Parameter(torch.empty(dim, device=device, dtype=dtype))
        self.add = add
+        self.fused = fused

    def forward(self, x: torch.Tensor):
        w = self.weight
        if self.add:
            w = w + 1.0

-        return comfy.ldm.common_dit.rms_norm(x, w, self.eps)
+        return comfy.ldm.common_dit.rms_norm(x, w, self.eps, fused=self.fused)



@ -521,7 +522,7 @@ class Attention(nn.Module):
            else:
                present_key_value = (xk, xv, index + num_tokens)

-            if sliding_window is not None and xk.shape[2] > sliding_window:
+            if sliding_window is not None and xk.shape[2] > sliding_window and seq_length == 1:
                xk = xk[:, :, -sliding_window:]
                xv = xv[:, :, -sliding_window:]
                attention_mask = attention_mask[..., -sliding_window:] if attention_mask is not None else None
@ -533,12 +534,12 @@ class Attention(nn.Module):
        return self.o_proj(output), present_key_value

 class MLP(nn.Module):
-    def __init__(self, config: Llama2Config, device=None, dtype=None, ops: Any = None):
+    def __init__(self, config: Llama2Config, device=None, dtype=None, ops: Any = None, intermediate_size=None):
        super().__init__()
-        ops = ops or nn
-        self.gate_proj = ops.Linear(config.hidden_size, config.intermediate_size, bias=False, device=device, dtype=dtype)
-        self.up_proj = ops.Linear(config.hidden_size, config.intermediate_size, bias=False, device=device, dtype=dtype)
-        self.down_proj = ops.Linear(config.intermediate_size, config.hidden_size, bias=False, device=device, dtype=dtype)
+        intermediate_size = intermediate_size or config.intermediate_size
+        self.gate_proj = ops.Linear(config.hidden_size, intermediate_size, bias=False, device=device, dtype=dtype)
+        self.up_proj = ops.Linear(config.hidden_size, intermediate_size, bias=False, device=device, dtype=dtype)
+        self.down_proj = ops.Linear(intermediate_size, config.hidden_size, bias=False, device=device, dtype=dtype)
        if config.mlp_activation == "silu":
            self.activation = torch.nn.functional.silu
        elif config.mlp_activation == "gelu_pytorch_tanh":
@ -647,6 +648,10 @@ class TransformerBlockGemma2(nn.Module):

        return x, present_key_value

+def _gemma_embed_scale_hook(module, input, output):
+    return (output.to(module._embed_scale.dtype) * module._embed_scale).to(output.dtype)
+
+
 class Llama2_(nn.Module):
    def __init__(self, config, device=None, dtype=None, ops=None):
        super().__init__()
@ -661,10 +666,10 @@ class Llama2_(nn.Module):
        )
        if self.config.transformer_type == "gemma2" or self.config.transformer_type == "gemma3":
            transformer = TransformerBlockGemma2
-            self.normalize_in = True
+            self.embed_tokens.register_buffer("_embed_scale", torch.tensor(config.hidden_size ** 0.5, dtype=dtype or self.embed_tokens.weight.dtype), persistent=False)
+            self.embed_tokens.register_forward_hook(_gemma_embed_scale_hook)
        else:
            transformer = TransformerBlock
-            self.normalize_in = False

        self.layers = nn.ModuleList([
            transformer(config, index=i, device=device, dtype=dtype, ops=ops)
@ -690,15 +695,12 @@ class Llama2_(nn.Module):
                                    self.config.rope_dims,
                                    device=device)

-    def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=None, position_ids=None, embeds_info=[], past_key_values=None):
+    def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=None, position_ids=None, embeds_info=[], past_key_values=None, input_ids=None):
        if embeds is not None:
            x = embeds
        else:
            x = self.embed_tokens(x, out_dtype=dtype)

-        if self.normalize_in:
-            x *= self.config.hidden_size ** 0.5
-
        seq_len = x.shape[1]
        past_len = 0
        if past_key_values is not None and len(past_key_values) > 0:
@ -850,7 +852,7 @@ class BaseGenerate:
                                    torch.empty([batch, model_config.num_key_value_heads, max_cache_len, model_config.head_dim], device=device, dtype=execution_dtype), 0))
        return past_key_values

-    def generate(self, embeds=None, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.9, min_p=0.0, repetition_penalty=1.0, seed=42, stop_tokens=None, initial_tokens=[], execution_dtype=None, min_tokens=0, presence_penalty=0.0):
+    def generate(self, embeds=None, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.9, min_p=0.0, repetition_penalty=1.0, seed=42, stop_tokens=None, initial_tokens=[], execution_dtype=None, min_tokens=0, presence_penalty=0.0, initial_input_ids=None):
        device = embeds.device

        if stop_tokens is None:
@ -875,14 +877,16 @@ class BaseGenerate:
        pbar = comfy.utils.ProgressBar(max_length)

        # Generation loop
+        current_input_ids = initial_input_ids
        for step in tqdm(range(max_length), desc="Generating tokens"):
-            x, _, past_key_values = self.model.forward(None, embeds=embeds, attention_mask=None, past_key_values=past_key_values)
+            x, _, past_key_values = self.model.forward(None, embeds=embeds, attention_mask=None, past_key_values=past_key_values, input_ids=current_input_ids)
            logits = self.logits(x)[:, -1]
            next_token = self.sample_token(logits, temperature, top_k, top_p, min_p, repetition_penalty, initial_tokens + generated_token_ids, generator, do_sample=do_sample, presence_penalty=presence_penalty)
            token_id = next_token[0].item()
            generated_token_ids.append(token_id)

            embeds = self.model.embed_tokens(next_token).to(execution_dtype)
+            current_input_ids = next_token if initial_input_ids is not None else None
            pbar.update(1)

            if token_id in stop_tokens:
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@ -93,8 +93,7 @@ class Gemma3_12BModel(sd1_clip.SDClipModel):

    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, presence_penalty):
        tokens_only = [[t[0] for t in b] for b in tokens]
-        embeds, _, _, embeds_info = self.process_tokens(tokens_only, self.execution_device)
-        comfy.utils.normalize_image_embeddings(embeds, embeds_info, self.transformer.model.config.hidden_size ** 0.5)
+        embeds, _, _, _ = self.process_tokens(tokens_only, self.execution_device)
        return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, stop_tokens=[106], presence_penalty=presence_penalty)  # 106 is <end_of_turn>

 class DualLinearProjection(torch.nn.Module):
--- a/comfy/text_encoders/lumina2.py
+++ b/comfy/text_encoders/lumina2.py
@ -50,8 +50,7 @@ class Gemma3_4B_Vision_Model(sd1_clip.SDClipModel):
        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_4B_Vision, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)

    def process_tokens(self, tokens, device):
-        embeds, _, _, embeds_info = super().process_tokens(tokens, device)
-        comfy.utils.normalize_image_embeddings(embeds, embeds_info, self.transformer.model.config.hidden_size ** 0.5)
+        embeds, _, _, _ = super().process_tokens(tokens, device)
        return embeds

 class LuminaModel(sd1_clip.SD1ClipModel):
--- a/comfy/text_encoders/qwen35.py
+++ b/comfy/text_encoders/qwen35.py
@ -408,8 +408,6 @@ class Qwen35Transformer(Llama2_):
        nn.Module.__init__(self)
        self.config = config
        self.vocab_size = config.vocab_size
-        self.normalize_in = False
-
        self.embed_tokens = ops.Embedding(config.vocab_size, config.hidden_size, device=device, dtype=dtype)
        self.layers = nn.ModuleList([
            Qwen35TransformerBlock(config, index=i, device=device, dtype=dtype, ops=ops)
--- a/comfy/utils.py
+++ b/comfy/utils.py
@ -1446,10 +1446,3 @@ def deepcopy_list_dict(obj, memo=None):
    memo[obj_id] = res
    return res

-def normalize_image_embeddings(embeds, embeds_info, scale_factor):
-    """Normalize image embeddings to match text embedding scale"""
-    for info in embeds_info:
-        if info.get("type") == "image":
-            start_idx = info["index"]
-            end_idx = start_idx + info["size"]
-            embeds[:, start_idx:end_idx, :] /= scale_factor
--- a/comfy_api/input/init.py
+++ b/comfy_api/input/init.py
@ -9,6 +9,7 @@ from comfy_api.latest._input import (
    CurveInput,
    MonotoneCubicCurve,
    LinearCurve,
+    RangeInput,
 )

 __all__ = [
@ -21,4 +22,5 @@ __all__ = [
    "CurveInput",
    "MonotoneCubicCurve",
    "LinearCurve",
+    "RangeInput",
 ]
--- a/comfy_api/latest/_input/init.py
+++ b/comfy_api/latest/_input/init.py
@ -1,5 +1,6 @@
 from .basic_types import ImageInput, AudioInput, MaskInput, LatentInput
 from .curve_types import CurvePoint, CurveInput, MonotoneCubicCurve, LinearCurve
+from .range_types import RangeInput
 from .video_types import VideoInput

 __all__ = [
@ -12,4 +13,5 @@ __all__ = [
    "CurveInput",
    "MonotoneCubicCurve",
    "LinearCurve",
+    "RangeInput",
 ]
--- a/comfy_api/latest/_input/range_types.py
+++ b/comfy_api/latest/_input/range_types.py
@ -0,0 +1,70 @@
+from __future__ import annotations
+
+import logging
+import math
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+class RangeInput:
+    """Represents a levels/range adjustment: input range [min, max] with
+    optional midpoint (gamma control).
+
+    Generates a 1D LUT identical to GIMP's levels mapping:
+        1. Normalize input to [0, 1] using [min, max]
+        2. Apply gamma correction: pow(value, 1/gamma)
+        3. Clamp to [0, 1]
+
+    The midpoint field is a position in [0, 1] representing where the
+    midtone falls within [min, max]. It maps to gamma via:
+        gamma = -log2(midpoint)
+    So midpoint=0.5 → gamma=1.0 (linear).
+    """
+
+    def __init__(self, min_val: float, max_val: float, midpoint: float | None = None):
+        self.min_val = min_val
+        self.max_val = max_val
+        self.midpoint = midpoint
+
+    @staticmethod
+    def from_raw(data) -> RangeInput:
+        if isinstance(data, RangeInput):
+            return data
+        if isinstance(data, dict):
+            return RangeInput(
+                min_val=float(data.get("min", 0.0)),
+                max_val=float(data.get("max", 1.0)),
+                midpoint=float(data["midpoint"]) if data.get("midpoint") is not None else None,
+            )
+        raise TypeError(f"Cannot convert {type(data)} to RangeInput")
+
+    def to_lut(self, size: int = 256) -> np.ndarray:
+        """Generate a float64 lookup table mapping [0, 1] input through this
+        levels adjustment.
+
+        The LUT maps normalized input values (0..1) to output values (0..1),
+        matching the GIMP levels formula.
+        """
+        xs = np.linspace(0.0, 1.0, size, dtype=np.float64)
+
+        in_range = self.max_val - self.min_val
+        if abs(in_range) < 1e-10:
+            return np.where(xs >= self.min_val, 1.0, 0.0).astype(np.float64)
+
+        # Normalize: map [min, max] → [0, 1]
+        result = (xs - self.min_val) / in_range
+        result = np.clip(result, 0.0, 1.0)
+
+        # Gamma correction from midpoint
+        if self.midpoint is not None and self.midpoint > 0 and self.midpoint != 0.5:
+            gamma = max(-math.log2(self.midpoint), 0.001)
+            inv_gamma = 1.0 / gamma
+            mask = result > 0
+            result[mask] = np.power(result[mask], inv_gamma)
+
+        return result
+
+    def __repr__(self) -> str:
+        mid = f", midpoint={self.midpoint}" if self.midpoint is not None else ""
+        return f"RangeInput(min={self.min_val}, max={self.max_val}{mid})"
--- a/comfy_api/latest/_io.py
+++ b/comfy_api/latest/_io.py
@ -1266,6 +1266,43 @@ class Histogram(ComfyTypeIO):
    Type = list[int]


+@comfytype(io_type="RANGE")
+class Range(ComfyTypeIO):
+    from comfy_api.input import RangeInput
+    if TYPE_CHECKING:
+        Type = RangeInput
+
+    class Input(WidgetInput):
+        def __init__(self, id: str, display_name: str=None, optional=False, tooltip: str=None,
+                     socketless: bool=True, default: dict=None,
+                     display: str=None,
+                     gradient_stops: list=None,
+                     show_midpoint: bool=None,
+                     midpoint_scale: str=None,
+                     value_min: float=None,
+                     value_max: float=None,
+                     advanced: bool=None):
+            super().__init__(id, display_name, optional, tooltip, None, default, socketless, None, None, None, None, advanced)
+            if default is None:
+                self.default = {"min": 0.0, "max": 1.0}
+            self.display = display
+            self.gradient_stops = gradient_stops
+            self.show_midpoint = show_midpoint
+            self.midpoint_scale = midpoint_scale
+            self.value_min = value_min
+            self.value_max = value_max
+
+        def as_dict(self):
+            return super().as_dict() | prune_dict({
+                "display": self.display,
+                "gradient_stops": self.gradient_stops,
+                "show_midpoint": self.show_midpoint,
+                "midpoint_scale": self.midpoint_scale,
+                "value_min": self.value_min,
+                "value_max": self.value_max,
+            })
+
+
 DYNAMIC_INPUT_LOOKUP: dict[str, Callable[[dict[str, Any], dict[str, Any], tuple[str, dict[str, Any]], str, list[str] | None], None]] = {}
 def register_dynamic_input_func(io_type: str, func: Callable[[dict[str, Any], dict[str, Any], tuple[str, dict[str, Any]], str, list[str] | None], None]):
    DYNAMIC_INPUT_LOOKUP[io_type] = func
@ -2276,5 +2313,6 @@ __all__ = [
    "BoundingBox",
    "Curve",
    "Histogram",
+    "Range",
    "NodeReplace",
 ]
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@ -276,6 +276,7 @@ async def finish_omni_video_task(cls: type[IO.ComfyNode], response: TaskStatusRe
        cls,
        ApiEndpoint(path=f"/proxy/kling/v1/videos/omni-video/{response.data.task_id}"),
        response_model=TaskStatusResponse,
+        max_poll_attempts=280,
        status_extractor=lambda r: (r.data.task_status if r.data else None),
    )
    return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
@ -862,7 +863,7 @@ class OmniProTextToVideoNode(IO.ComfyNode):
                ),
                IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "1:1"]),
                IO.Int.Input("duration", default=5, min=3, max=15, display_mode=IO.NumberDisplay.slider),
-                IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True),
+                IO.Combo.Input("resolution", options=["4k", "1080p", "720p"], default="1080p", optional=True),
                IO.DynamicCombo.Input(
                    "storyboards",
                    options=[
@ -904,12 +905,13 @@ class OmniProTextToVideoNode(IO.ComfyNode):
                depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution", "model_name", "generate_audio"]),
                expr="""
                (
-                  $mode := (widgets.resolution = "720p") ? "std" : "pro";
+                  $res := widgets.resolution;
+                  $mode := $res = "4k" ? "4k" : ($res = "720p" ? "std" : "pro");
                  $isV3 := $contains(widgets.model_name, "v3");
                  $audio := $isV3 and widgets.generate_audio;
                  $rates := $audio
-                    ? {"std": 0.112, "pro": 0.14}
-                    : {"std": 0.084, "pro": 0.112};
+                    ? {"std": 0.112, "pro": 0.14, "4k": 0.42}
+                    : {"std": 0.084, "pro": 0.112, "4k": 0.42};
                  {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration}
                )
                """,
@ -934,6 +936,8 @@ class OmniProTextToVideoNode(IO.ComfyNode):
                raise ValueError("kling-video-o1 only supports durations of 5 or 10 seconds.")
            if generate_audio:
                raise ValueError("kling-video-o1 does not support audio generation.")
+            if resolution == "4k":
+                raise ValueError("kling-video-o1 does not support 4k resolution.")
        stories_enabled = storyboards is not None and storyboards["storyboards"] != "disabled"
        if stories_enabled and model_name == "kling-video-o1":
            raise ValueError("kling-video-o1 does not support storyboards.")
@ -963,6 +967,12 @@ class OmniProTextToVideoNode(IO.ComfyNode):
                    f"must equal the global duration ({duration}s)."
                )

+        if resolution == "4k":
+            mode = "4k"
+        elif resolution == "1080p":
+            mode = "pro"
+        else:
+            mode = "std"
        response = await sync_op(
            cls,
            ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
@ -972,7 +982,7 @@ class OmniProTextToVideoNode(IO.ComfyNode):
                prompt=prompt,
                aspect_ratio=aspect_ratio,
                duration=str(duration),
-                mode="pro" if resolution == "1080p" else "std",
+                mode=mode,
                multi_shot=multi_shot,
                multi_prompt=multi_prompt_list,
                shot_type="customize" if multi_shot else None,
@ -1014,7 +1024,7 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
                    optional=True,
                    tooltip="Up to 6 additional reference images.",
                ),
-                IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True),
+                IO.Combo.Input("resolution", options=["4k", "1080p", "720p"], default="1080p", optional=True),
                IO.DynamicCombo.Input(
                    "storyboards",
                    options=[
@ -1061,12 +1071,13 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
                depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution", "model_name", "generate_audio"]),
                expr="""
                (
-                  $mode := (widgets.resolution = "720p") ? "std" : "pro";
+                  $res := widgets.resolution;
+                  $mode := $res = "4k" ? "4k" : ($res = "720p" ? "std" : "pro");
                  $isV3 := $contains(widgets.model_name, "v3");
                  $audio := $isV3 and widgets.generate_audio;
                  $rates := $audio
-                    ? {"std": 0.112, "pro": 0.14}
-                    : {"std": 0.084, "pro": 0.112};
+                    ? {"std": 0.112, "pro": 0.14, "4k": 0.42}
+                    : {"std": 0.084, "pro": 0.112, "4k": 0.42};
                  {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration}
                )
                """,
@ -1093,6 +1104,8 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
                raise ValueError("kling-video-o1 does not support durations greater than 10 seconds.")
            if generate_audio:
                raise ValueError("kling-video-o1 does not support audio generation.")
+            if resolution == "4k":
+                raise ValueError("kling-video-o1 does not support 4k resolution.")
        stories_enabled = storyboards is not None and storyboards["storyboards"] != "disabled"
        if stories_enabled and model_name == "kling-video-o1":
            raise ValueError("kling-video-o1 does not support storyboards.")
@ -1161,6 +1174,12 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
                validate_image_aspect_ratio(i, (1, 2.5), (2.5, 1))
            for i in await upload_images_to_comfyapi(cls, reference_images, wait_label="Uploading reference frame(s)"):
                image_list.append(OmniParamImage(image_url=i))
+        if resolution == "4k":
+            mode = "4k"
+        elif resolution == "1080p":
+            mode = "pro"
+        else:
+            mode = "std"
        response = await sync_op(
            cls,
            ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
@ -1170,7 +1189,7 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
                prompt=prompt,
                duration=str(duration),
                image_list=image_list,
-                mode="pro" if resolution == "1080p" else "std",
+                mode=mode,
                sound="on" if generate_audio else "off",
                multi_shot=multi_shot,
                multi_prompt=multi_prompt_list,
@ -1204,7 +1223,7 @@ class OmniProImageToVideoNode(IO.ComfyNode):
                    "reference_images",
                    tooltip="Up to 7 reference images.",
                ),
-                IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True),
+                IO.Combo.Input("resolution", options=["4k", "1080p", "720p"], default="1080p", optional=True),
                IO.DynamicCombo.Input(
                    "storyboards",
                    options=[
@ -1251,12 +1270,13 @@ class OmniProImageToVideoNode(IO.ComfyNode):
                depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution", "model_name", "generate_audio"]),
                expr="""
                (
-                  $mode := (widgets.resolution = "720p") ? "std" : "pro";
+                  $res := widgets.resolution;
+                  $mode := $res = "4k" ? "4k" : ($res = "720p" ? "std" : "pro");
                  $isV3 := $contains(widgets.model_name, "v3");
                  $audio := $isV3 and widgets.generate_audio;
                  $rates := $audio
-                    ? {"std": 0.112, "pro": 0.14}
-                    : {"std": 0.084, "pro": 0.112};
+                    ? {"std": 0.112, "pro": 0.14, "4k": 0.42}
+                    : {"std": 0.084, "pro": 0.112, "4k": 0.42};
                  {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration}
                )
                """,
@ -1282,6 +1302,8 @@ class OmniProImageToVideoNode(IO.ComfyNode):
                raise ValueError("kling-video-o1 does not support durations greater than 10 seconds.")
            if generate_audio:
                raise ValueError("kling-video-o1 does not support audio generation.")
+            if resolution == "4k":
+                raise ValueError("kling-video-o1 does not support 4k resolution.")
        stories_enabled = storyboards is not None and storyboards["storyboards"] != "disabled"
        if stories_enabled and model_name == "kling-video-o1":
            raise ValueError("kling-video-o1 does not support storyboards.")
@ -1320,6 +1342,12 @@ class OmniProImageToVideoNode(IO.ComfyNode):
        image_list: list[OmniParamImage] = []
        for i in await upload_images_to_comfyapi(cls, reference_images, wait_label="Uploading reference image"):
            image_list.append(OmniParamImage(image_url=i))
+        if resolution == "4k":
+            mode = "4k"
+        elif resolution == "1080p":
+            mode = "pro"
+        else:
+            mode = "std"
        response = await sync_op(
            cls,
            ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
@ -1330,7 +1358,7 @@ class OmniProImageToVideoNode(IO.ComfyNode):
                aspect_ratio=aspect_ratio,
                duration=str(duration),
                image_list=image_list,
-                mode="pro" if resolution == "1080p" else "std",
+                mode=mode,
                sound="on" if generate_audio else "off",
                multi_shot=multi_shot,
                multi_prompt=multi_prompt_list,
@ -2860,7 +2888,7 @@ class KlingVideoNode(IO.ComfyNode):
                        IO.DynamicCombo.Option(
                            "kling-v3",
                            [
-                                IO.Combo.Input("resolution", options=["1080p", "720p"]),
+                                IO.Combo.Input("resolution", options=["4k", "1080p", "720p"], default="1080p"),
                                IO.Combo.Input(
                                    "aspect_ratio",
                                    options=["16:9", "9:16", "1:1"],
@ -2913,7 +2941,11 @@ class KlingVideoNode(IO.ComfyNode):
                ),
                expr="""
                (
-                  $rates := {"1080p": {"off": 0.112, "on": 0.168}, "720p": {"off": 0.084, "on": 0.126}};
+                  $rates := {
+                    "4k": {"off": 0.42, "on": 0.42},
+                    "1080p": {"off": 0.112, "on": 0.168},
+                    "720p": {"off": 0.084, "on": 0.126}
+                  };
                  $res := $lookup(widgets, "model.resolution");
                  $audio := widgets.generate_audio ? "on" : "off";
                  $rate := $lookup($lookup($rates, $res), $audio);
@ -2943,7 +2975,12 @@ class KlingVideoNode(IO.ComfyNode):
        start_frame: Input.Image | None = None,
    ) -> IO.NodeOutput:
        _ = seed
-        mode = "pro" if model["resolution"] == "1080p" else "std"
+        if model["resolution"] == "4k":
+            mode = "4k"
+        elif model["resolution"] == "1080p":
+            mode = "pro"
+        else:
+            mode = "std"
        custom_multi_shot = False
        if multi_shot["multi_shot"] == "disabled":
            shot_type = None
@ -3025,6 +3062,7 @@ class KlingVideoNode(IO.ComfyNode):
            cls,
            ApiEndpoint(path=poll_path),
            response_model=TaskStatusResponse,
+            max_poll_attempts=280,
            status_extractor=lambda r: (r.data.task_status if r.data else None),
        )
        return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
@ -3057,7 +3095,7 @@ class KlingFirstLastFrameNode(IO.ComfyNode):
                        IO.DynamicCombo.Option(
                            "kling-v3",
                            [
-                                IO.Combo.Input("resolution", options=["1080p", "720p"]),
+                                IO.Combo.Input("resolution", options=["4k", "1080p", "720p"], default="1080p"),
                            ],
                        ),
                    ],
@ -3089,7 +3127,11 @@ class KlingFirstLastFrameNode(IO.ComfyNode):
                ),
                expr="""
                (
-                  $rates := {"1080p": {"off": 0.112, "on": 0.168}, "720p": {"off": 0.084, "on": 0.126}};
+                  $rates := {
+                    "4k": {"off": 0.42, "on": 0.42},
+                    "1080p": {"off": 0.112, "on": 0.168},
+                    "720p": {"off": 0.084, "on": 0.126}
+                  };
                  $res := $lookup(widgets, "model.resolution");
                  $audio := widgets.generate_audio ? "on" : "off";
                  $rate := $lookup($lookup($rates, $res), $audio);
@ -3118,6 +3160,12 @@ class KlingFirstLastFrameNode(IO.ComfyNode):
        validate_image_aspect_ratio(end_frame, (1, 2.5), (2.5, 1))
        image_url = await upload_image_to_comfyapi(cls, first_frame, wait_label="Uploading first frame")
        image_tail_url = await upload_image_to_comfyapi(cls, end_frame, wait_label="Uploading end frame")
+        if model["resolution"] == "4k":
+            mode = "4k"
+        elif model["resolution"] == "1080p":
+            mode = "pro"
+        else:
+            mode = "std"
        response = await sync_op(
            cls,
            ApiEndpoint(path="/proxy/kling/v1/videos/image2video", method="POST"),
@ -3127,7 +3175,7 @@ class KlingFirstLastFrameNode(IO.ComfyNode):
                image=image_url,
                image_tail=image_tail_url,
                prompt=prompt,
-                mode="pro" if model["resolution"] == "1080p" else "std",
+                mode=mode,
                duration=str(duration),
                sound="on" if generate_audio else "off",
            ),
@ -3140,6 +3188,7 @@ class KlingFirstLastFrameNode(IO.ComfyNode):
            cls,
            ApiEndpoint(path=f"/proxy/kling/v1/videos/image2video/{response.data.task_id}"),
            response_model=TaskStatusResponse,
+            max_poll_attempts=280,
            status_extractor=lambda r: (r.data.task_status if r.data else None),
        )
        return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
--- a/comfy_extras/nodes_lt.py
+++ b/comfy_extras/nodes_lt.py
@ -1,6 +1,7 @@
 import nodes
 import node_helpers
 import torch
+import torchaudio
 import comfy.model_management
 import comfy.model_sampling
 import comfy.samplers
@ -711,7 +712,14 @@ class LTXVReferenceAudio(io.ComfyNode):
    @classmethod
    def execute(cls, model, positive, negative, reference_audio, audio_vae, identity_guidance_scale, start_percent, end_percent) -> io.NodeOutput:
        # Encode reference audio to latents and patchify
-        audio_latents = audio_vae.encode(reference_audio)
+        sample_rate = reference_audio["sample_rate"]
+        vae_sample_rate = getattr(audio_vae, "audio_sample_rate", 44100)
+        if vae_sample_rate != sample_rate:
+            waveform = torchaudio.functional.resample(reference_audio["waveform"], sample_rate, vae_sample_rate)
+        else:
+            waveform = reference_audio["waveform"]
+
+        audio_latents = audio_vae.encode(waveform.movedim(1, -1))
        b, c, t, f = audio_latents.shape
        ref_tokens = audio_latents.permute(0, 2, 1, 3).reshape(b, t, c * f)
        ref_audio = {"tokens": ref_tokens}
--- a/comfy_extras/nodes_preview_any.py
+++ b/comfy_extras/nodes_preview_any.py
@ -1,5 +1,6 @@
 import json
 from comfy.comfy_types.node_typing import IO
+import torch

 # Preview Any - original implement from
 # https://github.com/rgthree/rgthree-comfy/blob/main/py/display_any.py
@ -19,6 +20,7 @@ class PreviewAny():
    SEARCH_ALIASES = ["show output", "inspect", "debug", "print value", "show text"]

    def main(self, source=None):
+        torch.set_printoptions(edgeitems=6)
        value = 'None'
        if isinstance(source, str):
            value = source
@ -33,6 +35,7 @@ class PreviewAny():
                except Exception:
                    value = 'source exists, but could not be serialized.'

+        torch.set_printoptions()
        return {"ui": {"text": (value,)}, "result": (value,)}

 NODE_CLASS_MAPPINGS = {
--- a/comfy_extras/nodes_textgen.py
+++ b/comfy_extras/nodes_textgen.py
@ -32,6 +32,8 @@ class TextGenerate(io.ComfyNode):
                io.Clip.Input("clip"),
                io.String.Input("prompt", multiline=True, dynamic_prompts=True, default=""),
                io.Image.Input("image", optional=True),
+                io.Image.Input("video", optional=True, tooltip="Video frames as image batch. Assumed to be 24 FPS; subsampled to 1 FPS internally."),
+                io.Audio.Input("audio", optional=True),
                io.Int.Input("max_length", default=256, min=1, max=2048),
                io.DynamicCombo.Input("sampling_mode", options=sampling_options, display_name="Sampling Mode"),
                io.Boolean.Input("thinking", optional=True, default=False, tooltip="Operate in thinking mode if the model supports it."),
@ -43,9 +45,9 @@ class TextGenerate(io.ComfyNode):
        )

    @classmethod
-    def execute(cls, clip, prompt, max_length, sampling_mode, image=None, thinking=False, use_default_template=True) -> io.NodeOutput:
+    def execute(cls, clip, prompt, max_length, sampling_mode, image=None, thinking=False, use_default_template=True, video=None, audio=None) -> io.NodeOutput:

-        tokens = clip.tokenize(prompt, image=image, skip_template=not use_default_template, min_length=1, thinking=thinking)
+        tokens = clip.tokenize(prompt, image=image, skip_template=not use_default_template, min_length=1, thinking=thinking, video=video, audio=audio)

        # Get sampling parameters from dynamic combo
        do_sample = sampling_mode.get("sampling_mode") == "on"
@ -70,7 +72,8 @@ class TextGenerate(io.ComfyNode):
            seed=seed
        )

-        generated_text = clip.decode(generated_ids, skip_special_tokens=True)
+        generated_text = clip.decode(generated_ids)
+
        return io.NodeOutput(generated_text)


@ -161,12 +164,12 @@ class TextGenerateLTX2Prompt(TextGenerate):
        )

    @classmethod
-    def execute(cls, clip, prompt, max_length, sampling_mode, image=None, thinking=False, use_default_template=True) -> io.NodeOutput:
+    def execute(cls, clip, prompt, max_length, sampling_mode, image=None, thinking=False, use_default_template=True, video=None, audio=None) -> io.NodeOutput:
        if image is None:
            formatted_prompt = f"<start_of_turn>system\n{LTX2_T2V_SYSTEM_PROMPT.strip()}<end_of_turn>\n<start_of_turn>user\nUser Raw Input Prompt: {prompt}.<end_of_turn>\n<start_of_turn>model\n"
        else:
            formatted_prompt = f"<start_of_turn>system\n{LTX2_I2V_SYSTEM_PROMPT.strip()}<end_of_turn>\n<start_of_turn>user\n\n<image_soft_token>\n\nUser Raw Input Prompt: {prompt}.<end_of_turn>\n<start_of_turn>model\n"
-        return super().execute(clip, formatted_prompt, max_length, sampling_mode, image, thinking, use_default_template)
+        return super().execute(clip, formatted_prompt, max_length, sampling_mode, image=image, thinking=thinking, use_default_template=use_default_template, video=video, audio=audio)


 class TextgenExtension(ComfyExtension):
--- a/execution.py
+++ b/execution.py
@ -811,11 +811,30 @@ class PromptExecutor:
            self._notify_prompt_lifecycle("end", prompt_id)


-async def validate_inputs(prompt_id, prompt, item, validated):
+async def validate_inputs(prompt_id, prompt, item, validated, visiting=None):
+    if visiting is None:
+        visiting = []
+
    unique_id = item
    if unique_id in validated:
        return validated[unique_id]

+    if unique_id in visiting:
+        cycle_path_nodes = visiting[visiting.index(unique_id):] + [unique_id]
+        cycle_nodes = list(dict.fromkeys(cycle_path_nodes))
+        cycle_path = " -> ".join(f"{node_id} ({prompt[node_id]['class_type']})" for node_id in cycle_path_nodes)
+        for node_id in cycle_nodes:
+            validated[node_id] = (False, [{
+                "type": "dependency_cycle",
+                "message": "Dependency cycle detected",
+                "details": cycle_path,
+                "extra_info": {
+                    "node_id": node_id,
+                    "cycle_nodes": cycle_nodes,
+                }
+            }], node_id)
+        return validated[unique_id]
+
    inputs = prompt[unique_id]['inputs']
    class_type = prompt[unique_id]['class_type']
    obj_class = nodes.NODE_CLASS_MAPPINGS[class_type]
@ -899,7 +918,11 @@ async def validate_inputs(prompt_id, prompt, item, validated):
                errors.append(error)
                continue
            try:
-                r = await validate_inputs(prompt_id, prompt, o_id, validated)
+                visiting.append(unique_id)
+                try:
+                    r = await validate_inputs(prompt_id, prompt, o_id, validated, visiting)
+                finally:
+                    visiting.pop()
                if r[0] is False:
                    # `r` will be set in `validated[o_id]` already
                    valid = False
@ -1048,10 +1071,13 @@ async def validate_inputs(prompt_id, prompt, item, validated):
                    errors.append(error)
                    continue

-    if len(errors) > 0 or valid is not True:
-        ret = (False, errors, unique_id)
-    else:
-        ret = (True, [], unique_id)
+    ret = validated.get(unique_id, (True, [], unique_id))
+    # Recursive cycle detection may have already populated an error on us. Join it.
+    ret = (
+        ret[0] and valid is True and not errors,
+        ret[1] + [error for error in errors if error not in ret[1]],
+        unique_id,
+    )

    validated[unique_id] = ret
    return ret
--- a/openapi.yaml
+++ b/openapi.yaml
--- a/requirements.txt
+++ b/requirements.txt
@ -1,6 +1,6 @@
 comfyui-frontend-package==1.42.14
-comfyui-workflow-templates==0.9.61
-comfyui-embedded-docs==0.4.3
+comfyui-workflow-templates==0.9.62
+comfyui-embedded-docs==0.4.4
 torch
 torchsde
 torchvision
@ -23,7 +23,7 @@ SQLAlchemy>=2.0
 filelock
 av>=14.2.0
 comfy-kitchen>=0.2.8
-comfy-aimdo>=0.2.12
+comfy-aimdo==0.2.14
 requests
 simpleeval>=1.0.0
 blake3
Author	SHA1	Message	Date
Jukka Seppänen	3671cfa5af	Merge `5b470171df` into `443074eee9`	2026-04-24 00:06:47 -04:00
Matt Miller	443074eee9	Add OpenAPI 3.1 specification for ComfyUI API (#13397 ) * Add OpenAPI 3.1 specification for ComfyUI API Adds a comprehensive OpenAPI 3.1 spec documenting all HTTP endpoints exposed by ComfyUI's server, including prompt execution, queue management, file uploads, userdata, settings, system stats, object info, assets, and internal routes. The spec was validated against the source code with adversarial review from multiple models, and passes Spectral linting with zero errors. Also removes openapi.yaml from .gitignore so the spec is tracked. * Mark /api/history endpoints as deprecated Address Jacob's review feedback on PR #13397 by explicitly marking the three /api/history operations as deprecated in the OpenAPI spec: * GET /api/history -> superseded by GET /api/jobs * POST /api/history -> superseded by /api/jobs management * GET /api/history/{prompt_id} -> superseded by GET /api/jobs/{job_id} Each operation gains deprecated: true plus a description that names the replacement. A formal sunset timeline (RFC 8594 Deprecation and RFC 8553 Sunset headers, minimum-runway policy) is being defined separately and will be applied as a follow-up. * Address Spectral lint findings in openapi.yaml - Add operation descriptions to 52 endpoints (prompt, queue, upload, view, models, userdata, settings, assets, internal, etc.) - Add schema descriptions to 22 component schemas - Add parameter descriptions to 8 path parameters that were missing them - Remove 6 unused component schemas: TaskOutput, EmbeddingsResponse, ExtensionsResponse, LogRawResponse, UserInfo, UserDataFullInfo No wire/shape changes. Reduces Spectral findings from 92 to 4. The remaining 4 are real issues (WebSocket 101 on /ws, loose error schema, and two snake_case warnings on real wire field names) and are worth addressing separately. * fix(openapi): address jtreminio oneOf review on /api/userdata Restructure the UserData response schemas to address the review feedback on the `oneOf` without a discriminator, and fix two accuracy bugs found while doing it. Changes - GET /api/userdata response: extract the inline `oneOf` to a named schema (`ListUserdataResponse`) and add the missing third variant returned when `split=true` and `full_info=false` (array of `[relative_path, ...path_components]`). Previously only two of the three actual server response shapes were described. - UserDataResponse (POST endpoints): correct the description — this schema is a single item, not a list — and point at the canonical `GetUserDataResponseFullFile` schema instead of the duplicate `UserDataResponseFull`. Also removes the malformed blank line in `UserDataResponseShort`. - Delete the now-unused `UserDataResponseFull` and `UserDataResponseShort` schemas (replaced by reuse of `GetUserDataResponseFullFile` and an inline string variant). - Add an `x-variant-selector` vendor extension to both `oneOf` sites documenting which query-parameter combination selects which branch, since a true OpenAPI `discriminator` is not applicable (the variants are type-disjoint and the selector lives in the request, not the response body). This keeps the shapes the server actually emits (no wire-breaking change) while making the selection rule explicit for SDK generators and readers. --------- Co-authored-by: guill <jacob.e.segal@gmail.com>	2026-04-23 21:00:25 -07:00
Terry Jia	2e0503780d	range type (#13322 ) Co-authored-by: guill <jacob.e.segal@gmail.com>	2026-04-23 20:51:34 -07:00
Terry Jia	00d2f4047d	fix: use textureSize instead of u_resolution for texel size in blur/sharpen shaders (#13347 ) * fix: use textureSize instead of u_resolution for texel size in blur/sharpen shaders * fix: remove unused u_resolution uniform and fix Glow shader texelSize --------- Co-authored-by: guill <jacob.e.segal@gmail.com>	2026-04-23 20:42:22 -07:00
comfyanonymous	c5d9edacd0	Print more tensor values in the preview any node. (#13544 )	2026-04-23 22:19:00 -04:00
Daxiong (Lin)	47ccecaee0	chore: update workflow templates to v0.9.62 (#13539 )	2026-04-23 16:56:13 -07:00
rattus	2327fa1c90	execution: Add anti-cycle validation (#13169 ) Currently if the graph contains a cycle, the just inifitiate recursions, hits a catch all then throws a generic error against the output node that seeded the validation. Instead, fail the offending cycling mode chain and handlng it as an error in its own right. Co-authored-by: guill <jacob.e.segal@gmail.com>	2026-04-23 15:20:24 -07:00
Jukka Seppänen	084e08c6e2	Disable sageattention for SAM3 (#13529 ) Causes Nans	2026-04-23 11:14:42 -07:00
rattus	ef8f3cbcdc	comfy-aimdo 0.2.14: Hotfix async allocator estimations (#13534 ) This was doing an over-estimate of VRAM used by the async allocator when lots of little small tensors were in play. Also change the versioning scheme to == so we can roll forward aimdo without worrying about stable regressions downstream in comfyUI core.	2026-04-23 11:14:13 -07:00
Jukka Seppänen	6fbb6b6f49	Fix LTXV Reference Audio node (#13531 )	2026-04-23 11:13:17 -07:00
Alexander Piskun	abf3d56f27	add 4K resolution to Kling nodes (#13536 ) Signed-off-by: bigcat88 <bigcat88@icloud.com>	2026-04-23 08:49:54 -07:00
Daxiong (Lin)	2a14e1e96a	chore: update embedded docs to v0.4.4 (#13535 )	2026-04-23 08:15:47 -07:00
kijai	5b470171df	Update nodes_textgen.py	2026-04-21 16:55:25 +03:00
kijai	11162f9e74	Merge remote-tracking branch 'upstream/master' into gemma4	2026-04-21 16:53:00 +03:00
kijai	80af032762	Code cleanup	2026-04-21 16:44:27 +03:00
kijai	35987d7061	Merge remote-tracking branch 'upstream/master' into gemma4	2026-04-15 01:10:23 +03:00
kijai	845eb14425	Handle thinking tokens different only for Gemma4	2026-04-13 23:40:52 +03:00
kijai	e0cccbd4c9	Fix image encoder attention mask type So it works with basic attention	2026-04-13 23:29:27 +03:00
kijai	c857b6c657	Translate think tokens	2026-04-12 21:25:19 +03:00
kijai	387e8d8a4c	small fixes	2026-04-12 18:27:26 +03:00
kijai	ba3a484c06	Add fp8 scaled embedding support	2026-04-12 18:16:21 +03:00
kijai	0fc398a821	Various fixes	2026-04-12 18:11:07 +03:00
kijai	abff4cf3d6	Update sd.py	2026-04-12 16:55:37 +03:00
kijai	411b26ec71	Merge remote-tracking branch 'upstream/master' into gemma4	2026-04-12 16:55:23 +03:00
kijai	17ccff25be	Cleanup	2026-04-12 16:53:12 +03:00
kijai	6b803abe5a	update comment	2026-04-12 16:30:49 +03:00
kijai	6718be09ba	cleanup, enable fused rms norm by default	2026-04-10 15:28:26 +03:00
kijai	05eaceafa1	Cleanup, video fixes	2026-04-07 12:37:29 +03:00
kijai	93e8635110	parity with reference implementation outputs can 100% match transformers with same sdpa flags, checkpoint this and then optimize	2026-04-07 01:15:04 +03:00
kijai	832753f497	initial gemma4 support	2026-04-03 03:46:45 +03:00