diff --git a/javascript/orchid-dreams.css b/javascript/orchid-dreams.css
index 915823bb3..a28e894a4 100644
--- a/javascript/orchid-dreams.css
+++ b/javascript/orchid-dreams.css
@@ -14,7 +14,7 @@
--inactive-color: var(--primary--800);
--body-text-color: var(--neutral-100);
--body-text-color-subdued: var(--neutral-300);
- --background-color: var(--primary-100);
+ --background-color: var(--primary-100);
--background-fill-primary: var(--input-background-fill);
--input-padding: 8px;
--input-background-fill: var(--primary-200);
diff --git a/javascript/sdnext.css b/javascript/sdnext.css
index 5e219e195..6f3c4fd53 100644
--- a/javascript/sdnext.css
+++ b/javascript/sdnext.css
@@ -387,7 +387,7 @@ div:has(>#tab-gallery-folders) { flex-grow: 0 !important; background-color: var(
--spacing-md: 4px;
--spacing-lg: 5px;
--spacing-xl: 6px;
- --spacing-xxl: 7px;
+ --spacing-xxl: 7px;
}
@media (hover: none) and (pointer: coarse) { /* Apply different styles for devices with coarse pointers dependant on screen resolution */
@@ -396,7 +396,7 @@ div:has(>#tab-gallery-folders) { flex-grow: 0 !important; background-color: var(
:root, .light, .dark { --left-column: 100%; }
#txt2img_results, #img2img_results, #extras_results { min-width: calc(min(320px, 100%)) !important;} /* maintain single column for from image operations on larger mobile devices */
#txt2img_footer p { text-wrap: wrap; }
- }
+ }
@media (min-width: 400px) { /* Screens larger than 400px wide */
:root, .light, .dark {--left-column: 50%;}
#txt2img_results, #extras_results, #txt2im g_footer p { text-wrap: wrap; max-width: 100% !important; } /* maintain side by side split on larger mobile displays for from text */
@@ -408,7 +408,7 @@ div:has(>#tab-gallery-folders) { flex-grow: 0 !important; background-color: var(
#img2img_actions_column { display: flex; min-width: fit-content !important; flex-direction: row;justify-content: space-evenly; align-items: center;}
#txt2img_generate_box, #img2img_generate_box, #txt2img_enqueue_wrapper,#img2img_enqueue_wrapper {display: flex;flex-direction: column;height: 4em !important;align-items: stretch;justify-content: space-evenly;}
#img2img_interface, #img2img_results, #img2img_footer p { text-wrap: wrap; min-width: 100% !important; max-width: 100% !important;} /* maintain single column for from image operations on larger mobile devices */
- #txt2img_sampler, #txt2img_batch, #txt2img_seed_group, #txt2img_advanced, #txt2img_second_pass, #img2img_sampling_group, #img2img_resize_group, #img2img_batch_group, #img2img_seed_group, #img2img_denoise_group, #img2img_advanced_group { width: 100% !important; } /* fix from text/image UI
+ #txt2img_sampler, #txt2img_batch, #txt2img_seed_group, #txt2img_advanced, #txt2img_second_pass, #img2img_sampling_group, #img2img_resize_group, #img2img_batch_group, #img2img_seed_group, #img2img_denoise_group, #img2img_advanced_group { width: 100% !important; } /* fix from text/image UI
elements to prevent them from moving around within the UI */
#img2img_resize_group .gradio-radio>div { display: flex; flex-direction: column; width: unset !important; }
#inpaint_controls div { display:flex;flex-direction: row;}
@@ -425,5 +425,5 @@ div:has(>#tab-gallery-folders) { flex-grow: 0 !important; background-color: var(
.gradio-slider input[type="number"] { width: 4em; font-size: var(--text-xs); height: 16px; text-align: center; } /* adjust slider input fields as they were too large for mobile devices. */
#txt2img_settings .block .padded:not(.gradio-accordion) { padding: 0 !important;margin-right: 0; min-width: 100% !important; width:100% !important;}
#script_txt2img_prompts_from_file_prompt_txt, #script_img2img_prompts_from_file_prompt_txt, #script_control2img_prompts_from_file_prompt_txt { resize: vertical !important; }
- }
+ }
}
diff --git a/javascript/timeless-beige.css b/javascript/timeless-beige.css
index a8a9c1536..f611dbbeb 100644
--- a/javascript/timeless-beige.css
+++ b/javascript/timeless-beige.css
@@ -14,7 +14,7 @@
--inactive-color: var(--primary--800);
--body-text-color: var(--neutral-100);
--body-text-color-subdued: var(--neutral-300);
- --background-color: var(--primary-100);
+ --background-color: var(--primary-100);
--background-fill-primary: var(--input-background-fill);
--input-padding: 8px;
--input-background-fill: var(--primary-200);
diff --git a/modules/cfgzero/cogview4_pipeline.py b/modules/cfgzero/cogview4_pipeline.py
index 3467a649f..472c2eb72 100644
--- a/modules/cfgzero/cogview4_pipeline.py
+++ b/modules/cfgzero/cogview4_pipeline.py
@@ -41,7 +41,7 @@ def optimized_scale(positive_flat, negative_flat):
# st_star = v_cond^T * v_uncond / ||v_uncond||^2
st_star = dot_product / squared_norm
-
+
return st_star
@@ -662,8 +662,8 @@ class CogView4CFGZeroPipeline(DiffusionPipeline, CogView4LoraLoaderMixin):
return_dict=False,
)[0]
if use_cfg_zero_star:
- positive_flat = noise_pred_cond.view(batch_size, -1)
- negative_flat = noise_pred_uncond.view(batch_size, -1)
+ positive_flat = noise_pred_cond.view(batch_size, -1)
+ negative_flat = noise_pred_uncond.view(batch_size, -1)
alpha = optimized_scale(positive_flat,negative_flat)
alpha = alpha.view(batch_size, *([1] * (len(noise_pred_cond.shape) - 1)))
diff --git a/modules/cfgzero/hunyuan_t2v_pipeline.py b/modules/cfgzero/hunyuan_t2v_pipeline.py
index e6f2f1bde..8494b9b96 100644
--- a/modules/cfgzero/hunyuan_t2v_pipeline.py
+++ b/modules/cfgzero/hunyuan_t2v_pipeline.py
@@ -77,7 +77,7 @@ def optimized_scale(positive_flat, negative_flat):
# st_star = v_cond^T * v_uncond / ||v_uncond||^2
st_star = dot_product / squared_norm
-
+
return st_star
DEFAULT_PROMPT_TEMPLATE = {
diff --git a/modules/cfgzero/sd3_pipeline.py b/modules/cfgzero/sd3_pipeline.py
index d7c8b81ec..946571d81 100644
--- a/modules/cfgzero/sd3_pipeline.py
+++ b/modules/cfgzero/sd3_pipeline.py
@@ -80,7 +80,7 @@ def optimized_scale(positive_flat, negative_flat):
# st_star = v_cond^T * v_uncond / ||v_uncond||^2
st_star = dot_product / squared_norm
-
+
return st_star
# Copied from diffusers.pipelines.flux.pipeline_flux.calculate_shift
@@ -1088,8 +1088,8 @@ class StableDiffusion3CFGZeroPipeline(DiffusionPipeline, SD3LoraLoaderMixin, Fro
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
if use_cfg_zero_star:
- positive_flat = noise_pred_text.view(batch_size, -1)
- negative_flat = noise_pred_uncond.view(batch_size, -1)
+ positive_flat = noise_pred_text.view(batch_size, -1)
+ negative_flat = noise_pred_uncond.view(batch_size, -1)
alpha = optimized_scale(positive_flat,negative_flat)
alpha = alpha.view(batch_size, *([1] * (len(noise_pred_text.shape) - 1)))
diff --git a/modules/cfgzero/wan_t2v_pipeline.py b/modules/cfgzero/wan_t2v_pipeline.py
index e23c20721..7912ea3aa 100644
--- a/modules/cfgzero/wan_t2v_pipeline.py
+++ b/modules/cfgzero/wan_t2v_pipeline.py
@@ -82,9 +82,9 @@ def optimized_scale(positive_flat, negative_flat):
# st_star = v_cond^T * v_uncond / ||v_uncond||^2
st_star = dot_product / squared_norm
-
+
return st_star
-
+
def basic_clean(text):
text = ftfy.fix_text(text)
text = html.unescape(html.unescape(text))
@@ -555,8 +555,8 @@ class WanCFGZeroPipeline(DiffusionPipeline, WanLoraLoaderMixin):
noise_pred_text = noise_pred
if use_cfg_zero_star:
- positive_flat = noise_pred_text.view(batch_size, -1)
- negative_flat = noise_pred_uncond.view(batch_size, -1)
+ positive_flat = noise_pred_text.view(batch_size, -1)
+ negative_flat = noise_pred_uncond.view(batch_size, -1)
alpha = optimized_scale(positive_flat,negative_flat)
alpha = alpha.view(batch_size, *([1] * (len(noise_pred_text.shape) - 1)))
diff --git a/modules/control/proc/leres/leres/LICENSE b/modules/control/proc/leres/leres/LICENSE
index e0f1d07d9..b8e45dffc 100644
--- a/modules/control/proc/leres/leres/LICENSE
+++ b/modules/control/proc/leres/leres/LICENSE
@@ -20,4 +20,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
+SOFTWARE.
diff --git a/modules/control/proc/leres/leres/Resnext_torch.py b/modules/control/proc/leres/leres/Resnext_torch.py
index 9af54fcc3..1a3dac630 100644
--- a/modules/control/proc/leres/leres/Resnext_torch.py
+++ b/modules/control/proc/leres/leres/Resnext_torch.py
@@ -234,4 +234,3 @@ def resnext101_32x8d(pretrained=True, **kwargs):
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
return model
-
diff --git a/modules/control/proc/leres/leres/network_auxi.py b/modules/control/proc/leres/leres/network_auxi.py
index 44d96423e..34007c9c9 100644
--- a/modules/control/proc/leres/leres/network_auxi.py
+++ b/modules/control/proc/leres/leres/network_auxi.py
@@ -416,4 +416,3 @@ if __name__ == '__main__':
inputs = torch.ones(4,3,128,128)
out = net(inputs)
print(out.size())
-
diff --git a/modules/control/proc/leres/pix2pix/LICENSE b/modules/control/proc/leres/pix2pix/LICENSE
index 38b1a24fd..8126345cc 100644
--- a/modules/control/proc/leres/pix2pix/LICENSE
+++ b/modules/control/proc/leres/pix2pix/LICENSE
@@ -2,10 +2,10 @@ https://github.com/compphoto/BoostingMonocularDepth
Copyright 2021, Seyed Mahdi Hosseini Miangoleh, Sebastian Dille, Computational Photography Laboratory. All rights reserved.
-This software is for academic use only. A redistribution of this
-software, with or without modifications, has to be for academic
-use only, while giving the appropriate credit to the original
-authors of the software. The methods implemented as a part of
+This software is for academic use only. A redistribution of this
+software, with or without modifications, has to be for academic
+use only, while giving the appropriate credit to the original
+authors of the software. The methods implemented as a part of
this software may be covered under patents or patent applications.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ''AS IS'' AND ANY EXPRESS OR IMPLIED
@@ -16,4 +16,4 @@ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/modules/control/proc/midas/api.py b/modules/control/proc/midas/api.py
index b1540cd9e..c08c02cdd 100644
--- a/modules/control/proc/midas/api.py
+++ b/modules/control/proc/midas/api.py
@@ -165,4 +165,3 @@ class MiDaSInference(nn.Module):
def forward(self, x):
prediction = self.model(x)
return prediction
-
diff --git a/modules/control/proc/midas/midas/blocks.py b/modules/control/proc/midas/midas/blocks.py
index cb840ded3..861687fe3 100644
--- a/modules/control/proc/midas/midas/blocks.py
+++ b/modules/control/proc/midas/midas/blocks.py
@@ -339,4 +339,3 @@ class FeatureFusionBlock_custom(nn.Module):
output = self.out_conv(output)
return output
-
diff --git a/modules/control/proc/midas/midas/dpt_depth.py b/modules/control/proc/midas/midas/dpt_depth.py
index 4429b7f94..600a42cd8 100644
--- a/modules/control/proc/midas/midas/dpt_depth.py
+++ b/modules/control/proc/midas/midas/dpt_depth.py
@@ -106,4 +106,3 @@ class DPTDepthModel(DPT):
def forward(self, x):
return super().forward(x).squeeze(dim=1)
-
diff --git a/modules/control/proc/mlsd/LICENSE b/modules/control/proc/mlsd/LICENSE
index d855c6db4..0729d998d 100644
--- a/modules/control/proc/mlsd/LICENSE
+++ b/modules/control/proc/mlsd/LICENSE
@@ -198,4 +198,4 @@
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
- limitations under the License.
\ No newline at end of file
+ limitations under the License.
diff --git a/modules/control/proc/normalbae/LICENSE b/modules/control/proc/normalbae/LICENSE
index 16a9d56a3..59c5465b4 100644
--- a/modules/control/proc/normalbae/LICENSE
+++ b/modules/control/proc/normalbae/LICENSE
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
+SOFTWARE.
diff --git a/modules/control/proc/normalbae/nets/submodules/decoder.py b/modules/control/proc/normalbae/nets/submodules/decoder.py
index 993203d17..316537fce 100644
--- a/modules/control/proc/normalbae/nets/submodules/decoder.py
+++ b/modules/control/proc/normalbae/nets/submodules/decoder.py
@@ -199,4 +199,3 @@ class Decoder(nn.Module):
return [out_res8, out_res4, out_res2, out_res1], \
[out_res8, samples_pred_res4, samples_pred_res2, samples_pred_res1], \
[None, point_coords_res4, point_coords_res2, point_coords_res1]
-
diff --git a/modules/control/proc/normalbae/nets/submodules/efficientnet_repo/README.md b/modules/control/proc/normalbae/nets/submodules/efficientnet_repo/README.md
index 463368280..d8afeb4d1 100644
--- a/modules/control/proc/normalbae/nets/submodules/efficientnet_repo/README.md
+++ b/modules/control/proc/normalbae/nets/submodules/efficientnet_repo/README.md
@@ -319,5 +319,3 @@ export now requires additional args mentioned in the export script (not needed i
2. TF ported models with 'SAME' padding will have the padding fixed at export time to the resolution used for export. Even though dynamic padding is supported in opset >= 11, I can't get it working.
3. ONNX optimize facility doesn't work reliably in PyTorch 1.6 / ONNX 1.7. Fortunately, the onnxruntime based inference is working very well now and includes on the fly optimization.
3. ONNX / Caffe2 export/import frequently breaks with different PyTorch and ONNX version releases. Please check their respective issue trackers before filing issues here.
-
-
diff --git a/modules/control/proc/normalbae/nets/submodules/efficientnet_repo/geffnet/activations/__init__.py b/modules/control/proc/normalbae/nets/submodules/efficientnet_repo/geffnet/activations/__init__.py
index 813421a74..b031fbe2e 100644
--- a/modules/control/proc/normalbae/nets/submodules/efficientnet_repo/geffnet/activations/__init__.py
+++ b/modules/control/proc/normalbae/nets/submodules/efficientnet_repo/geffnet/activations/__init__.py
@@ -133,5 +133,3 @@ def get_act_layer(name='relu'):
if use_jit and name in _ACT_FN_JIT: # jit scripted models should be okay for export/scripting
return _ACT_LAYER_JIT[name]
return _ACT_LAYER_DEFAULT[name]
-
-
diff --git a/modules/control/proc/normalbae/nets/submodules/efficientnet_repo/geffnet/activations/activations.py b/modules/control/proc/normalbae/nets/submodules/efficientnet_repo/geffnet/activations/activations.py
index bdea692d1..eca58933b 100644
--- a/modules/control/proc/normalbae/nets/submodules/efficientnet_repo/geffnet/activations/activations.py
+++ b/modules/control/proc/normalbae/nets/submodules/efficientnet_repo/geffnet/activations/activations.py
@@ -98,5 +98,3 @@ class HardSigmoid(nn.Module):
def forward(self, x):
return hard_sigmoid(x, self.inplace)
-
-
diff --git a/modules/control/proc/normalbae/nets/submodules/efficientnet_repo/utils.py b/modules/control/proc/normalbae/nets/submodules/efficientnet_repo/utils.py
index d327e8bd8..5eb82a298 100644
--- a/modules/control/proc/normalbae/nets/submodules/efficientnet_repo/utils.py
+++ b/modules/control/proc/normalbae/nets/submodules/efficientnet_repo/utils.py
@@ -49,4 +49,3 @@ def get_outdir(path, *paths, inc=False):
outdir = outdir_inc
os.makedirs(outdir)
return outdir
-
diff --git a/modules/control/proc/openpose/LICENSE b/modules/control/proc/openpose/LICENSE
index 6f60b76d3..51a094a4e 100644
--- a/modules/control/proc/openpose/LICENSE
+++ b/modules/control/proc/openpose/LICENSE
@@ -4,15 +4,15 @@ ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT. IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
-This is a license agreement ("Agreement") between your academic institution or non-profit organization or self (called "Licensee" or "You" in this Agreement) and Carnegie Mellon University (called "Licensor" in this Agreement). All rights not specifically granted to you in this Agreement are reserved for Licensor.
+This is a license agreement ("Agreement") between your academic institution or non-profit organization or self (called "Licensee" or "You" in this Agreement) and Carnegie Mellon University (called "Licensor" in this Agreement). All rights not specifically granted to you in this Agreement are reserved for Licensor.
-RESERVATION OF OWNERSHIP AND GRANT OF LICENSE:
-Licensor retains exclusive ownership of any copy of the Software (as defined below) licensed under this Agreement and hereby grants to Licensee a personal, non-exclusive,
+RESERVATION OF OWNERSHIP AND GRANT OF LICENSE:
+Licensor retains exclusive ownership of any copy of the Software (as defined below) licensed under this Agreement and hereby grants to Licensee a personal, non-exclusive,
non-transferable license to use the Software for noncommercial research purposes, without the right to sublicense, pursuant to the terms and conditions of this Agreement. As used in this Agreement, the term "Software" means (i) the actual copy of all or any portion of code for program routines made accessible to Licensee by Licensor pursuant to this Agreement, inclusive of backups, updates, and/or merged copies permitted hereunder or subsequently supplied by Licensor, including all or any file structures, programming instructions, user interfaces and screen formats and sequences as well as any and all documentation and instructions related to it, and (ii) all or any derivatives and/or modifications created or made by You to any of the items specified in (i).
CONFIDENTIALITY: Licensee acknowledges that the Software is proprietary to Licensor, and as such, Licensee agrees to receive all such materials in confidence and use the Software only in accordance with the terms of this Agreement. Licensee agrees to use reasonable effort to protect the Software from unauthorized use, reproduction, distribution, or publication.
-COPYRIGHT: The Software is owned by Licensor and is protected by United
+COPYRIGHT: The Software is owned by Licensor and is protected by United
States copyright laws and applicable international treaties and/or conventions.
PERMITTED USES: The Software may be used for your own noncommercial internal research purposes. You understand and agree that Licensor is not obligated to implement any suggestions and/or feedback you might provide regarding the Software, but to the extent Licensor does so, you are not entitled to any compensation related thereto.
@@ -35,11 +35,11 @@ FEE: Provided Licensee abides completely by the terms and conditions of this Agr
DISCLAIMER OF WARRANTIES: THE SOFTWARE IS PROVIDED "AS-IS" WITHOUT WARRANTY OF ANY KIND INCLUDING ANY WARRANTIES OF PERFORMANCE OR MERCHANTABILITY OR FITNESS FOR A PARTICULAR USE OR PURPOSE OR OF NON-INFRINGEMENT. LICENSEE BEARS ALL RISK RELATING TO QUALITY AND PERFORMANCE OF THE SOFTWARE AND RELATED MATERIALS.
-SUPPORT AND MAINTENANCE: No Software support or training by the Licensor is provided as part of this Agreement.
+SUPPORT AND MAINTENANCE: No Software support or training by the Licensor is provided as part of this Agreement.
EXCLUSIVE REMEDY AND LIMITATION OF LIABILITY: To the maximum extent permitted under applicable law, Licensor shall not be liable for direct, indirect, special, incidental, or consequential damages or lost profits related to Licensee's use of and/or inability to use the Software, even if Licensor is advised of the possibility of such damage.
-EXPORT REGULATION: Licensee agrees to comply with any and all applicable
+EXPORT REGULATION: Licensee agrees to comply with any and all applicable
U.S. export control laws, regulations, and/or other laws related to embargoes and sanction programs administered by the Office of Foreign Assets Control.
SEVERABILITY: If any provision(s) of this Agreement shall be held to be invalid, illegal, or unenforceable by a court or other tribunal of competent jurisdiction, the validity, legality and enforceability of the remaining provisions shall not in any way be affected or impaired thereby.
@@ -57,7 +57,7 @@ ENTIRE AGREEMENT AND AMENDMENTS: This Agreement constitutes the sole and entire
THIRD-PARTY SOFTWARE NOTICES AND INFORMATION
This project incorporates material from the project(s) listed below (collectively, "Third Party Code"). This Third Party Code is licensed to you under their original license terms set forth below. We reserves all other rights not expressly granted, whether by implication, estoppel or otherwise.
-
+
1. Caffe, version 1.0.0, (https://github.com/BVLC/caffe/)
COPYRIGHT
@@ -80,13 +80,13 @@ committed.
LICENSE
Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
+modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
- list of conditions and the following disclaimer.
+ list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
+ and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
@@ -105,4 +105,4 @@ By contributing to the BVLC/caffe repository through pull-request, comment,
or otherwise, the contributor releases their content to the
license and copyright terms herein.
-************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION**********
\ No newline at end of file
+************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION**********
diff --git a/modules/control/proc/segment_anything/build_sam.py b/modules/control/proc/segment_anything/build_sam.py
index 9a52c506b..6c64ee1e8 100644
--- a/modules/control/proc/segment_anything/build_sam.py
+++ b/modules/control/proc/segment_anything/build_sam.py
@@ -155,5 +155,3 @@ def _build_sam(
state_dict = torch.load(f)
sam.load_state_dict(state_dict)
return sam
-
-
diff --git a/modules/control/proc/zoe/LICENSE b/modules/control/proc/zoe/LICENSE
index 7a1e90d00..70a206826 100644
--- a/modules/control/proc/zoe/LICENSE
+++ b/modules/control/proc/zoe/LICENSE
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
+SOFTWARE.
diff --git a/modules/control/proc/zoe/zoedepth/models/__init__.py b/modules/control/proc/zoe/zoedepth/models/__init__.py
index 5f2668792..c344f725c 100644
--- a/modules/control/proc/zoe/zoedepth/models/__init__.py
+++ b/modules/control/proc/zoe/zoedepth/models/__init__.py
@@ -21,4 +21,3 @@
# SOFTWARE.
# File author: Shariq Farooq Bhat
-
diff --git a/modules/control/proc/zoe/zoedepth/models/base_models/__init__.py b/modules/control/proc/zoe/zoedepth/models/base_models/__init__.py
index 5f2668792..c344f725c 100644
--- a/modules/control/proc/zoe/zoedepth/models/base_models/__init__.py
+++ b/modules/control/proc/zoe/zoedepth/models/base_models/__init__.py
@@ -21,4 +21,3 @@
# SOFTWARE.
# File author: Shariq Farooq Bhat
-
diff --git a/modules/control/proc/zoe/zoedepth/models/base_models/midas_repo/midas/blocks.py b/modules/control/proc/zoe/zoedepth/models/base_models/midas_repo/midas/blocks.py
index 998a94bda..c480bbb66 100644
--- a/modules/control/proc/zoe/zoedepth/models/base_models/midas_repo/midas/blocks.py
+++ b/modules/control/proc/zoe/zoedepth/models/base_models/midas_repo/midas/blocks.py
@@ -438,4 +438,3 @@ class FeatureFusionBlock_custom(nn.Module):
output = self.out_conv(output)
return output
-
diff --git a/modules/control/proc/zoe/zoedepth/models/zoedepth/config_zoedepth.json b/modules/control/proc/zoe/zoedepth/models/zoedepth/config_zoedepth.json
index 3112ed78c..44779abf4 100644
--- a/modules/control/proc/zoe/zoedepth/models/zoedepth/config_zoedepth.json
+++ b/modules/control/proc/zoe/zoedepth/models/zoedepth/config_zoedepth.json
@@ -18,7 +18,7 @@
"inverse_midas": false,
"img_size": [384, 512]
},
-
+
"train": {
"train_midas": true,
"use_pretrained_midas": true,
@@ -55,4 +55,4 @@
"use_pretrained_midas": false,
"pretrained_resource" : null
}
-}
\ No newline at end of file
+}
diff --git a/modules/control/proc/zoe/zoedepth/models/zoedepth/config_zoedepth_kitti.json b/modules/control/proc/zoe/zoedepth/models/zoedepth/config_zoedepth_kitti.json
index b51802aa4..15140db42 100644
--- a/modules/control/proc/zoe/zoedepth/models/zoedepth/config_zoedepth_kitti.json
+++ b/modules/control/proc/zoe/zoedepth/models/zoedepth/config_zoedepth_kitti.json
@@ -3,7 +3,7 @@
"bin_centers_type": "normed",
"img_size": [384, 768]
},
-
+
"train": {
},
@@ -19,4 +19,4 @@
"use_pretrained_midas": false,
"pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt"
}
-}
\ No newline at end of file
+}
diff --git a/modules/control/proc/zoe/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json b/modules/control/proc/zoe/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json
index 42bab2a3a..b85747297 100644
--- a/modules/control/proc/zoe/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json
+++ b/modules/control/proc/zoe/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json
@@ -15,7 +15,7 @@
"min_depth": 1e-3,
"max_depth": 80.0
}
- ],
+ ],
"bin_embedding_dim": 128,
"bin_centers_type": "softplus",
"n_attractors":[16, 8, 4, 1],
@@ -24,8 +24,8 @@
"attractor_kind" : "mean",
"attractor_type" : "inv",
"min_temp": 0.0212,
- "max_temp": 50.0,
- "memory_efficient": true,
+ "max_temp": 50.0,
+ "memory_efficient": true,
"midas_model_type" : "DPT_BEiT_L_384",
"img_size": [384, 512]
},
@@ -58,10 +58,10 @@
"use_pretrained_midas": false,
"force_keep_ar": true
},
-
+
"eval": {
"train_midas": false,
"pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
"use_pretrained_midas": false
}
-}
\ No newline at end of file
+}
diff --git a/modules/control/proc/zoe/zoedepth/utils/__init__.py b/modules/control/proc/zoe/zoedepth/utils/__init__.py
index 5f2668792..c344f725c 100644
--- a/modules/control/proc/zoe/zoedepth/utils/__init__.py
+++ b/modules/control/proc/zoe/zoedepth/utils/__init__.py
@@ -21,4 +21,3 @@
# SOFTWARE.
# File author: Shariq Farooq Bhat
-
diff --git a/modules/flash_attn_triton_amd/fwd_prefill.py b/modules/flash_attn_triton_amd/fwd_prefill.py
index f03d229fc..38589f016 100644
--- a/modules/flash_attn_triton_amd/fwd_prefill.py
+++ b/modules/flash_attn_triton_amd/fwd_prefill.py
@@ -295,7 +295,7 @@ def attn_fwd(Q, K, V, bias, Cache_seqlens, Cache_batch_idx, # pylint: disable=un
# The tensor allocated for L is based on MAX_SEQLENS_Q as that is
# statically known.
l_offset = LSE + off_z * stride_lse_z + off_h_q * stride_lse_h + cu_seqlens_q_start * stride_lse_m
- l_ptrs = l_offset + offs_m * stride_lse_m
+ l_ptrs = l_offset + offs_m * stride_lse_m
l = tl.full([BLOCK_M], value=0.0, dtype=ACCUMULATOR_TYPE)
@@ -450,7 +450,7 @@ def attn_fwd(Q, K, V, bias, Cache_seqlens, Cache_batch_idx, # pylint: disable=un
# write back LSE(Log Sum Exponents), the log of the normalization constant
l_offset = LSE + off_z * stride_lse_z + off_h_q * stride_lse_h + cu_seqlens_q_start * stride_lse_m
- l_ptrs = l_offset + offs_m * stride_lse_m
+ l_ptrs = l_offset + offs_m * stride_lse_m
if USE_EXP2:
RCP_LN2: tl.constexpr = 1.4426950408889634
LN2: tl.constexpr = 0.6931471824645996
@@ -499,9 +499,9 @@ def attention_prefill_forward_triton_impl(
bias: Optional[torch.Tensor],
layout: Literal["bshd", "bhsd", "thd"],
# varlen
- cu_seqlens_q: Optional[torch.Tensor],
+ cu_seqlens_q: Optional[torch.Tensor],
cu_seqlens_k: Optional[torch.Tensor],
- max_seqlens_q: int,
+ max_seqlens_q: int,
max_seqlens_k: int,
# inference
cache_seqlens: Optional[Union[(int, torch.Tensor)]],
@@ -570,7 +570,7 @@ def attention_prefill_forward_triton_impl(
attn_fwd[grid](q, k, v, bias, cache_seqlens, cache_batch_idx,
sm_scale, softmax_lse, o, *q_strides, *k_strides, *v_strides, *o_strides,
*bias_strides, stride_az, stride_ah, *scores_strides, stride_lse_z, stride_lse_h, stride_lse_m, cu_seqlens_q, cu_seqlens_k,
- dropout_p=dropout_p, philox_seed=philox_seed, philox_offset_base=philox_offset, sd_mask=sd_mask, dropout_mask=dropout_mask, alibi_slopes=alibi_slopes,
+ dropout_p=dropout_p, philox_seed=philox_seed, philox_offset_base=philox_offset, sd_mask=sd_mask, dropout_mask=dropout_mask, alibi_slopes=alibi_slopes,
HQ=nheads_q, HK=nheads_k, ACTUAL_BLOCK_DMODEL=head_size, MAX_SEQLENS_Q=max_seqlens_q,
MAX_SEQLENS_K=max_seqlens_k, IS_CAUSAL=causal, IS_VARLEN=is_varlen, IS_INFERENCE=is_inference,
BLOCK_DMODEL=padded_d_model, USE_BIAS=False if bias is None else True,
diff --git a/modules/flash_attn_triton_amd/utils.py b/modules/flash_attn_triton_amd/utils.py
index 9250819b2..09f6a6d78 100644
--- a/modules/flash_attn_triton_amd/utils.py
+++ b/modules/flash_attn_triton_amd/utils.py
@@ -366,7 +366,7 @@ def get_shape_from_layout(
elif layout == 'thd':
total_seqlen, num_heads, head_dim = x.shape
if cu_seqlens is None:
- raise ValueError("cu_seqlens must be provided for varlen (thd) layout")
+ raise ValueError("cu_seqlens must be provided for varlen (thd) layout")
if max_seqlen is None:
raise ValueError("max_seqlen must be provided for varlen (thd) layout")
@@ -389,7 +389,7 @@ def get_shapes_from_layout(q, k, layout, cu_seqlens_q = None, cu_seqlens_k = Non
def get_stride_from_layout(x: torch.Tensor, layout:Literal["bshd", "bhsd", "thd"]):
if layout == 'thd':
- strides = (0, x.stride(1), x.stride(0), x.stride(2))
+ strides = (0, x.stride(1), x.stride(0), x.stride(2))
elif layout == 'bhsd':
strides = (x.stride(0), x.stride(1), x.stride(2), x.stride(3))
elif layout == 'bshd':
diff --git a/modules/schedulers/scheduler_bdia.py b/modules/schedulers/scheduler_bdia.py
index bb3e7f9b2..cffb5cb35 100644
--- a/modules/schedulers/scheduler_bdia.py
+++ b/modules/schedulers/scheduler_bdia.py
@@ -219,7 +219,7 @@ class BDIA_DDIMScheduler(SchedulerMixin, ConfigMixin):
self.betas = rescale_zero_terminal_snr(self.betas)
self.alphas = 1.0 - self.betas #may have to add something for last step
-
+
self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
# At every step in ddim, we are looking into the previous alphas_cumprod
# For the final step, there is no previous alphas_cumprod because we are already at 0
@@ -357,7 +357,7 @@ class BDIA_DDIMScheduler(SchedulerMixin, ConfigMixin):
) -> Union[DDIMSchedulerOutput, Tuple]:
"""
Predict the sample from the previous timestep by reversing the SDE.
-
+
Args:
model_output (torch.Tensor): Direct output from learned diffusion model
timestep (int): Current discrete timestep in the diffusion chain
@@ -458,15 +458,15 @@ class BDIA_DDIMScheduler(SchedulerMixin, ConfigMixin):
alpha_prod_t_next = self.alphas_cumprod[next_timestep]
alpha_i_plus_1 = alpha_prod_t_next ** 0.5
sigma_i_plus_1 = (1 - alpha_prod_t_next) ** 0.5
-
+
if debug:
print(f"alpha_i_plus_1: {alpha_i_plus_1}")
print(f"sigma_i_plus_1: {sigma_i_plus_1}")
-
+
a = alpha_i_plus_1 * pred_original_sample + sigma_i_plus_1 * pred_epsilon
bdia_step = (
- self.config.gamma * self.next_sample[-2] +
- ddim_step -
+ self.config.gamma * self.next_sample[-2] +
+ ddim_step -
(self.config.gamma * a)
)
self.update_next_sample_BDIA(bdia_step)
@@ -477,7 +477,7 @@ class BDIA_DDIMScheduler(SchedulerMixin, ConfigMixin):
if eta > 0:
if debug:
print(f"\nApplying variance noise with eta: {eta}")
-
+
if variance_noise is not None and generator is not None:
raise ValueError(
"Cannot pass both generator and variance_noise. Use either `generator` or `variance_noise`."
@@ -496,7 +496,7 @@ class BDIA_DDIMScheduler(SchedulerMixin, ConfigMixin):
return (prev_sample,)
return DDIMSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample)
-
+
def add_noise(
self,
original_samples: torch.Tensor,
@@ -542,10 +542,10 @@ class BDIA_DDIMScheduler(SchedulerMixin, ConfigMixin):
velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample
return velocity
-
+
def update_next_sample_BDIA(self, new_value):
self.next_sample.append(new_value.clone())
def __len__(self):
- return self.config.num_train_timesteps
\ No newline at end of file
+ return self.config.num_train_timesteps
diff --git a/modules/schedulers/scheduler_dc.py b/modules/schedulers/scheduler_dc.py
index 190588855..8b483f901 100644
--- a/modules/schedulers/scheduler_dc.py
+++ b/modules/schedulers/scheduler_dc.py
@@ -740,7 +740,7 @@ class DCSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
t_ = ratio * (t_prev_list[-1] - t_prev_list[-2]) + t_prev_list[-2]
inter_order = min(self.dc_order + 1, 4)
-
+
if inter_order is not None:
model_t_dc = torch.zeros_like(model_prev_list[-1])
for i in range(inter_order):
@@ -768,7 +768,7 @@ class DCSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
else:
scalar_t = 0
ratio_param = torch.nn.Parameter(torch.tensor([param_initial], device=sample.device), requires_grad=True)
-
+
sample_clone = sample.clone()
index = np.where(self.ddim_gt['ts'] >= scalar_t)[0].max()
diff --git a/modules/schedulers/scheduler_dpm_flowmatch.py b/modules/schedulers/scheduler_dpm_flowmatch.py
index c1f045e8a..2bf5b092a 100644
--- a/modules/schedulers/scheduler_dpm_flowmatch.py
+++ b/modules/schedulers/scheduler_dpm_flowmatch.py
@@ -114,25 +114,25 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
The DPMSolver order which can be `2` or `3`. It is recommended to use `solver_order=2` for guided
sampling, and `solver_order=3` for unconditional sampling.
algorithm_type (`str`, defaults to `dpmsolver++2M`):
- Algorithm type for the solver; can be `dpmsolver2`, `dpmsolver2A`, `dpmsolver++2M`, `dpmsolver++2S`, `dpmsolver++sde`, `dpmsolver++2Msde`,
+ Algorithm type for the solver; can be `dpmsolver2`, `dpmsolver2A`, `dpmsolver++2M`, `dpmsolver++2S`, `dpmsolver++sde`, `dpmsolver++2Msde`,
or `dpmsolver++3Msde`.
solver_type (`str`, defaults to `midpoint`):
Solver type for the second-order solver; can be `midpoint` or `heun`. The solver type slightly affects the
sample quality, especially for a small number of steps. It is recommended to use `midpoint` solvers.
- sigma_schedule (`str`, *optional*, defaults to None (beta)): Sigma schedule to compute the `sigmas`. Optionally, we use
- the schedule "karras" introduced in the EDM paper (https://arxiv.org/abs/2206.00364). Other acceptable values are
- "exponential". The exponential schedule was incorporated in this model: https://huggingface.co/stabilityai/cosxl.
- Other acceptable values are "lambdas". The uniform-logSNR for step sizes proposed by Lu's DPM-Solver in the
+ sigma_schedule (`str`, *optional*, defaults to None (beta)): Sigma schedule to compute the `sigmas`. Optionally, we use
+ the schedule "karras" introduced in the EDM paper (https://arxiv.org/abs/2206.00364). Other acceptable values are
+ "exponential". The exponential schedule was incorporated in this model: https://huggingface.co/stabilityai/cosxl.
+ Other acceptable values are "lambdas". The uniform-logSNR for step sizes proposed by Lu's DPM-Solver in the
noise schedule during the sampling process. The sigmas and time steps are determined according to a sequence of `lambda(t)`.
"betas" for step sizes in the noise schedule during the sampling process. Refer to [Beta
Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information.
use_noise_sampler for BrownianTreeNoiseSampler (only valid for `dpmsolver++2S`, `dpmsolver++sde`, `dpmsolver++2Msde`, or `dpmsolver++3Msde`.
- A noise sampler backed by a torchsde increasing the stability of convergence. Default strategy
+ A noise sampler backed by a torchsde increasing the stability of convergence. Default strategy
(random noise) has it jumping all over the place, but Brownian sampling is more stable. Utilizes the model generation seed provided.
midpoint_ratio (`float`, *optional*, range: 0.4 to 0.6, default=0.5): Only valid for (`dpmsolver++sde`, `dpmsolver++2S`).
Higher values may result in smoothing, more vivid colors and less noise at the expense of more detail and effect.
- s_noise (`float`, *optional*, defaults to 1.0): Sigma noise strength: range 0 - 1.1 (only valid for `dpmsolver++2S`, `dpmsolver++sde`,
- `dpmsolver++2Msde`, or `dpmsolver++3Msde`). The amount of additional noise to counteract loss of detail during sampling. A
+ s_noise (`float`, *optional*, defaults to 1.0): Sigma noise strength: range 0 - 1.1 (only valid for `dpmsolver++2S`, `dpmsolver++sde`,
+ `dpmsolver++2Msde`, or `dpmsolver++3Msde`). The amount of additional noise to counteract loss of detail during sampling. A
reasonable range is [1.000, 1.011]. Defaults to 1.0 from the original implementation.
use_beta_sigmas: (`bool` defaults to False for FLUX and True for SD3). Based on original interpretation of using beta values for determining sigmas.
use_dynamic_shifting (`bool` defaults to False for SD3 and True for FLUX). When `True`, shift is ignored.
diff --git a/modules/shared.py b/modules/shared.py
index 8e37ab09b..0a3588851 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -717,7 +717,7 @@ options_templates.update(options_section(('saving-paths', "Image Paths"), {
"outdir_extras_samples": OptionInfo("outputs/extras", 'Folder for processed images', component_args=hide_dirs, folder=True),
"outdir_save": OptionInfo("outputs/save", "Folder for manually saved images", component_args=hide_dirs, folder=True),
"outdir_video": OptionInfo("outputs/video", "Folder for videos", component_args=hide_dirs, folder=True),
- "outdir_init_images": OptionInfo("outputs/init-images", "Folder for init images", component_args=hide_dirs, folder=True),
+ "outdir_init_images": OptionInfo("outputs/inputs", "Folder for init images", component_args=hide_dirs, folder=True),
"outdir_sep_grids": OptionInfo("
Grids
", "", gr.HTML),
"outdir_grids": OptionInfo("", "Grids folder", component_args=hide_dirs, folder=True),
diff --git a/modules/teacache/teacache_chroma.py b/modules/teacache/teacache_chroma.py
index 102ae7f9b..8b2b22333 100644
--- a/modules/teacache/teacache_chroma.py
+++ b/modules/teacache/teacache_chroma.py
@@ -329,4 +329,4 @@ def teacache_chroma_forward(
if not return_dict:
return (output,)
- return Transformer2DModelOutput(sample=output)
\ No newline at end of file
+ return Transformer2DModelOutput(sample=output)
diff --git a/modules/teacache/teacache_cogvideox.py b/modules/teacache/teacache_cogvideox.py
index 436338b37..33ed865d6 100644
--- a/modules/teacache/teacache_cogvideox.py
+++ b/modules/teacache/teacache_cogvideox.py
@@ -64,10 +64,10 @@ def teacache_cog_forward(
if self.cnt == 0 or self.cnt == self.num_steps-1:
should_calc = True
self.accumulated_rel_l1_distance = 0
- else:
+ else:
if not self.config.use_rotary_positional_embeddings:
# CogVideoX-2B
- coefficients = [-3.10658903e+01, 2.54732368e+01, -5.92380459e+00, 1.75769064e+00, -3.61568434e-03]
+ coefficients = [-3.10658903e+01, 2.54732368e+01, -5.92380459e+00, 1.75769064e+00, -3.61568434e-03]
else:
# CogVideoX-5B and CogvideoX1.5-5B
coefficients = [-1.53880483e+03, 8.43202495e+02, -1.34363087e+02, 7.97131516e+00, -5.23162339e-02]
@@ -81,8 +81,8 @@ def teacache_cog_forward(
self.previous_modulated_input = emb
self.cnt += 1
if self.cnt == self.num_steps:
- self.cnt = 0
-
+ self.cnt = 0
+
if self.enable_teacache:
if not should_calc:
hidden_states += self.previous_residual
diff --git a/modules/teacache/teacache_hidream.py b/modules/teacache/teacache_hidream.py
index eab1b7220..cb3767cab 100644
--- a/modules/teacache/teacache_hidream.py
+++ b/modules/teacache/teacache_hidream.py
@@ -119,7 +119,7 @@ def teacache_hidream_forward(
else:
should_calc = True
self.accumulated_rel_l1_distance = 0
- self.previous_modulated_input = modulated_inp
+ self.previous_modulated_input = modulated_inp
self.cnt += 1
if self.cnt == self.num_steps:
self.cnt = 0
@@ -262,4 +262,4 @@ def teacache_hidream_forward(
if not return_dict:
return (output,)
- return Transformer2DModelOutput(sample=output)
\ No newline at end of file
+ return Transformer2DModelOutput(sample=output)
diff --git a/modules/teacache/teacache_ltx.py b/modules/teacache/teacache_ltx.py
index 8a4e1b392..23d4897c0 100644
--- a/modules/teacache/teacache_ltx.py
+++ b/modules/teacache/teacache_ltx.py
@@ -69,7 +69,7 @@ def teacache_ltx_forward(
if self.cnt == 0 or self.cnt == self.num_steps-1:
should_calc = True
self.accumulated_rel_l1_distance = 0
- else:
+ else:
coefficients = [2.14700694e+01, -1.28016453e+01, 2.31279151e+00, 7.92487521e-01, 9.69274326e-03]
rescale_func = np.poly1d(coefficients)
self.accumulated_rel_l1_distance += rescale_func(((modulated_inp-self.previous_modulated_input).abs().mean() / self.previous_modulated_input.abs().mean()).cpu().item())
@@ -78,11 +78,11 @@ def teacache_ltx_forward(
else:
should_calc = True
self.accumulated_rel_l1_distance = 0
- self.previous_modulated_input = modulated_inp
+ self.previous_modulated_input = modulated_inp
self.cnt += 1
if self.cnt == self.num_steps:
- self.cnt = 0
-
+ self.cnt = 0
+
if self.enable_teacache:
if not should_calc:
hidden_states += self.previous_residual
diff --git a/modules/teacache/teacache_mochi.py b/modules/teacache/teacache_mochi.py
index e899fb164..2be193a77 100644
--- a/modules/teacache/teacache_mochi.py
+++ b/modules/teacache/teacache_mochi.py
@@ -65,7 +65,7 @@ def teacache_mochi_forward(
if self.cnt == 0 or self.cnt == self.num_steps-1:
should_calc = True
self.accumulated_rel_l1_distance = 0
- else:
+ else:
coefficients = [-3.51241319e+03, 8.11675948e+02, -6.09400215e+01, 2.42429681e+00, 3.05291719e-03]
rescale_func = np.poly1d(coefficients)
self.accumulated_rel_l1_distance += rescale_func(((modulated_inp-self.previous_modulated_input).abs().mean() / self.previous_modulated_input.abs().mean()).cpu().item())
@@ -74,11 +74,11 @@ def teacache_mochi_forward(
else:
should_calc = True
self.accumulated_rel_l1_distance = 0
- self.previous_modulated_input = modulated_inp
+ self.previous_modulated_input = modulated_inp
self.cnt += 1
if self.cnt == self.num_steps:
- self.cnt = 0
-
+ self.cnt = 0
+
if self.enable_teacache:
if not should_calc:
hidden_states += self.previous_residual
@@ -112,7 +112,7 @@ def teacache_mochi_forward(
image_rotary_emb=image_rotary_emb,
)
hidden_states = self.norm_out(hidden_states, temb)
- self.previous_residual = hidden_states - ori_hidden_states
+ self.previous_residual = hidden_states - ori_hidden_states
else:
for i, block in enumerate(self.transformer_blocks):
if torch.is_grad_enabled() and self.gradient_checkpointing:
diff --git a/motd b/motd
index 0519ecba6..e69de29bb 100644
--- a/motd
+++ b/motd
@@ -1 +0,0 @@
-
\ No newline at end of file
diff --git a/package.json b/package.json
index 159b76420..05604fb65 100644
--- a/package.json
+++ b/package.json
@@ -22,7 +22,8 @@
"eslint": "eslint javascript/ extensions-builtin/sdnext-modernui/javascript/",
"ruff": ". venv/bin/activate && ruff check",
"pylint": ". venv/bin/activate && pylint *.py modules/ pipelines/ scripts/ extensions-builtin/ | grep -v '^*'",
- "lint": "npm run eslint && npm run ruff && npm run pylint | grep -v TODO",
+ "format": ". venv/bin/activate && pre-commit run -a",
+ "lint": "npm run eslint && npm run format && npm run ruff && npm run pylint | grep -v TODO",
"todo": "npm run pylint | grep W0511 | awk -F'TODO ' '{print \"- \"$NF}' | sed 's/ (fixme)//g' | sort",
"test": ". venv/bin/activate; python launch.py --debug --test"
},
diff --git a/pipelines/meissonic/pipeline.py b/pipelines/meissonic/pipeline.py
index 512eab742..4f1bb05a2 100644
--- a/pipelines/meissonic/pipeline.py
+++ b/pipelines/meissonic/pipeline.py
@@ -54,7 +54,7 @@ class Pipeline(DiffusionPipeline):
vqvae: VQModel
tokenizer: CLIPTokenizer
text_encoder: CLIPTextModelWithProjection
- transformer: Transformer2DModel
+ transformer: Transformer2DModel
scheduler: Scheduler
# tokenizer_t5: T5Tokenizer
# text_encoder_t5: T5ForConditionalGeneration
@@ -66,7 +66,7 @@ class Pipeline(DiffusionPipeline):
vqvae: VQModel,
tokenizer: CLIPTokenizer,
text_encoder: CLIPTextModelWithProjection,
- transformer: Transformer2DModel,
+ transformer: Transformer2DModel,
scheduler: Scheduler,
# tokenizer_t5: T5Tokenizer,
# text_encoder_t5: T5ForConditionalGeneration,
@@ -226,8 +226,8 @@ class Pipeline(DiffusionPipeline):
# truncation=True,
# max_length=512,
# ).input_ids.to(self._execution_device)
-
-
+
+
outputs = self.text_encoder(input_ids, return_dict=True, output_hidden_states=True)
# outputs_t5 = self.text_encoder_t5(input_ids_t5, decoder_input_ids = input_ids_t5 ,return_dict=True, output_hidden_states=True)
prompt_embeds = outputs.text_embeds
@@ -265,8 +265,8 @@ class Pipeline(DiffusionPipeline):
negative_prompt_embeds = outputs.text_embeds
negative_encoder_hidden_states = outputs.hidden_states[-2]
# negative_encoder_hidden_states = outputs_t5.encoder_hidden_states[-2]
-
-
+
+
negative_prompt_embeds = negative_prompt_embeds.repeat(num_images_per_prompt, 1)
negative_encoder_hidden_states = negative_encoder_hidden_states.repeat(num_images_per_prompt, 1, 1)
@@ -370,4 +370,4 @@ class Pipeline(DiffusionPipeline):
if not return_dict:
return (output,)
- return ImagePipelineOutput(output)
\ No newline at end of file
+ return ImagePipelineOutput(output)
diff --git a/pipelines/meissonic/test.py b/pipelines/meissonic/test.py
index 46189c85d..5687cbff0 100644
--- a/pipelines/meissonic/test.py
+++ b/pipelines/meissonic/test.py
@@ -26,7 +26,7 @@ pipe = pipe.to(device)
steps = 64
guidance_scale = 9
-resolution = 1024
+resolution = 1024
negative = "worst quality, low quality, low res, blurry, distortion, watermark, logo, signature, text, jpeg artifacts, signature, sketch, duplicate, ugly, identifying mark"
prompt = "Beautiful young woman posing on a lake with snow covered mountains in the background"
image = pipe(prompt=prompt, negative_prompt=negative, height=resolution, width=resolution, guidance_scale=guidance_scale, num_inference_steps=steps).images[0]
diff --git a/pipelines/meissonic/transformer.py b/pipelines/meissonic/transformer.py
index 543c30108..c2336d323 100644
--- a/pipelines/meissonic/transformer.py
+++ b/pipelines/meissonic/transformer.py
@@ -34,7 +34,7 @@ from diffusers.models.normalization import AdaLayerNormContinuous, AdaLayerNormZ
from diffusers.utils import USE_PEFT_BACKEND, is_torch_version, logging, scale_lora_layers, unscale_lora_layers
from diffusers.utils.torch_utils import maybe_allow_in_graph
from diffusers.models.embeddings import CombinedTimestepGuidanceTextProjEmbeddings, CombinedTimestepTextProjEmbeddings,TimestepEmbedding, get_timestep_embedding #,FluxPosEmbed
-from diffusers.models.modeling_outputs import Transformer2DModelOutput
+from diffusers.models.modeling_outputs import Transformer2DModelOutput
from diffusers.models.resnet import Downsample2D, Upsample2D
from typing import List
@@ -794,8 +794,8 @@ class Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginal
guidance_embeds (`bool`, defaults to False): Whether to use guidance embeddings.
"""
- _supports_gradient_checkpointing = False #True
- # Due to NotImplementedError: DDPOptimizer backend: Found a higher order op in the graph. This is not supported. Please turn off DDP optimizer using torch._dynamo.config.optimize_ddp=False. Note that this can cause performance degradation because there will be one bucket for the entire Dynamo graph.
+ _supports_gradient_checkpointing = False #True
+ # Due to NotImplementedError: DDPOptimizer backend: Found a higher order op in the graph. This is not supported. Please turn off DDP optimizer using torch._dynamo.config.optimize_ddp=False. Note that this can cause performance degradation because there will be one bucket for the entire Dynamo graph.
# Please refer to this issue - https://github.com/pytorch/pytorch/issues/104674.
_no_split_modules = ["TransformerBlock", "SingleTransformerBlock"]
@@ -819,7 +819,7 @@ class Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginal
):
super().__init__()
self.out_channels = in_channels
- self.inner_dim = self.config.num_attention_heads * self.config.attention_head_dim
+ self.inner_dim = self.config.num_attention_heads * self.config.attention_head_dim
self.pos_embed = FluxPosEmbed(theta=10000, axes_dim=axes_dims_rope)
text_time_guidance_cls = (
@@ -830,7 +830,7 @@ class Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginal
)
self.context_embedder = nn.Linear(self.config.joint_attention_dim, self.inner_dim)
-
+
self.transformer_blocks = nn.ModuleList(
[
TransformerBlock(
@@ -856,7 +856,7 @@ class Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginal
self.gradient_checkpointing = False
- in_channels_embed = self.inner_dim
+ in_channels_embed = self.inner_dim
ln_elementwise_affine = True
layer_norm_eps = 1e-06
use_bias = False
@@ -867,7 +867,7 @@ class Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginal
self.mlm_layer = ConvMlmLayer(
self.inner_dim, in_channels_embed, use_bias, ln_elementwise_affine, layer_norm_eps, self.config.codebook_size
)
- self.cond_embed = TimestepEmbedding(
+ self.cond_embed = TimestepEmbedding(
micro_cond_embed_dim + self.config.pooled_projection_dim, self.inner_dim, sample_proj_bias=use_bias
)
self.encoder_proj_layer_norm = RMSNorm(self.inner_dim, layer_norm_eps, ln_elementwise_affine)
@@ -875,9 +875,9 @@ class Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginal
self.project_to_hidden = nn.Linear(in_channels_embed, self.inner_dim, bias=use_bias)
self.project_from_hidden_norm = RMSNorm(self.inner_dim, layer_norm_eps, ln_elementwise_affine)
self.project_from_hidden = nn.Linear(self.inner_dim, in_channels_embed, bias=use_bias)
-
+
self.down_block = Simple_UVitBlock(
- self.inner_dim,
+ self.inner_dim,
ln_elementwise_affine,
layer_norm_eps,
use_bias,
@@ -892,7 +892,7 @@ class Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginal
False,
upsample=upsample,
)
-
+
# self.fuse_qkv_projections()
@property
@@ -1043,26 +1043,26 @@ class Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginal
micro_cond_encode_dim = 256 # same as self.config.micro_cond_encode_dim = 256 from amused
micro_cond_embeds = get_timestep_embedding(
micro_conds.flatten(), micro_cond_encode_dim, flip_sin_to_cos=True, downscale_freq_shift=0
- )
- micro_cond_embeds = micro_cond_embeds.reshape((hidden_states.shape[0], -1))
+ )
+ micro_cond_embeds = micro_cond_embeds.reshape((hidden_states.shape[0], -1))
pooled_projections = torch.cat([pooled_projections, micro_cond_embeds], dim=1)
pooled_projections = pooled_projections.to(dtype=self.dtype)
- pooled_projections = self.cond_embed(pooled_projections).to(encoder_hidden_states.dtype)
-
+ pooled_projections = self.cond_embed(pooled_projections).to(encoder_hidden_states.dtype)
- hidden_states = self.embed(hidden_states)
- encoder_hidden_states = self.context_embedder(encoder_hidden_states)
+ hidden_states = self.embed(hidden_states)
+
+ encoder_hidden_states = self.context_embedder(encoder_hidden_states)
encoder_hidden_states = self.encoder_proj_layer_norm(encoder_hidden_states)
hidden_states = self.down_block(hidden_states)
batch_size, channels, height, width = hidden_states.shape
hidden_states = hidden_states.permute(0, 2, 3, 1).reshape(batch_size, height * width, channels)
- hidden_states = self.project_to_hidden_norm(hidden_states)
+ hidden_states = self.project_to_hidden_norm(hidden_states)
hidden_states = self.project_to_hidden(hidden_states)
-
+
if joint_attention_kwargs is not None:
joint_attention_kwargs = joint_attention_kwargs.copy()
lora_scale = joint_attention_kwargs.pop("scale", 1.0)
@@ -1083,11 +1083,11 @@ class Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginal
guidance = guidance.to(hidden_states.dtype) * 1000
else:
guidance = None
- temb = (
+ temb = (
self.time_text_embed(timestep, pooled_projections)
if guidance is None
else self.time_text_embed(timestep, guidance, pooled_projections)
- )
+ )
if txt_ids.ndim == 3:
logger.warning(
@@ -1102,8 +1102,8 @@ class Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginal
)
img_ids = img_ids[0]
ids = torch.cat((txt_ids, img_ids), dim=0)
-
- image_rotary_emb = self.pos_embed(ids)
+
+ image_rotary_emb = self.pos_embed(ids)
for index_block, block in enumerate(self.transformer_blocks):
if self.training and self.gradient_checkpointing:
@@ -1131,10 +1131,10 @@ class Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginal
encoder_hidden_states, hidden_states = block(
hidden_states=hidden_states,
encoder_hidden_states=encoder_hidden_states,
- temb=temb,
+ temb=temb,
image_rotary_emb=image_rotary_emb,
)
-
+
# controlnet residual
if controlnet_block_samples is not None:
@@ -1181,12 +1181,12 @@ class Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginal
+ controlnet_single_block_samples[index_block // interval_control]
)
- hidden_states = hidden_states[:, encoder_hidden_states.shape[1] :, ...]
+ hidden_states = hidden_states[:, encoder_hidden_states.shape[1] :, ...]
+
-
hidden_states = self.project_from_hidden_norm(hidden_states)
hidden_states = self.project_from_hidden(hidden_states)
-
+
hidden_states = hidden_states.reshape(batch_size, height, width, channels).permute(0, 3, 1, 2)
@@ -1195,11 +1195,11 @@ class Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginal
if USE_PEFT_BACKEND:
# remove `lora_scale` from each PEFT layer
unscale_lora_layers(self, lora_scale)
-
+
output = self.mlm_layer(hidden_states)
# self.unfuse_qkv_projections()
if not return_dict:
return (output,)
-
- return output
\ No newline at end of file
+
+ return output
diff --git a/pipelines/omnigen2/pipeline_omnigen2.py b/pipelines/omnigen2/pipeline_omnigen2.py
index a7d6c2ca3..d2e13ba18 100644
--- a/pipelines/omnigen2/pipeline_omnigen2.py
+++ b/pipelines/omnigen2/pipeline_omnigen2.py
@@ -54,8 +54,8 @@ class FMPipelineOutput(BaseOutput):
Output class for OmniGen2 pipeline.
Args:
- images (Union[List[PIL.Image.Image], np.ndarray]):
- List of denoised PIL images of length `batch_size` or numpy array of shape
+ images (Union[List[PIL.Image.Image], np.ndarray]):
+ List of denoised PIL images of length `batch_size` or numpy array of shape
`(batch_size, height, width, num_channels)`. Contains the generated images.
"""
images: Union[List[PIL.Image.Image], np.ndarray]
diff --git a/repositories/codeformer/LICENSE b/repositories/codeformer/LICENSE
index 44bf750a2..be6c4ed80 100644
--- a/repositories/codeformer/LICENSE
+++ b/repositories/codeformer/LICENSE
@@ -2,34 +2,34 @@ S-Lab License 1.0
Copyright 2022 S-Lab
-Redistribution and use for non-commercial purpose in source and
-binary forms, with or without modification, are permitted provided
+Redistribution and use for non-commercial purpose in source and
+binary forms, with or without modification, are permitted provided
that the following conditions are met:
-1. Redistributions of source code must retain the above copyright
+1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
distribution.
-3. Neither the name of the copyright holder nor the names of its
- contributors may be used to endorse or promote products derived
+3. Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-In the event that redistribution and/or use for commercial purpose in
-source or binary forms, with or without modification is required,
-please contact the contributor(s) of the work.
\ No newline at end of file
+In the event that redistribution and/or use for commercial purpose in
+source or binary forms, with or without modification is required,
+please contact the contributor(s) of the work.
diff --git a/repositories/codeformer/basicsr/archs/arcface_arch.py b/repositories/codeformer/basicsr/archs/arcface_arch.py
index fe5afb7bd..e6d3bd97f 100644
--- a/repositories/codeformer/basicsr/archs/arcface_arch.py
+++ b/repositories/codeformer/basicsr/archs/arcface_arch.py
@@ -242,4 +242,4 @@ class ResNetArcFace(nn.Module):
x = self.fc5(x)
x = self.bn5(x)
- return x
\ No newline at end of file
+ return x
diff --git a/repositories/codeformer/basicsr/archs/arch_util.py b/repositories/codeformer/basicsr/archs/arch_util.py
index bad45ab34..11b82a74e 100644
--- a/repositories/codeformer/basicsr/archs/arch_util.py
+++ b/repositories/codeformer/basicsr/archs/arch_util.py
@@ -315,4 +315,4 @@ to_1tuple = _ntuple(1)
to_2tuple = _ntuple(2)
to_3tuple = _ntuple(3)
to_4tuple = _ntuple(4)
-to_ntuple = _ntuple
\ No newline at end of file
+to_ntuple = _ntuple
diff --git a/repositories/codeformer/basicsr/archs/codeformer_arch.py b/repositories/codeformer/basicsr/archs/codeformer_arch.py
index 4d0d8027c..fd633cf2e 100644
--- a/repositories/codeformer/basicsr/archs/codeformer_arch.py
+++ b/repositories/codeformer/basicsr/archs/codeformer_arch.py
@@ -119,7 +119,7 @@ class TransformerSALayer(nn.Module):
tgt_mask: Optional[Tensor] = None,
tgt_key_padding_mask: Optional[Tensor] = None,
query_pos: Optional[Tensor] = None):
-
+
# self attention
tgt2 = self.norm1(tgt)
q = k = self.with_pos_embed(tgt2, query_pos)
@@ -159,7 +159,7 @@ class Fuse_sft_block(nn.Module):
@ARCH_REGISTRY.register()
class CodeFormer(VQAutoEncoder):
- def __init__(self, dim_embd=512, n_head=8, n_layers=9,
+ def __init__(self, dim_embd=512, n_head=8, n_layers=9,
codebook_size=1024, latent_size=256,
connect_list=['32', '64', '128', '256'],
fix_modules=['quantize','generator']):
@@ -179,14 +179,14 @@ class CodeFormer(VQAutoEncoder):
self.feat_emb = nn.Linear(256, self.dim_embd)
# transformer
- self.ft_layers = nn.Sequential(*[TransformerSALayer(embed_dim=dim_embd, nhead=n_head, dim_mlp=self.dim_mlp, dropout=0.0)
+ self.ft_layers = nn.Sequential(*[TransformerSALayer(embed_dim=dim_embd, nhead=n_head, dim_mlp=self.dim_mlp, dropout=0.0)
for _ in range(self.n_layers)])
# logits_predict head
self.idx_pred_layer = nn.Sequential(
nn.LayerNorm(dim_embd),
nn.Linear(dim_embd, codebook_size, bias=False))
-
+
self.channels = {
'16': 512,
'32': 256,
@@ -221,7 +221,7 @@ class CodeFormer(VQAutoEncoder):
enc_feat_dict = {}
out_list = [self.fuse_encoder_block[f_size] for f_size in self.connect_list]
for i, block in enumerate(self.encoder.blocks):
- x = block(x)
+ x = block(x)
if i in out_list:
enc_feat_dict[str(x.shape[-1])] = x.clone()
@@ -266,11 +266,11 @@ class CodeFormer(VQAutoEncoder):
fuse_list = [self.fuse_generator_block[f_size] for f_size in self.connect_list]
for i, block in enumerate(self.generator.blocks):
- x = block(x)
+ x = block(x)
if i in fuse_list: # fuse after i-th block
f_size = str(x.shape[-1])
if w>0:
x = self.fuse_convs_dict[f_size](enc_feat_dict[f_size].detach(), x, w)
out = x
# logits doesn't need softmax before cross_entropy loss
- return out, logits, lq_feat
\ No newline at end of file
+ return out, logits, lq_feat
diff --git a/repositories/codeformer/basicsr/archs/rrdbnet_arch.py b/repositories/codeformer/basicsr/archs/rrdbnet_arch.py
index 49a2d6c20..e1f31bcad 100644
--- a/repositories/codeformer/basicsr/archs/rrdbnet_arch.py
+++ b/repositories/codeformer/basicsr/archs/rrdbnet_arch.py
@@ -116,4 +116,4 @@ class RRDBNet(nn.Module):
feat = self.lrelu(self.conv_up1(F.interpolate(feat, scale_factor=2, mode='nearest')))
feat = self.lrelu(self.conv_up2(F.interpolate(feat, scale_factor=2, mode='nearest')))
out = self.conv_last(self.lrelu(self.conv_hr(feat)))
- return out
\ No newline at end of file
+ return out
diff --git a/repositories/codeformer/basicsr/archs/vqgan_arch.py b/repositories/codeformer/basicsr/archs/vqgan_arch.py
index 5ac692633..54b314877 100644
--- a/repositories/codeformer/basicsr/archs/vqgan_arch.py
+++ b/repositories/codeformer/basicsr/archs/vqgan_arch.py
@@ -13,7 +13,7 @@ from basicsr.utils.registry import ARCH_REGISTRY
def normalize(in_channels):
return torch.nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True)
-
+
@torch.jit.script
def swish(x):
@@ -209,15 +209,15 @@ class AttnBlock(nn.Module):
# compute attention
b, c, h, w = q.shape
q = q.reshape(b, c, h*w)
- q = q.permute(0, 2, 1)
+ q = q.permute(0, 2, 1)
k = k.reshape(b, c, h*w)
- w_ = torch.bmm(q, k)
+ w_ = torch.bmm(q, k)
w_ = w_ * (int(c)**(-0.5))
w_ = F.softmax(w_, dim=2)
# attend to values
v = v.reshape(b, c, h*w)
- w_ = w_.permute(0, 2, 1)
+ w_ = w_.permute(0, 2, 1)
h_ = torch.bmm(v, w_)
h_ = h_.reshape(b, c, h, w)
@@ -269,18 +269,18 @@ class Encoder(nn.Module):
def forward(self, x):
for block in self.blocks:
x = block(x)
-
+
return x
class Generator(nn.Module):
def __init__(self, nf, emb_dim, ch_mult, res_blocks, img_size, attn_resolutions):
super().__init__()
- self.nf = nf
- self.ch_mult = ch_mult
+ self.nf = nf
+ self.ch_mult = ch_mult
self.num_resolutions = len(self.ch_mult)
self.num_res_blocks = res_blocks
- self.resolution = img_size
+ self.resolution = img_size
self.attn_resolutions = attn_resolutions
self.in_channels = emb_dim
self.out_channels = 3
@@ -314,24 +314,24 @@ class Generator(nn.Module):
blocks.append(nn.Conv2d(block_in_ch, self.out_channels, kernel_size=3, stride=1, padding=1))
self.blocks = nn.ModuleList(blocks)
-
+
def forward(self, x):
for block in self.blocks:
x = block(x)
-
+
return x
-
+
@ARCH_REGISTRY.register()
class VQAutoEncoder(nn.Module):
def __init__(self, img_size, nf, ch_mult, quantizer="nearest", res_blocks=2, attn_resolutions=[16], codebook_size=1024, emb_dim=256,
beta=0.25, gumbel_straight_through=False, gumbel_kl_weight=1e-8, model_path=None):
super().__init__()
logger = get_root_logger()
- self.in_channels = 3
- self.nf = nf
- self.n_blocks = res_blocks
+ self.in_channels = 3
+ self.nf = nf
+ self.n_blocks = res_blocks
self.codebook_size = codebook_size
self.embed_dim = emb_dim
self.ch_mult = ch_mult
@@ -362,11 +362,11 @@ class VQAutoEncoder(nn.Module):
self.kl_weight
)
self.generator = Generator(
- self.nf,
+ self.nf,
self.embed_dim,
- self.ch_mult,
- self.n_blocks,
- self.resolution,
+ self.ch_mult,
+ self.n_blocks,
+ self.resolution,
self.attn_resolutions
)
@@ -431,4 +431,4 @@ class VQGANDiscriminator(nn.Module):
raise ValueError(f'Wrong params!')
def forward(self, x):
- return self.main(x)
\ No newline at end of file
+ return self.main(x)
diff --git a/repositories/codeformer/basicsr/losses/losses.py b/repositories/codeformer/basicsr/losses/losses.py
index 71331aa01..f4028c3ff 100644
--- a/repositories/codeformer/basicsr/losses/losses.py
+++ b/repositories/codeformer/basicsr/losses/losses.py
@@ -254,8 +254,8 @@ class PerceptualLoss(nn.Module):
@LOSS_REGISTRY.register()
class LPIPSLoss(nn.Module):
- def __init__(self,
- loss_weight=1.0,
+ def __init__(self,
+ loss_weight=1.0,
use_input_norm=True,
range_norm=False,):
super(LPIPSLoss, self).__init__()
diff --git a/repositories/codeformer/basicsr/train.py b/repositories/codeformer/basicsr/train.py
index a01c0dfcc..2490bdffd 100644
--- a/repositories/codeformer/basicsr/train.py
+++ b/repositories/codeformer/basicsr/train.py
@@ -130,7 +130,7 @@ def train_pipeline(root_path):
# initialize loggers
logger, tb_logger = init_loggers(opt)
-
+
# create train and validation dataloaders
result = create_train_val_dataloader(opt, logger)
train_loader, train_sampler, val_loader, total_epochs, total_iters = result
diff --git a/repositories/codeformer/basicsr/utils/download_util.py b/repositories/codeformer/basicsr/utils/download_util.py
index 2a2679157..9da02ffd4 100644
--- a/repositories/codeformer/basicsr/utils/download_util.py
+++ b/repositories/codeformer/basicsr/utils/download_util.py
@@ -92,4 +92,4 @@ def load_file_from_url(url, model_dir=None, progress=True, file_name=None):
if not os.path.exists(cached_file):
print(f'Downloading: "{url}" to {cached_file}\n')
download_url_to_file(url, cached_file, hash_prefix=None, progress=progress)
- return cached_file
\ No newline at end of file
+ return cached_file
diff --git a/repositories/codeformer/basicsr/utils/img_util.py b/repositories/codeformer/basicsr/utils/img_util.py
index 5aba82ce0..d409a132f 100644
--- a/repositories/codeformer/basicsr/utils/img_util.py
+++ b/repositories/codeformer/basicsr/utils/img_util.py
@@ -168,4 +168,3 @@ def crop_border(imgs, crop_border):
return [v[crop_border:-crop_border, crop_border:-crop_border, ...] for v in imgs]
else:
return imgs[crop_border:-crop_border, crop_border:-crop_border, ...]
-
\ No newline at end of file
diff --git a/repositories/codeformer/basicsr/utils/logger.py b/repositories/codeformer/basicsr/utils/logger.py
index 9714bf59c..c375ddfdd 100644
--- a/repositories/codeformer/basicsr/utils/logger.py
+++ b/repositories/codeformer/basicsr/utils/logger.py
@@ -166,4 +166,4 @@ def get_env_info():
f'\n\tBasicSR: {__version__}'
f'\n\tPyTorch: {torch.__version__}'
f'\n\tTorchVision: {torchvision.__version__}')
- return msg
\ No newline at end of file
+ return msg
diff --git a/repositories/codeformer/basicsr/utils/realesrgan_utils.py b/repositories/codeformer/basicsr/utils/realesrgan_utils.py
index 6b7a8b460..18af7e6ac 100644
--- a/repositories/codeformer/basicsr/utils/realesrgan_utils.py
+++ b/repositories/codeformer/basicsr/utils/realesrgan_utils.py
@@ -209,9 +209,9 @@ class RealESRGANer():
if img_mode == 'L':
output_img = cv2.cvtColor(output_img, cv2.COLOR_BGR2GRAY)
del output_img_t
- torch.cuda.empty_cache()
+ torch.cuda.empty_cache()
except RuntimeError as error:
- print(f"Failed inference for RealESRGAN: {error}")
+ print(f"Failed inference for RealESRGAN: {error}")
# ------------------- process the alpha channel if necessary ------------------- #
if img_mode == 'RGBA':
@@ -296,4 +296,4 @@ class IOConsumer(threading.Thread):
output = msg['output']
save_path = msg['save_path']
cv2.imwrite(save_path, output)
- print(f'IO worker {self.qid} is done.')
\ No newline at end of file
+ print(f'IO worker {self.qid} is done.')
diff --git a/repositories/codeformer/basicsr/utils/video_util.py b/repositories/codeformer/basicsr/utils/video_util.py
index 20a2ff14c..b29df5caf 100644
--- a/repositories/codeformer/basicsr/utils/video_util.py
+++ b/repositories/codeformer/basicsr/utils/video_util.py
@@ -122,4 +122,4 @@ class VideoWriter:
def close(self):
self.stream_writer.stdin.close()
- self.stream_writer.wait()
\ No newline at end of file
+ self.stream_writer.wait()
diff --git a/repositories/codeformer/facelib/detection/yolov5face/face_detector.py b/repositories/codeformer/facelib/detection/yolov5face/face_detector.py
index 79fdba0c9..addb9f703 100644
--- a/repositories/codeformer/facelib/detection/yolov5face/face_detector.py
+++ b/repositories/codeformer/facelib/detection/yolov5face/face_detector.py
@@ -119,9 +119,9 @@ class YoloDetector:
origimgs = copy.deepcopy(images)
images = self._preprocess(images)
-
+
if IS_HIGH_VERSION:
- with torch.inference_mode(): # for pytorch>=1.9
+ with torch.inference_mode(): # for pytorch>=1.9
pred = self.detector(images)[0]
else:
with torch.no_grad(): # for pytorch<1.9
diff --git a/repositories/codeformer/facelib/detection/yolov5face/models/yolov5l.yaml b/repositories/codeformer/facelib/detection/yolov5face/models/yolov5l.yaml
index 0532b0e22..5c8302517 100644
--- a/repositories/codeformer/facelib/detection/yolov5face/models/yolov5l.yaml
+++ b/repositories/codeformer/facelib/detection/yolov5face/models/yolov5l.yaml
@@ -44,4 +44,4 @@ head:
[-1, 3, C3, [1024, False]], # 22 (P5/32-large)
[[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
- ]
\ No newline at end of file
+ ]
diff --git a/repositories/codeformer/facelib/detection/yolov5face/utils/extract_ckpt.py b/repositories/codeformer/facelib/detection/yolov5face/utils/extract_ckpt.py
index 4b8b63134..413719ecc 100644
--- a/repositories/codeformer/facelib/detection/yolov5face/utils/extract_ckpt.py
+++ b/repositories/codeformer/facelib/detection/yolov5face/utils/extract_ckpt.py
@@ -2,4 +2,4 @@ import torch
import sys
sys.path.insert(0,'./facelib/detection/yolov5face')
model = torch.load('facelib/detection/yolov5face/yolov5n-face.pt', map_location='cpu')['model']
-torch.save(model.state_dict(),'weights/facelib/yolov5n-face.pth')
\ No newline at end of file
+torch.save(model.state_dict(),'weights/facelib/yolov5n-face.pth')
diff --git a/repositories/codeformer/facelib/utils/__init__.py b/repositories/codeformer/facelib/utils/__init__.py
index f03b1c2ba..23ef0352c 100644
--- a/repositories/codeformer/facelib/utils/__init__.py
+++ b/repositories/codeformer/facelib/utils/__init__.py
@@ -2,6 +2,6 @@ from .face_utils import align_crop_face_landmarks, compute_increased_bbox, get_v
from .misc import img2tensor, load_file_from_url, download_pretrained_models, scandir
__all__ = [
- 'align_crop_face_landmarks', 'compute_increased_bbox', 'get_valid_bboxes', 'load_file_from_url',
+ 'align_crop_face_landmarks', 'compute_increased_bbox', 'get_valid_bboxes', 'load_file_from_url',
'download_pretrained_models', 'paste_face_back', 'img2tensor', 'scandir'
]
diff --git a/repositories/codeformer/facelib/utils/face_restoration_helper.py b/repositories/codeformer/facelib/utils/face_restoration_helper.py
index 5d3fb8f3b..e0cd2710b 100644
--- a/repositories/codeformer/facelib/utils/face_restoration_helper.py
+++ b/repositories/codeformer/facelib/utils/face_restoration_helper.py
@@ -68,7 +68,7 @@ class FaceRestoreHelper(object):
if self.template_3points:
self.face_template = np.array([[192, 240], [319, 240], [257, 371]])
else:
- # standard 5 landmarks for FFHQ faces with 512 x 512
+ # standard 5 landmarks for FFHQ faces with 512 x 512
# facexlib
self.face_template = np.array([[192.98138, 239.94708], [318.90277, 240.1936], [256.63416, 314.01935],
[201.26117, 371.41043], [313.08905, 371.15118]])
@@ -170,7 +170,7 @@ class FaceRestoreHelper(object):
landmark = np.array([[bbox[i], bbox[i + 1]] for i in range(5, 15, 2)])
self.all_landmarks_5.append(landmark)
self.det_faces.append(bbox[0:5])
-
+
if len(self.det_faces) == 0:
return 0
if only_keep_largest:
@@ -317,7 +317,7 @@ class FaceRestoreHelper(object):
assert len(self.restored_faces) == len(
self.inverse_affine_matrices), ('length of restored_faces and affine_matrices are different.')
-
+
inv_mask_borders = []
for restored_face, inverse_affine in zip(self.restored_faces, self.inverse_affine_matrices):
if face_upsampler is not None:
@@ -457,4 +457,4 @@ class FaceRestoreHelper(object):
self.cropped_faces = []
self.inverse_affine_matrices = []
self.det_faces = []
- self.pad_input_imgs = []
\ No newline at end of file
+ self.pad_input_imgs = []
diff --git a/repositories/codeformer/facelib/utils/misc.py b/repositories/codeformer/facelib/utils/misc.py
index 7f5c95506..1f14e6c37 100644
--- a/repositories/codeformer/facelib/utils/misc.py
+++ b/repositories/codeformer/facelib/utils/misc.py
@@ -13,7 +13,7 @@ ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__fil
def download_pretrained_models(file_ids, save_path_root):
import gdown
-
+
os.makedirs(save_path_root, exist_ok=True)
for file_name, file_id in file_ids.items():
diff --git a/repositories/codeformer/inference_codeformer.py b/repositories/codeformer/inference_codeformer.py
index cde1094af..c3fd3b545 100644
--- a/repositories/codeformer/inference_codeformer.py
+++ b/repositories/codeformer/inference_codeformer.py
@@ -52,20 +52,20 @@ if __name__ == '__main__':
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
parser = argparse.ArgumentParser()
- parser.add_argument('-i', '--input_path', type=str, default='./inputs/whole_imgs',
+ parser.add_argument('-i', '--input_path', type=str, default='./inputs/whole_imgs',
help='Input image, video or folder. Default: inputs/whole_imgs')
- parser.add_argument('-o', '--output_path', type=str, default=None,
+ parser.add_argument('-o', '--output_path', type=str, default=None,
help='Output folder. Default: results/_')
- parser.add_argument('-w', '--fidelity_weight', type=float, default=0.5,
+ parser.add_argument('-w', '--fidelity_weight', type=float, default=0.5,
help='Balance the quality and fidelity. Default: 0.5')
- parser.add_argument('-s', '--upscale', type=int, default=2,
+ parser.add_argument('-s', '--upscale', type=int, default=2,
help='The final upsampling scale of the image. Default: 2')
parser.add_argument('--has_aligned', action='store_true', help='Input are cropped and aligned faces. Default: False')
parser.add_argument('--only_center_face', action='store_true', help='Only restore the center face. Default: False')
parser.add_argument('--draw_box', action='store_true', help='Draw the bounding box for the detected faces. Default: False')
# large det_model: 'YOLOv5l', 'retinaface_resnet50'
# small det_model: 'YOLOv5n', 'retinaface_mobile0.25'
- parser.add_argument('--detection_model', type=str, default='retinaface_resnet50',
+ parser.add_argument('--detection_model', type=str, default='retinaface_resnet50',
help='Face detector. Optional: retinaface_resnet50, retinaface_mobile0.25, YOLOv5l, YOLOv5n. \
Default: retinaface_resnet50')
parser.add_argument('--bg_upsampler', type=str, default='None', help='Background upsampler. Optional: realesrgan')
@@ -91,7 +91,7 @@ if __name__ == '__main__':
input_img_list.append(image)
image = vidreader.get_frame()
audio = vidreader.get_audio()
- fps = vidreader.get_fps() if args.save_video_fps is None else args.save_video_fps
+ fps = vidreader.get_fps() if args.save_video_fps is None else args.save_video_fps
video_name = os.path.basename(args.input_path)[:-4]
result_root = f'results/{video_name}_{w}'
input_video = True
@@ -127,11 +127,11 @@ if __name__ == '__main__':
face_upsampler = None
# ------------------ set up CodeFormer restorer -------------------
- net = ARCH_REGISTRY.get('CodeFormer')(dim_embd=512, codebook_size=1024, n_head=8, n_layers=9,
+ net = ARCH_REGISTRY.get('CodeFormer')(dim_embd=512, codebook_size=1024, n_head=8, n_layers=9,
connect_list=['32', '64', '128', '256']).to(device)
-
+
# ckpt_path = 'weights/CodeFormer/codeformer.pth'
- ckpt_path = load_file_from_url(url=pretrain_model_url['restoration'],
+ ckpt_path = load_file_from_url(url=pretrain_model_url['restoration'],
model_dir='weights/CodeFormer', progress=True, file_name=None)
checkpoint = torch.load(ckpt_path)['params_ema']
net.load_state_dict(checkpoint)
@@ -140,9 +140,9 @@ if __name__ == '__main__':
# ------------------ set up FaceRestoreHelper -------------------
# large det_model: 'YOLOv5l', 'retinaface_resnet50'
# small det_model: 'YOLOv5n', 'retinaface_mobile0.25'
- if not args.has_aligned:
+ if not args.has_aligned:
print(f'Face detection model: {args.detection_model}')
- if bg_upsampler is not None:
+ if bg_upsampler is not None:
print(f'Background upsampling: True, Face upsampling: {args.face_upsample}')
else:
print(f'Background upsampling: False, Face upsampling: {args.face_upsample}')
@@ -160,7 +160,7 @@ if __name__ == '__main__':
for i, img_path in enumerate(input_img_list):
# clean all the intermediate results to process the next image
face_helper.clean_all()
-
+
if isinstance(img_path, str):
img_name = os.path.basename(img_path)
basename, ext = os.path.splitext(img_name)
@@ -172,7 +172,7 @@ if __name__ == '__main__':
print(f'[{i+1}/{test_img_num}] Processing: {img_name}')
img = img_path
- if args.has_aligned:
+ if args.has_aligned:
# the input faces are already cropped and aligned
img = cv2.resize(img, (512, 512), interpolation=cv2.INTER_LINEAR)
face_helper.is_gray = is_gray(img, threshold=5)
@@ -218,7 +218,7 @@ if __name__ == '__main__':
bg_img = None
face_helper.get_inverse_affine(None)
# paste each restored face to the input image
- if args.face_upsample and face_upsampler is not None:
+ if args.face_upsample and face_upsampler is not None:
restored_img = face_helper.paste_faces_to_input_image(upsample_img=bg_img, draw_box=args.draw_box, face_upsampler=face_upsampler)
else:
restored_img = face_helper.paste_faces_to_input_image(upsample_img=bg_img, draw_box=args.draw_box)
@@ -226,7 +226,7 @@ if __name__ == '__main__':
# save faces
for idx, (cropped_face, restored_face) in enumerate(zip(face_helper.cropped_faces, face_helper.restored_faces)):
# save cropped face
- if not args.has_aligned:
+ if not args.has_aligned:
save_crop_path = os.path.join(result_root, 'cropped_faces', f'{basename}_{idx:02d}.png')
imwrite(cropped_face, save_crop_path)
# save restored face
@@ -261,7 +261,7 @@ if __name__ == '__main__':
video_name = f'{video_name}_{args.suffix}.png'
save_restore_path = os.path.join(result_root, f'{video_name}.mp4')
vidwriter = VideoWriter(save_restore_path, height, width, fps, audio)
-
+
for f in video_frames:
vidwriter.write_frame(f)
vidwriter.close()
diff --git a/repositories/codeformer/requirements.txt b/repositories/codeformer/requirements.txt
index 7e1950a06..4d67efafb 100644
--- a/repositories/codeformer/requirements.txt
+++ b/repositories/codeformer/requirements.txt
@@ -14,4 +14,4 @@ torchvision
tqdm
yapf
lpips
-gdown # supports downloading the large file from Google Drive
\ No newline at end of file
+gdown # supports downloading the large file from Google Drive
diff --git a/repositories/codeformer/scripts/crop_align_face.py b/repositories/codeformer/scripts/crop_align_face.py
index 31e66266a..125cdbcf7 100644
--- a/repositories/codeformer/scripts/crop_align_face.py
+++ b/repositories/codeformer/scripts/crop_align_face.py
@@ -124,7 +124,7 @@ def align_face(filepath, out_path):
img = img.resize(rsize, PIL.Image.ANTIALIAS)
quad /= shrink
qsize /= shrink
-
+
# Crop.
border = max(int(np.rint(qsize * 0.1)), 3)
crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))),
@@ -187,6 +187,6 @@ if __name__ == '__main__':
img_list = sorted(img_list)
for in_path in img_list:
- out_path = os.path.join(args.out_dir, in_path.split("/")[-1])
+ out_path = os.path.join(args.out_dir, in_path.split("/")[-1])
out_path = out_path.replace('.jpg', '.png')
- size_ = align_face(in_path, out_path)
\ No newline at end of file
+ size_ = align_face(in_path, out_path)
diff --git a/requirements.txt b/requirements.txt
index 87d1b9f9f..4b5ce3f49 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -26,8 +26,6 @@ voluptuous
yapf
fasteners
orjson
-ruff
-pylint
sqlalchemy
invisible-watermark
pi-heif
@@ -70,6 +68,11 @@ scikit-image
seam-carving
sentencepiece
+# lint
+ruff
+pylint
+pre-commit
+
# block
torch!=2.5.0
torchvision!=0.20.0
diff --git a/scripts/consistory/consistory_pipeline.py b/scripts/consistory/consistory_pipeline.py
index b17fb4143..cf020c254 100644
--- a/scripts/consistory/consistory_pipeline.py
+++ b/scripts/consistory/consistory_pipeline.py
@@ -82,7 +82,7 @@ class ConsistoryExtendAttnSDXLPipeline(
clip_skip: Optional[int] = None,
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
-
+
attention_store_kwargs: Optional[Dict] = None,
extended_attn_kwargs: Optional[Dict] = None,
share_queries: bool = False,
@@ -422,8 +422,8 @@ class ConsistoryExtendAttnSDXLPipeline(
t,
encoder_hidden_states=prompt_embeds,
timestep_cond=timestep_cond,
- cross_attention_kwargs={'query_store': query_store,
- 'perform_extend_attn': False,
+ cross_attention_kwargs={'query_store': query_store,
+ 'perform_extend_attn': False,
'record_attention': False},
added_cond_kwargs=added_cond_kwargs,
return_dict=False,
@@ -436,9 +436,9 @@ class ConsistoryExtendAttnSDXLPipeline(
t,
encoder_hidden_states=prompt_embeds,
timestep_cond=timestep_cond,
- cross_attention_kwargs={'query_store': query_store,
- 'perform_extend_attn': True,
- 'record_attention': True,
+ cross_attention_kwargs={'query_store': query_store,
+ 'perform_extend_attn': True,
+ 'record_attention': True,
'feature_injector': feature_injector,
'anchors_cache': anchors_cache},
added_cond_kwargs=added_cond_kwargs,
@@ -483,7 +483,7 @@ class ConsistoryExtendAttnSDXLPipeline(
if XLA_AVAILABLE:
# xm.mark_step()
pass
-
+
# Update attention store mask
self.attention_store.aggregate_last_steps_attention()
@@ -516,4 +516,4 @@ class ConsistoryExtendAttnSDXLPipeline(
if not return_dict:
return (image,)
- return StableDiffusionXLPipelineOutput(images=image)
\ No newline at end of file
+ return StableDiffusionXLPipelineOutput(images=image)
diff --git a/scripts/consistory/consistory_unet_sdxl.py b/scripts/consistory/consistory_unet_sdxl.py
index 940b4ba01..4e6e4b335 100644
--- a/scripts/consistory/consistory_unet_sdxl.py
+++ b/scripts/consistory/consistory_unet_sdxl.py
@@ -1153,7 +1153,7 @@ class ConsistorySDXLUNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditio
upsample_size=upsample_size,
scale=lora_scale,
)
-
+
self.latent_store(sample.detach(), t=timestep, layer_index=i)
# 6. post-process
diff --git a/scripts/consistory/utils/ptp_utils.py b/scripts/consistory/utils/ptp_utils.py
index 0ab2d8d93..5bd4adf65 100644
--- a/scripts/consistory/utils/ptp_utils.py
+++ b/scripts/consistory/utils/ptp_utils.py
@@ -13,9 +13,9 @@
# limitations under the License.
# MIT License
-#
+#
# Copyright (c) 2023 AttendAndExcite
-#
+#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
@@ -175,9 +175,9 @@ class AttentionStore:
attn_mask = self.last_mask_dropout[width]
if attn_mask is None:
return None
-
+
n_patches = width**2
-
+
output_attn_mask = torch.zeros((attn_mask.shape[0] * attn_mask.shape[1],), device=attn_mask.device, dtype=torch.bool)
for j in range(attn_mask.shape[0]):
@@ -191,4 +191,4 @@ class AttentionStore:
raise NotImplementedError('mask_background_query is not supported anymore')
output_attn_mask[0, attn_mask[i], k*n_patches:(k+1)*n_patches] = attn_mask[j].unsqueeze(0).expand(attn_mask[i].sum(), -1)
- return output_attn_mask
\ No newline at end of file
+ return output_attn_mask
diff --git a/scripts/instantir/aggregator.py b/scripts/instantir/aggregator.py
index 1950f9efa..5877340ca 100644
--- a/scripts/instantir/aggregator.py
+++ b/scripts/instantir/aggregator.py
@@ -68,7 +68,7 @@ class SFT(nn.Module):
self.add = nn.Conv2d(nhidden, norm_nc, kernel_size=ks, padding=pw)
def forward(self, hidden_states, mask=False):
-
+
c, h = hidden_states
mask = mask or self.mask
assert mask is False
@@ -750,7 +750,7 @@ class Aggregator(ModelMixin, ConfigMixin, FromOriginalModelMixin):
image_embeds = self.encoder_hid_proj(image_embeds)
encoder_hidden_states = (encoder_hidden_states, image_embeds)
return encoder_hidden_states
-
+
def _set_gradient_checkpointing(self, module, value: bool = False) -> None:
if isinstance(module, (CrossAttnDownBlock2D, DownBlock2D)):
module.gradient_checkpointing = value
diff --git a/scripts/instantir/ip_adapter/attention_processor.py b/scripts/instantir/ip_adapter/attention_processor.py
index 68dce4281..ca57b8754 100644
--- a/scripts/instantir/ip_adapter/attention_processor.py
+++ b/scripts/instantir/ip_adapter/attention_processor.py
@@ -723,7 +723,7 @@ class AdditiveKV_AttnProcessor2_0(torch.nn.Module):
temb=None,
):
assert temb is not None, "Timestep embedding is needed for a time-aware attention processor."
-
+
residual = hidden_states
if attn.spatial_norm is not None:
@@ -834,7 +834,7 @@ class TA_AdditiveKV_AttnProcessor2_0(torch.nn.Module):
temb=None,
):
assert temb is not None, "Timestep embedding is needed for a time-aware attention processor."
-
+
residual = hidden_states
if attn.spatial_norm is not None:
@@ -1163,7 +1163,7 @@ class TA_IPAttnProcessor2_0(torch.nn.Module):
# for ip-adapter
ip_key = self.to_k_ip(ip_hidden_states)
ip_value = self.to_v_ip(ip_hidden_states)
-
+
# time-dependent adaLN
ip_key = self.ln_k_ip(ip_key, temb)
ip_value = self.ln_v_ip(ip_value, temb)
diff --git a/scripts/instantir/ip_adapter/ip_adapter.py b/scripts/instantir/ip_adapter/ip_adapter.py
index 7f4bcbc45..a244cd681 100644
--- a/scripts/instantir/ip_adapter/ip_adapter.py
+++ b/scripts/instantir/ip_adapter/ip_adapter.py
@@ -47,14 +47,14 @@ class MLPProjModel(torch.nn.Module):
"""SD model with image prompt"""
def __init__(self, cross_attention_dim=2048, clip_embeddings_dim=1280):
super().__init__()
-
+
self.proj = torch.nn.Sequential(
torch.nn.Linear(clip_embeddings_dim, clip_embeddings_dim),
torch.nn.GELU(),
torch.nn.Linear(clip_embeddings_dim, cross_attention_dim),
torch.nn.LayerNorm(cross_attention_dim)
)
-
+
def forward(self, image_embeds):
clip_extra_context_tokens = self.proj(image_embeds)
return clip_extra_context_tokens
diff --git a/scripts/instantir/ip_adapter/utils.py b/scripts/instantir/ip_adapter/utils.py
index 64c45cd85..07147c278 100644
--- a/scripts/instantir/ip_adapter/utils.py
+++ b/scripts/instantir/ip_adapter/utils.py
@@ -245,4 +245,4 @@ def prepare_training_image_embeds(
)
image_embeds.append(single_image_embeds)
- return image_embeds
\ No newline at end of file
+ return image_embeds
diff --git a/scripts/instantir/sdxl_instantir.py b/scripts/instantir/sdxl_instantir.py
index b279d8445..8595a4b40 100644
--- a/scripts/instantir/sdxl_instantir.py
+++ b/scripts/instantir/sdxl_instantir.py
@@ -396,7 +396,7 @@ class InstantIRPipeline(
self.unet.disable_adapters()
return lora_alpha
-
+
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
def encode_prompt(
self,
diff --git a/scripts/pulid/eva_clip/__init__.py b/scripts/pulid/eva_clip/__init__.py
index fa2d014bb..039e7ce32 100644
--- a/scripts/pulid/eva_clip/__init__.py
+++ b/scripts/pulid/eva_clip/__init__.py
@@ -8,4 +8,4 @@ from .openai import load_openai_model, list_openai_models
from .pretrained import list_pretrained, list_pretrained_models_by_tag, list_pretrained_tags_by_model,\
get_pretrained_url, download_pretrained_from_url, is_pretrained_cfg, get_pretrained_cfg, download_pretrained
from .tokenizer import SimpleTokenizer, tokenize
-from .transform import image_transform
\ No newline at end of file
+from .transform import image_transform
diff --git a/scripts/pulid/eva_clip/eva_vit_model.py b/scripts/pulid/eva_clip/eva_vit_model.py
index 51db88cf0..7d22eeacd 100644
--- a/scripts/pulid/eva_clip/eva_vit_model.py
+++ b/scripts/pulid/eva_clip/eva_vit_model.py
@@ -11,7 +11,7 @@ try:
from timm.models.layers import drop_path, to_2tuple, trunc_normal_
except:
from timm.layers import drop_path, to_2tuple, trunc_normal_
-
+
from .transformer import PatchDropout
from .rope import VisionRotaryEmbedding, VisionRotaryEmbeddingFast
@@ -39,19 +39,19 @@ class DropPath(nn.Module):
def forward(self, x):
return drop_path(x, self.drop_prob, self.training)
-
+
def extra_repr(self) -> str:
return 'p={}'.format(self.drop_prob)
class Mlp(nn.Module):
def __init__(
- self,
- in_features,
- hidden_features=None,
- out_features=None,
- act_layer=nn.GELU,
- norm_layer=nn.LayerNorm,
+ self,
+ in_features,
+ hidden_features=None,
+ out_features=None,
+ act_layer=nn.GELU,
+ norm_layer=nn.LayerNorm,
drop=0.,
subln=False,
@@ -71,7 +71,7 @@ class Mlp(nn.Module):
x = self.fc1(x)
x = self.act(x)
# x = self.drop(x)
- # commit this for the orignal BERT implement
+ # commit this for the orignal BERT implement
x = self.ffn_ln(x)
x = self.fc2(x)
@@ -79,7 +79,7 @@ class Mlp(nn.Module):
return x
class SwiGLU(nn.Module):
- def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.SiLU, drop=0.,
+ def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.SiLU, drop=0.,
norm_layer=nn.LayerNorm, subln=False):
super().__init__()
out_features = out_features or in_features
@@ -91,7 +91,7 @@ class SwiGLU(nn.Module):
self.act = act_layer()
self.ffn_ln = norm_layer(hidden_features) if subln else nn.Identity()
self.w3 = nn.Linear(hidden_features, out_features)
-
+
self.drop = nn.Dropout(drop)
def forward(self, x):
@@ -172,20 +172,20 @@ class Attention(nn.Module):
def forward(self, x, rel_pos_bias=None, attn_mask=None):
B, N, C = x.shape
- if self.subln:
+ if self.subln:
q = F.linear(input=x, weight=self.q_proj.weight, bias=self.q_bias)
k = F.linear(input=x, weight=self.k_proj.weight, bias=None)
v = F.linear(input=x, weight=self.v_proj.weight, bias=self.v_bias)
q = q.reshape(B, N, self.num_heads, -1).permute(0, 2, 1, 3) # B, num_heads, N, C
- k = k.reshape(B, N, self.num_heads, -1).permute(0, 2, 1, 3)
- v = v.reshape(B, N, self.num_heads, -1).permute(0, 2, 1, 3)
- else:
+ k = k.reshape(B, N, self.num_heads, -1).permute(0, 2, 1, 3)
+ v = v.reshape(B, N, self.num_heads, -1).permute(0, 2, 1, 3)
+ else:
qkv_bias = None
if self.q_bias is not None:
qkv_bias = torch.cat((self.q_bias, torch.zeros_like(self.v_bias, requires_grad=False), self.v_bias))
-
+
qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias)
qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) # 3, B, num_heads, N, C
q, k, v = qkv[0], qkv[1], qkv[2]
@@ -232,7 +232,7 @@ class Attention(nn.Module):
if attn_mask is not None:
attn_mask = attn_mask.bool()
attn = attn.masked_fill(~attn_mask[:, None, None, :], float("-inf"))
-
+
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
@@ -262,15 +262,15 @@ class Block(nn.Module):
if naiveswiglu:
self.mlp = SwiGLU(
- in_features=dim,
- hidden_features=mlp_hidden_dim,
+ in_features=dim,
+ hidden_features=mlp_hidden_dim,
subln=subln,
norm_layer=norm_layer,
)
else:
self.mlp = Mlp(
- in_features=dim,
- hidden_features=mlp_hidden_dim,
+ in_features=dim,
+ hidden_features=mlp_hidden_dim,
act_layer=act_layer,
subln=subln,
drop=drop
@@ -407,7 +407,7 @@ class EVAVisionTransformer(nn.Module):
ft_seq_len=hw_seq_len if intp_freq else None,
# patch_dropout=patch_dropout
)
- else:
+ else:
self.rope = None
self.naiveswiglu = naiveswiglu
@@ -469,7 +469,7 @@ class EVAVisionTransformer(nn.Module):
def get_num_layers(self):
return len(self.blocks)
-
+
def lock(self, unlocked_groups=0, freeze_bn_stats=False):
assert unlocked_groups == 0, 'partial locking not currently supported for this model'
for param in self.parameters():
@@ -491,7 +491,7 @@ class EVAVisionTransformer(nn.Module):
self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
def forward_features(self, x, return_all_features=False, return_hidden=False, shuffle=False):
-
+
x = self.patch_embed(x)
batch_size, seq_len, _ = x.size()
diff --git a/scripts/pulid/eva_clip/factory.py b/scripts/pulid/eva_clip/factory.py
index ced899999..b33929625 100644
--- a/scripts/pulid/eva_clip/factory.py
+++ b/scripts/pulid/eva_clip/factory.py
@@ -93,7 +93,7 @@ def load_state_dict(checkpoint_path: str, map_location: str='cpu', model_key: st
state_dict = checkpoint
if next(iter(state_dict.items()))[0].startswith('module'):
state_dict = {k[7:]: v for k, v in state_dict.items()}
-
+
for k in skip_list:
if k in list(state_dict.keys()):
logging.info(f"Removing key {k} from pretrained checkpoint")
@@ -181,7 +181,7 @@ def load_pretrained_checkpoint(
visual_state_dict = load_clip_visual_state_dict(visual_checkpoint_path, is_openai=True, skip_list=skip_list)
else:
visual_state_dict = load_state_dict(visual_checkpoint_path, model_key=model_key, is_openai=False, skip_list=skip_list)
-
+
# resize_clip_pos_embed for CLIP and open CLIP
if 'positional_embedding' in visual_state_dict:
resize_visual_pos_embed(visual_state_dict, model)
@@ -202,7 +202,7 @@ def load_pretrained_checkpoint(
text_state_dict = load_state_dict(visual_checkpoint_path, model_key=model_key, is_openai=False, skip_list=skip_list)
text_incompatible_keys = model.text.load_state_dict(text_state_dict, strict=strict)
-
+
logging.info(f"num of loaded text_state_dict keys: {len(text_state_dict.keys())}")
logging.info(f"text_incompatible_keys.missing_keys: {text_incompatible_keys.missing_keys}")
@@ -255,7 +255,7 @@ def create_model(
if force_quick_gelu:
# override for use of QuickGELU on non-OpenAI transformer models
model_cfg["quick_gelu"] = True
-
+
if force_patch_dropout is not None:
# override the default patch dropout value
model_cfg['vision_cfg']["patch_dropout"] = force_patch_dropout
@@ -286,7 +286,7 @@ def create_model(
checkpoint_path,
model_key="model|module|state_dict",
strict=False
- )
+ )
else:
error_str = (
f'Pretrained weights ({pretrained}) not found for model {model_name}.'
@@ -296,7 +296,7 @@ def create_model(
else:
visual_checkpoint_path = ''
text_checkpoint_path = ''
-
+
if pretrained_image:
pretrained_visual_model = pretrained_visual_model.replace('/', '-') # for callers using old naming with / in ViT names
pretrained_image_cfg = get_pretrained_cfg(pretrained_visual_model, pretrained_image)
@@ -321,7 +321,7 @@ def create_model(
else:
logging.warning(f'Pretrained weights ({text_checkpoint_path}) not found for model {model_name}.text.')
raise RuntimeError(f'Pretrained weights ({text_checkpoint_path}) not found for model {model_name}.text.')
-
+
if visual_checkpoint_path:
logging.info(f'Loading pretrained {model_name}.visual weights ({visual_checkpoint_path}).')
if text_checkpoint_path:
@@ -338,7 +338,7 @@ def create_model(
model_key="model|module|state_dict",
skip_list=skip_list
)
-
+
if "fp16" in precision or "bf16" in precision:
logging.info(f'convert precision to {precision}')
model = model.to(torch.bfloat16) if 'bf16' in precision else model.to(torch.float16)
diff --git a/scripts/pulid/eva_clip/hf_model.py b/scripts/pulid/eva_clip/hf_model.py
index 0b9551993..1665ada0b 100644
--- a/scripts/pulid/eva_clip/hf_model.py
+++ b/scripts/pulid/eva_clip/hf_model.py
@@ -62,19 +62,19 @@ class ClsPooler(nn.Module):
self.use_pooler_output = use_pooler_output
def forward(self, x:BaseModelOutput, attention_mask:TensorType):
-
- if (self.use_pooler_output and
+
+ if (self.use_pooler_output and
isinstance(x, (BaseModelOutputWithPooling, BaseModelOutputWithPoolingAndCrossAttentions)) and
(x.pooler_output is not None)
):
return x.pooler_output
-
+
return x.last_hidden_state[:, self.cls_token_position, :]
class HFTextEncoder(nn.Module):
"""HuggingFace model adapter"""
def __init__(
- self,
+ self,
model_name_or_path: str,
output_dim: int,
tokenizer_name: str = None,
@@ -134,10 +134,10 @@ class HFTextEncoder(nn.Module):
self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
# def forward_itm(self, x:TensorType, image_embeds:TensorType) -> TensorType:
- # image_atts = torch.ones(image_embeds.size()[:-1],dtype=torch.long).to(x.device)
+ # image_atts = torch.ones(image_embeds.size()[:-1],dtype=torch.long).to(x.device)
# attn_mask = (x != self.config.pad_token_id).long()
# out = self.transformer(
- # input_ids=x,
+ # input_ids=x,
# attention_mask=attn_mask,
# encoder_hidden_states = image_embeds,
# encoder_attention_mask = image_atts,
@@ -147,14 +147,14 @@ class HFTextEncoder(nn.Module):
# return self.itm_proj(pooled_out)
def mask(self, input_ids, vocab_size, device, targets=None, masked_indices=None, probability_matrix=None):
- if masked_indices is None:
+ if masked_indices is None:
masked_indices = torch.bernoulli(probability_matrix).bool()
-
+
masked_indices[input_ids == self.tokenizer.pad_token_id] = False
masked_indices[input_ids == self.tokenizer.cls_token_id] = False
-
+
if targets is not None:
- targets[~masked_indices] = -100 # We only compute loss on masked tokens
+ targets[~masked_indices] = -100 # We only compute loss on masked tokens
# 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK])
indices_replaced = torch.bernoulli(torch.full(input_ids.shape, 0.8)).bool() & masked_indices
@@ -163,9 +163,9 @@ class HFTextEncoder(nn.Module):
# 10% of the time, we replace masked input tokens with random word
indices_random = torch.bernoulli(torch.full(input_ids.shape, 0.5)).bool() & masked_indices & ~indices_replaced
random_words = torch.randint(vocab_size, input_ids.shape, dtype=torch.long).to(device)
- input_ids[indices_random] = random_words[indices_random]
- # The rest of the time (10% of the time) we keep the masked input tokens unchanged
-
+ input_ids[indices_random] = random_words[indices_random]
+ # The rest of the time (10% of the time) we keep the masked input tokens unchanged
+
if targets is not None:
return input_ids, targets
else:
@@ -174,7 +174,7 @@ class HFTextEncoder(nn.Module):
def forward_mlm(self, input_ids, image_embeds, mlm_probability=0.25):
labels = input_ids.clone()
attn_mask = (input_ids != self.config.pad_token_id).long()
- image_atts = torch.ones(image_embeds.size()[:-1],dtype=torch.long).to(input_ids.device)
+ image_atts = torch.ones(image_embeds.size()[:-1],dtype=torch.long).to(input_ids.device)
vocab_size = getattr(self.config, arch_dict[self.config.model_type]["config_names"]["vocab_size"])
probability_matrix = torch.full(labels.shape, mlm_probability)
input_ids, labels = self.mask(input_ids, vocab_size, input_ids.device, targets=labels,
diff --git a/scripts/pulid/eva_clip/loss.py b/scripts/pulid/eva_clip/loss.py
index 473f60d98..13450d838 100644
--- a/scripts/pulid/eva_clip/loss.py
+++ b/scripts/pulid/eva_clip/loss.py
@@ -119,7 +119,7 @@ class ClipLoss(nn.Module):
self.prev_num_logits = num_logits
else:
labels = self.labels[device]
-
+
if self.label_smoothing_cross_entropy:
total_loss = (
self.label_smoothing_cross_entropy(logits_per_image, labels) +
@@ -130,9 +130,9 @@ class ClipLoss(nn.Module):
F.cross_entropy(logits_per_image, labels) +
F.cross_entropy(logits_per_text, labels)
) / 2
-
+
acc = None
i2t_acc = (logits_per_image.argmax(-1) == labels).sum() / len(logits_per_image)
t2i_acc = (logits_per_text.argmax(-1) == labels).sum() / len(logits_per_text)
acc = {"i2t": i2t_acc, "t2i": t2i_acc}
- return total_loss, acc
\ No newline at end of file
+ return total_loss, acc
diff --git a/scripts/pulid/eva_clip/model.py b/scripts/pulid/eva_clip/model.py
index abd8c02db..85a3e5b53 100644
--- a/scripts/pulid/eva_clip/model.py
+++ b/scripts/pulid/eva_clip/model.py
@@ -231,7 +231,7 @@ class CLIP(nn.Module):
def set_grad_checkpointing(self, enable=True):
self.visual.set_grad_checkpointing(enable)
self.transformer.grad_checkpointing = enable
-
+
@torch.jit.ignore
def no_weight_decay(self):
return {'logit_scale'}
@@ -309,7 +309,7 @@ def convert_weights_to_lp(model: nn.Module, dtype=torch.float16):
"""Convert applicable model parameters to low-precision (bf16 or fp16)"""
def _convert_weights(l):
-
+
if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Linear)):
l.weight.data = l.weight.data.to(dtype)
if l.bias is not None:
diff --git a/scripts/pulid/eva_clip/model_configs/EVA01-CLIP-B-16.json b/scripts/pulid/eva_clip/model_configs/EVA01-CLIP-B-16.json
index aad205800..fc684f14e 100644
--- a/scripts/pulid/eva_clip/model_configs/EVA01-CLIP-B-16.json
+++ b/scripts/pulid/eva_clip/model_configs/EVA01-CLIP-B-16.json
@@ -16,4 +16,4 @@
"heads": 8,
"layers": 12
}
-}
\ No newline at end of file
+}
diff --git a/scripts/pulid/eva_clip/model_configs/EVA01-CLIP-g-14-plus.json b/scripts/pulid/eva_clip/model_configs/EVA01-CLIP-g-14-plus.json
index 100279572..b08129178 100644
--- a/scripts/pulid/eva_clip/model_configs/EVA01-CLIP-g-14-plus.json
+++ b/scripts/pulid/eva_clip/model_configs/EVA01-CLIP-g-14-plus.json
@@ -21,4 +21,4 @@
"xattn": false,
"fusedLN": true
}
-}
\ No newline at end of file
+}
diff --git a/scripts/pulid/eva_clip/model_configs/EVA01-CLIP-g-14.json b/scripts/pulid/eva_clip/model_configs/EVA01-CLIP-g-14.json
index 5d338b4e6..6a477be45 100644
--- a/scripts/pulid/eva_clip/model_configs/EVA01-CLIP-g-14.json
+++ b/scripts/pulid/eva_clip/model_configs/EVA01-CLIP-g-14.json
@@ -21,4 +21,4 @@
"xattn": false,
"fusedLN": true
}
-}
\ No newline at end of file
+}
diff --git a/scripts/pulid/eva_clip/model_configs/EVA02-CLIP-B-16.json b/scripts/pulid/eva_clip/model_configs/EVA02-CLIP-B-16.json
index e4a6e723f..533e39afe 100644
--- a/scripts/pulid/eva_clip/model_configs/EVA02-CLIP-B-16.json
+++ b/scripts/pulid/eva_clip/model_configs/EVA02-CLIP-B-16.json
@@ -26,4 +26,4 @@
"xattn": true,
"fusedLN": true
}
-}
\ No newline at end of file
+}
diff --git a/scripts/pulid/eva_clip/model_configs/EVA02-CLIP-L-14-336.json b/scripts/pulid/eva_clip/model_configs/EVA02-CLIP-L-14-336.json
index 3e1d124e1..5feb65755 100644
--- a/scripts/pulid/eva_clip/model_configs/EVA02-CLIP-L-14-336.json
+++ b/scripts/pulid/eva_clip/model_configs/EVA02-CLIP-L-14-336.json
@@ -26,4 +26,4 @@
"xattn": false,
"fusedLN": true
}
-}
\ No newline at end of file
+}
diff --git a/scripts/pulid/eva_clip/model_configs/EVA02-CLIP-L-14.json b/scripts/pulid/eva_clip/model_configs/EVA02-CLIP-L-14.json
index 03b22ad3c..853abf237 100644
--- a/scripts/pulid/eva_clip/model_configs/EVA02-CLIP-L-14.json
+++ b/scripts/pulid/eva_clip/model_configs/EVA02-CLIP-L-14.json
@@ -26,4 +26,4 @@
"xattn": false,
"fusedLN": true
}
-}
\ No newline at end of file
+}
diff --git a/scripts/pulid/eva_clip/model_configs/EVA02-CLIP-bigE-14.json b/scripts/pulid/eva_clip/model_configs/EVA02-CLIP-bigE-14.json
index 747ffccc8..df99893bf 100644
--- a/scripts/pulid/eva_clip/model_configs/EVA02-CLIP-bigE-14.json
+++ b/scripts/pulid/eva_clip/model_configs/EVA02-CLIP-bigE-14.json
@@ -22,4 +22,4 @@
"xattn": false,
"fusedLN": true
}
-}
\ No newline at end of file
+}
diff --git a/scripts/pulid/eva_clip/rope.py b/scripts/pulid/eva_clip/rope.py
index 69030c35e..764cb1976 100644
--- a/scripts/pulid/eva_clip/rope.py
+++ b/scripts/pulid/eva_clip/rope.py
@@ -60,7 +60,7 @@ class VisionRotaryEmbedding(nn.Module):
freqs_w = torch.einsum('..., f -> ... f', t, freqs)
freqs_w = repeat(freqs_w, '... n -> ... (n r)', r = 2)
- freqs = broadcat((freqs_h[:, None, :], freqs_w[None, :, :]), dim = -1)
+ freqs = broadcat((freqs_h[:, None, :], freqs_w[None, :, :]), dim = -1)
self.register_buffer("freqs_cos", freqs.cos())
self.register_buffer("freqs_sin", freqs.sin())
@@ -134,4 +134,4 @@ class VisionRotaryEmbeddingFast(nn.Module):
return t * freqs_cos + rotate_half(t) * freqs_sin
- return t * self.freqs_cos + rotate_half(t) * self.freqs_sin
\ No newline at end of file
+ return t * self.freqs_cos + rotate_half(t) * self.freqs_sin
diff --git a/scripts/pulid/eva_clip/transformer.py b/scripts/pulid/eva_clip/transformer.py
index 1e0a52ceb..f5afdb2a0 100644
--- a/scripts/pulid/eva_clip/transformer.py
+++ b/scripts/pulid/eva_clip/transformer.py
@@ -12,7 +12,7 @@ try:
from timm.models.layers import trunc_normal_
except:
from timm.layers import trunc_normal_
-
+
from .rope import VisionRotaryEmbedding, VisionRotaryEmbeddingFast
from .utils import to_2tuple
@@ -311,7 +311,7 @@ class CustomAttention(nn.Module):
attn = self.attn_drop(attn)
x = torch.bmm(attn, v)
-
+
if self.head_scale is not None:
x = x.view(B_q, self.num_heads, N_q, C_q) * self.head_scale
x = x.view(-1, N_q, C_q)
@@ -411,7 +411,7 @@ class CustomTransformer(nn.Module):
])
def get_cast_dtype(self) -> torch.dtype:
- return self.resblocks[0].mlp.c_fc.weight.dtype
+ return self.resblocks[0].mlp.c_fc.weight.dtype
def forward(self, q: torch.Tensor, k: torch.Tensor = None, v: torch.Tensor = None, attn_mask: Optional[torch.Tensor] = None):
if k is None and v is None:
@@ -532,7 +532,7 @@ class VisionTransformer(nn.Module):
# setting a patch_dropout of 0. would mean it is disabled and this function would be the identity fn
self.patch_dropout = PatchDropout(patch_dropout) if patch_dropout > 0. else nn.Identity()
self.ln_pre = norm_layer(width)
-
+
self.transformer = Transformer(
width,
layers,
@@ -551,7 +551,7 @@ class VisionTransformer(nn.Module):
def lock(self, unlocked_groups=0, freeze_bn_stats=False):
for param in self.parameters():
param.requires_grad = False
-
+
if unlocked_groups != 0:
groups = [
[
@@ -655,7 +655,7 @@ class TextTransformer(nn.Module):
norm_layer=norm_layer,
xattn=xattn
)
-
+
self.xattn = xattn
self.ln_final = norm_layer(width)
self.text_projection = nn.Parameter(torch.empty(width, output_dim))
@@ -686,7 +686,7 @@ class TextTransformer(nn.Module):
@torch.jit.ignore
def set_grad_checkpointing(self, enable=True):
self.transformer.grad_checkpointing = enable
-
+
@torch.jit.ignore
def no_weight_decay(self):
# return {'positional_embedding', 'token_embedding'}
diff --git a/scripts/pulid/eva_clip/utils.py b/scripts/pulid/eva_clip/utils.py
index bdc5a7a45..1c3c06201 100644
--- a/scripts/pulid/eva_clip/utils.py
+++ b/scripts/pulid/eva_clip/utils.py
@@ -135,7 +135,7 @@ def resize_eva_pos_embed(state_dict, model, interpolation: str = 'bicubic', seq_
patch_size = model.visual.patch_embed.patch_size
state_dict['patch_embed.proj.weight'] = torch.nn.functional.interpolate(
patch_embed_proj.float(), size=patch_size, mode='bicubic', align_corners=False)
-
+
def resize_rel_pos_embed(state_dict, model, interpolation: str = 'bicubic', seq_dim=1):
all_keys = list(state_dict.keys())
@@ -323,4 +323,4 @@ class AllGather(torch.autograd.Function):
None
)
-allgather = AllGather.apply
\ No newline at end of file
+allgather = AllGather.apply
diff --git a/scripts/xadapter/pipeline_sd_xl_adapter_controlnet.py b/scripts/xadapter/pipeline_sd_xl_adapter_controlnet.py
index f0f27fa69..a07982925 100644
--- a/scripts/xadapter/pipeline_sd_xl_adapter_controlnet.py
+++ b/scripts/xadapter/pipeline_sd_xl_adapter_controlnet.py
@@ -1824,4 +1824,3 @@ class StableDiffusionXLAdapterControlnetPipeline(DiffusionPipeline, FromSingleFi
init_latents = self.scheduler.add_noise(init_latents, noise, timestep)
return init_latents
-
diff --git a/scripts/xadapter/pipeline_sd_xl_adapter_controlnet_img2img.py b/scripts/xadapter/pipeline_sd_xl_adapter_controlnet_img2img.py
index c2dadfbfc..b2eee115f 100644
--- a/scripts/xadapter/pipeline_sd_xl_adapter_controlnet_img2img.py
+++ b/scripts/xadapter/pipeline_sd_xl_adapter_controlnet_img2img.py
@@ -1910,4 +1910,3 @@ class StableDiffusionXLAdapterControlnetI2IPipeline(DiffusionPipeline, FromSingl
# image.save(f'./test_img/noisy_image_sd1_5_{int(timestep)}.jpg')
return init_latents
-
diff --git a/scripts/xadapter/utils.py b/scripts/xadapter/utils.py
index 02990f645..817491647 100644
--- a/scripts/xadapter/utils.py
+++ b/scripts/xadapter/utils.py
@@ -11,4 +11,4 @@ from safetensors import safe_open
from tqdm import tqdm
from einops import rearrange
from model.convert_from_ckpt import convert_ldm_unet_checkpoint, convert_ldm_clip_checkpoint, convert_ldm_vae_checkpoint
-# from animatediff.utils.convert_lora_safetensor_to_diffusers import convert_lora, convert_motion_lora_ckpt_to_diffusers
\ No newline at end of file
+# from animatediff.utils.convert_lora_safetensor_to_diffusers import convert_lora, convert_motion_lora_ckpt_to_diffusers