initial commit

2023-05-20 13:39:33 +00:00 · 2023-05-20 13:39:33 +00:00 · 97bc1ff0b3
commit 97bc1ff0b3
12 changed files with 1711 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,7 @@
+# meta
+.vscode/
+__pycache__/
+.DS_Store
+
+# settings
+models/
--- a/35
+++ b/35
@ -0,0 +1,35 @@
+S-Lab License 1.0
+
+Copyright 2022 S-Lab
+
+Redistribution and use for non-commercial purpose in source and 
+binary forms, with or without modification, are permitted provided 
+that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright 
+   notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright 
+   notice, this list of conditions and the following disclaimer in 
+   the documentation and/or other materials provided with the 
+   distribution.
+
+3. Neither the name of the copyright holder nor the names of its 
+   contributors may be used to endorse or promote products derived 
+   from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+In the event that redistribution and/or use for commercial purpose in 
+source or binary forms, with or without modification is required, 
+please contact the contributor(s) of the work.
--- a/437
+++ b/437
@ -0,0 +1,437 @@
+Attribution-NonCommercial-ShareAlike 4.0 International
+
+=======================================================================
+
+Creative Commons Corporation ("Creative Commons") is not a law firm and
+does not provide legal services or legal advice. Distribution of
+Creative Commons public licenses does not create a lawyer-client or
+other relationship. Creative Commons makes its licenses and related
+information available on an "as-is" basis. Creative Commons gives no
+warranties regarding its licenses, any material licensed under their
+terms and conditions, or any related information. Creative Commons
+disclaims all liability for damages resulting from their use to the
+fullest extent possible.
+
+Using Creative Commons Public Licenses
+
+Creative Commons public licenses provide a standard set of terms and
+conditions that creators and other rights holders may use to share
+original works of authorship and other material subject to copyright
+and certain other rights specified in the public license below. The
+following considerations are for informational purposes only, are not
+exhaustive, and do not form part of our licenses.
+
+     Considerations for licensors: Our public licenses are
+     intended for use by those authorized to give the public
+     permission to use material in ways otherwise restricted by
+     copyright and certain other rights. Our licenses are
+     irrevocable. Licensors should read and understand the terms
+     and conditions of the license they choose before applying it.
+     Licensors should also secure all rights necessary before
+     applying our licenses so that the public can reuse the
+     material as expected. Licensors should clearly mark any
+     material not subject to the license. This includes other CC-
+     licensed material, or material used under an exception or
+     limitation to copyright. More considerations for licensors:
+    wiki.creativecommons.org/Considerations_for_licensors
+
+     Considerations for the public: By using one of our public
+     licenses, a licensor grants the public permission to use the
+     licensed material under specified terms and conditions. If
+     the licensor's permission is not necessary for any reason--for
+     example, because of any applicable exception or limitation to
+     copyright--then that use is not regulated by the license. Our
+     licenses grant only permissions under copyright and certain
+     other rights that a licensor has authority to grant. Use of
+     the licensed material may still be restricted for other
+     reasons, including because others have copyright or other
+     rights in the material. A licensor may make special requests,
+     such as asking that all changes be marked or described.
+     Although not required by our licenses, you are encouraged to
+     respect those requests where reasonable. More considerations
+     for the public:
+    wiki.creativecommons.org/Considerations_for_licensees
+
+=======================================================================
+
+Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International
+Public License
+
+By exercising the Licensed Rights (defined below), You accept and agree
+to be bound by the terms and conditions of this Creative Commons
+Attribution-NonCommercial-ShareAlike 4.0 International Public License
+("Public License"). To the extent this Public License may be
+interpreted as a contract, You are granted the Licensed Rights in
+consideration of Your acceptance of these terms and conditions, and the
+Licensor grants You such rights in consideration of benefits the
+Licensor receives from making the Licensed Material available under
+these terms and conditions.
+
+
+Section 1 -- Definitions.
+
+  a. Adapted Material means material subject to Copyright and Similar
+     Rights that is derived from or based upon the Licensed Material
+     and in which the Licensed Material is translated, altered,
+     arranged, transformed, or otherwise modified in a manner requiring
+     permission under the Copyright and Similar Rights held by the
+     Licensor. For purposes of this Public License, where the Licensed
+     Material is a musical work, performance, or sound recording,
+     Adapted Material is always produced where the Licensed Material is
+     synched in timed relation with a moving image.
+
+  b. Adapter's License means the license You apply to Your Copyright
+     and Similar Rights in Your contributions to Adapted Material in
+     accordance with the terms and conditions of this Public License.
+
+  c. BY-NC-SA Compatible License means a license listed at
+     creativecommons.org/compatiblelicenses, approved by Creative
+     Commons as essentially the equivalent of this Public License.
+
+  d. Copyright and Similar Rights means copyright and/or similar rights
+     closely related to copyright including, without limitation,
+     performance, broadcast, sound recording, and Sui Generis Database
+     Rights, without regard to how the rights are labeled or
+     categorized. For purposes of this Public License, the rights
+     specified in Section 2(b)(1)-(2) are not Copyright and Similar
+     Rights.
+
+  e. Effective Technological Measures means those measures that, in the
+     absence of proper authority, may not be circumvented under laws
+     fulfilling obligations under Article 11 of the WIPO Copyright
+     Treaty adopted on December 20, 1996, and/or similar international
+     agreements.
+
+  f. Exceptions and Limitations means fair use, fair dealing, and/or
+     any other exception or limitation to Copyright and Similar Rights
+     that applies to Your use of the Licensed Material.
+
+  g. License Elements means the license attributes listed in the name
+     of a Creative Commons Public License. The License Elements of this
+     Public License are Attribution, NonCommercial, and ShareAlike.
+
+  h. Licensed Material means the artistic or literary work, database,
+     or other material to which the Licensor applied this Public
+     License.
+
+  i. Licensed Rights means the rights granted to You subject to the
+     terms and conditions of this Public License, which are limited to
+     all Copyright and Similar Rights that apply to Your use of the
+     Licensed Material and that the Licensor has authority to license.
+
+  j. Licensor means the individual(s) or entity(ies) granting rights
+     under this Public License.
+
+  k. NonCommercial means not primarily intended for or directed towards
+     commercial advantage or monetary compensation. For purposes of
+     this Public License, the exchange of the Licensed Material for
+     other material subject to Copyright and Similar Rights by digital
+     file-sharing or similar means is NonCommercial provided there is
+     no payment of monetary compensation in connection with the
+     exchange.
+
+  l. Share means to provide material to the public by any means or
+     process that requires permission under the Licensed Rights, such
+     as reproduction, public display, public performance, distribution,
+     dissemination, communication, or importation, and to make material
+     available to the public including in ways that members of the
+     public may access the material from a place and at a time
+     individually chosen by them.
+
+  m. Sui Generis Database Rights means rights other than copyright
+     resulting from Directive 96/9/EC of the European Parliament and of
+     the Council of 11 March 1996 on the legal protection of databases,
+     as amended and/or succeeded, as well as other essentially
+     equivalent rights anywhere in the world.
+
+  n. You means the individual or entity exercising the Licensed Rights
+     under this Public License. Your has a corresponding meaning.
+
+
+Section 2 -- Scope.
+
+  a. License grant.
+
+       1. Subject to the terms and conditions of this Public License,
+          the Licensor hereby grants You a worldwide, royalty-free,
+          non-sublicensable, non-exclusive, irrevocable license to
+          exercise the Licensed Rights in the Licensed Material to:
+
+            a. reproduce and Share the Licensed Material, in whole or
+               in part, for NonCommercial purposes only; and
+
+            b. produce, reproduce, and Share Adapted Material for
+               NonCommercial purposes only.
+
+       2. Exceptions and Limitations. For the avoidance of doubt, where
+          Exceptions and Limitations apply to Your use, this Public
+          License does not apply, and You do not need to comply with
+          its terms and conditions.
+
+       3. Term. The term of this Public License is specified in Section
+          6(a).
+
+       4. Media and formats; technical modifications allowed. The
+          Licensor authorizes You to exercise the Licensed Rights in
+          all media and formats whether now known or hereafter created,
+          and to make technical modifications necessary to do so. The
+          Licensor waives and/or agrees not to assert any right or
+          authority to forbid You from making technical modifications
+          necessary to exercise the Licensed Rights, including
+          technical modifications necessary to circumvent Effective
+          Technological Measures. For purposes of this Public License,
+          simply making modifications authorized by this Section 2(a)
+          (4) never produces Adapted Material.
+
+       5. Downstream recipients.
+
+            a. Offer from the Licensor -- Licensed Material. Every
+               recipient of the Licensed Material automatically
+               receives an offer from the Licensor to exercise the
+               Licensed Rights under the terms and conditions of this
+               Public License.
+
+            b. Additional offer from the Licensor -- Adapted Material.
+               Every recipient of Adapted Material from You
+               automatically receives an offer from the Licensor to
+               exercise the Licensed Rights in the Adapted Material
+               under the conditions of the Adapter's License You apply.
+
+            c. No downstream restrictions. You may not offer or impose
+               any additional or different terms or conditions on, or
+               apply any Effective Technological Measures to, the
+               Licensed Material if doing so restricts exercise of the
+               Licensed Rights by any recipient of the Licensed
+               Material.
+
+       6. No endorsement. Nothing in this Public License constitutes or
+          may be construed as permission to assert or imply that You
+          are, or that Your use of the Licensed Material is, connected
+          with, or sponsored, endorsed, or granted official status by,
+          the Licensor or others designated to receive attribution as
+          provided in Section 3(a)(1)(A)(i).
+
+  b. Other rights.
+
+       1. Moral rights, such as the right of integrity, are not
+          licensed under this Public License, nor are publicity,
+          privacy, and/or other similar personality rights; however, to
+          the extent possible, the Licensor waives and/or agrees not to
+          assert any such rights held by the Licensor to the limited
+          extent necessary to allow You to exercise the Licensed
+          Rights, but not otherwise.
+
+       2. Patent and trademark rights are not licensed under this
+          Public License.
+
+       3. To the extent possible, the Licensor waives any right to
+          collect royalties from You for the exercise of the Licensed
+          Rights, whether directly or through a collecting society
+          under any voluntary or waivable statutory or compulsory
+          licensing scheme. In all other cases the Licensor expressly
+          reserves any right to collect such royalties, including when
+          the Licensed Material is used other than for NonCommercial
+          purposes.
+
+
+Section 3 -- License Conditions.
+
+Your exercise of the Licensed Rights is expressly made subject to the
+following conditions.
+
+  a. Attribution.
+
+       1. If You Share the Licensed Material (including in modified
+          form), You must:
+
+            a. retain the following if it is supplied by the Licensor
+               with the Licensed Material:
+
+                 i. identification of the creator(s) of the Licensed
+                    Material and any others designated to receive
+                    attribution, in any reasonable manner requested by
+                    the Licensor (including by pseudonym if
+                    designated);
+
+                ii. a copyright notice;
+
+               iii. a notice that refers to this Public License;
+
+                iv. a notice that refers to the disclaimer of
+                    warranties;
+
+                 v. a URI or hyperlink to the Licensed Material to the
+                    extent reasonably practicable;
+
+            b. indicate if You modified the Licensed Material and
+               retain an indication of any previous modifications; and
+
+            c. indicate the Licensed Material is licensed under this
+               Public License, and include the text of, or the URI or
+               hyperlink to, this Public License.
+
+       2. You may satisfy the conditions in Section 3(a)(1) in any
+          reasonable manner based on the medium, means, and context in
+          which You Share the Licensed Material. For example, it may be
+          reasonable to satisfy the conditions by providing a URI or
+          hyperlink to a resource that includes the required
+          information.
+       3. If requested by the Licensor, You must remove any of the
+          information required by Section 3(a)(1)(A) to the extent
+          reasonably practicable.
+
+  b. ShareAlike.
+
+     In addition to the conditions in Section 3(a), if You Share
+     Adapted Material You produce, the following conditions also apply.
+
+       1. The Adapter's License You apply must be a Creative Commons
+          license with the same License Elements, this version or
+          later, or a BY-NC-SA Compatible License.
+
+       2. You must include the text of, or the URI or hyperlink to, the
+          Adapter's License You apply. You may satisfy this condition
+          in any reasonable manner based on the medium, means, and
+          context in which You Share Adapted Material.
+
+       3. You may not offer or impose any additional or different terms
+          or conditions on, or apply any Effective Technological
+          Measures to, Adapted Material that restrict exercise of the
+          rights granted under the Adapter's License You apply.
+
+
+Section 4 -- Sui Generis Database Rights.
+
+Where the Licensed Rights include Sui Generis Database Rights that
+apply to Your use of the Licensed Material:
+
+  a. for the avoidance of doubt, Section 2(a)(1) grants You the right
+     to extract, reuse, reproduce, and Share all or a substantial
+     portion of the contents of the database for NonCommercial purposes
+     only;
+
+  b. if You include all or a substantial portion of the database
+     contents in a database in which You have Sui Generis Database
+     Rights, then the database in which You have Sui Generis Database
+     Rights (but not its individual contents) is Adapted Material,
+     including for purposes of Section 3(b); and
+
+  c. You must comply with the conditions in Section 3(a) if You Share
+     all or a substantial portion of the contents of the database.
+
+For the avoidance of doubt, this Section 4 supplements and does not
+replace Your obligations under this Public License where the Licensed
+Rights include other Copyright and Similar Rights.
+
+
+Section 5 -- Disclaimer of Warranties and Limitation of Liability.
+
+  a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
+     EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
+     AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
+     ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
+     IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
+     WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
+     PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
+     ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
+     KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
+     ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
+
+  b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
+     TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
+     NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
+     INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
+     COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
+     USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
+     ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
+     DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
+     IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
+
+  c. The disclaimer of warranties and limitation of liability provided
+     above shall be interpreted in a manner that, to the extent
+     possible, most closely approximates an absolute disclaimer and
+     waiver of all liability.
+
+
+Section 6 -- Term and Termination.
+
+  a. This Public License applies for the term of the Copyright and
+     Similar Rights licensed here. However, if You fail to comply with
+     this Public License, then Your rights under this Public License
+     terminate automatically.
+
+  b. Where Your right to use the Licensed Material has terminated under
+     Section 6(a), it reinstates:
+
+       1. automatically as of the date the violation is cured, provided
+          it is cured within 30 days of Your discovery of the
+          violation; or
+
+       2. upon express reinstatement by the Licensor.
+
+     For the avoidance of doubt, this Section 6(b) does not affect any
+     right the Licensor may have to seek remedies for Your violations
+     of this Public License.
+
+  c. For the avoidance of doubt, the Licensor may also offer the
+     Licensed Material under separate terms or conditions or stop
+     distributing the Licensed Material at any time; however, doing so
+     will not terminate this Public License.
+
+  d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
+     License.
+
+
+Section 7 -- Other Terms and Conditions.
+
+  a. The Licensor shall not be bound by any additional or different
+     terms or conditions communicated by You unless expressly agreed.
+
+  b. Any arrangements, understandings, or agreements regarding the
+     Licensed Material not stated herein are separate from and
+     independent of the terms and conditions of this Public License.
+
+
+Section 8 -- Interpretation.
+
+  a. For the avoidance of doubt, this Public License does not, and
+     shall not be interpreted to, reduce, limit, restrict, or impose
+     conditions on any use of the Licensed Material that could lawfully
+     be made without permission under this Public License.
+
+  b. To the extent possible, if any provision of this Public License is
+     deemed unenforceable, it shall be automatically reformed to the
+     minimum extent necessary to make it enforceable. If the provision
+     cannot be reformed, it shall be severed from this Public License
+     without affecting the enforceability of the remaining terms and
+     conditions.
+
+  c. No term or condition of this Public License will be waived and no
+     failure to comply consented to unless expressly agreed to by the
+     Licensor.
+
+  d. Nothing in this Public License constitutes or may be interpreted
+     as a limitation upon, or waiver of, any privileges and immunities
+     that apply to the Licensor or You, including from the legal
+     processes of any jurisdiction or authority.
+
+=======================================================================
+
+Creative Commons is not a party to its public
+licenses. Notwithstanding, Creative Commons may elect to apply one of
+its public licenses to material it publishes and in those instances
+will be considered the “Licensor.” The text of the Creative Commons
+public licenses is dedicated to the public domain under the CC0 Public
+Domain Dedication. Except for the limited purpose of indicating that
+material is shared under a Creative Commons public license or as
+otherwise permitted by the Creative Commons policies published at
+creativecommons.org/policies, Creative Commons does not authorize the
+use of the trademark "Creative Commons" or any other trademark or logo
+of Creative Commons without its prior written consent including,
+without limitation, in connection with any unauthorized modifications
+to any of its public licenses or any other arrangements,
+understandings, or agreements concerning use of licensed material. For
+the avoidance of doubt, this paragraph does not form part of the
+public licenses.
+
+Creative Commons may be contacted at creativecommons.org.
--- a/README.md
+++ b/README.md
@ -0,0 +1,122 @@
+# StableSR for Stable Diffusion WebUI
+
+Licensed under S-Lab License 1.0
+
+[![CC BY-NC-SA 4.0][cc-by-nc-sa-shield]][cc-by-nc-sa]
+
+English｜[中文](README_CN.md)
+
+- StableSR is a competitive super-resolution method originally proposed by Jianyi Wang et al.
+- This repository is a migration of the StableSR project to the Automatic1111 WebUI.
+
+Relevant Links
+
+> Click to view high-quality official examples!
+
+- [Project Page](https://iceclear.github.io/projects/stablesr/)
+- [Official Repository](https://github.com/IceClear/StableSR)
+- [Paper on arXiv](https://arxiv.org/abs/2305.07015)
+
+> If you find this project useful, please give me & Jianyi Wang a star! ⭐
+---
+## Usage
+
+### 1. Installation
+
+⚪ Method 1: URL Install
+
+- Open Automatic1111 WebUI -> Click Tab "Extensions" -> Click Tab "Install from URL" -> type in https://github.com/pkuliyi2015/sd-webui-stablesr.git -> Click "Install" 
+
+![installation](https://github.com/pkuliyi2015/multidiffusion-img-demo/blob/master/installation.png?raw=true)
+
+⚪ Method 2: In progress...
+
+> After sucessful installation, you should see "StableSR" in img2img Scripts dropdown list.
+
+### 2. Download the main components
+
+- You MUST use the Stable Diffusion V2.1 512 **EMA** checkpoint (~5.21GB) from StabilityAI
+    - You can download it from [HuggingFace](https://huggingface.co/stabilityai/stable-diffusion-2-1-base)
+    - Put into stable-diffusion-webui/models/Stable-Diffusion/
+- Download the pruned StableSR module (~
+400MB)
+    - Official resources: In Progress
+    - My resources: <[GoogleDrive](https://drive.google.com/file/d/1tWjkZQhfj07sHDR4r9Ta5Fk4iMp1t3Qw/view?usp=sharing)> <[百度网盘-提取码aguq](https://pan.baidu.com/s/1Nq_6ciGgKnTu0W14QcKKWg?pwd=aguq)>
+    - Put into stable-diffusion-webui/extensions/sd-webui-stablesr/models/
+
+### 3. Optional components
+
+- Install [Tiled Diffusion & VAE]((https://github.com/pkuliyi2015/multidiffusion-upscaler-for-automatic1111)) extension
+    - The original StableSR easily gets OOM for large images > 512.
+    - For better quality and less VRAM usage, we recommend Tiled Diffusion & VAE.
+- Use the Official VQGAN VAE (~700MB)
+    - Official resources: In Progress
+    - My resources: <[GoogleDrive](https://drive.google.com/file/d/1ARtDMia3_CbwNsGxxGcZ5UP75W4PeIEI/view?usp=share_link)> <[百度网盘-提取码83u9](https://pan.baidu.com/s/1YCYmGBethR9JZ8-eypoIiQ?pwd=83u9)>
+    - Put it in your stable-diffusion-webui/models/VAE
+
+### 4. Extension Usage
+
+- At the top of the WebUI, select the v2-1_512-ema-pruned checkpoint you downloaded.
+- Switch to img2img tag. Find the "Scripts" dropdown at the bottom of the page.
+    - Select the StableSR script.
+    - Click the refresh button and select the StableSR checkpoint you have downloaded.
+    - Choose a scale factor.
+- Upload your image and start generation (can work without prompts).
+
+### 5. Useful Tips
+
+- Euler a sampler is recommended. Steps >= 20.
+- For output image size > 512, we recommend using Tiled Diffusion & VAE, otherwise, the image quality may not be ideal, and the VRAM usage will be huge. 
+- Here are the Tiled Diffusion settings that replicate the official behavior in the paper.
+    - Method = Mixture of Diffusers
+    - Latent tile size = 64, Latent tile overlap = 32
+    - Latent tile batch size as large as possible before Out of Memory.
+    - Upscaler MUST be None.
+- What is "Pure Noise"?
+    - Pure Noise refers to starting from a fully random noise tensor instead of your image. **This is the default behavior in the StableSR paper.**
+    - When enabling it, the script ignores your denoising strength and gives you much more detailed images, but also changes the color & sharpness significantly
+    - When disabling it, the script starts by adding some noise to your image. The result will be not fully detailed, even if you set denoising strength = 1 (but maybe aesthetically good). See [Comparison](https://imgsli.com/MTgwMTMx).
+
+### 6. Important Notice
+
+> Why my results are different from the offical examples?
+
+- It is not your or our fault.
+    - This extension has the same UNet model weights as the StableSR if installed correctly. 
+    - If you install the optional VQVAE, the whole model weights will be the same as the official model with fusion weights=0.
+- However, your result will be **not as good as** the official results, because:
+    - Sampler Difference: 
+        - The official repo does 100 or 200 steps of legacy DDPM sampling with a custom timestep scheduler, and samples without negative prompts.
+        - However, WebUI doesn't offer such a sampler, and it must sample with negative prompts. **This is the main difference.**
+    - VQVAE Decoder Difference: 
+        - The official VQVAE Decoder takes some Encoder features as input. 
+        - However, in practice, I found these features are astonishingly huge for large images. (>10G for 4k images even in float16!) 
+        - Hence, **I removed the CFW component in VAE Decoder**. As this lead to inferior fidelity in details, I will try to add it back later as an option.
+
+---
+## License
+
+This project is licensed under:
+
+- S-Lab License 1.0.
+- [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License][cc-by-nc-sa], due to the use of the NVIDIA SPADE module.
+
+[![CC BY-NC-SA 4.0][cc-by-nc-sa-image]][cc-by-nc-sa]
+[cc-by-nc-sa]: http://creativecommons.org/licenses/by-nc-sa/4.0/
+[cc-by-nc-sa-image]: https://licensebuttons.net/l/by-nc-sa/4.0/88x31.png
+[cc-by-nc-sa-shield]: https://img.shields.io/badge/License-CC%20BY--NC--SA%204.0-lightgrey.svg
+
+### Disclaimer
+
+- All code in this extension is for research purposes only. 
+- The commercial use of the code and checkpoint is **strictly prohibited**.
+
+### Important Notice for Outcome Images
+
+- Please note that the CC BY-NC-SA 4.0 license in the NVIDIA SPADE module also prohibits the commercial use of outcome images. 
+- Jianyi Wang may change the SPADE module to a commercial-friendly one but he is busy.
+- If you wish to *speed up* his process for commercial purposes, please contact him through email: iceclearwjy@gmail.com
+
+## Acknowledgments
+
+I would like to thank Jianyi Wang et al. for the original StableSR method.
--- a/README_CN.md
+++ b/README_CN.md
@ -0,0 +1,119 @@
+# StableSR - Stable Diffusion WebUI
+
+S-Lab License 1.0 & [![CC BY-NC-SA 4.0][cc-by-nc-sa-shield]][cc-by-nc-sa]
+
+[English](README.md) | 中文
+
+- StableSR 是原初由 Jianyi Wang 等人提出的具有竞争力的超分辨率方法。
+- 本仓库是将 StableSR 项目迁移到 Automatic1111 WebUI 的迁移工作。
+
+相关链接
+
+> 点击查看高质量官方示例！
+
+- [项目页面](https://iceclear.github.io/projects/stablesr/)
+- [官方仓库](https://github.com/IceClear/StableSR)
+- [arXiv 上的论文](https://arxiv.org/abs/2305.07015)
+
+> 如果你觉得这个项目有用，请给我和 Jianyi Wang 点个赞！⭐
+---
+## 使用
+
+### 1. 安装
+
+⚪ 方法 1: URL 安装
+
+- 打开 Automatic1111 WebUI -> 点击 "扩展" 标签页 -> 点击 "从 URL 安装" 标签页 -> 输入 https://github.com/pkuliyi2015/sd-webui-stablesr.git -> 点击 "安装"
+
+![installation](https://github.com/pkuliyi2015/multidiffusion-img-demo/blob/master/installation.png?raw=true)
+
+⚪ 方法 2: 进行中...
+
+> 安装成功后，你应该能在 img2img 脚本下拉列表中看到 "StableSR"。
+
+### 2. 下载主要组件
+
+- 你必须使用来自 StabilityAI 的 Stable Diffusion V2.1 512 **EMA** 检查点（大约 5.21GB）
+    - 你可以从 [HuggingFace](https://huggingface.co/stabilityai/stable-diffusion-2-1-base) 下载它
+    - 放入 stable-diffusion-webui/models/Stable-Diffusion/
+- 下载剪枝后的 StableSR 模块（大约 400MB）
+    - 官方资源：进行中
+    - 我的资源：<[GoogleDrive](https://drive.google.com/file/d/1tWjkZQhfj07sHDR4r9Ta5Fk4iMp1t3Qw/view?usp=sharing)> <[百度网盘-提取码aguq](https://pan.baidu.com/s/1Nq_6ciGgKnTu0W14QcKKWg?pwd=aguq)>
+    - 放入 stable-diffusion-webui/extensions/sd-webui-stablesr/models/
+
+### 3. 可选组件
+
+- 安装 [Tiled Diffusion & VAE](https://github.com/pkuliyi2015/multidiffusion-upscaler-for-automatic1111) 扩展
+    - 原始的 StableSR 对大于 512 的大图像容易出现 OOM。
+    - 为了获得更好的质量和更少的 VRAM 使用，我们建议使用 Tiled Diffusion & VAE。
+- 使用官方 VQGAN VAE（大约 700MB）
+    - 官方资源：进行中
+    - 我的资源：<[GoogleDrive](https://drive.google.com/file/d/1ARtDMia3_CbwNsGxxGcZ5UP75W4PeIEI/view?usp=share_link)> <[百度网盘-提取码83u9](https://pan.baidu.com/s/1YCYmGBethR9JZ8-eypoIiQ?pwd=83u9)>
+    - 将它放在你的 stable-diffusion-webui/models/VAE 中
+
+### 4. 扩展使用
+
+- 在 WebUI 的顶部，选择你下载的 v2-1_512-ema-pruned 检查点。
+- 切换到 img2img 标签。在页面底部找到 "脚本" 下拉列表。
+    - 选择 StableSR 脚本。
+    - 点击刷新按钮并选择你已下载的 StableSR 检查点。
+    - 选择一个比例因子。
+- 上传你的图像并开始生成（无需提示）。
+
+### 5. 有用的提示
+
+- 推荐使用 Euler 采样器。步数 >= 20。
+- 对于输出图像大小 > 512，我们推荐使用 Tiled Diffusion & VAE，否则，图像质量可能不理想，VRAM 使用量会很大。
+- 这里有一些 Tiled Diffusion 设置，可以复制论文中的官方行为。
+    - 方法 = Diffusers 混合
+    - 隐变量瓷砖大小 = 64，隐变量瓷砖重叠 = 32
+    - 隐变量瓷砖批大小尽可能大，避免内存不足。
+    - 上采样器必须为 None。
+- 什么是 "纯噪声"？
+    - 纯噪声指的是从完全随机的噪声张量开始，而不是从你的图像开始。**这是 StableSR 论文中的默认行为。**
+    - 启用时，脚本会忽略你的去噪强度，并给你更详细的图像，但也会显著改变颜色和锐度
+    - 禁用时，脚本会开始添加一些噪声到你的图像。即使你将去噪强度设为 1，结果也不会完全详细（但可能在美感上更好）。参见 [对比](https://imgsli.com/MTgwMTMx)。
+
+### 6. 重要提醒
+
+> 为什么我的结果和官方示例不同？
+
+- 这不是你或我们的错。
+    - 如果正确安装，这个扩展有与 StableSR 相同的 UNet 模型权重。
+    - 如果你安装了可选的 VQVAE，整个模型权重将与融合权重为 0 的官方模型相同。
+- 但是，你的结果将**不如**官方结果，因为：
+    - 采样器差异：
+        -官方仓库进行 100 或 200 步的 legacy DDPM 采样，并使用自定义的时间步调度器，采样时不使用负提示。
+        - 然而，WebUI 不提供这样的采样器，必须带有负提示进行采样。**这是主要的差异。**
+    - VQVAE 解码器差异：
+        - 官方 VQVAE 解码器将一些编码器特征作为输入。
+        - 然而，在实践中，我发现这些特征对于大图像来说非常大。 (>10G 用于 4k 图像，即使是在 float16！)
+        - 因此，**我移除了 VAE 解码器中的 CFW 组件**。由于这导致了对细节的较低保真度，我将尝试将它作为一个选项添加回去。
+
+---
+## 许可
+
+此项目在以下许可下授权：
+
+- S-Lab License 1.0.
+- [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License][cc-by-nc-sa]，由于使用了 NVIDIA SPADE 模块。
+
+[![CC BY-NC-SA 4.0][cc-by-nc-sa-image]][cc-by-nc-sa]
+[cc-by-nc-sa]: http://creativecommons.org/licenses/by-nc-sa/4.0/
+[cc-by-nc-sa-image]: https://licensebuttons.net/l/by-nc-sa/4.0/88x31.png
+[cc-by-nc-sa-shield]: https://img.shields.io/badge/License-CC%20BY--NC--SA%204.0-lightgrey.svg
+
+### 免责声明
+
+- 此扩展中的所有代码仅供研究目的。
+- 代码和检查点的商业用途**严格禁止**。
+
+### 成果图像的重要通知
+
+- 请注意，NVIDIA SPADE 模块中的 CC BY-NC-SA 4.0 许可也禁止使用成果图像进行商业用途。
+- Jianyi Wang 可能会将 SPADE 模块更改为商业友好的一个，但他很忙。
+- 如果你希望*加快*他为商业目的的进程，请通过电子邮件与他联系：iceclearwjy@gmail.com
+
+## 致谢
+
+我要感谢 Jianyi Wang 等人提出的原始 StableSR 方法。
--- a/scripts/stablesr.py
+++ b/scripts/stablesr.py
@ -0,0 +1,238 @@
+'''
+# --------------------------------------------------------------------------------
+#
+#   StableSR for Automatic1111 WebUI
+#
+#   Introducing state-of-the super-resolution method: StableSR!
+#   Techniques is originally proposed by my schoolmate Jianyi Wang et, al.
+#
+#   Project Page: https://iceclear.github.io/projects/stablesr/
+#   Official Repo: https://github.com/IceClear/StableSR
+#   Paper: https://arxiv.org/abs/2305.07015
+#   
+#   @original author: Jianyi Wang et, al.
+#   @migration: LI YI 
+#   @organization: Nanyang Technological University - Singapore
+#   @date: 2023-05-20
+#   @license: 
+#       S-Lab License 1.0 (see LICENSE file)
+#       CC BY-NC-SA 4.0 (required by NVIDIA SPADE module)
+# 
+#   @disclaimer: 
+#       All code in this extension is for research purpose only. 
+#       The commercial use of the code & checkpoint is strictly prohibited.
+#
+# --------------------------------------------------------------------------------
+#
+#   IMPORTANT NOTICE FOR OUTCOME IMAGES:
+#       - Please be aware that the CC BY-NC-SA 4.0 license in SPADE module
+#         also prohibits the commercial use of outcome images.
+#       - Jianyi Wang may change the SPADE module to a commercial-friendly one.
+#         If you want to use the outcome images for commercial purposes, please
+#         contact Jianyi Wang for more information.
+#
+#   Please give me a star (and also Jianyi's repo) if you like this project!
+#
+# --------------------------------------------------------------------------------
+'''
+
+import os
+import torch
+import gradio as gr
+import numpy as np
+import PIL.Image as Image
+
+from pathlib import Path
+from torch import Tensor
+from tqdm import tqdm
+
+from modules import scripts, processing, sd_samplers, devices
+from modules.processing import StableDiffusionProcessingImg2Img, Processed
+from ldm.modules.diffusionmodules.openaimodel import UNetModel
+
+from srmodule.spade import SPADELayers
+from srmodule.struct_cond import EncoderUNetModelWT, build_unetwt
+from srmodule.colorfix import fix_color
+
+SD_WEBUI_PATH = Path.cwd()
+ME_PATH = SD_WEBUI_PATH / 'extensions' / 'sd-webui-stablesr'
+MODEL_PATH = ME_PATH / 'models'
+FORWARD_CACHE_NAME = 'org_forward_stablesr'
+
+class StableSR:
+    def __init__(self, path, dtype, device):
+        state_dict = torch.load(path, map_location='cpu')
+        self.struct_cond_model: EncoderUNetModelWT = build_unetwt()
+        self.spade_layers: SPADELayers = SPADELayers()
+        self.struct_cond_model.load_from_dict(state_dict)
+        self.spade_layers.load_from_dict(state_dict)
+        del state_dict
+        self.struct_cond_model.apply(lambda x: x.to(dtype=dtype, device=device))
+        self.spade_layers.apply(lambda x: x.to(dtype=dtype, device=device))
+
+        self.latent_image: Tensor = None
+        self.set_image_hooks = {}
+        self.struct_cond: Tensor = None
+
+    def set_latent_image(self, latent_image):
+        self.latent_image = latent_image
+        for hook in self.set_image_hooks.values():
+            hook(latent_image)
+
+    def hook(self, unet: UNetModel):
+        # hook unet to set the struct_cond
+        if not hasattr(unet, FORWARD_CACHE_NAME):
+            setattr(unet, FORWARD_CACHE_NAME, unet.forward)
+
+        def unet_forward(x, timesteps=None, context=None, y=None,**kwargs):
+            self.latent_image = self.latent_image.to(x.device)
+            self.struct_cond = None # mitigate vram peak
+            self.struct_cond = self.struct_cond_model(self.latent_image, timesteps.to(x.device)[:self.latent_image.shape[0]])
+            return getattr(unet, FORWARD_CACHE_NAME)(x, timesteps, context, y, **kwargs)
+        
+        unet.forward = unet_forward
+
+        self.spade_layers.hook(unet, lambda: self.struct_cond)
+
+
+    def unhook(self, unet: UNetModel):
+        # clean up cache
+        self.latent_image = None
+        self.struct_cond = None
+        self.set_image_hooks = {}
+        # unhook unet forward
+        if hasattr(unet, FORWARD_CACHE_NAME):
+            unet.forward = getattr(unet, FORWARD_CACHE_NAME)
+            delattr(unet, FORWARD_CACHE_NAME)
+
+        # unhook spade layers
+        self.spade_layers.unhook(unet)
+
+
+class Script(scripts.Script):
+    def __init__(self) -> None:
+        self.model_list = {}
+        self.load_model_list()
+        self.last_path = None
+        self.stablesr_model: StableSR = None
+
+    def load_model_list(self):
+        # traverse the CFG_PATH and add all files to the model list
+        self.model_list = {}
+        for file in MODEL_PATH.iterdir():
+            if file.is_file():
+                # save tha absolute path
+                self.model_list[file.name] = str(file.absolute())
+        self.model_list['None'] = None
+
+    def title(self):
+        return "StableSR"
+
+    def show(self, is_img2img):
+        return is_img2img
+
+    def ui(self, is_img2img):
+        gr.HTML('<p>StableSR is a state-of-the-art super-resolution method.</p>')
+        gr.HTML('<p>1. You MUST use SD2.1-512-ema-pruned checkpoint. Euler a sampler is recommended.</p>')
+        gr.HTML('<p>2. Use Tiled Diffusion & VAE - Mixture of Diffusers for resolution > 512.</p>')
+        gr.HTML('<p>3. When use Tiled Diffusion, you MUST set the upscaler to None!</p>')
+        with gr.Row():
+            model = gr.Dropdown(list(self.model_list.keys()), label="SR Model")
+            refresh = gr.Button(value='↻', variant='tool')
+            def refresh_fn(selected):
+                self.load_model_list()
+                if selected not in self.model_list:
+                    selected = 'None'
+                return gr.Dropdown.update(value=selected, choices=list(self.model_list.keys()))
+            refresh.click(fn=refresh_fn,inputs=model, outputs=model)
+        with gr.Row():
+            scale_factor = gr.Slider(minimum=1, maximum=16, step=0.1, value=2, label='Scale Factor', elem_id=f'StableSR-scale')
+        with gr.Row():
+            pure_noise = gr.Checkbox(label='Pure Noise', value=True, elem_id=f'StableSR-pure-noise')
+            color_fix = gr.Checkbox(label='Color Fix', value=True, elem_id=f'StableSR-color-fix')
+            
+        return [model, scale_factor, pure_noise, color_fix]
+
+    def run(self, p: StableDiffusionProcessingImg2Img, model: str, scale_factor:float, pure_noise: bool, color_fix:bool):
+
+        if model == 'None':
+            # do clean up
+            self.stablesr_model = None
+            self.last_model_path = None
+            return
+        
+        if model not in self.model_list:
+            raise gr.Error(f"Model {model} is not in the list! Please refresh your browser!")
+        
+        if not os.path.exists(self.model_list[model]):
+            raise gr.Error(f"Model {model} is not on your disk! Please refresh the model list!")
+
+        # upscale the image, set the ouput size 
+        init_img: Image = p.init_images[0]
+        target_width = int(init_img.width * scale_factor)
+        target_height = int(init_img.height * scale_factor)
+        # if the target width is not dividable by 8, then round it up
+        if target_width % 8 != 0:
+            target_width = target_width + 8 - target_width % 8
+        # if the target height is not dividable by 8, then round it up
+        if target_height % 8 != 0:
+            target_height = target_height + 8 - target_height % 8
+        init_img = init_img.resize((target_width, target_height), Image.LANCZOS)
+        p.init_images[0] = init_img
+        p.width = init_img.width
+        p.height = init_img.height
+
+        print('[StableSR] Target image size: {}x{}'.format(init_img.width, init_img.height))
+
+        unet: UNetModel = p.sd_model.model.diffusion_model
+        # print(unet.input_blocks)
+        first_param = unet.parameters().__next__()
+        if self.last_path != self.model_list[model]:
+            # load the model
+            self.stablesr_model = None
+            # get the type and the device of the unet model's first parameter
+            self.stablesr_model = StableSR(self.model_list[model], dtype=first_param.dtype, device=first_param.device)
+            self.last_path = self.model_list[model]
+
+        def sample_custom(conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
+            self.stablesr_model.set_latent_image(p.init_latent)
+            x = processing.create_random_tensors(p.init_latent.shape[1:], seeds=seeds, subseeds=subseeds, subseed_strength=p.subseed_strength, seed_resize_from_h=p.seed_resize_from_h, seed_resize_from_w=p.seed_resize_from_w, p=p)
+            sampler = sd_samplers.create_sampler(p.sampler_name, p.sd_model)
+            if pure_noise:
+                # NOTE: use txt2img instead of img2img sampling
+                samples = sampler.sample(p, x, conditioning, unconditional_conditioning, image_conditioning=p.image_conditioning)
+            else:
+                if p.initial_noise_multiplier != 1.0:
+                    p.extra_generation_params["Noise multiplier"] =p.initial_noise_multiplier
+                    x *= p.initial_noise_multiplier
+                samples = sampler.sample_img2img(p, p.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=p.image_conditioning)
+            
+            if p.mask is not None:
+                samples = samples * p.nmask + p.init_latent * p.mask
+            del x
+            devices.torch_gc()
+            return samples
+
+                
+        # replace the sample function
+        p.sample = sample_custom
+        
+        # Hook the unet, and unhook after processing.
+        try:
+            self.stablesr_model.hook(unet)
+            result: Processed = processing.process_images(p)
+            if color_fix:
+                for i in range(len(result.images)):
+                    result.images[i] = fix_color(result.images[i], init_img)
+            return result
+        finally:
+            self.stablesr_model.unhook(unet)
+
+    
+
+            
+
+        
+
+
+
--- a/srmodule/attn.py
+++ b/srmodule/attn.py
@ -0,0 +1,111 @@
+'''
+    This file is modified from the TiledVAE attn.py, so that the StableSR can save much VRAM.
+'''
+import math
+import torch
+
+from modules import shared, sd_hijack
+from modules.sd_hijack_optimizations import get_available_vram, get_xformers_flash_attention_op, sub_quad_attention
+
+try:
+    import xformers
+    import xformers.ops
+except ImportError:
+    pass
+
+
+def get_attn_func():
+    method = sd_hijack.model_hijack.optimization_method
+    if method is None:
+        return attn_forward
+    method = method.lower()
+    # The method should be one of the following:
+    # ['none', 'sdp-no-mem', 'sdp', 'xformers', ''sub-quadratic', 'v1', 'invokeai', 'doggettx']
+    if method not in ['none', 'sdp-no-mem', 'sdp', 'xformers', 'sub-quadratic', 'v1', 'invokeai', 'doggettx']:
+        print(f"[StableSR] Warning: Unknown attention optimization method {method}. Please try to update the extension.")
+        return attn_forward
+    
+    if method == 'none':
+        return attn_forward
+    elif method == 'xformers':
+        return xformers_attnblock_forward
+    elif method == 'sdp-no-mem':
+        return sdp_no_mem_attnblock_forward
+    elif method == 'sdp':
+        return sdp_attnblock_forward
+    elif method == 'sub-quadratic':
+        return sub_quad_attnblock_forward
+    elif method == 'doggettx':
+        return cross_attention_attnblock_forward
+    
+    return attn_forward
+
+
+# The following functions are all copied from modules.sd_hijack_optimizations
+# However, the residual & normalization are removed and computed separately.
+
+def attn_forward(q, k, v):
+    # compute attention
+    # q: b,hw,c
+    k = k.permute(0, 2, 1)  # b,c,hw
+    c = k.shape[1]
+    w_ = torch.bmm(q, k)     # b,hw,hw    w[b,i,j]=sum_c q[b,i,c]k[b,c,j]
+    w_ = w_ * (int(c)**(-0.5))
+    w_ = torch.nn.functional.softmax(w_, dim=2)
+
+    # attend to values
+    v = v.permute(0, 2, 1)   # b,c,hw
+    w_ = w_.permute(0, 2, 1)   # b,hw,hw (first hw of k, second of q)
+    # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j]
+    h_ = torch.bmm(v, w_)
+
+    return h_.permute(0, 2, 1)
+
+def xformers_attnblock_forward(q, k, v):
+    return xformers.ops.memory_efficient_attention(q, k, v, op=get_xformers_flash_attention_op(q, k, v))
+   
+
+def cross_attention_attnblock_forward(q, k, v):
+    # compute attention
+    k = k.permute(0, 2, 1)# b,c,hw
+    v = v.permute(0, 2, 1)# b,c,hw
+    c = k.shape[1]
+    h_ = torch.zeros_like(k, device=q.device)
+
+    mem_free_total = get_available_vram()
+
+    tensor_size = q.shape[0] * q.shape[1] * k.shape[2] * q.element_size()
+    mem_required = tensor_size * 2.5
+    steps = 1
+
+    if mem_required > mem_free_total:
+        steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))
+
+    slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1]
+    for i in range(0, q.shape[1], slice_size):
+        end = i + slice_size
+
+        w1 = torch.bmm(q[:, i:end], k)     # b,hw,hw    w[b,i,j]=sum_c q[b,i,c]k[b,c,j]
+        w2 = w1 * (int(c)**(-0.5))
+        del w1
+        w3 = torch.nn.functional.softmax(w2, dim=2, dtype=q.dtype)
+        del w2
+
+        # attend to values
+        w4 = w3.permute(0, 2, 1)   # b,hw,hw (first hw of k, second of q)
+        del w3
+
+        h_[:, :, i:end] = torch.bmm(v, w4)     # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j]
+        del w4
+
+    return h_.permute(0, 2, 1)
+
+def sdp_no_mem_attnblock_forward(q, k, v):
+    with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=True, enable_mem_efficient=False):
+        return sdp_attnblock_forward(q, k, v)
+
+def sdp_attnblock_forward(q, k, v):
+    return torch.nn.functional.scaled_dot_product_attention(q, k, v, dropout_p=0.0, is_causal=False)
+
+def sub_quad_attnblock_forward(q, k, v):
+    return sub_quad_attention(q, k, v, q_chunk_size=shared.cmd_opts.sub_quad_q_chunk_size, kv_chunk_size=shared.cmd_opts.sub_quad_kv_chunk_size, chunk_threshold=shared.cmd_opts.sub_quad_chunk_threshold, use_checkpoint=True)
--- a/srmodule/colorfix.py
+++ b/srmodule/colorfix.py
@ -0,0 +1,48 @@
+from PIL import Image
+from torch import Tensor
+
+from torchvision.transforms import ToTensor, ToPILImage
+
+def fix_color(target: Image, source: Image):
+    # Convert images to tensors
+    to_tensor = ToTensor()
+    target_tensor = to_tensor(target).unsqueeze(0)
+    source_tensor = to_tensor(source).unsqueeze(0)
+
+    # Apply adaptive instance normalization
+    result_tensor = adaptive_instance_normalization(target_tensor, source_tensor)
+
+    # Convert tensor back to image
+    to_image = ToPILImage()
+    result_image = to_image(result_tensor.squeeze(0).clamp_(0.0, 1.0))
+
+    return result_image
+
+def calc_mean_std(feat: Tensor, eps=1e-5):
+    """Calculate mean and std for adaptive_instance_normalization.
+    Args:
+        feat (Tensor): 4D tensor.
+        eps (float): A small value added to the variance to avoid
+            divide-by-zero. Default: 1e-5.
+    """
+    size = feat.size()
+    assert len(size) == 4, 'The input feature should be 4D tensor.'
+    b, c = size[:2]
+    feat_var = feat.view(b, c, -1).var(dim=2) + eps
+    feat_std = feat_var.sqrt().view(b, c, 1, 1)
+    feat_mean = feat.view(b, c, -1).mean(dim=2).view(b, c, 1, 1)
+    return feat_mean, feat_std
+
+def adaptive_instance_normalization(content_feat:Tensor, style_feat:Tensor):
+    """Adaptive instance normalization.
+    Adjust the reference features to have the similar color and illuminations
+    as those in the degradate features.
+    Args:
+        content_feat (Tensor): The reference feature.
+        style_feat (Tensor): The degradate features.
+    """
+    size = content_feat.size()
+    style_mean, style_std = calc_mean_std(style_feat)
+    content_mean, content_std = calc_mean_std(content_feat)
+    normalized_feat = (content_feat - content_mean.expand(size)) / content_std.expand(size)
+    return normalized_feat * style_std.expand(size) + style_mean.expand(size)
--- a/srmodule/spade.py
+++ b/srmodule/spade.py
@ -0,0 +1,201 @@
+"""
+Copyright (C) 2019 NVIDIA Corporation.  All rights reserved.
+Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
+"""
+
+import re
+import torch
+import torch.nn as nn
+
+from ldm.modules.diffusionmodules.util import normalization, checkpoint
+from ldm.modules.diffusionmodules.openaimodel import ResBlock, UNetModel
+
+
+class SPADE(nn.Module):
+    def __init__(self, norm_nc, label_nc=256, config_text='spadeinstance3x3'):
+        super().__init__()
+        assert config_text.startswith('spade')
+        parsed = re.search('spade(\D+)(\d)x\d', config_text)
+        ks = int(parsed.group(2))
+        self.param_free_norm = normalization(norm_nc)
+
+        # The dimension of the intermediate embedding space. Yes, hardcoded.
+        nhidden = 128
+
+        pw = ks // 2
+        self.mlp_shared = nn.Sequential(
+            nn.Conv2d(label_nc, nhidden, kernel_size=ks, padding=pw),
+            nn.ReLU()
+        )
+        self.mlp_gamma = nn.Conv2d(nhidden, norm_nc, kernel_size=ks, padding=pw)
+        self.mlp_beta = nn.Conv2d(nhidden, norm_nc, kernel_size=ks, padding=pw)
+
+    def forward(self, x_dic, segmap_dic):
+        return checkpoint(
+            self._forward, (x_dic, segmap_dic), self.parameters(), True
+        )
+
+    def _forward(self, x_dic, segmap_dic):
+        segmap = segmap_dic[str(x_dic.size(-1))]
+        x = x_dic
+
+        # Part 1. generate parameter-free normalized activations
+        normalized = self.param_free_norm(x)
+
+        # Part 2. produce scaling and bias conditioned on semantic map
+        # segmap = F.interpolate(segmap, size=x.size()[2:], mode='nearest')
+        actv = self.mlp_shared(segmap)
+
+        repeat_factor = normalized.shape[0]//segmap.shape[0]
+        if repeat_factor > 1:
+            out = normalized
+            out *= (1 + self.mlp_gamma(actv).repeat_interleave(repeat_factor, dim=0))
+            out += self.mlp_beta(actv).repeat_interleave(repeat_factor, dim=0)
+        else:
+            out = normalized
+            out *= (1 + self.mlp_gamma(actv))
+            out += self.mlp_beta(actv)
+        return out
+    
+def dual_resblock_forward(self: ResBlock, x, emb, spade: SPADE, get_struct_cond):
+    if self.updown:
+        in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1]
+        h = in_rest(x)
+        h = self.h_upd(h)
+        x = self.x_upd(x)
+        h = in_conv(h)
+    else:
+        h = self.in_layers(x)
+    emb_out = self.emb_layers(emb).type(h.dtype)
+    while len(emb_out.shape) < len(h.shape):
+        emb_out = emb_out[..., None]
+    if self.use_scale_shift_norm:
+        out_norm, out_rest = self.out_layers[0], self.out_layers[1:]
+        scale, shift = torch.chunk(emb_out, 2, dim=1)
+        h = out_norm(h) * (1 + scale) + shift
+        h = out_rest(h)
+    else:
+        h = h + emb_out
+        h = self.out_layers(h)
+    h = spade(h, get_struct_cond())
+    return self.skip_connection(x) + h
+
+    
+class SPADELayers(nn.Module):
+    def __init__(self):
+        '''
+        A container class for fast SPADE layer loading.
+        params inferred from the official checkpoint
+        '''
+        super().__init__()
+        self.input_blocks = nn.ModuleList([
+            nn.Identity(),
+            SPADE(320),
+            SPADE(320),
+            nn.Identity(),
+            SPADE(640),
+            SPADE(640),
+            nn.Identity(),
+            SPADE(1280),
+            SPADE(1280),
+            nn.Identity(),
+            SPADE(1280),
+            SPADE(1280),
+        ])
+        self.middle_block = nn.ModuleList([
+            SPADE(1280),
+            nn.Identity(),
+            SPADE(1280),
+        ])
+        self.output_blocks = nn.ModuleList([
+            SPADE(1280),
+            SPADE(1280),
+            SPADE(1280),
+            SPADE(1280),
+            SPADE(1280),
+            SPADE(1280),
+            SPADE(640),
+            SPADE(640),
+            SPADE(640),
+            SPADE(320),
+            SPADE(320),
+            SPADE(320),
+        ])
+        self.input_ids = [1,2,4,5,7,8,10,11]
+        self.output_ids = list(range(12))
+        self.mid_ids = [0,2]
+        self.forward_cache_name = 'org_forward_stablesr'
+
+
+    def hook(self, unet: UNetModel, get_struct_cond):
+        # hook all resblocks
+        resblock: ResBlock = None
+        for i in self.input_ids:
+            resblock = unet.input_blocks[i][0]
+            # debug
+            # assert isinstance(resblock, ResBlock)
+            if not hasattr(resblock, self.forward_cache_name):
+                setattr(resblock, self.forward_cache_name, resblock._forward)
+            resblock._forward = lambda x, timesteps, resblock=resblock, spade=self.input_blocks[i]: dual_resblock_forward(resblock, x, timesteps, spade, get_struct_cond)
+
+        for i in self.output_ids:
+            resblock = unet.output_blocks[i][0]
+            # debug
+            # assert isinstance(resblock, ResBlock)
+            if not hasattr(resblock, self.forward_cache_name):
+                setattr(resblock, self.forward_cache_name, resblock._forward)
+            resblock._forward = lambda x, timesteps, resblock=resblock, spade=self.output_blocks[i]: dual_resblock_forward(resblock, x, timesteps, spade, get_struct_cond)
+
+        for i in self.mid_ids:
+            resblock = unet.middle_block[i]
+            # debug
+            # assert isinstance(resblock, ResBlock)
+            if not hasattr(resblock, self.forward_cache_name):
+                setattr(resblock, self.forward_cache_name, resblock._forward)
+            resblock._forward = lambda x, timesteps, resblock=resblock, spade=self.middle_block[i]: dual_resblock_forward(resblock, x, timesteps, spade, get_struct_cond)
+
+    def unhook(self, unet: UNetModel):
+        resblock: ResBlock = None
+        for i in self.input_ids:
+            resblock = unet.input_blocks[i][0]
+            if hasattr(resblock, self.forward_cache_name):
+                resblock._forward = getattr(resblock, self.forward_cache_name)
+                delattr(resblock, self.forward_cache_name)
+
+        for i in self.output_ids:
+            resblock = unet.output_blocks[i][0]
+            if hasattr(resblock, self.forward_cache_name):
+                resblock._forward = getattr(resblock, self.forward_cache_name)
+                delattr(resblock, self.forward_cache_name)
+
+        for i in self.mid_ids:
+            resblock = unet.middle_block[i]
+            if hasattr(resblock, self.forward_cache_name):
+                resblock._forward = getattr(resblock, self.forward_cache_name)
+                delattr(resblock, self.forward_cache_name)
+
+
+    def load_from_dict(self, state_dict):
+        """
+        Load model weights from a dictionary.
+        :param state_dict: a dict of parameters.
+        """
+        filtered_dict = {}
+        for k, v in state_dict.items():
+            if k.startswith("model.diffusion_model."):
+                key = k[len("model.diffusion_model.") :]
+                # remove the '.0.spade' within the key
+                if 'middle_block' not in key:
+                    key = key.replace('.0.spade', '')
+                else:
+                    key = key.replace('.spade', '')
+                filtered_dict[key] = v
+        self.load_state_dict(filtered_dict)
+
+
+if __name__ == '__main__':
+    path = '../models/stablesr_sd21.ckpt'
+    state_dict = torch.load(path)
+    model = SPADELayers()
+    model.load_from_dict(state_dict)
+    print(model)
--- a/srmodule/struct_cond.py
+++ b/srmodule/struct_cond.py
@ -0,0 +1,353 @@
+import math
+import torch
+import torch.nn as nn
+
+from ldm.modules.diffusionmodules.openaimodel import (
+    TimestepEmbedSequential, 
+    ResBlock, 
+    Downsample, 
+)
+
+from ldm.modules.diffusionmodules.util import (
+    conv_nd,
+    linear,
+    timestep_embedding,
+    checkpoint,
+    normalization,
+    zero_module,
+)
+
+from srmodule.attn import get_attn_func
+
+attn_func = None
+
+
+class QKVAttentionLegacy(nn.Module):
+    """
+    A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping
+    """
+
+    def __init__(self, n_heads):
+        super().__init__()
+        self.n_heads = n_heads
+
+    def forward(self, qkv):
+        """
+        Apply QKV attention.
+        :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs.
+        :return: an [N x (H * C) x T] tensor after attention.
+        """
+        bs, width, length = qkv.shape
+        assert width % (3 * self.n_heads) == 0
+        ch = width // (3 * self.n_heads)
+        q, k, v = qkv.reshape(bs * self.n_heads, ch * 3, length).split(ch, dim=1)
+        # Legacy Attention
+        # scale = 1 / math.sqrt(math.sqrt(ch))
+        # weight = torch.einsum(
+        #     "bct,bcs->bts", q * scale, k * scale
+        # )  # More stable with f16 than dividing afterwards
+        # weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype)
+        # a = torch.einsum("bts,bcs->bct", weight, v)
+        # a = a.reshape(bs, -1, length)
+        q, k, v = map(
+            lambda t:t.permute(0,2,1)
+            .contiguous(),
+            (q, k, v),
+        )
+        global attn_func
+        a = attn_func(q, k, v)
+        a = (
+            a.permute(0,2,1)
+            .reshape(bs, -1, length)
+        )
+        return a
+
+class AttentionBlock(nn.Module):
+    """
+    An attention block that allows spatial positions to attend to each other.
+    Originally ported from here, but adapted to the N-d case.
+    https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66.
+    """
+
+    def __init__(
+        self,
+        channels,
+        num_heads=1,
+        num_head_channels=-1,
+        use_checkpoint=False,
+        use_new_attention_order=False,
+    ):
+        super().__init__()
+        self.channels = channels
+        if num_head_channels == -1:
+            self.num_heads = num_heads
+        else:
+            assert (
+                channels % num_head_channels == 0
+            ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}"
+            self.num_heads = channels // num_head_channels
+        self.norm = normalization(channels)
+        self.qkv = conv_nd(1, channels, channels * 3, 1)
+        self.attention = QKVAttentionLegacy(self.num_heads)
+
+        self.proj_out = zero_module(conv_nd(1, channels, channels, 1))
+
+    def forward(self, x):
+        return checkpoint(self._forward, (x,), self.parameters(), True)   # TODO: check checkpoint usage, is True # TODO: fix the .half call!!!
+
+    def _forward(self, x):
+        b, c, *spatial = x.shape
+        x = x.reshape(b, c, -1)
+        qkv = self.qkv(self.norm(x))
+        h = self.attention(qkv)
+        h = self.proj_out(h)
+        return (x + h).reshape(b, c, *spatial)
+
+
+class EncoderUNetModelWT(nn.Module):
+    """
+    The half UNet model with attention and timestep embedding.
+    For usage, see UNet.
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        model_channels,
+        out_channels,
+        num_res_blocks,
+        attention_resolutions,
+        dropout=0,
+        channel_mult=(1, 2, 4, 8),
+        conv_resample=True,
+        dims=2,
+        use_checkpoint=False,
+        use_fp16=False,
+        num_heads=4,
+        num_head_channels=-1,
+        num_heads_upsample=-1,
+        use_scale_shift_norm=False,
+        resblock_updown=False,
+        use_new_attention_order=False,
+    ):
+        super().__init__()
+
+        if num_heads_upsample == -1:
+            num_heads_upsample = num_heads
+
+        self.in_channels = in_channels
+        self.model_channels = model_channels
+        self.out_channels = out_channels
+        self.num_res_blocks = num_res_blocks
+        self.attention_resolutions = attention_resolutions
+        self.dropout = dropout
+        self.channel_mult = channel_mult
+        self.conv_resample = conv_resample
+        self.use_checkpoint = use_checkpoint
+        self.dtype = torch.float16 if use_fp16 else torch.float32
+        self.num_heads = num_heads
+        self.num_head_channels = num_head_channels
+        self.num_heads_upsample = num_heads_upsample
+
+        time_embed_dim = model_channels * 4
+        self.time_embed = nn.Sequential(
+            linear(model_channels, time_embed_dim),
+            nn.SiLU(),
+            linear(time_embed_dim, time_embed_dim),
+        )
+
+        self.input_blocks = nn.ModuleList(
+            [
+                TimestepEmbedSequential(
+                    conv_nd(dims, in_channels, model_channels, 3, padding=1)
+                )
+            ]
+        )
+        self._feature_size = model_channels
+        input_block_chans = []
+        ch = model_channels
+        ds = 1
+        for level, mult in enumerate(channel_mult):
+            for _ in range(num_res_blocks):
+                layers = [
+                    ResBlock(
+                        ch,
+                        time_embed_dim,
+                        dropout,
+                        out_channels=mult * model_channels,
+                        dims=dims,
+                        use_checkpoint=use_checkpoint,
+                        use_scale_shift_norm=use_scale_shift_norm,
+                    )
+                ]
+                ch = mult * model_channels
+                if ds in attention_resolutions:
+                    layers.append(
+                        AttentionBlock(
+                            ch,
+                            use_checkpoint=use_checkpoint,
+                            num_heads=num_heads,
+                            num_head_channels=num_head_channels,
+                            use_new_attention_order=use_new_attention_order,
+                        )
+                    )
+                self.input_blocks.append(TimestepEmbedSequential(*layers))
+                self._feature_size += ch
+            if level != len(channel_mult) - 1:
+                out_ch = ch
+                self.input_blocks.append(
+                    TimestepEmbedSequential(
+                        ResBlock(
+                            ch,
+                            time_embed_dim,
+                            dropout,
+                            out_channels=out_ch,
+                            dims=dims,
+                            use_checkpoint=use_checkpoint,
+                            use_scale_shift_norm=use_scale_shift_norm,
+                            down=True,
+                        )
+                        if resblock_updown
+                        else Downsample(
+                            ch, conv_resample, dims=dims, out_channels=out_ch
+                        )
+                    )
+                )
+                ch = out_ch
+                input_block_chans.append(ch)
+                ds *= 2
+                self._feature_size += ch
+
+        self.middle_block = TimestepEmbedSequential(
+            ResBlock(
+                ch,
+                time_embed_dim,
+                dropout,
+                dims=dims,
+                use_checkpoint=use_checkpoint,
+                use_scale_shift_norm=use_scale_shift_norm,
+            ),
+            AttentionBlock(
+                ch,
+                use_checkpoint=use_checkpoint,
+                num_heads=num_heads,
+                num_head_channels=num_head_channels,
+                use_new_attention_order=use_new_attention_order,
+            ),
+            ResBlock(
+                ch,
+                time_embed_dim,
+                dropout,
+                dims=dims,
+                use_checkpoint=use_checkpoint,
+                use_scale_shift_norm=use_scale_shift_norm,
+            ),
+        )
+        input_block_chans.append(ch)
+        self._feature_size += ch
+        self.input_block_chans = input_block_chans
+
+        self.fea_tran = nn.ModuleList([])
+
+        for i in range(len(input_block_chans)):
+            self.fea_tran.append(
+                ResBlock(
+                    input_block_chans[i],
+                    time_embed_dim,
+                    dropout,
+                    out_channels=out_channels,
+                    dims=dims,
+                    use_checkpoint=use_checkpoint,
+                    use_scale_shift_norm=use_scale_shift_norm,
+                )
+            )
+
+    @torch.no_grad()
+    def forward(self, x, timesteps):
+        """
+        Apply the model to an input batch.
+        :param x: an [N x C x ...] Tensor of inputs.
+        :param timesteps: a 1-D batch of timesteps.
+        :return: an [N x K] Tensor of outputs.
+        """
+        emb = self.time_embed(timestep_embedding(timesteps, self.model_channels))
+
+        result_list = []
+        results = {}
+        h = x.type(self.dtype)
+        for module in self.input_blocks:
+            last_h = h
+            h = module(h, emb)
+            if h.size(-1) != last_h.size(-1):
+                result_list.append(last_h)
+        h = self.middle_block(h, emb)
+        result_list.append(h)
+
+        assert len(result_list) == len(self.fea_tran)
+
+        for i in range(len(result_list)):
+            results[str(result_list[i].size(-1))] = self.fea_tran[i](result_list[i], emb)
+
+        return results
+    
+    def load_from_dict(self, state_dict):
+        """
+        Load model weights from a dictionary.
+        :param state_dict: a dict of parameters.
+        """
+        filtered_dict = {}
+        for k, v in state_dict.items():
+            if k.startswith("structcond_stage_model."):
+                filtered_dict[k[len("structcond_stage_model.") :]] = v
+        self.load_state_dict(filtered_dict)
+
+
+def build_unetwt() -> EncoderUNetModelWT:
+    """
+    Build a model from a state dict.
+    :param state_dict: a dict of parameters.
+    :return: a nn.Module.
+    """
+    # The settings is from official setting yaml file.
+    # https://github.com/IceClear/StableSR/blob/main/configs/stableSRNew/v2-finetune_text_T_512.yaml
+
+    model = EncoderUNetModelWT(
+        in_channels=4,
+        model_channels=256,
+        out_channels=256,
+        num_res_blocks=2,
+        attention_resolutions=[ 4, 2, 1 ],
+        dropout=0.0,
+        channel_mult=[1, 1, 2, 2],
+        conv_resample=True,
+        dims=2,
+        use_checkpoint=False,
+        use_fp16=False,
+        num_heads=4,
+        num_head_channels=-1,
+        num_heads_upsample=-1,
+        use_scale_shift_norm=False,
+        resblock_updown=False,
+        use_new_attention_order=False,
+    )
+    global attn_func
+    attn_func = get_attn_func()
+    return model
+
+
+if __name__ == "__main__":
+    '''
+    Test the lr encoder model.
+    '''
+    path = '../models/stablesr_sd21.ckpt'
+    state_dict = torch.load(path)
+    for key in state_dict.keys():
+        print(key)
+    model = build_unetwt()
+    model.load_from_dict(state_dict)
+    model = model.cuda()
+    test_latent = torch.randn(1, 4, 64, 64).half().cuda()
+    test_timesteps = torch.tensor([0]).half().cuda()
+    with torch.no_grad():
+        test_result = model(test_latent, test_timesteps)
+    print(test_result.keys())
--- a/tools/extract_srmodule.py
+++ b/tools/extract_srmodule.py
@ -0,0 +1,20 @@
+'''
+    This script extracts the spade and structcond module from the official stablesr_000117.ckpt
+'''
+
+import torch
+
+stablesr_path = 'models/stablesr_000117.ckpt'
+
+
+with open(stablesr_path, 'rb') as f:
+    stablesr_ckpt = torch.load(f, map_location='cpu')
+
+srmodule = {}
+for k, v in stablesr_ckpt['state_dict'].items():
+    if 'spade' in k or 'structcond' in k:
+        srmodule[k] = v
+        # print(k) 
+# save
+
+torch.save(srmodule, 'models/stablesr_sd21.ckpt')
--- a/tools/extract_vaecfw.py
+++ b/tools/extract_vaecfw.py
@ -0,0 +1,20 @@
+import torch
+
+vae_path = 'models/vqgan_cfw_00011.ckpt'
+
+with open(vae_path, 'rb') as f:
+    vae_ckpt = torch.load(f, map_location='cpu')
+
+prune_keys = []
+for k, v in vae_ckpt['state_dict'].items():
+    if 'decoder.fusion_layer' in k:
+        prune_keys.append(k)
+        print(k)
+
+vae_cfw = {}
+for k in prune_keys:
+    vae_cfw[k] = vae_ckpt['state_dict'][k]
+    del vae_ckpt['state_dict'][k]
+
+torch.save(vae_ckpt, 'models/vqgan_cfw_00011_vae_only.ckpt')
+torch.save(vae_cfw, 'models/vqgan_cfw_00011_cfw_only.ckpt')