Prompt travelling guide, added Riffusion tab to manually convert all images in a folder to wav files
parent
dea297328e
commit
6e65bed6ab
13
README.MD
13
README.MD
|
|
@ -2,6 +2,8 @@
|
|||
|
||||

|
||||
|
||||

|
||||
|
||||
## Installation
|
||||
|
||||
- Clone this repo inside your `/extensions` folder
|
||||
|
|
@ -10,9 +12,16 @@
|
|||
|
||||
Select the `Riffusion Audio Generator` script before generating, and use the riffusion model.
|
||||
|
||||
## TODO
|
||||
## Prompt Travelling
|
||||
|
||||
- Currently does not do the fancy img2img latent space interpolation described by the authors, just generates .mp3 files each time you generate an image
|
||||
If you want to prompt travel in the latent space as described by the authors, install this extension:
|
||||
|
||||
https://github.com/Kahsolt/stable-diffusion-webui-prompt-travel
|
||||
|
||||
It will output the results of runs in the `<SD>/outputs/(txt|img)2img-images/prompt_travel/` directory, and you can use the convert folder to audio functionality in the `Riffusion` tab to generate a single stitched-together audio file alongside the individual ones.
|
||||
|
||||
Here is a sample made by travelling in img2img mode from `jamaican rap` to `deep house, techno` with denoise `0.5` for `14` steps, and using the `og_beat.png` provided by the original authors as a base image:
|
||||
[Audio Sample (Jamaican Rap to Deep House, Techno)](/static/sample-0.mp3)
|
||||
|
||||
## Acknowledgements
|
||||
|
||||
|
|
|
|||
|
|
@ -10,11 +10,15 @@ from scipy.io import wavfile
|
|||
import torch
|
||||
import torchaudio
|
||||
import gradio as gr
|
||||
from modules import scripts
|
||||
from modules import scripts, script_callbacks
|
||||
from modules.images import FilenameGenerator
|
||||
from modules.processing import process_images
|
||||
import os
|
||||
import modules.shared as shared
|
||||
from pedalboard.io import AudioFile
|
||||
import glob
|
||||
from datetime import datetime
|
||||
import wave
|
||||
|
||||
base_dir = scripts.basedir()
|
||||
|
||||
|
|
@ -34,7 +38,7 @@ class RiffusionScript(scripts.Script):
|
|||
samplerate = f.samplerate
|
||||
|
||||
filename = wav_file.replace(".wav", ".mp3")
|
||||
self.last_generated_files.append(filename)
|
||||
RiffusionScript.last_generated_files.append(filename)
|
||||
|
||||
with AudioFile(filename, "w", samplerate, audio.shape[0]) as f:
|
||||
f.write(audio)
|
||||
|
|
@ -44,7 +48,6 @@ class RiffusionScript(scripts.Script):
|
|||
|
||||
def ui(self, is_img2img):
|
||||
path = os.path.join(base_dir, "outputs")
|
||||
print("Path to save mp3 files: ", path)
|
||||
|
||||
with gr.Row():
|
||||
riffusion_enabled = gr.Checkbox(label="Riffusion enabled", value=True)
|
||||
|
|
@ -53,12 +56,12 @@ class RiffusionScript(scripts.Script):
|
|||
output_path = gr.Textbox(label="Output path", value=path)
|
||||
|
||||
def update_audio_players():
|
||||
count = len(self.last_generated_files)
|
||||
count = len(RiffusionScript.last_generated_files)
|
||||
updates = [
|
||||
gr.Audio.update(
|
||||
value=self.last_generated_files[i],
|
||||
value=RiffusionScript.last_generated_files[i],
|
||||
visible=True,
|
||||
label=self.last_generated_labels[i],
|
||||
label=RiffusionScript.last_generated_labels[i],
|
||||
)
|
||||
for i in range(count)
|
||||
]
|
||||
|
|
@ -118,8 +121,8 @@ class RiffusionScript(scripts.Script):
|
|||
|
||||
proc = process_images(p)
|
||||
|
||||
self.last_generated_labels = []
|
||||
self.last_generated_files = []
|
||||
RiffusionScript.last_generated_labels = []
|
||||
RiffusionScript.last_generated_files = []
|
||||
try:
|
||||
# try to create output path dir if doesnt exist
|
||||
os.makedirs(output_path)
|
||||
|
|
@ -139,7 +142,7 @@ class RiffusionScript(scripts.Script):
|
|||
f.write(wav_bytes.getbuffer())
|
||||
|
||||
self.process_wav(filename, preserve_wav=save_wav)
|
||||
self.last_generated_labels.append(
|
||||
RiffusionScript.last_generated_labels.append(
|
||||
namegen.apply(f"[seed]-[prompt_spaces]-{i}")
|
||||
)
|
||||
|
||||
|
|
@ -312,3 +315,92 @@ class RiffusionScript(scripts.Script):
|
|||
waveform = griffin_lim(Sxx_torch).cpu().numpy()
|
||||
|
||||
return waveform
|
||||
|
||||
|
||||
def convert_audio_file(image, output_dir):
|
||||
image_file = Image.open(image)
|
||||
new_filename = os.path.splitext(os.path.basename(image))[0] + ".wav"
|
||||
filename = os.path.join(output_dir, new_filename)
|
||||
riffusion = RiffusionScript()
|
||||
wav_bytes, duration_s = riffusion.wav_bytes_from_spectrogram_image(image_file)
|
||||
|
||||
with open(filename, "wb") as f:
|
||||
f.write(wav_bytes.getbuffer())
|
||||
return filename
|
||||
|
||||
|
||||
def convert_audio(image_dir: str, file_regex: str, join_images: bool) -> None:
|
||||
|
||||
images = []
|
||||
|
||||
globs = map(lambda x: x.strip(), file_regex.split(","))
|
||||
|
||||
for g in globs:
|
||||
images.extend(glob.glob(os.path.join(image_dir, g)))
|
||||
|
||||
print(f"Found {len(images)} images in {image_dir}, pattern {file_regex}")
|
||||
output_files = []
|
||||
for image in images:
|
||||
output_files.append(convert_audio_file(image, image_dir))
|
||||
|
||||
if join_images and len(output_files) > 1:
|
||||
data = []
|
||||
outfile = os.path.join(
|
||||
image_dir,
|
||||
f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}_joined.wav",
|
||||
)
|
||||
for wav in output_files:
|
||||
w = wave.open(wav, "rb")
|
||||
data.append([w.getparams(), w.readframes(w.getnframes())])
|
||||
w.close()
|
||||
output = wave.open(outfile, "wb")
|
||||
output.setparams(data[0][0])
|
||||
for i in range(len(data)):
|
||||
output.writeframes(data[i][1])
|
||||
output.close()
|
||||
|
||||
print(f"Converted {len(images)} images to audio")
|
||||
|
||||
|
||||
def on_ui_tabs():
|
||||
with gr.Blocks() as riffusion_ui:
|
||||
with gr.Row():
|
||||
with gr.Column(variant="panel"):
|
||||
with gr.Row():
|
||||
image_directory = gr.Textbox(
|
||||
label="Image Directory",
|
||||
placeholder="Directory containing your image files",
|
||||
value="",
|
||||
interactive=True,
|
||||
)
|
||||
with gr.Row():
|
||||
join_images = gr.Checkbox(
|
||||
label="Also output single joined audio file (will be named <date>_joined.wav)",
|
||||
value=True,
|
||||
interactive=True,
|
||||
)
|
||||
with gr.Row():
|
||||
file_regex = gr.Textbox(
|
||||
label="GLOB patterns (comma separated)",
|
||||
value="*.jpg, *.png",
|
||||
interactive=True,
|
||||
)
|
||||
with gr.Column(variant="panel"):
|
||||
with gr.Row():
|
||||
convert_folder_btn = gr.Button(
|
||||
"Convert Folder", label="Convert Folder", variant="primary"
|
||||
)
|
||||
convert_folder_btn.click(
|
||||
convert_audio,
|
||||
inputs=[
|
||||
image_directory,
|
||||
file_regex,
|
||||
join_images,
|
||||
],
|
||||
outputs=[],
|
||||
)
|
||||
gr.HTML(value="<p>Converts all images in a folder to audio</p>")
|
||||
return ((riffusion_ui, "Riffusion", "riffusion_ui"),)
|
||||
|
||||
|
||||
script_callbacks.on_ui_tabs(on_ui_tabs)
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
|
After Width: | Height: | Size: 44 KiB |
Loading…
Reference in New Issue