Hev832
/

hex-rvc

@@ -95,48 +95,94 @@ if __name__ == '__main__':
 # Gradio Interface
 with gr.Blocks(title="Hex RVC", theme=gr.themes.Base(primary_hue="red", secondary_hue="pink")) as app:
-    gr.Markdown("# Hex RVC")
     gr.Markdown("Join [AIHub](https://discord.gg/aihub) to get the RVC model!")
     with gr.Tab("Inference"):
         with gr.Row():
             MODEL_NAME = gr.Dropdown(
-                label="Select a Model",
                 choices=get_folders(),
-                interactive=True
             )
             SOUND_PATH = gr.Dropdown(
                 choices=load_audio_files(),
-                label="Select an audio file",
-                interactive=True
             )
-            upload_audio = gr.Audio(label="Upload Audio", type='filepath')
-        with gr.Accordion("Hex TTS"):
-            input_text = gr.Textbox(lines=5, label="Input Text")
-            language = gr.Dropdown(choices=list(language_dict.keys()), label="Choose the Voice Model")
-            tts_convert = gr.Button("Convert")
-            tts_output = gr.Audio(label="Generated TTS Audio", type='filepath')
-            tts_convert.click(
-                fn=text_to_speech_edge,
-                inputs=[input_text, language],
-                outputs=tts_output
             )
-        output_audio = gr.Audio(label="Generated Audio", type='filepath')
         with gr.Row():
-            refresh_btn = gr.Button("Refresh")
-            run_button = gr.Button("Convert")
         refresh_btn.click(
             lambda: (refresh_audio_list(), refresh_folders()),
             outputs=[SOUND_PATH, MODEL_NAME]
         )
     with gr.Tab("Download RVC Model"):
-        url = gr.Textbox(label="Your Model URL")
-        dirname = gr.Textbox(label="Your Model Name")
         download_button = gr.Button("Download Model")
         download_output = gr.Textbox(label="Download Status")
@@ -147,17 +193,36 @@ with gr.Blocks(title="Hex RVC", theme=gr.themes.Base(primary_hue="red", secondar
         )
     with gr.Tab("Audio Separation"):
-        input_audio = gr.Audio(type="filepath", label="Upload Audio")
-        roformer_link = gr.Textbox(label="Audio Link")
-        roformer_download_button = gr.Button("Download")
-        separate_button = gr.Button("Separate Audio")
-        separation_output = gr.Textbox(label="Separation Output Path")
-        roformer_download_button.click(download_audio, inputs=[roformer_link], outputs=[input_audio])
 app.launch(
     share=args.share_enabled,
     server_name=None if not args.listen else (args.listen_host or '0.0.0.0'),
-    server_port=args.listen_port,
 )

 # Gradio Interface
 with gr.Blocks(title="Hex RVC", theme=gr.themes.Base(primary_hue="red", secondary_hue="pink")) as app:
+    gr.Markdown("# Hex RVC - AI Audio Inference")
     gr.Markdown("Join [AIHub](https://discord.gg/aihub) to get the RVC model!")
+    # Inference Tab with Priority on Settings
     with gr.Tab("Inference"):
+        gr.Markdown("## Inference Settings")
         with gr.Row():
             MODEL_NAME = gr.Dropdown(
+                label="Select AI Model",
                 choices=get_folders(),
+                interactive=True,
+                info="Choose a pre-trained model for audio processing"
             )
             SOUND_PATH = gr.Dropdown(
                 choices=load_audio_files(),
+                label="Select Existing Audio File",
+                interactive=True,
+                info="Pick an audio file from the predefined directory"
             )
+            upload_audio = gr.Audio(
+                label="Upload Your Own Audio",
+                type='filepath',
+                info="Upload an audio file if not using existing ones"
             )
+        gr.Markdown("### Conversion Parameters")
+        with gr.Accordion("Conversion Settings", open=True):
+            with gr.Row():
+                F0_CHANGE = gr.Number(
+                    label="Pitch Change (semitones)",
+                    value=0,
+                    info="Adjust the pitch of the output audio"
+                )
+                F0_METHOD = gr.Dropdown(
+                    choices=["crepe", "harvest", "mangio-crepe", "rmvpe", "rmvpe_legacy", "fcpe", "fcpe_legacy", "hybrid[rmvpe+fcpe]"],
+                    label="F0 Method",
+                    value="fcpe",
+                    info="Select the fundamental frequency extraction method"
+                )
+            with gr.Row():
+                MIN_PITCH = gr.Number(label="Min Pitch", value=50, info="Minimum pitch detection threshold")
+                MAX_PITCH = gr.Number(label="Max Pitch", value=1100, info="Maximum pitch detection threshold")
+                CREPE_HOP_LENGTH = gr.Number(label="Crepe Hop Length", value=120, info="Hop length for Crepe method")
+                INDEX_RATE = gr.Slider(label="Index Rate", minimum=0, maximum=1, value=0.75)
+                FILTER_RADIUS = gr.Number(label="Filter Radius", value=3, info="Filter intensity for smoothing")
+                RMS_MIX_RATE = gr.Slider(label="RMS Mix Rate", minimum=0, maximum=1, value=0.25)
+                PROTECT = gr.Slider(label="Protect Factor", minimum=0, maximum=1, value=0.33)
+        gr.Markdown("### Advanced Settings")
+        with gr.Accordion("Advanced Settings", open=False):
+            SPLIT_INFER = gr.Checkbox(label="Enable Split Inference", value=False)
+            MIN_SILENCE = gr.Number(label="Min Silence (ms)", value=500)
+            SILENCE_THRESHOLD = gr.Number(label="Silence Threshold (dBFS)", value=-50)
+            SEEK_STEP = gr.Slider(label="Seek Step (ms)", minimum=1, maximum=10, value=1)
+            KEEP_SILENCE = gr.Number(label="Keep Silence (ms)", value=200)
+            FORMANT_SHIFT = gr.Checkbox(label="Enable Formant Shift", value=False)
+            QUEFRENCY = gr.Number(label="Quefrency", value=0)
+            TIMBRE = gr.Number(label="Timbre", value=1)
+            F0_AUTOTUNE = gr.Checkbox(label="Enable F0 Autotune", value=False)
+            OUTPUT_FORMAT = gr.Dropdown(choices=["wav", "flac", "mp3"], label="Output Format", value="wav")
+        gr.Markdown("## Generate Audio")
+        output_audio = gr.Audio(label="Generated Audio Output", type='filepath')
         with gr.Row():
+            refresh_btn = gr.Button("Refresh Lists")
+            run_button = gr.Button("Run Inference")
+        # Refresh Button for Updating Model and Audio Choices
         refresh_btn.click(
             lambda: (refresh_audio_list(), refresh_folders()),
             outputs=[SOUND_PATH, MODEL_NAME]
         )
+        # Run Inference and Display Result
+        run_button.click(
+            fn=process_audio,
+            inputs=[MODEL_NAME, SOUND_PATH, F0_CHANGE, F0_METHOD, MIN_PITCH, MAX_PITCH, CREPE_HOP_LENGTH, INDEX_RATE,
+                    FILTER_RADIUS, RMS_MIX_RATE, PROTECT, SPLIT_INFER, MIN_SILENCE, SILENCE_THRESHOLD, SEEK_STEP,
+                    KEEP_SILENCE, FORMANT_SHIFT, QUEFRENCY, TIMBRE, F0_AUTOTUNE, OUTPUT_FORMAT, upload_audio],
+            outputs=output_audio
+        )
+    # Other Tabs (Download Model, Audio Separation)
     with gr.Tab("Download RVC Model"):
+        gr.Markdown("## Download RVC Model")
+        url = gr.Textbox(label="Model URL")
+        dirname = gr.Textbox(label="Model Directory Name")
         download_button = gr.Button("Download Model")
         download_output = gr.Textbox(label="Download Status")
         )
     with gr.Tab("Audio Separation"):
+        gr.Markdown("## Audio Separation")
+        input_audio = gr.Audio(type="filepath", label="Upload Audio for Separation")
+        with gr.Accordion("Separation by Link", open = False):
+            with gr.Row():
+                roformer_link = gr.Textbox(
+                    label = "Link",
+                    placeholder = "Paste the link here",
+                    interactive = True
+                )
+                with gr.Row():
+                    gr.Markdown("You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)")
+                with gr.Row():
+                    roformer_download_button = gr.Button(
+                        "Download!",
+                        variant = "primary"
+                    )
+            separate_button = gr.Button("Separate Audio")
+            separation_output = gr.Textbox(label="Separation Output Path")
+        roformer_download_button.click(download_audio, [roformer_link], [input_audio])
+        separate_button.click(
+            fn=separate_audio,
+            inputs=[input_audio, "model_bs_roformer_ep_317_sdr_12.9755.ckpt",
+                    "UVR-DeEcho-DeReverb.pth",
+                    "mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt"],
+            outputs=[separation_output]
+        )
 app.launch(
     share=args.share_enabled,
     server_name=None if not args.listen else (args.listen_host or '0.0.0.0'),
+    server_port=args.listen_port
 )