gguf-my-repo-ENHANCED

Sleeping

App Files Files Community

Oleg Shulyakov commited on Aug 5

Commit

a35310e

1 Parent(s): 1580529

Format

Browse files

Files changed (2) hide show

README.md +1 -1
app.py +21 -21

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 title: GGUF My Repo
 emoji: 🦙
 colorFrom: gray
-colorTo: pink
 sdk: docker
 hf_oauth: true
 hf_oauth_scopes:

 title: GGUF My Repo
 emoji: 🦙
 colorFrom: gray
+colorTo: gray
 sdk: docker
 hf_oauth: true
 hf_oauth_scopes:

app.py CHANGED Viewed

@@ -60,7 +60,7 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
     if oauth_token is None or oauth_token.token is None:
         raise ValueError("You have to be logged in.")
     split_cmd = [
         "./llama.cpp/llama-gguf-split",
         "--split",
@@ -77,12 +77,12 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
     split_cmd.append(model_path)
     split_cmd.append(model_path_prefix)
-    print(f"Split command: {split_cmd}")
     result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
-    print(f"Split command stdout: {result.stdout}")
-    print(f"Split command stderr: {result.stderr}")
     if result.returncode != 0:
         stderr_str = result.stderr.decode("utf-8")
         raise Exception(f"Error splitting the model: {stderr_str}")
@@ -93,7 +93,7 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
         os.remove(model_path)
     model_file_prefix = model_path_prefix.split('/')[-1]
-    print(f"Model file name prefix: {model_file_prefix}")
     sharded_model_files = [f for f in os.listdir(outdir) if f.startswith(model_file_prefix) and f.endswith(".gguf")]
     if sharded_model_files:
         print(f"Sharded model files: {sharded_model_files}")
@@ -111,7 +111,7 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
                 raise Exception(f"Error uploading file {file_path}: {e}")
     else:
         raise Exception("No sharded files found.")
     print("Sharded model has been uploaded successfully!")
 def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_repo, train_data_file, split_model, split_max_tensors, split_max_size, oauth_token: gr.OAuthToken | None):
@@ -184,7 +184,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
                 if train_data_file:
                     train_data_path = train_data_file.name
                 else:
-                    train_data_path = "llama.cpp/groups_merged.txt" #fallback calibration dataset
                 print(f"Training data file path: {train_data_path}")
@@ -194,7 +194,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
                 generate_importance_matrix(fp16, train_data_path, imatrix_path)
             else:
                 print("Not using imatrix quantization.")
             # Quantize the model
             quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
             quantized_gguf_path = str(Path(outdir)/quantized_gguf_name)
@@ -235,26 +235,26 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
                 # {new_repo_id}
                 This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/ggml-org/gguf-my-repo) space.
                 Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
                 ## Use with llama.cpp
                 Install llama.cpp through brew (works on Mac and Linux)
                 ```bash
                 brew install llama.cpp
                 ```
                 Invoke the llama.cpp server or the CLI.
                 ### CLI:
                 ```bash
                 llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
                 ```
                 ### Server:
                 ```bash
                 llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
                 ```
                 Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
                 Step 1: Clone llama.cpp from GitHub.
@@ -271,7 +271,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
                 ```
                 ./llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
                 ```
-                or
                 ```
                 ./llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
                 ```
@@ -292,7 +292,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
                     )
                 except Exception as e:
                     raise Exception(f"Error uploading quantized model: {e}")
             if os.path.isfile(imatrix_path):
                 try:
                     print(f"Uploading imatrix.dat: {imatrix_path}")
@@ -343,7 +343,7 @@ imatrix_q_method = gr.Dropdown(
     ["IQ3_M", "IQ3_XXS", "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS", "Q5_K_M", "Q5_K_S"],
     label="Imatrix Quantization Method",
     info="GGML imatrix quants type",
-    value="IQ4_NL",
     filterable=False,
     visible=False
 )
@@ -408,7 +408,7 @@ iface = gr.Interface(
     )
 # Create Gradio interface
-with gr.Blocks(css=css) as demo:
     gr.Markdown("You must be logged in to use GGUF-my-repo.")
     gr.LoginButton(min_width=250)
@@ -425,7 +425,7 @@ with gr.Blocks(css=css) as demo:
     def update_visibility(use_imatrix):
         return gr.update(visible=not use_imatrix), gr.update(visible=use_imatrix), gr.update(visible=use_imatrix)
     use_imatrix.change(
         fn=update_visibility,
         inputs=use_imatrix,

     if oauth_token is None or oauth_token.token is None:
         raise ValueError("You have to be logged in.")
     split_cmd = [
         "./llama.cpp/llama-gguf-split",
         "--split",
     split_cmd.append(model_path)
     split_cmd.append(model_path_prefix)
+    print(f"Split command: {split_cmd}")
     result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
+    print(f"Split command stdout: {result.stdout}")
+    print(f"Split command stderr: {result.stderr}")
     if result.returncode != 0:
         stderr_str = result.stderr.decode("utf-8")
         raise Exception(f"Error splitting the model: {stderr_str}")
         os.remove(model_path)
     model_file_prefix = model_path_prefix.split('/')[-1]
+    print(f"Model file name prefix: {model_file_prefix}")
     sharded_model_files = [f for f in os.listdir(outdir) if f.startswith(model_file_prefix) and f.endswith(".gguf")]
     if sharded_model_files:
         print(f"Sharded model files: {sharded_model_files}")
                 raise Exception(f"Error uploading file {file_path}: {e}")
     else:
         raise Exception("No sharded files found.")
     print("Sharded model has been uploaded successfully!")
 def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_repo, train_data_file, split_model, split_max_tensors, split_max_size, oauth_token: gr.OAuthToken | None):
                 if train_data_file:
                     train_data_path = train_data_file.name
                 else:
+                    train_data_path = "llama.cpp/train_data.txt" #fallback calibration dataset
                 print(f"Training data file path: {train_data_path}")
                 generate_importance_matrix(fp16, train_data_path, imatrix_path)
             else:
                 print("Not using imatrix quantization.")
             # Quantize the model
             quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
             quantized_gguf_path = str(Path(outdir)/quantized_gguf_name)
                 # {new_repo_id}
                 This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/ggml-org/gguf-my-repo) space.
                 Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
                 ## Use with llama.cpp
                 Install llama.cpp through brew (works on Mac and Linux)
                 ```bash
                 brew install llama.cpp
                 ```
                 Invoke the llama.cpp server or the CLI.
                 ### CLI:
                 ```bash
                 llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
                 ```
                 ### Server:
                 ```bash
                 llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
                 ```
                 Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
                 Step 1: Clone llama.cpp from GitHub.
                 ```
                 ./llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
                 ```
+                or
                 ```
                 ./llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
                 ```
                     )
                 except Exception as e:
                     raise Exception(f"Error uploading quantized model: {e}")
             if os.path.isfile(imatrix_path):
                 try:
                     print(f"Uploading imatrix.dat: {imatrix_path}")
     ["IQ3_M", "IQ3_XXS", "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS", "Q5_K_M", "Q5_K_S"],
     label="Imatrix Quantization Method",
     info="GGML imatrix quants type",
+    value="IQ4_NL",
     filterable=False,
     visible=False
 )
     )
 # Create Gradio interface
+with gr.Blocks(css=css) as demo:
     gr.Markdown("You must be logged in to use GGUF-my-repo.")
     gr.LoginButton(min_width=250)
     def update_visibility(use_imatrix):
         return gr.update(visible=not use_imatrix), gr.update(visible=use_imatrix), gr.update(visible=use_imatrix)
     use_imatrix.change(
         fn=update_visibility,
         inputs=use_imatrix,