Spaces:
Sleeping
Sleeping
Oleg Shulyakov
commited on
Commit
·
55ecc95
1
Parent(s):
c96815e
Replace model name with UI values
Browse files
app.py
CHANGED
|
@@ -220,9 +220,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
| 220 |
print("Not using imatrix quantization.")
|
| 221 |
|
| 222 |
# Quantize the model
|
| 223 |
-
|
| 224 |
-
quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
|
| 225 |
-
quantized_gguf_path = str(Path(outdir)/quantized_gguf_name)
|
| 226 |
if use_imatrix:
|
| 227 |
quantise_ggml = [
|
| 228 |
"llama-quantize",
|
|
@@ -241,9 +239,6 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
| 241 |
print(f"Quantized model path: {os.path.abspath(quantized_gguf_path)}")
|
| 242 |
|
| 243 |
# Create empty repo
|
| 244 |
-
username = whoami(oauth_token.token)["name"]
|
| 245 |
-
|
| 246 |
-
repo_name = f"{username}/{model_name}-GGUF"
|
| 247 |
api = HfApi(token=oauth_token.token)
|
| 248 |
new_repo_url = api.create_repo(repo_id=repo_name, exist_ok=True, private=private_repo)
|
| 249 |
new_repo_id = new_repo_url.repo_id
|
|
@@ -283,12 +278,12 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
| 283 |
|
| 284 |
### CLI:
|
| 285 |
```bash
|
| 286 |
-
llama-cli --hf-repo {new_repo_id} --hf-file {
|
| 287 |
```
|
| 288 |
|
| 289 |
### Server:
|
| 290 |
```bash
|
| 291 |
-
llama-server --hf-repo {new_repo_id} --hf-file {
|
| 292 |
```
|
| 293 |
|
| 294 |
Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
|
|
@@ -305,11 +300,11 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
| 305 |
|
| 306 |
Step 3: Run inference through the main binary.
|
| 307 |
```
|
| 308 |
-
./llama-cli --hf-repo {new_repo_id} --hf-file {
|
| 309 |
```
|
| 310 |
or
|
| 311 |
```
|
| 312 |
-
./llama-server --hf-repo {new_repo_id} --hf-file {
|
| 313 |
```
|
| 314 |
"""
|
| 315 |
)
|
|
@@ -323,7 +318,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
| 323 |
print(f"Uploading quantized model: {quantized_gguf_path}")
|
| 324 |
api.upload_file(
|
| 325 |
path_or_fileobj=quantized_gguf_path,
|
| 326 |
-
path_in_repo=
|
| 327 |
repo_id=new_repo_id,
|
| 328 |
)
|
| 329 |
except Exception as e:
|
|
@@ -455,18 +450,23 @@ gguf_name = gr.Textbox(
|
|
| 455 |
def update_output_repo(model_id, oauth_token: gr.OAuthToken | None):
|
| 456 |
if oauth_token is None or oauth_token.token is None:
|
| 457 |
return ""
|
| 458 |
-
|
| 459 |
if model_id is None:
|
| 460 |
return ""
|
|
|
|
|
|
|
| 461 |
model_name = model_id.split('/')[-1]
|
| 462 |
return f"{username}/{model_name}-GGUF"
|
| 463 |
|
| 464 |
def update_output_filename(model_id, use_imatrix, q_method, imatrix_q_method):
|
| 465 |
if model_id is None:
|
| 466 |
return ""
|
|
|
|
| 467 |
model_name = model_id.split('/')[-1]
|
|
|
|
| 468 |
if use_imatrix:
|
| 469 |
return f"{model_name.lower()}-{imatrix_q_method.upper()}-imat.gguf"
|
|
|
|
| 470 |
return f"{model_name.lower()}-{q_method.upper()}.gguf"
|
| 471 |
|
| 472 |
#####
|
|
|
|
| 220 |
print("Not using imatrix quantization.")
|
| 221 |
|
| 222 |
# Quantize the model
|
| 223 |
+
quantized_gguf_path = str(Path(outdir)/gguf_name)
|
|
|
|
|
|
|
| 224 |
if use_imatrix:
|
| 225 |
quantise_ggml = [
|
| 226 |
"llama-quantize",
|
|
|
|
| 239 |
print(f"Quantized model path: {os.path.abspath(quantized_gguf_path)}")
|
| 240 |
|
| 241 |
# Create empty repo
|
|
|
|
|
|
|
|
|
|
| 242 |
api = HfApi(token=oauth_token.token)
|
| 243 |
new_repo_url = api.create_repo(repo_id=repo_name, exist_ok=True, private=private_repo)
|
| 244 |
new_repo_id = new_repo_url.repo_id
|
|
|
|
| 278 |
|
| 279 |
### CLI:
|
| 280 |
```bash
|
| 281 |
+
llama-cli --hf-repo {new_repo_id} --hf-file {gguf_name} -p "The meaning to life and the universe is"
|
| 282 |
```
|
| 283 |
|
| 284 |
### Server:
|
| 285 |
```bash
|
| 286 |
+
llama-server --hf-repo {new_repo_id} --hf-file {gguf_name} -c 2048
|
| 287 |
```
|
| 288 |
|
| 289 |
Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
|
|
|
|
| 300 |
|
| 301 |
Step 3: Run inference through the main binary.
|
| 302 |
```
|
| 303 |
+
./llama-cli --hf-repo {new_repo_id} --hf-file {gguf_name} -p "The meaning to life and the universe is"
|
| 304 |
```
|
| 305 |
or
|
| 306 |
```
|
| 307 |
+
./llama-server --hf-repo {new_repo_id} --hf-file {gguf_name} -c 2048
|
| 308 |
```
|
| 309 |
"""
|
| 310 |
)
|
|
|
|
| 318 |
print(f"Uploading quantized model: {quantized_gguf_path}")
|
| 319 |
api.upload_file(
|
| 320 |
path_or_fileobj=quantized_gguf_path,
|
| 321 |
+
path_in_repo=gguf_name,
|
| 322 |
repo_id=new_repo_id,
|
| 323 |
)
|
| 324 |
except Exception as e:
|
|
|
|
| 450 |
def update_output_repo(model_id, oauth_token: gr.OAuthToken | None):
|
| 451 |
if oauth_token is None or oauth_token.token is None:
|
| 452 |
return ""
|
| 453 |
+
|
| 454 |
if model_id is None:
|
| 455 |
return ""
|
| 456 |
+
|
| 457 |
+
username = whoami(oauth_token.token)["name"]
|
| 458 |
model_name = model_id.split('/')[-1]
|
| 459 |
return f"{username}/{model_name}-GGUF"
|
| 460 |
|
| 461 |
def update_output_filename(model_id, use_imatrix, q_method, imatrix_q_method):
|
| 462 |
if model_id is None:
|
| 463 |
return ""
|
| 464 |
+
|
| 465 |
model_name = model_id.split('/')[-1]
|
| 466 |
+
|
| 467 |
if use_imatrix:
|
| 468 |
return f"{model_name.lower()}-{imatrix_q_method.upper()}-imat.gguf"
|
| 469 |
+
|
| 470 |
return f"{model_name.lower()}-{q_method.upper()}.gguf"
|
| 471 |
|
| 472 |
#####
|