Spaces:
Sleeping
Sleeping
Oleg Shulyakov
commited on
Commit
·
5a54419
1
Parent(s):
e306547
Support leave-output-tensor option
Browse files
app.py
CHANGED
|
@@ -196,6 +196,7 @@ def quantize_model(
|
|
| 196 |
imatrix_path: str,
|
| 197 |
quant_embedding: bool,
|
| 198 |
embedding_tensor_method: str,
|
|
|
|
| 199 |
quant_output: bool,
|
| 200 |
output_tensor_method: str,
|
| 201 |
):
|
|
@@ -220,10 +221,12 @@ def quantize_model(
|
|
| 220 |
if quant_embedding:
|
| 221 |
quantize_cmd.append("--token-embedding-type")
|
| 222 |
quantize_cmd.append(embedding_tensor_method)
|
| 223 |
-
if
|
| 224 |
-
quantize_cmd.append("--output-tensor
|
| 225 |
-
|
| 226 |
-
|
|
|
|
|
|
|
| 227 |
if use_imatrix:
|
| 228 |
quantize_cmd.append("--imatrix")
|
| 229 |
quantize_cmd.append(imatrix_path)
|
|
@@ -329,6 +332,7 @@ def process_model(
|
|
| 329 |
gguf_name: str,
|
| 330 |
quant_embedding: bool,
|
| 331 |
embedding_tensor_method: str,
|
|
|
|
| 332 |
quant_output: bool,
|
| 333 |
output_tensor_method: str,
|
| 334 |
split_model: bool,
|
|
@@ -349,7 +353,7 @@ def process_model(
|
|
| 349 |
with tempfile.TemporaryDirectory(dir=OUTPUT_FOLDER) as outdir:
|
| 350 |
fp16 = download_base_model(token, model_id, outdir)
|
| 351 |
imatrix_path = Path(outdir)/"imatrix.dat"
|
| 352 |
-
quantized_gguf = quantize_model(outdir, gguf_name, fp16, q_method, use_imatrix, imatrix_q_method, imatrix_path, quant_embedding, embedding_tensor_method, quant_output, output_tensor_method)
|
| 353 |
|
| 354 |
# Create empty repo
|
| 355 |
api = HfApi(token=token)
|
|
@@ -449,9 +453,6 @@ train_data_file = gr.File(
|
|
| 449 |
visible=False
|
| 450 |
)
|
| 451 |
|
| 452 |
-
def update_imatrix_visibility(use_imatrix):
|
| 453 |
-
return gr.update(visible=not use_imatrix), gr.update(visible=use_imatrix), gr.update(visible=use_imatrix)
|
| 454 |
-
|
| 455 |
#####
|
| 456 |
# Advanced Options section
|
| 457 |
#####
|
|
@@ -474,13 +475,16 @@ split_max_size = gr.Textbox(
|
|
| 474 |
visible=False
|
| 475 |
)
|
| 476 |
|
| 477 |
-
|
| 478 |
-
|
|
|
|
|
|
|
|
|
|
| 479 |
|
| 480 |
quant_embedding = gr.Checkbox(
|
| 481 |
value=False,
|
| 482 |
label="Quant embeddings tensor",
|
| 483 |
-
info=""
|
| 484 |
)
|
| 485 |
embedding_tensor_method = gr.Dropdown(
|
| 486 |
choices=["Q2_K", "Q3_K", "Q4_K", "Q5_K", "Q6_K", "Q8_0"],
|
|
@@ -494,7 +498,7 @@ embedding_tensor_method = gr.Dropdown(
|
|
| 494 |
quant_output = gr.Checkbox(
|
| 495 |
value=False,
|
| 496 |
label="Quant output tensor",
|
| 497 |
-
info=""
|
| 498 |
)
|
| 499 |
output_tensor_method = gr.Dropdown(
|
| 500 |
choices=["Q2_K", "Q3_K", "Q4_K", "Q5_K", "Q6_K", "Q8_0"],
|
|
@@ -505,13 +509,6 @@ output_tensor_method = gr.Dropdown(
|
|
| 505 |
visible=False
|
| 506 |
)
|
| 507 |
|
| 508 |
-
def update_embedding_tensor_visibility(quant_embedding):
|
| 509 |
-
return gr.update(visible=quant_embedding)
|
| 510 |
-
|
| 511 |
-
def update_output_tensor_visibility(quant_output):
|
| 512 |
-
return gr.update(visible=quant_output)
|
| 513 |
-
|
| 514 |
-
|
| 515 |
#####
|
| 516 |
# Output Settings section
|
| 517 |
#####
|
|
@@ -534,10 +531,10 @@ gguf_name = gr.Textbox(
|
|
| 534 |
)
|
| 535 |
|
| 536 |
def update_output_repo(model_id, oauth_token: gr.OAuthToken | None):
|
| 537 |
-
if oauth_token is None or oauth_token.token
|
| 538 |
return ""
|
| 539 |
|
| 540 |
-
if model_id
|
| 541 |
return ""
|
| 542 |
|
| 543 |
username = whoami(oauth_token.token)["name"]
|
|
@@ -545,7 +542,7 @@ def update_output_repo(model_id, oauth_token: gr.OAuthToken | None):
|
|
| 545 |
return f"{username}/{model_name}-GGUF"
|
| 546 |
|
| 547 |
def update_output_filename(model_id, use_imatrix, q_method, imatrix_q_method):
|
| 548 |
-
if model_id
|
| 549 |
return ""
|
| 550 |
|
| 551 |
model_name = get_model_name(model_id)
|
|
@@ -568,6 +565,7 @@ clear_btn = gr.ClearButton(
|
|
| 568 |
imatrix_q_method,
|
| 569 |
private_repo,
|
| 570 |
train_data_file,
|
|
|
|
| 571 |
quant_embedding,
|
| 572 |
embedding_tensor_method,
|
| 573 |
quant_output,
|
|
@@ -621,6 +619,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 621 |
|
| 622 |
quant_embedding.render()
|
| 623 |
embedding_tensor_method.render()
|
|
|
|
| 624 |
quant_output.render()
|
| 625 |
output_tensor_method.render()
|
| 626 |
|
|
@@ -661,6 +660,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 661 |
gguf_name,
|
| 662 |
quant_embedding,
|
| 663 |
embedding_tensor_method,
|
|
|
|
| 664 |
quant_output,
|
| 665 |
output_tensor_method,
|
| 666 |
split_model,
|
|
@@ -677,27 +677,27 @@ with gr.Blocks(css=css) as demo:
|
|
| 677 |
# OnChange handlers
|
| 678 |
#####
|
| 679 |
use_imatrix.change(
|
| 680 |
-
fn=
|
| 681 |
inputs=use_imatrix,
|
| 682 |
outputs=[q_method, imatrix_q_method, train_data_file]
|
| 683 |
)
|
| 684 |
|
| 685 |
split_model.change(
|
| 686 |
-
fn=
|
| 687 |
inputs=split_model,
|
| 688 |
outputs=[split_max_tensors, split_max_size]
|
| 689 |
)
|
| 690 |
|
| 691 |
quant_embedding.change(
|
| 692 |
-
fn=
|
| 693 |
inputs=quant_embedding,
|
| 694 |
outputs=[embedding_tensor_method]
|
| 695 |
)
|
| 696 |
|
| 697 |
quant_output.change(
|
| 698 |
-
fn=
|
| 699 |
inputs=quant_output,
|
| 700 |
-
outputs=[output_tensor_method]
|
| 701 |
)
|
| 702 |
|
| 703 |
model_id.change(
|
|
|
|
| 196 |
imatrix_path: str,
|
| 197 |
quant_embedding: bool,
|
| 198 |
embedding_tensor_method: str,
|
| 199 |
+
leave_output: bool,
|
| 200 |
quant_output: bool,
|
| 201 |
output_tensor_method: str,
|
| 202 |
):
|
|
|
|
| 221 |
if quant_embedding:
|
| 222 |
quantize_cmd.append("--token-embedding-type")
|
| 223 |
quantize_cmd.append(embedding_tensor_method)
|
| 224 |
+
if leave_output:
|
| 225 |
+
quantize_cmd.append("--leave-output-tensor")
|
| 226 |
+
else:
|
| 227 |
+
if quant_output:
|
| 228 |
+
quantize_cmd.append("--output-tensor-type")
|
| 229 |
+
quantize_cmd.append(output_tensor_method)
|
| 230 |
if use_imatrix:
|
| 231 |
quantize_cmd.append("--imatrix")
|
| 232 |
quantize_cmd.append(imatrix_path)
|
|
|
|
| 332 |
gguf_name: str,
|
| 333 |
quant_embedding: bool,
|
| 334 |
embedding_tensor_method: str,
|
| 335 |
+
leave_output: bool,
|
| 336 |
quant_output: bool,
|
| 337 |
output_tensor_method: str,
|
| 338 |
split_model: bool,
|
|
|
|
| 353 |
with tempfile.TemporaryDirectory(dir=OUTPUT_FOLDER) as outdir:
|
| 354 |
fp16 = download_base_model(token, model_id, outdir)
|
| 355 |
imatrix_path = Path(outdir)/"imatrix.dat"
|
| 356 |
+
quantized_gguf = quantize_model(outdir, gguf_name, fp16, q_method, use_imatrix, imatrix_q_method, imatrix_path, quant_embedding, embedding_tensor_method, leave_output, quant_output, output_tensor_method)
|
| 357 |
|
| 358 |
# Create empty repo
|
| 359 |
api = HfApi(token=token)
|
|
|
|
| 453 |
visible=False
|
| 454 |
)
|
| 455 |
|
|
|
|
|
|
|
|
|
|
| 456 |
#####
|
| 457 |
# Advanced Options section
|
| 458 |
#####
|
|
|
|
| 475 |
visible=False
|
| 476 |
)
|
| 477 |
|
| 478 |
+
leave_output = gr.Checkbox(
|
| 479 |
+
value=False,
|
| 480 |
+
label="Leave output tensor",
|
| 481 |
+
info="Leaves output.weight un(re)quantized"
|
| 482 |
+
)
|
| 483 |
|
| 484 |
quant_embedding = gr.Checkbox(
|
| 485 |
value=False,
|
| 486 |
label="Quant embeddings tensor",
|
| 487 |
+
info="Quantize embeddings tensor separately"
|
| 488 |
)
|
| 489 |
embedding_tensor_method = gr.Dropdown(
|
| 490 |
choices=["Q2_K", "Q3_K", "Q4_K", "Q5_K", "Q6_K", "Q8_0"],
|
|
|
|
| 498 |
quant_output = gr.Checkbox(
|
| 499 |
value=False,
|
| 500 |
label="Quant output tensor",
|
| 501 |
+
info="Quantize output tensor separately"
|
| 502 |
)
|
| 503 |
output_tensor_method = gr.Dropdown(
|
| 504 |
choices=["Q2_K", "Q3_K", "Q4_K", "Q5_K", "Q6_K", "Q8_0"],
|
|
|
|
| 509 |
visible=False
|
| 510 |
)
|
| 511 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 512 |
#####
|
| 513 |
# Output Settings section
|
| 514 |
#####
|
|
|
|
| 531 |
)
|
| 532 |
|
| 533 |
def update_output_repo(model_id, oauth_token: gr.OAuthToken | None):
|
| 534 |
+
if oauth_token is None or not oauth_token.token:
|
| 535 |
return ""
|
| 536 |
|
| 537 |
+
if not model_id:
|
| 538 |
return ""
|
| 539 |
|
| 540 |
username = whoami(oauth_token.token)["name"]
|
|
|
|
| 542 |
return f"{username}/{model_name}-GGUF"
|
| 543 |
|
| 544 |
def update_output_filename(model_id, use_imatrix, q_method, imatrix_q_method):
|
| 545 |
+
if not model_id:
|
| 546 |
return ""
|
| 547 |
|
| 548 |
model_name = get_model_name(model_id)
|
|
|
|
| 565 |
imatrix_q_method,
|
| 566 |
private_repo,
|
| 567 |
train_data_file,
|
| 568 |
+
leave_output,
|
| 569 |
quant_embedding,
|
| 570 |
embedding_tensor_method,
|
| 571 |
quant_output,
|
|
|
|
| 619 |
|
| 620 |
quant_embedding.render()
|
| 621 |
embedding_tensor_method.render()
|
| 622 |
+
leave_output.render()
|
| 623 |
quant_output.render()
|
| 624 |
output_tensor_method.render()
|
| 625 |
|
|
|
|
| 660 |
gguf_name,
|
| 661 |
quant_embedding,
|
| 662 |
embedding_tensor_method,
|
| 663 |
+
leave_output,
|
| 664 |
quant_output,
|
| 665 |
output_tensor_method,
|
| 666 |
split_model,
|
|
|
|
| 677 |
# OnChange handlers
|
| 678 |
#####
|
| 679 |
use_imatrix.change(
|
| 680 |
+
fn=lambda use_imatrix: [gr.update(visible=not use_imatrix), gr.update(visible=use_imatrix), gr.update(visible=use_imatrix)],
|
| 681 |
inputs=use_imatrix,
|
| 682 |
outputs=[q_method, imatrix_q_method, train_data_file]
|
| 683 |
)
|
| 684 |
|
| 685 |
split_model.change(
|
| 686 |
+
fn=lambda split_model: [gr.update(visible=split_model), gr.update(visible=split_model)],
|
| 687 |
inputs=split_model,
|
| 688 |
outputs=[split_max_tensors, split_max_size]
|
| 689 |
)
|
| 690 |
|
| 691 |
quant_embedding.change(
|
| 692 |
+
fn=lambda quant_embedding: gr.update(visible=quant_embedding),
|
| 693 |
inputs=quant_embedding,
|
| 694 |
outputs=[embedding_tensor_method]
|
| 695 |
)
|
| 696 |
|
| 697 |
quant_output.change(
|
| 698 |
+
fn=lambda quant_output: [gr.update(visible=quant_output), gr.update(visible=not quant_output)],
|
| 699 |
inputs=quant_output,
|
| 700 |
+
outputs=[output_tensor_method, leave_output]
|
| 701 |
)
|
| 702 |
|
| 703 |
model_id.change(
|