Oleg Shulyakov commited on
Commit
5a54419
·
1 Parent(s): e306547

Support leave-output-tensor option

Browse files
Files changed (1) hide show
  1. app.py +27 -27
app.py CHANGED
@@ -196,6 +196,7 @@ def quantize_model(
196
  imatrix_path: str,
197
  quant_embedding: bool,
198
  embedding_tensor_method: str,
 
199
  quant_output: bool,
200
  output_tensor_method: str,
201
  ):
@@ -220,10 +221,12 @@ def quantize_model(
220
  if quant_embedding:
221
  quantize_cmd.append("--token-embedding-type")
222
  quantize_cmd.append(embedding_tensor_method)
223
- if quant_output:
224
- quantize_cmd.append("--output-tensor-type")
225
- quantize_cmd.append(output_tensor_method)
226
-
 
 
227
  if use_imatrix:
228
  quantize_cmd.append("--imatrix")
229
  quantize_cmd.append(imatrix_path)
@@ -329,6 +332,7 @@ def process_model(
329
  gguf_name: str,
330
  quant_embedding: bool,
331
  embedding_tensor_method: str,
 
332
  quant_output: bool,
333
  output_tensor_method: str,
334
  split_model: bool,
@@ -349,7 +353,7 @@ def process_model(
349
  with tempfile.TemporaryDirectory(dir=OUTPUT_FOLDER) as outdir:
350
  fp16 = download_base_model(token, model_id, outdir)
351
  imatrix_path = Path(outdir)/"imatrix.dat"
352
- quantized_gguf = quantize_model(outdir, gguf_name, fp16, q_method, use_imatrix, imatrix_q_method, imatrix_path, quant_embedding, embedding_tensor_method, quant_output, output_tensor_method)
353
 
354
  # Create empty repo
355
  api = HfApi(token=token)
@@ -449,9 +453,6 @@ train_data_file = gr.File(
449
  visible=False
450
  )
451
 
452
- def update_imatrix_visibility(use_imatrix):
453
- return gr.update(visible=not use_imatrix), gr.update(visible=use_imatrix), gr.update(visible=use_imatrix)
454
-
455
  #####
456
  # Advanced Options section
457
  #####
@@ -474,13 +475,16 @@ split_max_size = gr.Textbox(
474
  visible=False
475
  )
476
 
477
- def update_split_visibility(split_model):
478
- return gr.update(visible=split_model), gr.update(visible=split_model)
 
 
 
479
 
480
  quant_embedding = gr.Checkbox(
481
  value=False,
482
  label="Quant embeddings tensor",
483
- info=""
484
  )
485
  embedding_tensor_method = gr.Dropdown(
486
  choices=["Q2_K", "Q3_K", "Q4_K", "Q5_K", "Q6_K", "Q8_0"],
@@ -494,7 +498,7 @@ embedding_tensor_method = gr.Dropdown(
494
  quant_output = gr.Checkbox(
495
  value=False,
496
  label="Quant output tensor",
497
- info=""
498
  )
499
  output_tensor_method = gr.Dropdown(
500
  choices=["Q2_K", "Q3_K", "Q4_K", "Q5_K", "Q6_K", "Q8_0"],
@@ -505,13 +509,6 @@ output_tensor_method = gr.Dropdown(
505
  visible=False
506
  )
507
 
508
- def update_embedding_tensor_visibility(quant_embedding):
509
- return gr.update(visible=quant_embedding)
510
-
511
- def update_output_tensor_visibility(quant_output):
512
- return gr.update(visible=quant_output)
513
-
514
-
515
  #####
516
  # Output Settings section
517
  #####
@@ -534,10 +531,10 @@ gguf_name = gr.Textbox(
534
  )
535
 
536
  def update_output_repo(model_id, oauth_token: gr.OAuthToken | None):
537
- if oauth_token is None or oauth_token.token is None:
538
  return ""
539
 
540
- if model_id is None:
541
  return ""
542
 
543
  username = whoami(oauth_token.token)["name"]
@@ -545,7 +542,7 @@ def update_output_repo(model_id, oauth_token: gr.OAuthToken | None):
545
  return f"{username}/{model_name}-GGUF"
546
 
547
  def update_output_filename(model_id, use_imatrix, q_method, imatrix_q_method):
548
- if model_id is None:
549
  return ""
550
 
551
  model_name = get_model_name(model_id)
@@ -568,6 +565,7 @@ clear_btn = gr.ClearButton(
568
  imatrix_q_method,
569
  private_repo,
570
  train_data_file,
 
571
  quant_embedding,
572
  embedding_tensor_method,
573
  quant_output,
@@ -621,6 +619,7 @@ with gr.Blocks(css=css) as demo:
621
 
622
  quant_embedding.render()
623
  embedding_tensor_method.render()
 
624
  quant_output.render()
625
  output_tensor_method.render()
626
 
@@ -661,6 +660,7 @@ with gr.Blocks(css=css) as demo:
661
  gguf_name,
662
  quant_embedding,
663
  embedding_tensor_method,
 
664
  quant_output,
665
  output_tensor_method,
666
  split_model,
@@ -677,27 +677,27 @@ with gr.Blocks(css=css) as demo:
677
  # OnChange handlers
678
  #####
679
  use_imatrix.change(
680
- fn=update_imatrix_visibility,
681
  inputs=use_imatrix,
682
  outputs=[q_method, imatrix_q_method, train_data_file]
683
  )
684
 
685
  split_model.change(
686
- fn=update_split_visibility,
687
  inputs=split_model,
688
  outputs=[split_max_tensors, split_max_size]
689
  )
690
 
691
  quant_embedding.change(
692
- fn=update_embedding_tensor_visibility,
693
  inputs=quant_embedding,
694
  outputs=[embedding_tensor_method]
695
  )
696
 
697
  quant_output.change(
698
- fn=update_output_tensor_visibility,
699
  inputs=quant_output,
700
- outputs=[output_tensor_method]
701
  )
702
 
703
  model_id.change(
 
196
  imatrix_path: str,
197
  quant_embedding: bool,
198
  embedding_tensor_method: str,
199
+ leave_output: bool,
200
  quant_output: bool,
201
  output_tensor_method: str,
202
  ):
 
221
  if quant_embedding:
222
  quantize_cmd.append("--token-embedding-type")
223
  quantize_cmd.append(embedding_tensor_method)
224
+ if leave_output:
225
+ quantize_cmd.append("--leave-output-tensor")
226
+ else:
227
+ if quant_output:
228
+ quantize_cmd.append("--output-tensor-type")
229
+ quantize_cmd.append(output_tensor_method)
230
  if use_imatrix:
231
  quantize_cmd.append("--imatrix")
232
  quantize_cmd.append(imatrix_path)
 
332
  gguf_name: str,
333
  quant_embedding: bool,
334
  embedding_tensor_method: str,
335
+ leave_output: bool,
336
  quant_output: bool,
337
  output_tensor_method: str,
338
  split_model: bool,
 
353
  with tempfile.TemporaryDirectory(dir=OUTPUT_FOLDER) as outdir:
354
  fp16 = download_base_model(token, model_id, outdir)
355
  imatrix_path = Path(outdir)/"imatrix.dat"
356
+ quantized_gguf = quantize_model(outdir, gguf_name, fp16, q_method, use_imatrix, imatrix_q_method, imatrix_path, quant_embedding, embedding_tensor_method, leave_output, quant_output, output_tensor_method)
357
 
358
  # Create empty repo
359
  api = HfApi(token=token)
 
453
  visible=False
454
  )
455
 
 
 
 
456
  #####
457
  # Advanced Options section
458
  #####
 
475
  visible=False
476
  )
477
 
478
+ leave_output = gr.Checkbox(
479
+ value=False,
480
+ label="Leave output tensor",
481
+ info="Leaves output.weight un(re)quantized"
482
+ )
483
 
484
  quant_embedding = gr.Checkbox(
485
  value=False,
486
  label="Quant embeddings tensor",
487
+ info="Quantize embeddings tensor separately"
488
  )
489
  embedding_tensor_method = gr.Dropdown(
490
  choices=["Q2_K", "Q3_K", "Q4_K", "Q5_K", "Q6_K", "Q8_0"],
 
498
  quant_output = gr.Checkbox(
499
  value=False,
500
  label="Quant output tensor",
501
+ info="Quantize output tensor separately"
502
  )
503
  output_tensor_method = gr.Dropdown(
504
  choices=["Q2_K", "Q3_K", "Q4_K", "Q5_K", "Q6_K", "Q8_0"],
 
509
  visible=False
510
  )
511
 
 
 
 
 
 
 
 
512
  #####
513
  # Output Settings section
514
  #####
 
531
  )
532
 
533
  def update_output_repo(model_id, oauth_token: gr.OAuthToken | None):
534
+ if oauth_token is None or not oauth_token.token:
535
  return ""
536
 
537
+ if not model_id:
538
  return ""
539
 
540
  username = whoami(oauth_token.token)["name"]
 
542
  return f"{username}/{model_name}-GGUF"
543
 
544
  def update_output_filename(model_id, use_imatrix, q_method, imatrix_q_method):
545
+ if not model_id:
546
  return ""
547
 
548
  model_name = get_model_name(model_id)
 
565
  imatrix_q_method,
566
  private_repo,
567
  train_data_file,
568
+ leave_output,
569
  quant_embedding,
570
  embedding_tensor_method,
571
  quant_output,
 
619
 
620
  quant_embedding.render()
621
  embedding_tensor_method.render()
622
+ leave_output.render()
623
  quant_output.render()
624
  output_tensor_method.render()
625
 
 
660
  gguf_name,
661
  quant_embedding,
662
  embedding_tensor_method,
663
+ leave_output,
664
  quant_output,
665
  output_tensor_method,
666
  split_model,
 
677
  # OnChange handlers
678
  #####
679
  use_imatrix.change(
680
+ fn=lambda use_imatrix: [gr.update(visible=not use_imatrix), gr.update(visible=use_imatrix), gr.update(visible=use_imatrix)],
681
  inputs=use_imatrix,
682
  outputs=[q_method, imatrix_q_method, train_data_file]
683
  )
684
 
685
  split_model.change(
686
+ fn=lambda split_model: [gr.update(visible=split_model), gr.update(visible=split_model)],
687
  inputs=split_model,
688
  outputs=[split_max_tensors, split_max_size]
689
  )
690
 
691
  quant_embedding.change(
692
+ fn=lambda quant_embedding: gr.update(visible=quant_embedding),
693
  inputs=quant_embedding,
694
  outputs=[embedding_tensor_method]
695
  )
696
 
697
  quant_output.change(
698
+ fn=lambda quant_output: [gr.update(visible=quant_output), gr.update(visible=not quant_output)],
699
  inputs=quant_output,
700
+ outputs=[output_tensor_method, leave_output]
701
  )
702
 
703
  model_id.change(