vdmbrsv commited on
Commit
8cffba5
·
verified ·
1 Parent(s): 9b192e7

Upload vocagno_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. vocagno_config.json +27 -0
vocagno_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train_file": "data/processed/dolly_train.jsonl,data/processed/gsm8k_train.jsonl,data/processed/dolphin_train.jsonl,data/processed/openhermes_train.jsonl,data/processed/slimorca_train.jsonl,data/processed/metamath_train.jsonl,data/processed/oasst_train.jsonl",
3
+ "valid_file": "data/processed/dolly_train.jsonl",
4
+ "max_seq_length": 1024,
5
+ "prompt_max_length": 512,
6
+ "teacher_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
7
+ "teacher_tokenizer": null,
8
+ "student_checkpoint": "./checkpoints/minillm_enhanced_4b_nothink/final_model/",
9
+ "student_tokenizer": "Qwen/Qwen3-0.6B",
10
+ "output_dir": "checkpoints/vocagno_deepseek",
11
+ "num_steps": 100000,
12
+ "batch_size": 2,
13
+ "gradient_accumulation_steps": 8,
14
+ "learning_rate": 2e-05,
15
+ "weight_decay": 0.01,
16
+ "warmup_ratio": 0.1,
17
+ "max_grad_norm": 1.0,
18
+ "aggregation": "mean",
19
+ "top_k_ratio": 0.4,
20
+ "kd_weight": 1.0,
21
+ "sft_weight": 0.3,
22
+ "full_sequence_mode": true,
23
+ "bf16": true,
24
+ "logging_steps": 25,
25
+ "save_steps": 500,
26
+ "seed": 42
27
+ }