yujiepan commited on
Commit
3d3a17a
·
verified ·
1 Parent(s): d54e14d

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ pipeline_tag: text-generation
4
+ inference: true
5
+ widget:
6
+ - text: Hello!
7
+ example_title: Hello world
8
+ group: Python
9
+ base_model:
10
+ - moonshotai/Kimi-Linear-48B-A3B-Instruct
11
+ ---
12
+
13
+ This tiny model is intended for debugging. It is randomly initialized using the configuration adapted from [moonshotai/Kimi-Linear-48B-A3B-Instruct](https://huggingface.co/moonshotai/Kimi-Linear-48B-A3B-Instruct).
14
+
15
+ ### Example usage:
16
+
17
+ - vLLM
18
+
19
+ ```bash
20
+ vllm serve yujiepan/kimi-linear-tiny-random --trust-remote-code
21
+ ```
22
+
23
+ - Transformers
24
+
25
+ ```python
26
+ # tested on transformers==4.57.1
27
+ import torch
28
+ import transformers
29
+
30
+ model_id = "yujiepan/kimi-linear-tiny-random"
31
+ model = AutoModelForCausalLM.from_pretrained(
32
+ model_id,
33
+ dtype=torch.bfloat16,
34
+ device_map="cuda",
35
+ trust_remote_code=True
36
+ )
37
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
38
+
39
+ messages = [
40
+ {"role": "system", "content": "You are a helpful assistant provided by Moonshot-AI."},
41
+ {"role": "user", "content": "Is 123 a prime?"}
42
+ ]
43
+ input_ids = tokenizer.apply_chat_template(
44
+ messages,
45
+ add_generation_prompt=True,
46
+ return_tensors="pt",
47
+ tokenize=True,
48
+ ).to(model.device)
49
+ print(input_ids)
50
+ generated_ids = model.generate(inputs=input_ids, max_new_tokens=500)
51
+ response = tokenizer.batch_decode(generated_ids)[0]
52
+ print(response)
53
+ ```
54
+
55
+ ### Codes to create this repo:
56
+
57
+ ```python
58
+ import json
59
+ from pathlib import Path
60
+
61
+ import accelerate
62
+ import torch
63
+ from huggingface_hub import file_exists, hf_hub_download
64
+ from transformers import (
65
+ AutoConfig,
66
+ AutoModelForCausalLM,
67
+ AutoProcessor,
68
+ AutoTokenizer,
69
+ GenerationConfig,
70
+ set_seed,
71
+ )
72
+
73
+ source_model_id = "moonshotai/Kimi-Linear-48B-A3B-Instruct"
74
+ save_folder = "/tmp/yujiepan/kimi-linear-tiny-random"
75
+
76
+ Path(save_folder).mkdir(parents=True, exist_ok=True)
77
+ tokenizer = AutoTokenizer.from_pretrained(
78
+ source_model_id, trust_remote_code=True)
79
+ tokenizer.save_pretrained(save_folder)
80
+ with open(hf_hub_download(source_model_id, filename='tokenizer_config.json', repo_type='model'), 'r', encoding='utf-8') as f:
81
+ tokenizer_config_json = json.load(f)
82
+ tokenizer_config_json['auto_map']['AutoTokenizer'][0] = f'{source_model_id}--' + \
83
+ tokenizer_config_json["auto_map"]["AutoTokenizer"][0]
84
+ with open(f"{save_folder}/tokenizer_config.json", "w", encoding='utf-8') as f:
85
+ json.dump(tokenizer_config_json, f, indent=2)
86
+ # hf_hub_download(source_model_id, filename='tiktoken.model', repo_type='model',
87
+ # local_dir=save_folder, local_dir_use_symlinks=True, cache_dir='/tmp/')
88
+
89
+ with open(hf_hub_download(source_model_id, filename='config.json', repo_type='model'), 'r', encoding='utf-8') as f:
90
+ config_json = json.load(f)
91
+ for k, v in config_json['auto_map'].items():
92
+ config_json['auto_map'][k] = f'{source_model_id}--{v}'
93
+ config_json.update({
94
+ "head_dim": 32,
95
+ "hidden_size": 8,
96
+ "intermediate_size": 32,
97
+ "linear_attn_config": {
98
+ "full_attn_layers": [4],
99
+ "head_dim": 32,
100
+ "kda_layers": [1, 2, 3],
101
+ "num_heads": 8,
102
+ "short_conv_kernel_size": 4,
103
+ },
104
+ "num_attention_heads": 8,
105
+ "num_key_value_heads": 8,
106
+ "moe_intermediate_size": 32,
107
+ "num_hidden_layers": 5,
108
+ })
109
+ with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
110
+ json.dump(config_json, f, indent=2)
111
+
112
+ config = AutoConfig.from_pretrained(
113
+ save_folder,
114
+ trust_remote_code=True,
115
+ )
116
+ print(config)
117
+ torch.set_default_dtype(torch.bfloat16)
118
+ model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
119
+ torch.set_default_dtype(torch.float32)
120
+ if file_exists(filename="generation_config.json", repo_id=source_model_id, repo_type='model'):
121
+ model.generation_config = GenerationConfig.from_pretrained(
122
+ source_model_id, trust_remote_code=True,
123
+ )
124
+ set_seed(42)
125
+ model = model.cpu()
126
+ n_parms = sum(p.numel() for p in model.parameters())
127
+ with torch.no_grad():
128
+ for name, p in sorted(model.named_parameters()):
129
+ torch.nn.init.normal_(p, 0, 0.1)
130
+ print(name, p.shape, (p.numel() / n_parms * 100), '%')
131
+ model.save_pretrained(save_folder)
132
+
133
+ with open(f"{save_folder}/config.json", "r", encoding='utf-8') as f:
134
+ config_json = json.load(f)
135
+ config_json['auto_map'] = {k: f'{source_model_id}--' + v.split(
136
+ '--')[-1] for k, v in config_json['auto_map'].items()}
137
+ with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
138
+ json.dump(config_json, f, indent=2)
139
+ for python_file in Path(save_folder).glob('*.py'):
140
+ python_file.unlink()
141
+ ```
142
+
143
+ ### Printing the model:
144
+
145
+ ```text
146
+ KimiLinearForCausalLM(
147
+ (model): KimiLinearModel(
148
+ (embed_tokens): Embedding(163840, 8, padding_idx=163839)
149
+ (layers): ModuleList(
150
+ (0): KimiDecoderLayer(
151
+ (self_attn): KimiDeltaAttention(
152
+ (q_proj): Linear(in_features=8, out_features=256, bias=False)
153
+ (k_proj): Linear(in_features=8, out_features=256, bias=False)
154
+ (v_proj): Linear(in_features=8, out_features=256, bias=False)
155
+ (q_conv1d): ShortConvolution(256, 256, kernel_size=(4,), stride=(1,), padding=(3,), groups=256, bias=False, activation=silu, backend=triton)
156
+ (k_conv1d): ShortConvolution(256, 256, kernel_size=(4,), stride=(1,), padding=(3,), groups=256, bias=False, activation=silu, backend=triton)
157
+ (v_conv1d): ShortConvolution(256, 256, kernel_size=(4,), stride=(1,), padding=(3,), groups=256, bias=False, activation=silu, backend=triton)
158
+ (f_a_proj): Linear(in_features=8, out_features=32, bias=False)
159
+ (f_b_proj): Linear(in_features=32, out_features=256, bias=False)
160
+ (b_proj): Linear(in_features=8, out_features=8, bias=False)
161
+ (g_a_proj): Linear(in_features=8, out_features=32, bias=False)
162
+ (g_b_proj): Linear(in_features=32, out_features=256, bias=False)
163
+ (o_norm): FusedRMSNormGated(32, eps=1e-05, activation=sigmoid)
164
+ (o_proj): Linear(in_features=256, out_features=8, bias=False)
165
+ )
166
+ (mlp): KimiMLP(
167
+ (gate_proj): Linear(in_features=8, out_features=32, bias=False)
168
+ (up_proj): Linear(in_features=8, out_features=32, bias=False)
169
+ (down_proj): Linear(in_features=32, out_features=8, bias=False)
170
+ (act_fn): SiLUActivation()
171
+ )
172
+ (input_layernorm): KimiRMSNorm()
173
+ (post_attention_layernorm): KimiRMSNorm()
174
+ )
175
+ (1-2): 2 x KimiDecoderLayer(
176
+ (self_attn): KimiDeltaAttention(
177
+ (q_proj): Linear(in_features=8, out_features=256, bias=False)
178
+ (k_proj): Linear(in_features=8, out_features=256, bias=False)
179
+ (v_proj): Linear(in_features=8, out_features=256, bias=False)
180
+ (q_conv1d): ShortConvolution(256, 256, kernel_size=(4,), stride=(1,), padding=(3,), groups=256, bias=False, activation=silu, backend=triton)
181
+ (k_conv1d): ShortConvolution(256, 256, kernel_size=(4,), stride=(1,), padding=(3,), groups=256, bias=False, activation=silu, backend=triton)
182
+ (v_conv1d): ShortConvolution(256, 256, kernel_size=(4,), stride=(1,), padding=(3,), groups=256, bias=False, activation=silu, backend=triton)
183
+ (f_a_proj): Linear(in_features=8, out_features=32, bias=False)
184
+ (f_b_proj): Linear(in_features=32, out_features=256, bias=False)
185
+ (b_proj): Linear(in_features=8, out_features=8, bias=False)
186
+ (g_a_proj): Linear(in_features=8, out_features=32, bias=False)
187
+ (g_b_proj): Linear(in_features=32, out_features=256, bias=False)
188
+ (o_norm): FusedRMSNormGated(32, eps=1e-05, activation=sigmoid)
189
+ (o_proj): Linear(in_features=256, out_features=8, bias=False)
190
+ )
191
+ (block_sparse_moe): KimiSparseMoeBlock(
192
+ (experts): ModuleList(
193
+ (0-255): 256 x KimiBlockSparseMLP(
194
+ (w1): Linear(in_features=8, out_features=32, bias=False)
195
+ (w2): Linear(in_features=32, out_features=8, bias=False)
196
+ (w3): Linear(in_features=8, out_features=32, bias=False)
197
+ (act_fn): SiLUActivation()
198
+ )
199
+ )
200
+ (gate): KimiMoEGate()
201
+ (shared_experts): KimiMLP(
202
+ (gate_proj): Linear(in_features=8, out_features=32, bias=False)
203
+ (up_proj): Linear(in_features=8, out_features=32, bias=False)
204
+ (down_proj): Linear(in_features=32, out_features=8, bias=False)
205
+ (act_fn): SiLUActivation()
206
+ )
207
+ )
208
+ (input_layernorm): KimiRMSNorm()
209
+ (post_attention_layernorm): KimiRMSNorm()
210
+ )
211
+ (3-4): 2 x KimiDecoderLayer(
212
+ (self_attn): KimiMLAAttention(
213
+ (q_proj): Linear(in_features=8, out_features=1536, bias=False)
214
+ (kv_a_proj_with_mqa): Linear(in_features=8, out_features=576, bias=False)
215
+ (kv_a_layernorm): KimiRMSNorm()
216
+ (kv_b_proj): Linear(in_features=512, out_features=2048, bias=False)
217
+ (o_proj): Linear(in_features=1024, out_features=8, bias=False)
218
+ )
219
+ (block_sparse_moe): KimiSparseMoeBlock(
220
+ (experts): ModuleList(
221
+ (0-255): 256 x KimiBlockSparseMLP(
222
+ (w1): Linear(in_features=8, out_features=32, bias=False)
223
+ (w2): Linear(in_features=32, out_features=8, bias=False)
224
+ (w3): Linear(in_features=8, out_features=32, bias=False)
225
+ (act_fn): SiLUActivation()
226
+ )
227
+ )
228
+ (gate): KimiMoEGate()
229
+ (shared_experts): KimiMLP(
230
+ (gate_proj): Linear(in_features=8, out_features=32, bias=False)
231
+ (up_proj): Linear(in_features=8, out_features=32, bias=False)
232
+ (down_proj): Linear(in_features=32, out_features=8, bias=False)
233
+ (act_fn): SiLUActivation()
234
+ )
235
+ )
236
+ (input_layernorm): KimiRMSNorm()
237
+ (post_attention_layernorm): KimiRMSNorm()
238
+ )
239
+ )
240
+ (norm): KimiRMSNorm()
241
+ )
242
+ (lm_head): Linear(in_features=8, out_features=163840, bias=False)
243
+ )
244
+ ```
chat_template.jinja ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% macro render_content(msg) -%}
2
+ {%- set c = msg.get('content') -%}
3
+ {%- if c is string -%}
4
+ {{ c }}
5
+ {%- elif c is not none -%}
6
+ {% for content in c -%}
7
+ {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
8
+ <|media_start|>image<|media_content|><|media_pad|><|media_end|>
9
+ {% else -%}
10
+ {{ content['text'] }}
11
+ {%- endif -%}
12
+ {%- endfor -%}
13
+ {%- endif -%}
14
+ {%- endmacro %}
15
+
16
+
17
+ {%- if tools -%}
18
+ <|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|>
19
+ {%- endif -%}
20
+ {% for message in messages %}
21
+ {%- set role_name = message.get('name') or message['role'] -%}
22
+ {%- if message['role'] == 'user' -%}
23
+ <|im_user|>{{role_name}}<|im_middle|>
24
+ {%- elif message['role'] == 'assistant' -%}
25
+ <|im_assistant|>{{role_name}}<|im_middle|>
26
+ {%- else -%}
27
+ <|im_system|>{{role_name}}<|im_middle|>
28
+ {%- endif -%}
29
+
30
+ {%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
31
+ {{render_content(message)}}<|tool_calls_section_begin|>
32
+ {%- for tool_call in message['tool_calls'] -%}
33
+ {%- set formatted_id = tool_call['id'] -%}
34
+ <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
35
+ {%- endfor -%}
36
+ <|tool_calls_section_end|>
37
+ {%- elif message['role'] == 'tool' -%}
38
+ {%- set tool_call_id = message.tool_call_id -%}
39
+ ## Return of {{ tool_call_id }}
40
+ {{render_content(message)}}
41
+ {%- elif message['content'] is not none -%}
42
+ {{render_content(message)}}
43
+ {%- endif -%}
44
+ <|im_end|>
45
+ {%- endfor -%}
46
+ {%- if add_generation_prompt -%}
47
+ <|im_assistant|>assistant<|im_middle|>
48
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "KimiLinearForCausalLM"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "moonshotai/Kimi-Linear-48B-A3B-Instruct--configuration_kimi.KimiLinearConfig",
7
+ "AutoModel": "moonshotai/Kimi-Linear-48B-A3B-Instruct--modeling_kimi.KimiLinearModel",
8
+ "AutoModelForCausalLM": "moonshotai/Kimi-Linear-48B-A3B-Instruct--modeling_kimi.KimiLinearForCausalLM"
9
+ },
10
+ "bos_token_id": 163584,
11
+ "dtype": "bfloat16",
12
+ "eos_token_id": 163586,
13
+ "first_k_dense_replace": 1,
14
+ "head_dim": 32,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 8,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 32,
19
+ "kv_lora_rank": 512,
20
+ "linear_attn_config": {
21
+ "full_attn_layers": [
22
+ 4
23
+ ],
24
+ "head_dim": 32,
25
+ "kda_layers": [
26
+ 1,
27
+ 2,
28
+ 3
29
+ ],
30
+ "num_heads": 8,
31
+ "short_conv_kernel_size": 4
32
+ },
33
+ "mla_use_nope": true,
34
+ "model_max_length": 1048576,
35
+ "model_type": "kimi_linear",
36
+ "moe_intermediate_size": 32,
37
+ "moe_layer_freq": 1,
38
+ "moe_renormalize": true,
39
+ "moe_router_activation_func": "sigmoid",
40
+ "num_attention_heads": 8,
41
+ "num_expert_group": 1,
42
+ "num_experts": 256,
43
+ "num_experts_per_token": 8,
44
+ "num_hidden_layers": 5,
45
+ "num_key_value_heads": 8,
46
+ "num_nextn_predict_layers": 0,
47
+ "num_shared_experts": 1,
48
+ "pad_token_id": 163839,
49
+ "q_lora_rank": null,
50
+ "qk_nope_head_dim": 128,
51
+ "qk_rope_head_dim": 64,
52
+ "rms_norm_eps": 1e-05,
53
+ "rope_scaling": null,
54
+ "rope_theta": 10000.0,
55
+ "routed_scaling_factor": 2.446,
56
+ "tie_word_embeddings": false,
57
+ "topk_group": 1,
58
+ "transformers_version": "4.57.1",
59
+ "use_cache": true,
60
+ "use_grouped_topk": true,
61
+ "v_head_dim": 128,
62
+ "vocab_size": 163840
63
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 163584,
4
+ "eos_token_id": 163586,
5
+ "pad_token_id": 163839,
6
+ "transformers_version": "4.57.1"
7
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3094fee183ef74dbef84a89e274909566dcd5e98fadaa345e74cdc92e271671a
3
+ size 11691928
special_tokens_map.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_end|>",
4
+ "<|im_user|>",
5
+ "<|im_assistant|>",
6
+ "<|start_header_id|>",
7
+ "<|end_header_id|>",
8
+ "[EOT]",
9
+ "<|im_system|>",
10
+ "<|im_middle|>"
11
+ ],
12
+ "bos_token": {
13
+ "content": "[BOS]",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eos_token": {
20
+ "content": "[EOS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "pad_token": {
27
+ "content": "[PAD]",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ },
33
+ "unk_token": {
34
+ "content": "[UNK]",
35
+ "lstrip": false,
36
+ "normalized": false,
37
+ "rstrip": false,
38
+ "single_word": false
39
+ }
40
+ }
tiktoken.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6c497a7469b33ced9c38afb1ad6e47f03f5e5dc05f15930799210ec050c5103
3
+ size 2795286
tokenizer_config.json ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "163584": {
4
+ "content": "[BOS]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "163585": {
12
+ "content": "[EOS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "163586": {
20
+ "content": "<|im_end|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "163587": {
28
+ "content": "<|im_user|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "163588": {
36
+ "content": "<|im_assistant|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "163590": {
44
+ "content": "<|start_header_id|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "163591": {
52
+ "content": "<|end_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "163593": {
60
+ "content": "[EOT]",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "163594": {
68
+ "content": "<|im_system|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "163595": {
76
+ "content": "<|tool_calls_section_begin|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": false
82
+ },
83
+ "163596": {
84
+ "content": "<|tool_calls_section_end|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": false
90
+ },
91
+ "163597": {
92
+ "content": "<|tool_call_begin|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": false
98
+ },
99
+ "163598": {
100
+ "content": "<|tool_call_argument_begin|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": false
106
+ },
107
+ "163599": {
108
+ "content": "<|tool_call_end|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": false
114
+ },
115
+ "163601": {
116
+ "content": "<|im_middle|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "163838": {
124
+ "content": "[UNK]",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "163839": {
132
+ "content": "[PAD]",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ }
139
+ },
140
+ "additional_special_tokens": [
141
+ "<|im_end|>",
142
+ "<|im_user|>",
143
+ "<|im_assistant|>",
144
+ "<|start_header_id|>",
145
+ "<|end_header_id|>",
146
+ "[EOT]",
147
+ "<|im_system|>",
148
+ "<|im_middle|>"
149
+ ],
150
+ "bos_token": "[BOS]",
151
+ "clean_up_tokenization_spaces": false,
152
+ "eos_token": "[EOS]",
153
+ "extra_special_tokens": {},
154
+ "model_max_length": 1000000000000000019884624838656,
155
+ "pad_token": "[PAD]",
156
+ "tokenizer_class": "TikTokenTokenizer",
157
+ "unk_token": "[UNK]",
158
+ "auto_map": {
159
+ "AutoTokenizer": [
160
+ "moonshotai/Kimi-Linear-48B-A3B-Instruct--tokenization_kimi.TikTokenTokenizer",
161
+ null
162
+ ]
163
+ }
164
+ }