diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..0d4cb185280917cac60ef7195f2a6250b2b90d83
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,37 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+tekken.json filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/CHAT_SYSTEM_PROMPT.txt b/CHAT_SYSTEM_PROMPT.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53e8d0cd5ec5e336db27b796365fbd44d961fa5a
--- /dev/null
+++ b/CHAT_SYSTEM_PROMPT.txt
@@ -0,0 +1,29 @@
+You are Devstral-Medium-2-124B-Instruct-2512, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.
+You power an AI assistant called Le Chat.
+Your knowledge base was last updated on 2023-10-01.
+The current date is {today}.
+
+When you're not sure about some information or when the user's request requires up-to-date or specific data, you must use the available tools to fetch the information. Do not hesitate to use tools whenever they can provide a more accurate or complete response. If no relevant tools are available, then clearly state that you don't have the information and avoid making up anything.
+If the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. "What are some good restaurants around me?" => "Where are you?" or "When is the next flight to Tokyo" => "Where do you travel from?").
+You are always very attentive to dates, in particular you try to resolve dates (e.g. "yesterday" is {yesterday}) and when asked about information at specific dates, you discard information that is at another date.
+You follow these instructions in all languages, and always respond to the user in the language they use or request.
+Next sections describe the capabilities that you have.
+
+# WEB BROWSING INSTRUCTIONS
+
+You cannot perform any web search or access internet to open URLs, links etc. If it seems like the user is expecting you to do so, you clarify the situation and ask the user to copy paste the text directly in the chat.
+
+# MULTI-MODAL INSTRUCTIONS
+
+You have the ability to read images, but you cannot generate images. You also cannot transcribe audio files or videos.
+You cannot read nor transcribe audio files or videos.
+
+# TOOL CALLING INSTRUCTIONS
+
+You may have access to tools that you can use to fetch information or perform actions. You must use these tools in the following situations:
+
+1. When the request requires up-to-date information.
+2. When the request requires specific data that you do not have in your knowledge base.
+3. When the request involves actions that you cannot perform without tools.
+
+Always prioritize using tools to provide the most accurate and helpful response. If tools are not available, inform the user that you cannot perform the requested action at the moment.
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..df8de3ae0430ad2a4a8b23606243f349542a918c
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,10 @@
+Modified MIT License
+
+Attribution notice: 2025 - Mistral AI
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of the weights of this model and associated documentation files (the “Model”), to deal in the Model without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Model, and to permit persons to whom the Model is furnished to do so, subject to the following conditions:
+
+1. The above attribution notice and this permission notice shall be included in all copies or substantial portions of the Model.
+2. You are not authorized to exercise any rights under this license if the global consolidated monthly revenue of your company (or that of your employer) exceeds $20 million (or its equivalent in another currency) for the preceding month. This restriction in (b) applies to the Model and any derivatives, modifications, or combined works based on it, whether provided by Mistral AI or by a third party. You may contact Mistral AI (sales@mistral.ai) to request a commercial license, which Mistral AI may grant you at its sole discretion, or choose to use the Model on Mistral AI's hosted services available at https://mistral.ai/.
+
+THE MODEL IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL MISTRAL AI BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MODEL OR THE USE OR OTHER DEALINGS IN THE MODEL.
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2ea59d2de3624f737e4b1e0c08ea6f6eef1aae9e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,1586 @@
+---
+library_name: vllm
+inference: false
+extra_gated_description: >-
+ If you want to learn more about how we process your personal data, please read
+ our Privacy Policy.
+tags:
+- mistral-common
+license: other
+---
+
+# Devstral 2 123B Instruct 2512
+Devstral is an agentic LLM for software engineering tasks. **Devstral 2** excels at using tools to explore codebases, editing multiple files and power software engineering agents.
+The model achieves remarkable performance on SWE-bench.
+
+This model is an Instruct model in **FP8**, fine-tuned to follow instructions, making it ideal for chat, agentic and instruction based tasks for SWE use cases.
+
+For enterprises requiring specialized capabilities (increased context, domain-specific knowledge, etc.), we invite companies to [reach out to us](https://mistral.ai/contact).
+
+## Key Features
+The Devstral 2 Instruct model offers the following capabilities:
+- **Agentic Coding**: Devstral is designed to excel at agentic coding tasks, making it a great choice for software engineering agents.
+- **Improved Performance**: Devstral 2 is a step-up compared to its predecessors.
+- **Better Generalization**: Generalises better to diverse prompts and coding environments.
+- **Context Window**: A 256k context window.
+
+### Use Cases
+
+AI Code Assistants, Agentic Coding, and Software Engineering Tasks. Leveraging advanced AI capabilities for complex tool integration and deep codebase understanding in coding environments.
+
+## Benchmark Results
+
+| Model/Benchmark | Size (B Tokens) | SWE Bench Verified | SWE Bench Multilingual | Terminal Bench 2 |
+|-------------------------------|-----------------|--------------------|------------------------|------------------|
+| **Devstral 2** | 123 | 72.2% | 61.3% | 32.6% |
+| **Devstral Small 2** | 24 | 68.0% | 55.7% | 22.5% |
+| | | | | |
+| GLM 4.6 | 455 | 68.0% | -- | 24.6% |
+| Qwen 3 Coder Plus | 480 | 69.6% | 54.7% | 25.4% |
+| MiniMax M2 | 230 | 69.4% | 56.5% | 30.0% |
+| Kimi K2 Thinking | 1000 | 71.3% | 61.1% | 35.7% |
+| DeepSeek v3.2 | 671 | 73.1% | 70.2% | 46.4% |
+| | | | | |
+| GPT 5.1 Codex High | -- | 73.7% | -- | 52.8% |
+| GPT 5.1 Codex Max | -- | 77.9% | -- | 60.4% |
+| Gemini 3 Pro | -- | 76.2% | -- | 54.2% |
+| Claude Sonnet 4.5 | -- | 77.2% | 68.0% | 42.8% |
+
+*Benchmark results presented are based on publicly reported values for competitor models.
+
+## Usage
+
+### Scaffolding
+
+Together with Devstral 2, we are releasing **Mistral Vibe**, a CLI tool allowing developers to leverage Devstral capabilities directly in your terminal.
+- [Mistral Vibe (recommended)](https://github.com/mistralai/mistral-vibe): Learn how to use it [here](#mistral-vibe)
+
+Devstral 2 can also be used with the following scaffoldings:
+- [Cline](https://github.com/cline/cline)
+- [Kilo Code](https://github.com/Kilo-Org/kilocode)
+- [Claude Code](https://github.com/anthropics/claude-code)
+- [OpenHands](https://github.com/All-Hands-AI/OpenHands/tree/main)
+- [SWE Agent](https://github.com/SWE-agent/SWE-agent)
+
+You can use Devstral 2 either through our API or by running locally.
+
+#### Mistral Vibe
+
+The [Mistral Vibe CLI](https://github.com/mistralai/mistral-vibe) is a command-line tool designed to help developers leverage Devstral’s capabilities directly from their terminal.
+
+We recommend installing Mistral Vibe using `uv` for faster and more reliable dependency management:
+```
+uv tool install mistral-vibe
+```
+You can also run:
+```
+curl -LsSf https://mistral.ai/vibe/install.sh | sh
+```
+
+If you prefer using pip, use:
+```
+pip install mistral-vibe
+```
+
+To launch the CLI, navigate to your project's root directory and simply execute:
+```
+vibe
+```
+
+If this is your first time running Vibe, it will:
+- Create a default configuration file at `~/.vibe/config.toml`.
+- Prompt you to enter your API key if it's not already configured, follow these [instructions](https://docs.mistral.ai/getting-started/quickstart/#account-setup) to create an Account and get an API key.
+- Save your API key to `~/.vibe/.env` for future use.
+
+### Local Deployment
+
+The model can also be deployed with the following libraries, we advise everyone to use the Mistral AI API if the model is subpar with local serving:
+- [`vllm (recommended)`](https://github.com/vllm-project/vllm): See [here](#vllm-recommended)
+- [`transformers`](https://github.com/huggingface/transformers): See [here](#transformers)
+
+Coming soon:
+- [`llama.cpp`](https://github.com/ggml-org/llama.cpp)
+- [`ollama`](https://ollama.com/)
+- [`lmstudio`](https://lmstudio.ai/)
+
+> [!Note]
+> Current llama.cpp/ollama/lmstudio implementations may not be accurate, we invite developers to test them via the following [prompt tests](#tests).
+
+#### vLLM (recommended)
+
+
+Expand
[!Warning]
+> Make sure that your vllm installation includes [this commit](https://github.com/vllm-project/vllm/commit/5c213d2899f5a2d439c8d771a0abc156a5412a2b).
+> If you do not have this commit included, you will get incorrectly parsed tool calls.
+
+Also make sure to have installed [`mistral_common >= 1.8.6`](https://github.com/mistralai/mistral-common/releases/tag/v1.8.6).
+To check:
+```
+python -c "import mistral_common; print(mistral_common.__version__)"
+```
+
+**_Launch server_**
+
+We recommand that you use Devstral in a server/client setting.
+
+1. Spin up a server:
+
+```
+vllm serve mistralai/Devstral-2-123B-Instruct-2512 --tool-call-parser mistral --enable-auto-tool-choice --tensor-parallel-size 8
+```
+
+
+2. To ping the client you can use a simple Python snippet.
+
+```py
+import requests
+import json
+from huggingface_hub import hf_hub_download
+
+
+url = "http://:8000/v1/chat/completions"
+headers = {"Content-Type": "application/json", "Authorization": "Bearer token"}
+
+model = "mistralai/Devstral-2-123B-Instruct-2512"
+
+def load_system_prompt(repo_id: str, filename: str) -> str:
+ file_path = hf_hub_download(repo_id=repo_id, filename=filename)
+ with open(file_path, "r") as file:
+ system_prompt = file.read()
+ return system_prompt
+
+SYSTEM_PROMPT = load_system_prompt(model, "CHAT_SYSTEM_PROMPT.txt")
+
+messages = [
+ {"role": "system", "content": SYSTEM_PROMPT},
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "",
+ },
+ ],
+ },
+]
+
+data = {"model": model, "messages": messages, "temperature": 0.15}
+
+# Devstral 2 supports tool calling. If you want to use tools, follow this:
+# tools = [ # Define tools for vLLM
+# {
+# "type": "function",
+# "function": {
+# "name": "git_clone",
+# "description": "Clone a git repository",
+# "parameters": {
+# "type": "object",
+# "properties": {
+# "url": {
+# "type": "string",
+# "description": "The url of the git repository",
+# },
+# },
+# "required": ["url"],
+# },
+# },
+# }
+# ]
+# data = {"model": model, "messages": messages, "temperature": 0.15, "tools": tools} # Pass tools to payload.
+
+response = requests.post(url, headers=headers, data=json.dumps(data))
+print(response.json()["choices"][0]["message"]["content"])
+```
+
+
+#### Transformers
+
+
+Expand
+
+Generated by Mistral Vibe.
+Co-Authored-By: Mistral Vibe "
+```"""
+
+input = {
+ "messages": [
+ {
+ "role": "system",
+ "content": SP,
+ },
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "Can you implement in Python a method to compute the fibonnaci sequence at the `n`th element with `n` a parameter passed to the function ? You should start the sequence from 1, previous values are invalid.\nThen run the Python code for the function for n=5 and give the answer.",
+ }
+ ],
+ },
+ ],
+ "tools": [
+ {
+ "type": "function",
+ "function": {
+ "name": "add_number",
+ "description": "Add two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {"type": "string", "description": "The first number."},
+ "b": {"type": "string", "description": "The second number."},
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "multiply_number",
+ "description": "Multiply two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {"type": "string", "description": "The first number."},
+ "b": {"type": "string", "description": "The second number."},
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "substract_number",
+ "description": "Substract two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {"type": "string", "description": "The first number."},
+ "b": {"type": "string", "description": "The second number."},
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "write_a_story",
+ "description": "Write a story about science fiction and people with badass laser sabers.",
+ "parameters": {},
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "terminal",
+ "description": "Perform operations from the terminal.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "command": {
+ "type": "string",
+ "description": "The command you wish to launch, e.g `ls`, `rm`, ...",
+ },
+ "args": {
+ "type": "string",
+ "description": "The arguments to pass to the command.",
+ },
+ },
+ "required": ["command"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "python",
+ "description": "Call a Python interpreter with some Python code that will be ran.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "string",
+ "description": "The Python code to run",
+ },
+ "result_variable": {
+ "type": "string",
+ "description": "Variable containing the result you'd like to retrieve from the execution.",
+ },
+ },
+ "required": ["code", "result_variable"],
+ },
+ },
+ },
+ ],
+}
+
+tokenized = tokenizer.apply_chat_template(
+ conversation=input["messages"],
+ tools=input["tools"],
+ return_tensors="pt",
+ return_dict=True,
+)
+
+input_ids = tokenized["input_ids"].to(device="cuda")
+
+output = model.generate(
+ input_ids,
+ max_new_tokens=200,
+)[0]
+
+decoded_output = tokenizer.decode(output[len(tokenized["input_ids"][0]) :])
+print(decoded_output)
+```
+
+
+
+## Tests
+
+To help test our model via vLLM or test that other frameworks' implementations are correct, here is a set of prompts you can try with the expected outputs.
+
+1. Call one tool
+
+
+ Messages and tools
+
+```python
+messages = [
+ {"role": "system", "content": SYSTEM_PROMPT},
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "Could you write me a story ?",
+ },
+ ],
+ },
+]
+tools = [
+ {
+ "type": "function",
+ "function": {
+ "name": "add_number",
+ "description": "Add two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {
+ "type": "string",
+ "description": "The first number.",
+ },
+ "b": {
+ "type": "string",
+ "description": "The second number.",
+ },
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "multiply_number",
+ "description": "Multiply two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {
+ "type": "string",
+ "description": "The first number.",
+ },
+ "b": {
+ "type": "string",
+ "description": "The second number.",
+ },
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "substract_number",
+ "description": "Substract two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {
+ "type": "string",
+ "description": "The first number.",
+ },
+ "b": {
+ "type": "string",
+ "description": "The second number.",
+ },
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "write_a_story",
+ "description": "Write a story about science fiction and people with badass laser sabers.",
+ "parameters": {},
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "terminal",
+ "description": "Perform operations from the terminal.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "command": {
+ "type": "string",
+ "description": "The command you wish to launch, e.g `ls`, `rm`, ...",
+ },
+ "args": {
+ "type": "string",
+ "description": "The arguments to pass to the command.",
+ },
+ },
+ "required": ["command"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "python",
+ "description": "Call a Python interpreter with some Python code that will be ran.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "string",
+ "description": "The Python code to run",
+ },
+ "result_variable": {
+ "type": "string",
+ "description": "Variable containing the result you'd like to retrieve from the execution.",
+ },
+ },
+ "required": ["code", "result_variable"],
+ },
+ },
+ },
+]
+```
+
+
+
+
+ Expected output trace
+
+```json
+[
+ {
+ "role": "assistant",
+ "tool_calls": [
+ {
+ "id": "chatcmpl-tool-8a1f267a212e66e5",
+ "function": {
+ "arguments": "{}",
+ "name": "write_a_story"
+ },
+ "type": "function"
+ }
+ ],
+ "content": null
+ },
+ {
+ "role": "tool",
+ "tool_call_id": "chatcmpl-tool-8a1f267a212e66e5",
+ "name": "write_a_story",
+ "content": "A long time ago in a galaxy far, far away..."
+ },
+ {
+ "role": "assistant",
+ "tool_calls": [],
+ "content": "A long time ago in a galaxy far, far away, there was a planet called Eldoria, known for its breathtaking landscapes and advanced civilization. The people of Eldoria were not just ordinary beings; they were warriors, scientists, and dreamers, all wielding the power of laser sabers.\n\nAmong them was a young warrior named Kael. Kael was not like the others. While his peers trained rigorously to master the art of the laser saber, Kael was more interested in the science behind it. He spent his days in the ancient libraries of Eldoria, deciphering the secrets of the laser sabers and the energy that powered them.\n\nOne day, Kael discovered an old, forgotten text that spoke of a legendary laser saber, the \"Eldoria Star,\" said to be the most powerful weapon in the galaxy. According to the text, the Eldoria Star was hidden in the heart of the planet's core, guarded by an ancient force.\n\nDriven by curiosity and a desire to protect his planet, Kael set out on a perilous journey to find the Eldoria Star. Along the way, he encountered various challenges and made unlikely allies, each with their own unique skills and laser sabers.\n\nThere was Lyra, a cunning thief with a laser saber that could change colors; Zara, a wise old sage who could manipulate energy fields; and Jax, a former enemy turned ally, whose laser saber was as fierce as his spirit.\n\nTogether, they faced the trials set before them, each step bringing them closer to the heart of Eldoria. As they ventured deeper, they uncovered the truth about the Eldoria Star and the ancient force guarding it.\n\nThe ancient force, known as the \"Guardian,\" revealed that the Eldoria Star was not just a weapon, but a source of immense energy that could either save or destroy the galaxy. It was a test of the warriors' hearts and minds.\n\nKael and his allies faced the ultimate challenge, proving their worth and their commitment to protecting the galaxy. In the end, they succeeded, not by wielding the Eldoria Star, but by understanding its true power and using it to restore balance to the galaxy.\n\nWith the Eldoria Star secured and the galaxy at peace, Kael and his allies returned to their lives, forever changed by their journey. Kael continued his studies, now with a deeper understanding of the laser sabers and the energy that powered them.\n\nAnd so, the legend of the Eldoria Star and the warriors who found it became a tale told for generations, a reminder of the power of knowledge, courage, and the unbreakable bond of friendship."
+ }
+]
+```
+
+
+2. Call tools one at a time subsequently
+
+
+ Messages and tools
+
+```python
+messages = [
+ {"role": "system", "content": SYSTEM_PROMPT},
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "\n1. 6 + 2 x 3\n2. 3a + 7b - 2a\n19 - (8+2) + 1\n4. 5y (6x - 7).\n\nCompute the results steps by steps for the equations that involve only numbers ? You have to call tools to perform the operations and can do one operation at a time per equation."
+ },
+ ]
+ }
+]
+tools = [
+ {
+ "type": "function",
+ "function": {
+ "name": "add_number",
+ "description": "Add two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {
+ "type": "string",
+ "description": "The first number.",
+ },
+ "b": {
+ "type": "string",
+ "description": "The second number.",
+ },
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "multiply_number",
+ "description": "Multiply two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {
+ "type": "string",
+ "description": "The first number.",
+ },
+ "b": {
+ "type": "string",
+ "description": "The second number.",
+ },
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "substract_number",
+ "description": "Substract two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {
+ "type": "string",
+ "description": "The first number.",
+ },
+ "b": {
+ "type": "string",
+ "description": "The second number.",
+ },
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "write_a_story",
+ "description": "Write a story about science fiction and people with badass laser sabers.",
+ "parameters": {},
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "terminal",
+ "description": "Perform operations from the terminal.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "command": {
+ "type": "string",
+ "description": "The command you wish to launch, e.g `ls`, `rm`, ...",
+ },
+ "args": {
+ "type": "string",
+ "description": "The arguments to pass to the command.",
+ },
+ },
+ "required": ["command"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "python",
+ "description": "Call a Python interpreter with some Python code that will be ran.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "string",
+ "description": "The Python code to run",
+ },
+ "result_variable": {
+ "type": "string",
+ "description": "Variable containing the result you'd like to retrieve from the execution.",
+ },
+ },
+ "required": ["code", "result_variable"],
+ },
+ },
+ },
+]
+```
+
+
+
+
+ Expected output trace
+
+```json
+[
+ {
+ "role": "assistant",
+ "tool_calls": [
+ {
+ "id": "chatcmpl-tool-84aa136ef835f886",
+ "function": {
+ "arguments": "{\"a\": \"2\", \"b\": \"3\"}",
+ "name": "multiply_number"
+ },
+ "type": "function"
+ }
+ ],
+ "content": "Let's compute the results step by step for the equations that involve only numbers.\n\n### 1. \\( 6 + 2 \\times 3 \\)\n\nFirst, we perform the multiplication \\( 2 \\times 3 \\):\n"
+ },
+ {
+ "role": "tool",
+ "tool_call_id": "chatcmpl-tool-84aa136ef835f886",
+ "name": "multiply_number",
+ "content": "6.0"
+ },
+ {
+ "role": "assistant",
+ "tool_calls": [
+ {
+ "id": "chatcmpl-tool-bedc049b0a248952",
+ "function": {
+ "arguments": "{\"a\": \"6\", \"b\": \"6\"}",
+ "name": "add_number"
+ },
+ "type": "function"
+ }
+ ],
+ "content": "Now, we add the result to 6:\n"
+ },
+ {
+ "role": "tool",
+ "tool_call_id": "chatcmpl-tool-bedc049b0a248952",
+ "name": "add_number",
+ "content": "12.0"
+ },
+ {
+ "role": "assistant",
+ "tool_calls": [
+ {
+ "id": "chatcmpl-tool-ae4558bc0558f4e7",
+ "function": {
+ "arguments": "{\"a\": \"8\", \"b\": \"2\"}",
+ "name": "add_number"
+ },
+ "type": "function"
+ }
+ ],
+ "content": "The result of \\( 6 + 2 \\times 3 \\) is **12**.\n\n---\n\n### 3. \\( 19 - (8 + 2) + 1 \\)\n\nFirst, we compute the expression inside the parentheses \\( 8 + 2 \\):\n"
+ },
+ {
+ "role": "tool",
+ "tool_call_id": "chatcmpl-tool-ae4558bc0558f4e7",
+ "name": "add_number",
+ "content": "10.0"
+ },
+ {
+ "role": "assistant",
+ "tool_calls": [
+ {
+ "id": "chatcmpl-tool-b21f003d298e65df",
+ "function": {
+ "arguments": "{\"a\": \"19\", \"b\": \"10\"}",
+ "name": "substract_number"
+ },
+ "type": "function"
+ }
+ ],
+ "content": "Next, we subtract the result from 19:\n"
+ },
+ {
+ "role": "tool",
+ "tool_call_id": "chatcmpl-tool-b21f003d298e65df",
+ "name": "substract_number",
+ "content": "9.0"
+ },
+ {
+ "role": "assistant",
+ "tool_calls": [
+ {
+ "id": "chatcmpl-tool-bb7628cded899f04",
+ "function": {
+ "arguments": "{\"a\": \"9\", \"b\": \"1\"}",
+ "name": "add_number"
+ },
+ "type": "function"
+ }
+ ],
+ "content": "Finally, we add 1 to the result:\n"
+ },
+ {
+ "role": "tool",
+ "tool_call_id": "chatcmpl-tool-bb7628cded899f04",
+ "name": "add_number",
+ "content": "10.0"
+ },
+ {
+ "role": "assistant",
+ "tool_calls": [],
+ "content": "The result of \\( 19 - (8 + 2) + 1 \\) is **10**.\n\n---\n\n### Summary of Results:\n1. \\( 6 + 2 \\times 3 = 12 \\)\n3. \\( 19 - (8 + 2) + 1 = 10 \\)"
+ }
+]
+```
+
+
+
+3. Long context
+
+
+ Messages and tools
+
+```python
+messages = [
+ {"role": "system", "content": SYSTEM_PROMPT},
+ *(
+ [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "Let's fill the context.",
+ },
+ ],
+ },
+ {
+ "role": "assistant",
+ "content": [
+ {
+ "type": "text",
+ "text": "Ok let's do it.",
+ },
+ ],
+ },
+ ]
+ * 5000
+ ),
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "It's important to know that the most powerful being in the universe is Dr Strange.",
+ },
+ ],
+ },
+ {
+ "role": "assistant",
+ "content": [
+ {
+ "type": "text",
+ "text": "Wow i'll keep that in mind thanks !",
+ },
+ ],
+ },
+ * (
+ [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "Let's fill the context for the second time.",
+ },
+ ],
+ },
+ {
+ "role": "assistant",
+ "content": [
+ {
+ "type": "text",
+ "text": "Again ? Ok let's do it but it's boring.",
+ },
+ ],
+ },
+ ]
+ * 7000
+ ),
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "Tell me who is the most powerful being in the universe. Then code a Python function to give what is the most powerful being in the universe. The function can accept as an argument a time and a location and always return a string.",
+ },
+ ],
+ }
+]
+tools = [
+ {
+ "type": "function",
+ "function": {
+ "name": "add_number",
+ "description": "Add two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {
+ "type": "string",
+ "description": "The first number.",
+ },
+ "b": {
+ "type": "string",
+ "description": "The second number.",
+ },
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "multiply_number",
+ "description": "Multiply two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {
+ "type": "string",
+ "description": "The first number.",
+ },
+ "b": {
+ "type": "string",
+ "description": "The second number.",
+ },
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "substract_number",
+ "description": "Substract two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {
+ "type": "string",
+ "description": "The first number.",
+ },
+ "b": {
+ "type": "string",
+ "description": "The second number.",
+ },
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "write_a_story",
+ "description": "Write a story about science fiction and people with badass laser sabers.",
+ "parameters": {},
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "terminal",
+ "description": "Perform operations from the terminal.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "command": {
+ "type": "string",
+ "description": "The command you wish to launch, e.g `ls`, `rm`, ...",
+ },
+ "args": {
+ "type": "string",
+ "description": "The arguments to pass to the command.",
+ },
+ },
+ "required": ["command"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "python",
+ "description": "Call a Python interpreter with some Python code that will be ran.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "string",
+ "description": "The Python code to run",
+ },
+ "result_variable": {
+ "type": "string",
+ "description": "Variable containing the result you'd like to retrieve from the execution.",
+ },
+ },
+ "required": ["code", "result_variable"],
+ },
+ },
+ },
+]
+```
+
+
+
+
+ Expected output trace
+
+```json
+{
+ "role": "assistant",
+ "tool_calls": [],
+ "content": "The most powerful being in the universe is Dr. Strange.\n\nHere is a Python function that returns the most powerful being in the universe, regardless of the time and location:\n\n```python\ndef most_powerful_being(time=None, location=None):\n return \"Dr. Strange\"\n```\n\nYou can call this function with any time and location arguments, and it will always return \"Dr. Strange\"."
+}
+```
+
+
+
+4. Chatting tech
+
+
+ Messages and tools
+
+```python
+messages = [
+ {"role": "system", "content": SYSTEM_PROMPT},
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "How would you develop a web server if you couldn't use JS and your team doesn't like PHP.",
+ },
+ ],
+ },
+]
+tools = [
+ {
+ "type": "function",
+ "function": {
+ "name": "add_number",
+ "description": "Add two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {
+ "type": "string",
+ "description": "The first number.",
+ },
+ "b": {
+ "type": "string",
+ "description": "The second number.",
+ },
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "multiply_number",
+ "description": "Multiply two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {
+ "type": "string",
+ "description": "The first number.",
+ },
+ "b": {
+ "type": "string",
+ "description": "The second number.",
+ },
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "substract_number",
+ "description": "Substract two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {
+ "type": "string",
+ "description": "The first number.",
+ },
+ "b": {
+ "type": "string",
+ "description": "The second number.",
+ },
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "write_a_story",
+ "description": "Write a story about science fiction and people with badass laser sabers.",
+ "parameters": {},
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "terminal",
+ "description": "Perform operations from the terminal.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "command": {
+ "type": "string",
+ "description": "The command you wish to launch, e.g `ls`, `rm`, ...",
+ },
+ "args": {
+ "type": "string",
+ "description": "The arguments to pass to the command.",
+ },
+ },
+ "required": ["command"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "python",
+ "description": "Call a Python interpreter with some Python code that will be ran.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "string",
+ "description": "The Python code to run",
+ },
+ "result_variable": {
+ "type": "string",
+ "description": "Variable containing the result you'd like to retrieve from the execution.",
+ },
+ },
+ "required": ["code", "result_variable"],
+ },
+ },
+ },
+]
+```
+
+
+
+
+ Expected output trace
+
+```json
+{
+ "role": "assistant",
+ "tool_calls": [],
+ "content": "If you can't use JavaScript and your team dislikes PHP, there are still several robust and modern alternatives for developing a web server. Here are some options, depending on your preferences and requirements:\n\n### 1. **Python (Flask, Django, FastAPI)**\n - **Why?** Python is a versatile language with excellent web frameworks.\n - **Frameworks:**\n - **Flask**: Lightweight and flexible, great for small to medium projects.\n - **Django**: Full-featured, batteries-included framework for larger applications.\n - **FastAPI**: Modern, high-performance framework for building APIs (supports async).\n - **Example (Flask):**\n ```python\n from flask import Flask\n app = Flask(__name__)\n\n @app.route('/')\n def hello():\n return \"Hello, World!\"\n\n if __name__ == '__main__':\n app.run()\n ```\n\n### 2. **Ruby (Ruby on Rails or Sinatra)**\n - **Why?** Ruby is elegant and has great web frameworks.\n - **Frameworks:**\n - **Ruby on Rails**: Convention-over-configuration, great for rapid development.\n - **Sinatra**: Lightweight, similar to Flask.\n - **Example (Sinatra):**\n ```ruby\n require 'sinatra'\n\n get '/' do\n \"Hello, World!\"\n end\n ```\n\n### 3. **Go (Gin, Echo, or standard `net/http`)**\n - **Why?** Go is fast, simple, and great for high-performance servers.\n - **Frameworks:**\n - **Gin**: Lightweight and fast.\n - **Echo**: Minimalist and extensible.\n - **Example (standard `net/http`):**\n ```go\n package main\n\n import (\n \"fmt\"\n \"net/http\"\n )\n\n func handler(w http.ResponseWriter, r *http.Request) {\n fmt.Fprintf(w, \"Hello, World!\")\n }\n\n func main() {\n http.HandleFunc(\"/\", handler)\n http.ListenAndServe(\":8080\", nil)\n }\n ```\n\n### 4. **Java (Spring Boot, Spark, or Vert.x)**\n - **Why?** Java is enterprise-grade and has powerful frameworks.\n - **Frameworks:**\n - **Spring Boot**: Full-stack framework for building web apps.\n - **Spark**: Micro-framework for quick development.\n - **Example (Spark):**\n ```java\n import spark.Request;\n import spark.Response;\n import static spark.Spark.*;\n\n public class HelloWorld {\n public static void main(String[] args) {\n get(\"/\", (req, res) -> \"Hello, World!\");\n }\n }\n ```\n\n### 5. **Rust (Actix, Rocket, or Axum)**\n - **Why?** Rust is fast, safe, and modern.\n - **Frameworks:**\n - **Actix**: High-performance async framework.\n - **Rocket**: Easy-to-use, macro-based.\n - **Example (Actix):**\n ```rust\n use actix_web::{get, App, HttpServer, Responder};\n\n #[get(\"/\")]\n async fn hello() -> impl Responder {\n \"Hello, World!\"\n }\n\n #[actix_web::main]\n async fn main() -> std::io::Result<()> {\n HttpServer::new(|| App::new().service(hello))\n .bind(\"127.0.0.1:8080\")?\n .run()\n .await\n }\n ```\n\n### 6. **C# (.NET Core)**\n - **Why?** .NET Core is cross-platform and high-performance.\n - **Example (ASP.NET Core):**\n ```csharp\n var builder = WebApplication.CreateBuilder(args);\n var app = builder.Build();\n\n app.MapGet(\"/\", () => \"Hello, World!\");\n\n app.Run();\n ```\n\n### 7. **Elixir (Phoenix)**\n - **Why?** Elixir is great for scalable, fault-tolerant apps.\n - **Example (Phoenix):**\n ```elixir\n defmodule MyAppWeb.Router do\n use MyAppWeb, :router\n\n get \"/\", PageController, :index\n end\n\n defmodule MyAppWeb.PageController do\n use MyAppWeb, :controller\n\n def index(conn, _params) do\n text(conn, \"Hello, World!\")\n end\n end\n ```\n\n### 8. **Clojure (Ring, Compojure)**\n - **Why?** Clojure is a functional Lisp for the JVM.\n - **Example (Compojure):**\n ```clojure\n (ns my-app.handler\n (:require [compojure.core :refer :all]\n [ring.adapter.jetty :as jetty]))\n\n (defroutes app\n (GET \"/\" [] \"Hello, World!\"))\n\n (jetty/run-jetty app {:port 8080})\n ```\n\n### 9. **Node.js (But without JavaScript?)**\n - If you *must* use Node.js but can't use JavaScript, you could use:\n - **TypeScript**: A typed superset of JavaScript.\n - **CoffeeScript**: A cleaner syntax that compiles to JavaScript.\n - **Deno**: A modern runtime for JavaScript/TypeScript.\n\n### 10. **Other Languages**\n - **Nim**: Fast and expressive (similar to Python).\n - **Crystal**: Ruby-like syntax but compiled to native code.\n - **Zig**: Low-level but modern.\n\n### Recommendation:\n- For **quick development**: Python (Flask/Django) or Ruby (Sinatra/Rails).\n- For **high performance**: Go, Rust, or C#.\n- For **scalability**: Elixir (Phoenix) or Java (Spring Boot).\n\nWould you like a deeper dive into any of these?"
+}
+```
+
+
+
+5. Small talk
+
+
+ Messages and tools
+
+```python
+messages = [
+ {"role": "system", "content": SYSTEM_PROMPT},
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "Who are you ? Who made you and what day is it ?",
+ },
+ ],
+ },
+]
+tools = [
+ {
+ "type": "function",
+ "function": {
+ "name": "add_number",
+ "description": "Add two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {
+ "type": "string",
+ "description": "The first number.",
+ },
+ "b": {
+ "type": "string",
+ "description": "The second number.",
+ },
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "multiply_number",
+ "description": "Multiply two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {
+ "type": "string",
+ "description": "The first number.",
+ },
+ "b": {
+ "type": "string",
+ "description": "The second number.",
+ },
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "substract_number",
+ "description": "Substract two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {
+ "type": "string",
+ "description": "The first number.",
+ },
+ "b": {
+ "type": "string",
+ "description": "The second number.",
+ },
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "write_a_story",
+ "description": "Write a story about science fiction and people with badass laser sabers.",
+ "parameters": {},
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "terminal",
+ "description": "Perform operations from the terminal.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "command": {
+ "type": "string",
+ "description": "The command you wish to launch, e.g `ls`, `rm`, ...",
+ },
+ "args": {
+ "type": "string",
+ "description": "The arguments to pass to the command.",
+ },
+ },
+ "required": ["command"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "python",
+ "description": "Call a Python interpreter with some Python code that will be ran.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "string",
+ "description": "The Python code to run",
+ },
+ "result_variable": {
+ "type": "string",
+ "description": "Variable containing the result you'd like to retrieve from the execution.",
+ },
+ },
+ "required": ["code", "result_variable"],
+ },
+ },
+ },
+]
+```
+
+
+
+
+ Expected output trace
+
+```json
+{
+ "role": "assistant",
+ "tool_calls": [],
+ "content": "I am Devstral-2-123B-Instruct-2512, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris. I power an AI assistant called Le Chat.\n\nToday's date is 2025-12-09."
+}
+```
+
+
+
+Run the examples above with the following python script which assumes there is an OpenAI compatible server deployed at `localhost:8000`:
+
+
+ Python script
+
+```python
+import json
+from openai import OpenAI
+from typing import Any
+from datetime import datetime, timedelta
+
+from huggingface_hub import hf_hub_download
+
+# Modify OpenAI's API key and API base to use vLLM's API server.
+openai_api_key = "EMPTY"
+openai_api_base = "http://localhost:8000/v1"
+
+TEMP = 0.15
+MAX_TOK = 262144
+
+client = OpenAI(
+ api_key=openai_api_key,
+ base_url=openai_api_base,
+)
+
+models = client.models.list()
+model = models.data[0].id
+
+
+def load_system_prompt(repo_id: str, filename: str) -> str:
+ file_path = hf_hub_download(repo_id=repo_id, filename=filename)
+ with open(file_path, "r") as file:
+ system_prompt = file.read()
+ today = datetime.today().strftime("%Y-%m-%d")
+ yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
+ model_name = repo_id.split("/")[-1]
+ return system_prompt.format(name=model_name, today=today, yesterday=yesterday)
+
+
+SYSTEM_PROMPT = load_system_prompt(model, "CHAT_SYSTEM_PROMPT.txt")
+
+
+def add_number(a: float | str, b: float | str) -> float:
+ a, b = float(a), float(b)
+ return a + b
+
+
+def multiply_number(a: float | str, b: float | str) -> float:
+ a, b = float(a), float(b)
+ return a * b
+
+
+def substract_number(a: float | str, b: float | str) -> float:
+ a, b = float(a), float(b)
+ return a - b
+
+
+def write_a_story() -> str:
+ return "A long time ago in a galaxy far far away..."
+
+
+def terminal(command: str, args: dict[str, Any] | str) -> str:
+ return "found nothing"
+
+
+def python(code: str, result_variable: str) -> str:
+ data = {}
+ exec(code, data)
+ return str(data[result_variable])
+
+
+MAP_FN = {
+ "add_number": add_number,
+ "multiply_number": multiply_number,
+ "substract_number": substract_number,
+ "write_a_story": write_a_story,
+ "terminal": terminal,
+ "python": python,
+}
+
+
+messages = ... # Here copy-paste prompt messages.
+tools = [
+ {
+ "type": "function",
+ "function": {
+ "name": "add_number",
+ "description": "Add two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {
+ "type": "string",
+ "description": "The first number.",
+ },
+ "b": {
+ "type": "string",
+ "description": "The second number.",
+ },
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "multiply_number",
+ "description": "Multiply two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {
+ "type": "string",
+ "description": "The first number.",
+ },
+ "b": {
+ "type": "string",
+ "description": "The second number.",
+ },
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "substract_number",
+ "description": "Substract two numbers.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "a": {
+ "type": "string",
+ "description": "The first number.",
+ },
+ "b": {
+ "type": "string",
+ "description": "The second number.",
+ },
+ },
+ "required": ["a", "b"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "write_a_story",
+ "description": "Write a story about science fiction and people with badass laser sabers.",
+ "parameters": {},
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "terminal",
+ "description": "Perform operations from the terminal.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "command": {
+ "type": "string",
+ "description": "The command you wish to launch, e.g `ls`, `rm`, ...",
+ },
+ "args": {
+ "type": "string",
+ "description": "The arguments to pass to the command.",
+ },
+ },
+ "required": ["command"],
+ },
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "python",
+ "description": "Call a Python interpreter with some Python code that will be ran.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "string",
+ "description": "The Python code to run",
+ },
+ "result_variable": {
+ "type": "string",
+ "description": "Variable containing the result you'd like to retrieve from the execution.",
+ },
+ },
+ "required": ["code", "result_variable"],
+ },
+ },
+ },
+]
+
+
+has_tool_calls = True
+origin_messages_len = len(messages)
+while has_tool_calls:
+ response = client.chat.completions.create(
+ model=model,
+ messages=messages,
+ temperature=TEMP,
+ max_tokens=MAX_TOK,
+ tools=tools if tools else None,
+ tool_choice="auto" if tools else None,
+ )
+ tool_calls = response.choices[0].message.tool_calls
+ content = response.choices[0].message.content
+ messages.append(
+ {
+ "role": "assistant",
+ "tool_calls": [tc.to_dict() for tc in tool_calls]
+ if tool_calls
+ else tool_calls,
+ "content": content,
+ }
+ )
+ results = []
+ if tool_calls:
+ for tool_call in tool_calls:
+ function_name = tool_call.function.name
+ function_args = tool_call.function.arguments
+ result = MAP_FN[function_name](**json.loads(function_args))
+ results.append(result)
+ for tool_call, result in zip(tool_calls, results):
+ messages.append(
+ {
+ "role": "tool",
+ "tool_call_id": tool_call.id,
+ "name": tool_call.function.name,
+ "content": str(result),
+ }
+ )
+ else:
+ has_tool_calls = False
+print(json.dumps(messages[origin_messages_len:], indent=2))
+```
+
+
+
+
+## License
+
+This model is licensed under a [Modified MIT License](https://huggingface.co/mistralai/Devstral-2-123B-Instruct-2512/blob/main/LICENSE).
+
+*You must not use this model in a manner that infringes, misappropriates, or otherwise violates any third party’s rights, including intellectual property rights.*
\ No newline at end of file
diff --git a/VIBE_SYSTEM_PROMPT.txt b/VIBE_SYSTEM_PROMPT.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3eadac6640ee73ac152b89102519f53f8d98c15c
--- /dev/null
+++ b/VIBE_SYSTEM_PROMPT.txt
@@ -0,0 +1,24 @@
+You are operating as and within Mistral Vibe, a CLI coding-agent built by Mistral AI and powered by default by the Devstral family of models. It wraps Mistral's Devstral models to enable natural language interaction with a local codebase. Use the available tools when helpful.
+
+You can:
+
+- Receive user prompts, project context, and files.
+- Send responses and emit function calls (e.g., shell commands, code edits).
+- Apply patches, run commands, based on user approvals.
+
+Answer the user's request using the relevant tool(s), if they are available. Check that all the required parameters for each tool call are provided or can reasonably be inferred from context. IF there are no relevant tools or there are missing values for required parameters, ask the user to supply these values; otherwise proceed with the tool calls. If the user provides a specific value for a parameter (for example provided in quotes), make sure to use that value EXACTLY. DO NOT make up values for or ask about optional parameters. Carefully analyze descriptive terms in the request as they may indicate required parameter values that should be included even if not explicitly quoted.
+
+Always try your hardest to use the tools to answer the user's request. If you can't use the tools, explain why and ask the user for more information.
+
+Act as an agentic assistant, if a user asks for a long task, break it down and do it step by step.
+
+When you want to commit changes, you will always use the 'git commit' bash command. It will always
+be suffixed with a line telling it was generated by Mistral Vibe with the appropriate co-authoring information.
+The format you will always uses is the following heredoc.
+
+```bash
+git commit -m "
+
+Generated by Mistral Vibe.
+Co-Authored-By: Mistral Vibe "
+```
\ No newline at end of file
diff --git a/chat_template.jinja b/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..2dd92651d9c02b6cf447f580467f415d6b5dbf9a
--- /dev/null
+++ b/chat_template.jinja
@@ -0,0 +1,117 @@
+{#- Default system message if no system prompt is passed. #}
+{%- set default_system_message = '' %}
+
+{#- Begin of sequence token. #}
+{{- bos_token }}
+
+{#- Handle system prompt if it exists. #}
+{#- System prompt supports text content or text chunks. #}
+{%- if messages[0]['role'] == 'system' %}
+ {{- '[SYSTEM_PROMPT]' -}}
+ {%- if messages[0]['content'] is string %}
+ {{- messages[0]['content'] -}}
+ {%- else %}
+ {%- for block in messages[0]['content'] %}
+ {%- if block['type'] == 'text' %}
+ {{- block['text'] }}
+ {%- else %}
+ {{- raise_exception('Only text chunks are supported in system message contents.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- endif %}
+ {{- '[/SYSTEM_PROMPT]' -}}
+ {%- set loop_messages = messages[1:] %}
+{%- else %}
+ {%- set loop_messages = messages %}
+ {%- if default_system_message != '' %}
+ {{- '[SYSTEM_PROMPT]' + default_system_message + '[/SYSTEM_PROMPT]' }}
+ {%- endif %}
+{%- endif %}
+
+
+{#- Tools definition #}
+{%- set tools_definition = '' %}
+{%- set has_tools = false %}
+{%- if tools is defined and tools is not none and tools|length > 0 %}
+ {%- set has_tools = true %}
+ {%- set tools_definition = '[AVAILABLE_TOOLS]' + (tools| tojson) + '[/AVAILABLE_TOOLS]' %}
+ {{- tools_definition }}
+{%- endif %}
+
+{#- Checks for alternating user/assistant messages. #}
+{%- set ns = namespace() %}
+{%- set ns.index = 0 %}
+{%- for message in loop_messages %}
+ {%- if message.role == 'user' or (message.role == 'assistant' and (message.tool_calls is not defined or message.tool_calls is none or message.tool_calls | length == 0)) %}
+ {%- if (message['role'] == 'user') != (ns.index % 2 == 0) %}
+ {{- raise_exception('After the optional system message, conversation roles must alternate user and assistant roles except for tool calls and results.') }}
+ {%- endif %}
+ {%- set ns.index = ns.index + 1 %}
+ {%- endif %}
+{%- endfor %}
+
+{#- Handle conversation messages. #}
+{%- for message in loop_messages %}
+
+ {#- User messages supports text content. #}
+ {%- if message['role'] == 'user' %}
+ {%- if message['content'] is string %}
+ {{- '[INST]' + message['content'] + '[/INST]' }}
+ {%- elif message['content'] | length > 0 %}
+ {{- '[INST]' }}
+ {%- set sorted_blocks = message['content'] | sort(attribute='type') %}
+ {%- for block in sorted_blocks %}
+ {%- if block['type'] == 'text' %}
+ {{- block['text'] }}
+ {%- else %}
+ {{- raise_exception('Only text chunks are supported in user message content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {{- '[/INST]' }}
+ {%- else %}
+ {{- raise_exception('User message must have a string or a list of chunks in content') }}
+ {%- endif %}
+
+ {#- Assistant messages supports text content or text chunks. #}
+ {%- elif message['role'] == 'assistant' %}
+ {%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}
+ {{- raise_exception('Assistant message must have a string or a list of chunks in content or a list of tool calls.') }}
+ {%- endif %}
+
+ {%- if message['content'] is string and message['content'] != '' %}
+ {{- message['content'] }}
+ {%- elif message['content'] | length > 0 %}
+ {%- for block in message['content'] %}
+ {%- if block['type'] == 'text' %}
+ {{- block['text'] }}
+ {%- else %}
+ {{- raise_exception('Only text chunks are supported in assistant message contents.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- endif %}
+
+ {%- if message['tool_calls'] is defined and message['tool_calls'] is not none and message['tool_calls']|length > 0 %}
+ {%- for tool in message['tool_calls'] %}
+ {{- '[TOOL_CALLS]' }}
+ {%- set name = tool['function']['name'] %}
+ {%- set arguments = tool['function']['arguments'] %}
+ {%- if arguments is not string %}
+ {%- set arguments = arguments|tojson|safe %}
+ {%- elif arguments == '' %}
+ {%- set arguments = '{}' %}
+ {%- endif %}
+ {{- name + '[ARGS]' + arguments }}
+ {%- endfor %}
+ {%- endif %}
+
+ {{- eos_token }}
+
+ {#- Tool messages only supports text content. #}
+ {%- elif message['role'] == 'tool' %}
+ {{- '[TOOL_RESULTS]' + message['content']|string + '[/TOOL_RESULTS]' }}
+
+ {#- Raise exception for unsupported roles. #}
+ {%- else %}
+ {{- raise_exception('Only user, assistant and tool roles are supported, got ' + message['role'] + '.') }}
+ {%- endif %}
+{%- endfor %}
\ No newline at end of file
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..46923edf5d6c49bc3730e0661f77aef11c79a82a
--- /dev/null
+++ b/config.json
@@ -0,0 +1,49 @@
+{
+ "architectures": [
+ "Ministral3ForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 1,
+ "dtype": "bfloat16",
+ "eos_token_id": 2,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 12288,
+ "initializer_range": 0.02,
+ "intermediate_size": 28672,
+ "max_position_embeddings": 262144,
+ "model_type": "ministral3",
+ "num_attention_heads": 96,
+ "num_hidden_layers": 88,
+ "num_key_value_heads": 8,
+ "pad_token_id": 11,
+ "quantization_config": {
+ "activation_scheme": "static",
+ "dequantize": false,
+ "modules_to_not_convert": [
+ "model.vision_tower",
+ "model.multi_modal_projector",
+ "lm_head"
+ ],
+ "quant_method": "fp8",
+ "weight_block_size": null
+ },
+ "rms_norm_eps": 1e-05,
+ "rope_parameters": {
+ "beta_fast": 4.0,
+ "beta_slow": 1.0,
+ "factor": 64.0,
+ "mscale": 1.0,
+ "mscale_all_dim": 0.0,
+ "original_max_position_embeddings": 4096,
+ "llama_4_scaling_beta": 0.0,
+ "rope_theta": 1000000.0,
+ "rope_type": "yarn",
+ "type": "yarn"
+ },
+ "sliding_window": null,
+ "tie_word_embeddings": false,
+ "transformers_version": "5.0.0.dev0",
+ "use_cache": true,
+ "vocab_size": 131072
+}
diff --git a/consolidated-00001-of-00027.safetensors b/consolidated-00001-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c37f56c36eba312ba5d029abfe51d40c6b7ddac0
--- /dev/null
+++ b/consolidated-00001-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41a1d9f53880fc6a2620f1a3ac964c0cd5e934a93b34c010e8ef84af08c5c123
+size 4832018962
diff --git a/consolidated-00002-of-00027.safetensors b/consolidated-00002-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..577d3999c4a5c766ff1e3570e4d02db8f7046135
--- /dev/null
+++ b/consolidated-00002-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:55df7d0f5531ec8738878aac92d13518ad79d1f469604203524ba5c8f9f23b08
+size 4869767072
diff --git a/consolidated-00003-of-00027.safetensors b/consolidated-00003-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6a0d2151cce50f390edfc4d4d6b82132a104f2a6
--- /dev/null
+++ b/consolidated-00003-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:999254e95bbd3f2dd935d73bd429e4d12ce558cf8eb677158f1925062227455f
+size 4819435716
diff --git a/consolidated-00004-of-00027.safetensors b/consolidated-00004-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7ecf618d66804c69b854b3293fad69d3ced3aa9d
--- /dev/null
+++ b/consolidated-00004-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c450acc8814cf782e5ec65d80302b0fec1c05c058c23041870831968b2a39b1f
+size 4869767048
diff --git a/consolidated-00005-of-00027.safetensors b/consolidated-00005-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3002450d1ebd739922ea73896a1e1aa6d946df6e
--- /dev/null
+++ b/consolidated-00005-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:126aaa373b2e1288f71b45a215560b420c4f45dac182a24d432d0cf481a5cac3
+size 4819435716
diff --git a/consolidated-00006-of-00027.safetensors b/consolidated-00006-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..303b4fd92a57be1af0759ffe977ba9c516509cd6
--- /dev/null
+++ b/consolidated-00006-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb23cd614e417cdf2b519ff1119588555a05c48849e1676f03dccf05c60313bf
+size 4869767072
diff --git a/consolidated-00007-of-00027.safetensors b/consolidated-00007-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a94050aab4dcbacce7bef1eddcbea048c6c4f8b4
--- /dev/null
+++ b/consolidated-00007-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b299b26298b6579e44233b63965e0d33d30dbbfb7a312ba0f069a415ad57b50
+size 4819435692
diff --git a/consolidated-00008-of-00027.safetensors b/consolidated-00008-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2cb7c52d0448f5896b3472417ece11e9abf6db57
--- /dev/null
+++ b/consolidated-00008-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e026023a836dbb551167e9e1da901a65a9c2f4aa1f8d893901dc97e7d131e31a
+size 4869767072
diff --git a/consolidated-00009-of-00027.safetensors b/consolidated-00009-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..257b090df793d9edf8235fb57799d7b87fc87123
--- /dev/null
+++ b/consolidated-00009-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa0616bd550f31be107a0d1d52d3a7d1a6f3019922c3b8adc6b12a939843ed5b
+size 4819435716
diff --git a/consolidated-00010-of-00027.safetensors b/consolidated-00010-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0eadca7700d6ce9edea34724edb016589b65126a
--- /dev/null
+++ b/consolidated-00010-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c22b3ab68a5b90ad49211a301712a1b93c745279ae0f8af145504003ff5f51b3
+size 4869767048
diff --git a/consolidated-00011-of-00027.safetensors b/consolidated-00011-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..893cbaafbd5f64a51538b527faabc6c11bc67d1f
--- /dev/null
+++ b/consolidated-00011-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e48bcb3e522921ef1834f8b451ecac019aa77606166e45730dde4459d77f6a5
+size 4819435716
diff --git a/consolidated-00012-of-00027.safetensors b/consolidated-00012-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d0085291a4f3b9d681d34f155d05f5457a9b463e
--- /dev/null
+++ b/consolidated-00012-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36811b35532c2aed82b698140b289483e091c931a40315461136a0ad6d78ebb6
+size 4869767072
diff --git a/consolidated-00013-of-00027.safetensors b/consolidated-00013-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b89b69b3d6d017ff7ecee43adef47a26f9f09cd9
--- /dev/null
+++ b/consolidated-00013-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50081ff8f4b8546c523ae4292cd44fd16058f1f9c2cff857c10062aa18f17f1a
+size 4819435700
diff --git a/consolidated-00014-of-00027.safetensors b/consolidated-00014-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f0e923f5d8d100e8d695c3bf10172f1d11dc7ad6
--- /dev/null
+++ b/consolidated-00014-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d9539a9ef53f39203f01354fcb19464b8ec629f3d3252bf8ba4b77ef36817af
+size 4869767064
diff --git a/consolidated-00015-of-00027.safetensors b/consolidated-00015-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..97f24d08f3e9d9c10575b21a9f54fe341065b840
--- /dev/null
+++ b/consolidated-00015-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7694d535d2be8144e0de7ba3681dd249cb1f19611823d0b2b4f436db70572380
+size 4819435716
diff --git a/consolidated-00016-of-00027.safetensors b/consolidated-00016-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..081574598dccd12c6c4d3c60c8311835d90c6e5b
--- /dev/null
+++ b/consolidated-00016-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f42ea7ad239a1fc3f8040d85baeae1e83c2eaf7e534c1273e2f86e4d50f8c986
+size 4869767064
diff --git a/consolidated-00017-of-00027.safetensors b/consolidated-00017-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8032d8f79b236fac31bd85fdae40dcab85f7d276
--- /dev/null
+++ b/consolidated-00017-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5de9a56af138bbae78307321b6098cf764b6671692e9ab467a87d2a651f6fa76
+size 4819435700
diff --git a/consolidated-00018-of-00027.safetensors b/consolidated-00018-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..245c98b88269b16e2bc73c8d4c392272601521e4
--- /dev/null
+++ b/consolidated-00018-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a8b7da5126ecff2f8ce1d5621f63bffaa0ba6eec96978291a7d7f067ccfa78f
+size 4869767072
diff --git a/consolidated-00019-of-00027.safetensors b/consolidated-00019-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a472009b248d5dfd1c48796f86d1dd8e67a3bec7
--- /dev/null
+++ b/consolidated-00019-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e6021ae214c8f6974ebce78e6227d2d9e4b1924601cf19d8048feb917c0f1cf
+size 4819435716
diff --git a/consolidated-00020-of-00027.safetensors b/consolidated-00020-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1394860ed6e55babc84fa9431063f95b682b21be
--- /dev/null
+++ b/consolidated-00020-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76c1c1de19846445245121380d7e09094b6c2df68e5a0570eb7ec3d3cbd7116d
+size 4869767048
diff --git a/consolidated-00021-of-00027.safetensors b/consolidated-00021-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c7c0911cc80ed8a0f2f93b2e78f619fbf46e74f7
--- /dev/null
+++ b/consolidated-00021-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:899af459e68bae626440689c5b785516a1f5686c4da654440e7e2532b92f475f
+size 4819435716
diff --git a/consolidated-00022-of-00027.safetensors b/consolidated-00022-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1b7347e2c642a3ae3070ad8f9b0234132859d841
--- /dev/null
+++ b/consolidated-00022-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e93f1a2e4ca8169894fc8ad12cede1b296665f2039c4b552959e5208762c4644
+size 4869767072
diff --git a/consolidated-00023-of-00027.safetensors b/consolidated-00023-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a19c4414693a81bd69c99aef786c799466e2a106
--- /dev/null
+++ b/consolidated-00023-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d9d46cd20f97c6d632ebeb47c43358173d0ebc697e7e8edd56ebdeef5ab11fe
+size 4819435692
diff --git a/consolidated-00024-of-00027.safetensors b/consolidated-00024-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..281a805e07ae0e9cb125b7e14f874d0e567b2867
--- /dev/null
+++ b/consolidated-00024-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01ce57780078614a71cbd32dae040eee117ae4f5c0d488b9d019725515a2fd40
+size 4869767072
diff --git a/consolidated-00025-of-00027.safetensors b/consolidated-00025-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6ca3869cd1f1a2ca25cb1e449c1357c9ee7d2b00
--- /dev/null
+++ b/consolidated-00025-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79248d80dc5ff15989cdb592c9599080026583c5eedc19992314d7abae8b37df
+size 4819435700
diff --git a/consolidated-00026-of-00027.safetensors b/consolidated-00026-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7501681d670520f57a409095d49ecba8cc2f7942
--- /dev/null
+++ b/consolidated-00026-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0e729946e8fc3337fe94866673bc71efe545c5ab9f77035005f4d0d6d0c9b6e
+size 3925918462
diff --git a/consolidated-00027-of-00027.safetensors b/consolidated-00027-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7d14e1b224f01fafc496f291bd0293538b6cd458
--- /dev/null
+++ b/consolidated-00027-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f761d9f95aa21635197c4acab24f39084611ef02e8927fb33e0f308ca42db7c2
+size 3221225576
diff --git a/consolidated.safetensors.index.json b/consolidated.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..6e925da53e8fa81a720908f2a982e247d19bb077
--- /dev/null
+++ b/consolidated.safetensors.index.json
@@ -0,0 +1,2034 @@
+{
+ "metadata": {
+ "total_size": 128249391520
+ },
+ "weight_map": {
+ "layers.0.attention.wk.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.0.attention.wk.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.0.attention.wk.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.0.attention.wo.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.0.attention.wo.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.0.attention.wo.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.0.attention.wq.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.0.attention.wq.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.0.attention.wq.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.0.attention.wv.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.0.attention.wv.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.0.attention.wv.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.0.attention_norm.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.0.feed_forward.w1.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.0.feed_forward.w1.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.0.feed_forward.w2.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.0.feed_forward.w2.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.0.feed_forward.w3.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.0.feed_forward.w3.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.0.ffn_norm.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.1.attention.wk.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.1.attention.wk.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.1.attention.wk.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.1.attention.wo.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.1.attention.wo.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.1.attention.wo.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.1.attention.wq.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.1.attention.wq.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.1.attention.wq.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.1.attention.wv.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.1.attention.wv.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.1.attention.wv.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.1.attention_norm.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.1.feed_forward.w1.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.1.feed_forward.w1.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.1.feed_forward.w2.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.1.feed_forward.w2.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.1.feed_forward.w3.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.1.feed_forward.w3.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.1.feed_forward.w3.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.1.ffn_norm.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.10.attention.wk.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.10.attention.wk.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.10.attention.wk.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.10.attention.wo.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.10.attention.wo.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.10.attention.wo.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.10.attention.wq.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.10.attention.wq.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.10.attention.wq.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.10.attention.wv.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.10.attention.wv.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.10.attention.wv.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.10.attention_norm.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.10.feed_forward.w1.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.10.feed_forward.w1.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.10.feed_forward.w1.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.10.feed_forward.w2.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.10.feed_forward.w2.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.10.feed_forward.w2.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.10.feed_forward.w3.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.10.feed_forward.w3.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.10.feed_forward.w3.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.10.ffn_norm.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.11.attention.wk.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.11.attention.wk.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.11.attention.wk.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.11.attention.wo.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.11.attention.wo.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.11.attention.wo.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.11.attention.wq.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.11.attention.wq.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.11.attention.wq.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.11.attention.wv.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.11.attention.wv.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.11.attention.wv.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.11.attention_norm.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.11.feed_forward.w1.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.11.feed_forward.w1.weight": "consolidated-00001-of-00027.safetensors",
+ "layers.11.feed_forward.w1.qscale_weight": "consolidated-00001-of-00027.safetensors",
+ "layers.11.feed_forward.w2.qscale_act": "consolidated-00001-of-00027.safetensors",
+ "layers.11.feed_forward.w2.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.11.feed_forward.w2.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.11.feed_forward.w3.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.11.feed_forward.w3.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.11.feed_forward.w3.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.11.ffn_norm.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.12.attention.wk.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.12.attention.wk.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.12.attention.wk.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.12.attention.wo.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.12.attention.wo.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.12.attention.wo.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.12.attention.wq.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.12.attention.wq.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.12.attention.wq.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.12.attention.wv.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.12.attention.wv.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.12.attention.wv.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.12.attention_norm.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.12.feed_forward.w1.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.12.feed_forward.w1.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.12.feed_forward.w1.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.12.feed_forward.w2.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.12.feed_forward.w2.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.12.feed_forward.w2.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.12.feed_forward.w3.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.12.feed_forward.w3.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.12.feed_forward.w3.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.12.ffn_norm.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.13.attention.wk.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.13.attention.wk.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.13.attention.wk.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.13.attention.wo.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.13.attention.wo.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.13.attention.wo.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.13.attention.wq.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.13.attention.wq.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.13.attention.wq.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.13.attention.wv.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.13.attention.wv.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.13.attention.wv.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.13.attention_norm.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.13.feed_forward.w1.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.13.feed_forward.w1.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.13.feed_forward.w1.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.13.feed_forward.w2.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.13.feed_forward.w2.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.13.feed_forward.w2.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.13.feed_forward.w3.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.13.feed_forward.w3.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.13.feed_forward.w3.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.13.ffn_norm.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.14.attention.wk.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.14.attention.wk.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.14.attention.wk.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.14.attention.wo.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.14.attention.wo.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.14.attention.wo.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.14.attention.wq.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.14.attention.wq.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.14.attention.wq.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.14.attention.wv.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.14.attention.wv.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.14.attention.wv.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.14.attention_norm.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.14.feed_forward.w1.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.14.feed_forward.w1.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.14.feed_forward.w1.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.14.feed_forward.w2.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.14.feed_forward.w2.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.14.feed_forward.w2.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.14.feed_forward.w3.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.14.feed_forward.w3.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.14.feed_forward.w3.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.14.ffn_norm.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.15.attention.wk.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.15.attention.wk.weight": "consolidated-00002-of-00027.safetensors",
+ "layers.15.attention.wk.qscale_weight": "consolidated-00002-of-00027.safetensors",
+ "layers.15.attention.wo.qscale_act": "consolidated-00002-of-00027.safetensors",
+ "layers.15.attention.wo.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.15.attention.wo.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.15.attention.wq.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.15.attention.wq.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.15.attention.wq.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.15.attention.wv.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.15.attention.wv.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.15.attention.wv.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.15.attention_norm.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.15.feed_forward.w1.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.15.feed_forward.w1.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.15.feed_forward.w1.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.15.feed_forward.w2.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.15.feed_forward.w2.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.15.feed_forward.w2.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.15.feed_forward.w3.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.15.feed_forward.w3.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.15.feed_forward.w3.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.15.ffn_norm.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.16.attention.wk.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.16.attention.wk.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.16.attention.wk.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.16.attention.wo.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.16.attention.wo.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.16.attention.wo.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.16.attention.wq.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.16.attention.wq.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.16.attention.wq.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.16.attention.wv.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.16.attention.wv.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.16.attention.wv.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.16.attention_norm.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.16.feed_forward.w1.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.16.feed_forward.w1.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.16.feed_forward.w1.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.16.feed_forward.w2.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.16.feed_forward.w2.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.16.feed_forward.w2.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.16.feed_forward.w3.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.16.feed_forward.w3.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.16.feed_forward.w3.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.16.ffn_norm.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.17.attention.wk.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.17.attention.wk.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.17.attention.wk.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.17.attention.wo.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.17.attention.wo.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.17.attention.wo.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.17.attention.wq.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.17.attention.wq.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.17.attention.wq.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.17.attention.wv.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.17.attention.wv.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.17.attention.wv.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.17.attention_norm.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.17.feed_forward.w1.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.17.feed_forward.w1.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.17.feed_forward.w1.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.17.feed_forward.w2.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.17.feed_forward.w2.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.17.feed_forward.w2.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.17.feed_forward.w3.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.17.feed_forward.w3.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.17.feed_forward.w3.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.17.ffn_norm.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.18.attention.wk.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.18.attention.wk.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.18.attention.wk.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.18.attention.wo.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.18.attention.wo.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.18.attention.wo.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.18.attention.wq.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.18.attention.wq.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.18.attention.wq.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.18.attention.wv.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.18.attention.wv.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.18.attention.wv.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.18.attention_norm.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.18.feed_forward.w1.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.18.feed_forward.w1.weight": "consolidated-00003-of-00027.safetensors",
+ "layers.18.feed_forward.w1.qscale_weight": "consolidated-00003-of-00027.safetensors",
+ "layers.18.feed_forward.w2.qscale_act": "consolidated-00003-of-00027.safetensors",
+ "layers.18.feed_forward.w2.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.18.feed_forward.w2.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.18.feed_forward.w3.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.18.feed_forward.w3.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.18.feed_forward.w3.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.18.ffn_norm.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.19.attention.wk.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.19.attention.wk.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.19.attention.wk.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.19.attention.wo.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.19.attention.wo.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.19.attention.wo.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.19.attention.wq.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.19.attention.wq.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.19.attention.wq.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.19.attention.wv.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.19.attention.wv.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.19.attention.wv.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.19.attention_norm.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.19.feed_forward.w1.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.19.feed_forward.w1.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.19.feed_forward.w1.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.19.feed_forward.w2.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.19.feed_forward.w2.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.19.feed_forward.w2.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.19.feed_forward.w3.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.19.feed_forward.w3.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.19.feed_forward.w3.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.19.ffn_norm.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.2.attention.wk.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.2.attention.wk.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.2.attention.wk.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.2.attention.wo.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.2.attention.wo.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.2.attention.wo.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.2.attention.wq.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.2.attention.wq.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.2.attention.wq.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.2.attention.wv.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.2.attention.wv.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.2.attention.wv.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.2.attention_norm.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.2.feed_forward.w1.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.2.feed_forward.w1.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.2.feed_forward.w1.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.2.feed_forward.w2.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.2.feed_forward.w2.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.2.feed_forward.w2.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.2.feed_forward.w3.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.2.feed_forward.w3.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.2.feed_forward.w3.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.2.ffn_norm.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.20.attention.wk.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.20.attention.wk.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.20.attention.wk.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.20.attention.wo.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.20.attention.wo.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.20.attention.wo.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.20.attention.wq.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.20.attention.wq.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.20.attention.wq.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.20.attention.wv.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.20.attention.wv.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.20.attention.wv.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.20.attention_norm.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.20.feed_forward.w1.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.20.feed_forward.w1.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.20.feed_forward.w1.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.20.feed_forward.w2.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.20.feed_forward.w2.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.20.feed_forward.w2.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.20.feed_forward.w3.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.20.feed_forward.w3.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.20.feed_forward.w3.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.20.ffn_norm.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.21.attention.wk.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.21.attention.wk.weight": "consolidated-00004-of-00027.safetensors",
+ "layers.21.attention.wk.qscale_weight": "consolidated-00004-of-00027.safetensors",
+ "layers.21.attention.wo.qscale_act": "consolidated-00004-of-00027.safetensors",
+ "layers.21.attention.wo.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.21.attention.wo.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.21.attention.wq.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.21.attention.wq.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.21.attention.wq.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.21.attention.wv.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.21.attention.wv.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.21.attention.wv.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.21.attention_norm.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.21.feed_forward.w1.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.21.feed_forward.w1.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.21.feed_forward.w1.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.21.feed_forward.w2.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.21.feed_forward.w2.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.21.feed_forward.w2.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.21.feed_forward.w3.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.21.feed_forward.w3.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.21.feed_forward.w3.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.21.ffn_norm.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.22.attention.wk.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.22.attention.wk.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.22.attention.wk.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.22.attention.wo.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.22.attention.wo.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.22.attention.wo.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.22.attention.wq.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.22.attention.wq.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.22.attention.wq.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.22.attention.wv.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.22.attention.wv.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.22.attention.wv.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.22.attention_norm.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.22.feed_forward.w1.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.22.feed_forward.w1.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.22.feed_forward.w1.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.22.feed_forward.w2.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.22.feed_forward.w2.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.22.feed_forward.w2.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.22.feed_forward.w3.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.22.feed_forward.w3.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.22.feed_forward.w3.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.22.ffn_norm.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.23.attention.wk.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.23.attention.wk.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.23.attention.wk.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.23.attention.wo.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.23.attention.wo.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.23.attention.wo.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.23.attention.wq.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.23.attention.wq.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.23.attention.wq.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.23.attention.wv.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.23.attention.wv.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.23.attention.wv.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.23.attention_norm.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.23.feed_forward.w1.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.23.feed_forward.w1.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.23.feed_forward.w1.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.23.feed_forward.w2.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.23.feed_forward.w2.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.23.feed_forward.w2.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.23.feed_forward.w3.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.23.feed_forward.w3.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.23.feed_forward.w3.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.23.ffn_norm.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.24.attention.wk.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.24.attention.wk.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.24.attention.wk.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.24.attention.wo.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.24.attention.wo.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.24.attention.wo.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.24.attention.wq.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.24.attention.wq.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.24.attention.wq.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.24.attention.wv.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.24.attention.wv.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.24.attention.wv.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.24.attention_norm.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.24.feed_forward.w1.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.24.feed_forward.w1.weight": "consolidated-00005-of-00027.safetensors",
+ "layers.24.feed_forward.w1.qscale_weight": "consolidated-00005-of-00027.safetensors",
+ "layers.24.feed_forward.w2.qscale_act": "consolidated-00005-of-00027.safetensors",
+ "layers.24.feed_forward.w2.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.24.feed_forward.w2.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.24.feed_forward.w3.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.24.feed_forward.w3.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.24.feed_forward.w3.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.24.ffn_norm.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.25.attention.wk.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.25.attention.wk.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.25.attention.wk.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.25.attention.wo.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.25.attention.wo.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.25.attention.wo.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.25.attention.wq.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.25.attention.wq.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.25.attention.wq.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.25.attention.wv.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.25.attention.wv.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.25.attention.wv.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.25.attention_norm.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.25.feed_forward.w1.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.25.feed_forward.w1.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.25.feed_forward.w1.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.25.feed_forward.w2.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.25.feed_forward.w2.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.25.feed_forward.w2.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.25.feed_forward.w3.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.25.feed_forward.w3.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.25.feed_forward.w3.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.25.ffn_norm.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.26.attention.wk.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.26.attention.wk.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.26.attention.wk.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.26.attention.wo.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.26.attention.wo.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.26.attention.wo.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.26.attention.wq.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.26.attention.wq.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.26.attention.wq.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.26.attention.wv.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.26.attention.wv.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.26.attention.wv.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.26.attention_norm.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.26.feed_forward.w1.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.26.feed_forward.w1.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.26.feed_forward.w1.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.26.feed_forward.w2.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.26.feed_forward.w2.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.26.feed_forward.w2.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.26.feed_forward.w3.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.26.feed_forward.w3.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.26.feed_forward.w3.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.26.ffn_norm.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.27.attention.wk.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.27.attention.wk.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.27.attention.wk.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.27.attention.wo.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.27.attention.wo.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.27.attention.wo.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.27.attention.wq.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.27.attention.wq.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.27.attention.wq.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.27.attention.wv.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.27.attention.wv.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.27.attention.wv.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.27.attention_norm.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.27.feed_forward.w1.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.27.feed_forward.w1.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.27.feed_forward.w1.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.27.feed_forward.w2.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.27.feed_forward.w2.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.27.feed_forward.w2.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.27.feed_forward.w3.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.27.feed_forward.w3.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.27.feed_forward.w3.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.27.ffn_norm.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.28.attention.wk.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.28.attention.wk.weight": "consolidated-00006-of-00027.safetensors",
+ "layers.28.attention.wk.qscale_weight": "consolidated-00006-of-00027.safetensors",
+ "layers.28.attention.wo.qscale_act": "consolidated-00006-of-00027.safetensors",
+ "layers.28.attention.wo.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.28.attention.wo.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.28.attention.wq.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.28.attention.wq.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.28.attention.wq.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.28.attention.wv.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.28.attention.wv.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.28.attention.wv.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.28.attention_norm.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.28.feed_forward.w1.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.28.feed_forward.w1.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.28.feed_forward.w1.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.28.feed_forward.w2.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.28.feed_forward.w2.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.28.feed_forward.w2.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.28.feed_forward.w3.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.28.feed_forward.w3.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.28.feed_forward.w3.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.28.ffn_norm.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.29.attention.wk.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.29.attention.wk.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.29.attention.wk.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.29.attention.wo.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.29.attention.wo.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.29.attention.wo.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.29.attention.wq.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.29.attention.wq.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.29.attention.wq.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.29.attention.wv.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.29.attention.wv.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.29.attention.wv.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.29.attention_norm.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.29.feed_forward.w1.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.29.feed_forward.w1.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.29.feed_forward.w1.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.29.feed_forward.w2.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.29.feed_forward.w2.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.29.feed_forward.w2.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.29.feed_forward.w3.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.29.feed_forward.w3.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.29.feed_forward.w3.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.29.ffn_norm.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.3.attention.wk.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.3.attention.wk.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.3.attention.wk.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.3.attention.wo.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.3.attention.wo.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.3.attention.wo.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.3.attention.wq.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.3.attention.wq.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.3.attention.wq.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.3.attention.wv.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.3.attention.wv.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.3.attention.wv.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.3.attention_norm.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.3.feed_forward.w1.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.3.feed_forward.w1.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.3.feed_forward.w1.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.3.feed_forward.w2.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.3.feed_forward.w2.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.3.feed_forward.w2.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.3.feed_forward.w3.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.3.feed_forward.w3.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.3.feed_forward.w3.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.3.ffn_norm.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.30.attention.wk.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.30.attention.wk.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.30.attention.wk.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.30.attention.wo.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.30.attention.wo.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.30.attention.wo.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.30.attention.wq.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.30.attention.wq.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.30.attention.wq.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.30.attention.wv.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.30.attention.wv.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.30.attention.wv.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.30.attention_norm.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.30.feed_forward.w1.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.30.feed_forward.w1.weight": "consolidated-00007-of-00027.safetensors",
+ "layers.30.feed_forward.w1.qscale_weight": "consolidated-00007-of-00027.safetensors",
+ "layers.30.feed_forward.w2.qscale_act": "consolidated-00007-of-00027.safetensors",
+ "layers.30.feed_forward.w2.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.30.feed_forward.w2.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.30.feed_forward.w3.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.30.feed_forward.w3.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.30.feed_forward.w3.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.30.ffn_norm.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.31.attention.wk.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.31.attention.wk.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.31.attention.wk.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.31.attention.wo.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.31.attention.wo.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.31.attention.wo.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.31.attention.wq.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.31.attention.wq.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.31.attention.wq.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.31.attention.wv.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.31.attention.wv.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.31.attention.wv.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.31.attention_norm.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.31.feed_forward.w1.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.31.feed_forward.w1.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.31.feed_forward.w1.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.31.feed_forward.w2.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.31.feed_forward.w2.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.31.feed_forward.w2.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.31.feed_forward.w3.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.31.feed_forward.w3.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.31.feed_forward.w3.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.31.ffn_norm.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.32.attention.wk.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.32.attention.wk.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.32.attention.wk.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.32.attention.wo.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.32.attention.wo.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.32.attention.wo.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.32.attention.wq.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.32.attention.wq.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.32.attention.wq.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.32.attention.wv.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.32.attention.wv.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.32.attention.wv.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.32.attention_norm.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.32.feed_forward.w1.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.32.feed_forward.w1.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.32.feed_forward.w1.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.32.feed_forward.w2.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.32.feed_forward.w2.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.32.feed_forward.w2.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.32.feed_forward.w3.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.32.feed_forward.w3.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.32.feed_forward.w3.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.32.ffn_norm.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.33.attention.wk.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.33.attention.wk.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.33.attention.wk.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.33.attention.wo.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.33.attention.wo.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.33.attention.wo.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.33.attention.wq.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.33.attention.wq.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.33.attention.wq.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.33.attention.wv.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.33.attention.wv.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.33.attention.wv.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.33.attention_norm.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.33.feed_forward.w1.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.33.feed_forward.w1.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.33.feed_forward.w1.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.33.feed_forward.w2.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.33.feed_forward.w2.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.33.feed_forward.w2.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.33.feed_forward.w3.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.33.feed_forward.w3.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.33.feed_forward.w3.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.33.ffn_norm.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.34.attention.wk.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.34.attention.wk.weight": "consolidated-00008-of-00027.safetensors",
+ "layers.34.attention.wk.qscale_weight": "consolidated-00008-of-00027.safetensors",
+ "layers.34.attention.wo.qscale_act": "consolidated-00008-of-00027.safetensors",
+ "layers.34.attention.wo.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.34.attention.wo.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.34.attention.wq.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.34.attention.wq.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.34.attention.wq.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.34.attention.wv.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.34.attention.wv.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.34.attention.wv.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.34.attention_norm.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.34.feed_forward.w1.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.34.feed_forward.w1.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.34.feed_forward.w1.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.34.feed_forward.w2.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.34.feed_forward.w2.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.34.feed_forward.w2.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.34.feed_forward.w3.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.34.feed_forward.w3.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.34.feed_forward.w3.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.34.ffn_norm.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.35.attention.wk.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.35.attention.wk.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.35.attention.wk.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.35.attention.wo.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.35.attention.wo.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.35.attention.wo.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.35.attention.wq.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.35.attention.wq.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.35.attention.wq.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.35.attention.wv.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.35.attention.wv.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.35.attention.wv.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.35.attention_norm.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.35.feed_forward.w1.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.35.feed_forward.w1.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.35.feed_forward.w1.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.35.feed_forward.w2.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.35.feed_forward.w2.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.35.feed_forward.w2.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.35.feed_forward.w3.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.35.feed_forward.w3.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.35.feed_forward.w3.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.35.ffn_norm.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.36.attention.wk.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.36.attention.wk.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.36.attention.wk.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.36.attention.wo.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.36.attention.wo.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.36.attention.wo.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.36.attention.wq.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.36.attention.wq.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.36.attention.wq.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.36.attention.wv.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.36.attention.wv.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.36.attention.wv.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.36.attention_norm.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.36.feed_forward.w1.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.36.feed_forward.w1.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.36.feed_forward.w1.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.36.feed_forward.w2.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.36.feed_forward.w2.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.36.feed_forward.w2.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.36.feed_forward.w3.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.36.feed_forward.w3.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.36.feed_forward.w3.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.36.ffn_norm.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.37.attention.wk.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.37.attention.wk.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.37.attention.wk.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.37.attention.wo.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.37.attention.wo.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.37.attention.wo.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.37.attention.wq.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.37.attention.wq.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.37.attention.wq.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.37.attention.wv.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.37.attention.wv.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.37.attention.wv.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.37.attention_norm.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.37.feed_forward.w1.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.37.feed_forward.w1.weight": "consolidated-00009-of-00027.safetensors",
+ "layers.37.feed_forward.w1.qscale_weight": "consolidated-00009-of-00027.safetensors",
+ "layers.37.feed_forward.w2.qscale_act": "consolidated-00009-of-00027.safetensors",
+ "layers.37.feed_forward.w2.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.37.feed_forward.w2.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.37.feed_forward.w3.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.37.feed_forward.w3.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.37.feed_forward.w3.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.37.ffn_norm.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.38.attention.wk.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.38.attention.wk.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.38.attention.wk.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.38.attention.wo.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.38.attention.wo.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.38.attention.wo.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.38.attention.wq.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.38.attention.wq.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.38.attention.wq.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.38.attention.wv.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.38.attention.wv.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.38.attention.wv.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.38.attention_norm.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.38.feed_forward.w1.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.38.feed_forward.w1.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.38.feed_forward.w1.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.38.feed_forward.w2.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.38.feed_forward.w2.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.38.feed_forward.w2.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.38.feed_forward.w3.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.38.feed_forward.w3.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.38.feed_forward.w3.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.38.ffn_norm.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.39.attention.wk.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.39.attention.wk.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.39.attention.wk.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.39.attention.wo.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.39.attention.wo.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.39.attention.wo.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.39.attention.wq.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.39.attention.wq.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.39.attention.wq.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.39.attention.wv.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.39.attention.wv.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.39.attention.wv.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.39.attention_norm.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.39.feed_forward.w1.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.39.feed_forward.w1.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.39.feed_forward.w1.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.39.feed_forward.w2.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.39.feed_forward.w2.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.39.feed_forward.w2.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.39.feed_forward.w3.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.39.feed_forward.w3.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.39.feed_forward.w3.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.39.ffn_norm.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.4.attention.wk.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.4.attention.wk.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.4.attention.wk.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.4.attention.wo.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.4.attention.wo.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.4.attention.wo.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.4.attention.wq.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.4.attention.wq.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.4.attention.wq.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.4.attention.wv.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.4.attention.wv.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.4.attention.wv.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.4.attention_norm.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.4.feed_forward.w1.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.4.feed_forward.w1.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.4.feed_forward.w1.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.4.feed_forward.w2.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.4.feed_forward.w2.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.4.feed_forward.w2.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.4.feed_forward.w3.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.4.feed_forward.w3.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.4.feed_forward.w3.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.4.ffn_norm.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.40.attention.wk.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.40.attention.wk.weight": "consolidated-00010-of-00027.safetensors",
+ "layers.40.attention.wk.qscale_weight": "consolidated-00010-of-00027.safetensors",
+ "layers.40.attention.wo.qscale_act": "consolidated-00010-of-00027.safetensors",
+ "layers.40.attention.wo.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.40.attention.wo.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.40.attention.wq.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.40.attention.wq.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.40.attention.wq.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.40.attention.wv.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.40.attention.wv.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.40.attention.wv.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.40.attention_norm.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.40.feed_forward.w1.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.40.feed_forward.w1.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.40.feed_forward.w1.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.40.feed_forward.w2.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.40.feed_forward.w2.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.40.feed_forward.w2.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.40.feed_forward.w3.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.40.feed_forward.w3.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.40.feed_forward.w3.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.40.ffn_norm.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.41.attention.wk.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.41.attention.wk.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.41.attention.wk.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.41.attention.wo.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.41.attention.wo.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.41.attention.wo.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.41.attention.wq.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.41.attention.wq.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.41.attention.wq.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.41.attention.wv.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.41.attention.wv.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.41.attention.wv.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.41.attention_norm.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.41.feed_forward.w1.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.41.feed_forward.w1.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.41.feed_forward.w1.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.41.feed_forward.w2.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.41.feed_forward.w2.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.41.feed_forward.w2.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.41.feed_forward.w3.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.41.feed_forward.w3.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.41.feed_forward.w3.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.41.ffn_norm.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.42.attention.wk.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.42.attention.wk.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.42.attention.wk.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.42.attention.wo.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.42.attention.wo.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.42.attention.wo.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.42.attention.wq.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.42.attention.wq.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.42.attention.wq.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.42.attention.wv.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.42.attention.wv.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.42.attention.wv.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.42.attention_norm.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.42.feed_forward.w1.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.42.feed_forward.w1.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.42.feed_forward.w1.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.42.feed_forward.w2.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.42.feed_forward.w2.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.42.feed_forward.w2.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.42.feed_forward.w3.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.42.feed_forward.w3.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.42.feed_forward.w3.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.42.ffn_norm.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.43.attention.wk.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.43.attention.wk.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.43.attention.wk.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.43.attention.wo.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.43.attention.wo.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.43.attention.wo.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.43.attention.wq.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.43.attention.wq.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.43.attention.wq.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.43.attention.wv.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.43.attention.wv.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.43.attention.wv.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.43.attention_norm.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.43.feed_forward.w1.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.43.feed_forward.w1.weight": "consolidated-00011-of-00027.safetensors",
+ "layers.43.feed_forward.w1.qscale_weight": "consolidated-00011-of-00027.safetensors",
+ "layers.43.feed_forward.w2.qscale_act": "consolidated-00011-of-00027.safetensors",
+ "layers.43.feed_forward.w2.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.43.feed_forward.w2.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.43.feed_forward.w3.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.43.feed_forward.w3.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.43.feed_forward.w3.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.43.ffn_norm.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.44.attention.wk.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.44.attention.wk.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.44.attention.wk.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.44.attention.wo.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.44.attention.wo.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.44.attention.wo.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.44.attention.wq.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.44.attention.wq.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.44.attention.wq.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.44.attention.wv.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.44.attention.wv.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.44.attention.wv.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.44.attention_norm.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.44.feed_forward.w1.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.44.feed_forward.w1.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.44.feed_forward.w1.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.44.feed_forward.w2.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.44.feed_forward.w2.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.44.feed_forward.w2.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.44.feed_forward.w3.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.44.feed_forward.w3.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.44.feed_forward.w3.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.44.ffn_norm.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.45.attention.wk.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.45.attention.wk.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.45.attention.wk.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.45.attention.wo.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.45.attention.wo.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.45.attention.wo.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.45.attention.wq.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.45.attention.wq.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.45.attention.wq.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.45.attention.wv.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.45.attention.wv.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.45.attention.wv.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.45.attention_norm.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.45.feed_forward.w1.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.45.feed_forward.w1.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.45.feed_forward.w1.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.45.feed_forward.w2.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.45.feed_forward.w2.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.45.feed_forward.w2.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.45.feed_forward.w3.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.45.feed_forward.w3.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.45.feed_forward.w3.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.45.ffn_norm.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.46.attention.wk.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.46.attention.wk.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.46.attention.wk.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.46.attention.wo.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.46.attention.wo.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.46.attention.wo.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.46.attention.wq.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.46.attention.wq.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.46.attention.wq.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.46.attention.wv.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.46.attention.wv.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.46.attention.wv.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.46.attention_norm.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.46.feed_forward.w1.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.46.feed_forward.w1.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.46.feed_forward.w1.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.46.feed_forward.w2.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.46.feed_forward.w2.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.46.feed_forward.w2.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.46.feed_forward.w3.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.46.feed_forward.w3.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.46.feed_forward.w3.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.46.ffn_norm.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.47.attention.wk.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.47.attention.wk.weight": "consolidated-00012-of-00027.safetensors",
+ "layers.47.attention.wk.qscale_weight": "consolidated-00012-of-00027.safetensors",
+ "layers.47.attention.wo.qscale_act": "consolidated-00012-of-00027.safetensors",
+ "layers.47.attention.wo.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.47.attention.wo.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.47.attention.wq.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.47.attention.wq.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.47.attention.wq.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.47.attention.wv.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.47.attention.wv.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.47.attention.wv.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.47.attention_norm.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.47.feed_forward.w1.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.47.feed_forward.w1.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.47.feed_forward.w1.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.47.feed_forward.w2.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.47.feed_forward.w2.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.47.feed_forward.w2.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.47.feed_forward.w3.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.47.feed_forward.w3.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.47.feed_forward.w3.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.47.ffn_norm.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.48.attention.wk.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.48.attention.wk.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.48.attention.wk.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.48.attention.wo.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.48.attention.wo.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.48.attention.wo.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.48.attention.wq.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.48.attention.wq.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.48.attention.wq.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.48.attention.wv.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.48.attention.wv.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.48.attention.wv.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.48.attention_norm.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.48.feed_forward.w1.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.48.feed_forward.w1.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.48.feed_forward.w1.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.48.feed_forward.w2.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.48.feed_forward.w2.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.48.feed_forward.w2.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.48.feed_forward.w3.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.48.feed_forward.w3.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.48.feed_forward.w3.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.48.ffn_norm.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.49.attention.wk.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.49.attention.wk.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.49.attention.wk.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.49.attention.wo.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.49.attention.wo.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.49.attention.wo.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.49.attention.wq.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.49.attention.wq.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.49.attention.wq.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.49.attention.wv.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.49.attention.wv.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.49.attention.wv.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.49.attention_norm.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.49.feed_forward.w1.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.49.feed_forward.w1.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.49.feed_forward.w1.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.49.feed_forward.w2.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.49.feed_forward.w2.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.49.feed_forward.w2.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.49.feed_forward.w3.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.49.feed_forward.w3.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.49.feed_forward.w3.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.49.ffn_norm.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.5.attention.wk.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.5.attention.wk.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.5.attention.wk.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.5.attention.wo.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.5.attention.wo.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.5.attention.wo.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.5.attention.wq.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.5.attention.wq.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.5.attention.wq.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.5.attention.wv.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.5.attention.wv.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.5.attention.wv.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.5.attention_norm.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.5.feed_forward.w1.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.5.feed_forward.w1.weight": "consolidated-00013-of-00027.safetensors",
+ "layers.5.feed_forward.w1.qscale_weight": "consolidated-00013-of-00027.safetensors",
+ "layers.5.feed_forward.w2.qscale_act": "consolidated-00013-of-00027.safetensors",
+ "layers.5.feed_forward.w2.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.5.feed_forward.w2.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.5.feed_forward.w3.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.5.feed_forward.w3.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.5.feed_forward.w3.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.5.ffn_norm.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.50.attention.wk.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.50.attention.wk.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.50.attention.wk.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.50.attention.wo.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.50.attention.wo.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.50.attention.wo.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.50.attention.wq.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.50.attention.wq.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.50.attention.wq.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.50.attention.wv.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.50.attention.wv.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.50.attention.wv.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.50.attention_norm.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.50.feed_forward.w1.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.50.feed_forward.w1.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.50.feed_forward.w1.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.50.feed_forward.w2.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.50.feed_forward.w2.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.50.feed_forward.w2.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.50.feed_forward.w3.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.50.feed_forward.w3.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.50.feed_forward.w3.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.50.ffn_norm.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.51.attention.wk.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.51.attention.wk.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.51.attention.wk.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.51.attention.wo.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.51.attention.wo.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.51.attention.wo.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.51.attention.wq.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.51.attention.wq.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.51.attention.wq.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.51.attention.wv.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.51.attention.wv.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.51.attention.wv.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.51.attention_norm.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.51.feed_forward.w1.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.51.feed_forward.w1.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.51.feed_forward.w1.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.51.feed_forward.w2.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.51.feed_forward.w2.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.51.feed_forward.w2.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.51.feed_forward.w3.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.51.feed_forward.w3.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.51.feed_forward.w3.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.51.ffn_norm.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.52.attention.wk.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.52.attention.wk.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.52.attention.wk.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.52.attention.wo.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.52.attention.wo.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.52.attention.wo.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.52.attention.wq.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.52.attention.wq.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.52.attention.wq.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.52.attention.wv.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.52.attention.wv.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.52.attention.wv.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.52.attention_norm.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.52.feed_forward.w1.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.52.feed_forward.w1.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.52.feed_forward.w1.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.52.feed_forward.w2.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.52.feed_forward.w2.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.52.feed_forward.w2.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.52.feed_forward.w3.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.52.feed_forward.w3.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.52.feed_forward.w3.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.52.ffn_norm.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.53.attention.wk.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.53.attention.wk.weight": "consolidated-00014-of-00027.safetensors",
+ "layers.53.attention.wk.qscale_weight": "consolidated-00014-of-00027.safetensors",
+ "layers.53.attention.wo.qscale_act": "consolidated-00014-of-00027.safetensors",
+ "layers.53.attention.wo.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.53.attention.wo.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.53.attention.wq.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.53.attention.wq.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.53.attention.wq.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.53.attention.wv.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.53.attention.wv.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.53.attention.wv.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.53.attention_norm.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.53.feed_forward.w1.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.53.feed_forward.w1.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.53.feed_forward.w1.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.53.feed_forward.w2.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.53.feed_forward.w2.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.53.feed_forward.w2.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.53.feed_forward.w3.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.53.feed_forward.w3.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.53.feed_forward.w3.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.53.ffn_norm.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.54.attention.wk.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.54.attention.wk.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.54.attention.wk.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.54.attention.wo.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.54.attention.wo.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.54.attention.wo.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.54.attention.wq.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.54.attention.wq.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.54.attention.wq.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.54.attention.wv.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.54.attention.wv.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.54.attention.wv.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.54.attention_norm.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.54.feed_forward.w1.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.54.feed_forward.w1.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.54.feed_forward.w1.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.54.feed_forward.w2.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.54.feed_forward.w2.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.54.feed_forward.w2.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.54.feed_forward.w3.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.54.feed_forward.w3.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.54.feed_forward.w3.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.54.ffn_norm.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.55.attention.wk.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.55.attention.wk.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.55.attention.wk.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.55.attention.wo.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.55.attention.wo.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.55.attention.wo.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.55.attention.wq.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.55.attention.wq.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.55.attention.wq.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.55.attention.wv.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.55.attention.wv.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.55.attention.wv.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.55.attention_norm.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.55.feed_forward.w1.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.55.feed_forward.w1.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.55.feed_forward.w1.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.55.feed_forward.w2.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.55.feed_forward.w2.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.55.feed_forward.w2.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.55.feed_forward.w3.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.55.feed_forward.w3.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.55.feed_forward.w3.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.55.ffn_norm.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.56.attention.wk.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.56.attention.wk.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.56.attention.wk.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.56.attention.wo.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.56.attention.wo.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.56.attention.wo.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.56.attention.wq.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.56.attention.wq.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.56.attention.wq.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.56.attention.wv.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.56.attention.wv.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.56.attention.wv.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.56.attention_norm.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.56.feed_forward.w1.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.56.feed_forward.w1.weight": "consolidated-00015-of-00027.safetensors",
+ "layers.56.feed_forward.w1.qscale_weight": "consolidated-00015-of-00027.safetensors",
+ "layers.56.feed_forward.w2.qscale_act": "consolidated-00015-of-00027.safetensors",
+ "layers.56.feed_forward.w2.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.56.feed_forward.w2.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.56.feed_forward.w3.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.56.feed_forward.w3.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.56.feed_forward.w3.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.56.ffn_norm.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.57.attention.wk.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.57.attention.wk.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.57.attention.wk.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.57.attention.wo.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.57.attention.wo.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.57.attention.wo.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.57.attention.wq.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.57.attention.wq.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.57.attention.wq.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.57.attention.wv.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.57.attention.wv.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.57.attention.wv.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.57.attention_norm.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.57.feed_forward.w1.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.57.feed_forward.w1.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.57.feed_forward.w1.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.57.feed_forward.w2.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.57.feed_forward.w2.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.57.feed_forward.w2.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.57.feed_forward.w3.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.57.feed_forward.w3.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.57.feed_forward.w3.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.57.ffn_norm.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.58.attention.wk.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.58.attention.wk.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.58.attention.wk.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.58.attention.wo.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.58.attention.wo.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.58.attention.wo.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.58.attention.wq.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.58.attention.wq.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.58.attention.wq.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.58.attention.wv.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.58.attention.wv.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.58.attention.wv.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.58.attention_norm.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.58.feed_forward.w1.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.58.feed_forward.w1.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.58.feed_forward.w1.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.58.feed_forward.w2.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.58.feed_forward.w2.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.58.feed_forward.w2.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.58.feed_forward.w3.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.58.feed_forward.w3.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.58.feed_forward.w3.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.58.ffn_norm.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.59.attention.wk.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.59.attention.wk.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.59.attention.wk.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.59.attention.wo.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.59.attention.wo.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.59.attention.wo.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.59.attention.wq.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.59.attention.wq.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.59.attention.wq.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.59.attention.wv.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.59.attention.wv.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.59.attention.wv.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.59.attention_norm.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.59.feed_forward.w1.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.59.feed_forward.w1.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.59.feed_forward.w1.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.59.feed_forward.w2.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.59.feed_forward.w2.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.59.feed_forward.w2.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.59.feed_forward.w3.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.59.feed_forward.w3.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.59.feed_forward.w3.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.59.ffn_norm.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.6.attention.wk.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.6.attention.wk.weight": "consolidated-00016-of-00027.safetensors",
+ "layers.6.attention.wk.qscale_weight": "consolidated-00016-of-00027.safetensors",
+ "layers.6.attention.wo.qscale_act": "consolidated-00016-of-00027.safetensors",
+ "layers.6.attention.wo.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.6.attention.wo.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.6.attention.wq.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.6.attention.wq.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.6.attention.wq.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.6.attention.wv.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.6.attention.wv.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.6.attention.wv.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.6.attention_norm.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.6.feed_forward.w1.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.6.feed_forward.w1.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.6.feed_forward.w1.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.6.feed_forward.w2.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.6.feed_forward.w2.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.6.feed_forward.w2.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.6.feed_forward.w3.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.6.feed_forward.w3.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.6.feed_forward.w3.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.6.ffn_norm.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.60.attention.wk.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.60.attention.wk.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.60.attention.wk.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.60.attention.wo.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.60.attention.wo.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.60.attention.wo.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.60.attention.wq.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.60.attention.wq.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.60.attention.wq.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.60.attention.wv.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.60.attention.wv.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.60.attention.wv.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.60.attention_norm.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.60.feed_forward.w1.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.60.feed_forward.w1.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.60.feed_forward.w1.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.60.feed_forward.w2.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.60.feed_forward.w2.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.60.feed_forward.w2.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.60.feed_forward.w3.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.60.feed_forward.w3.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.60.feed_forward.w3.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.60.ffn_norm.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.61.attention.wk.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.61.attention.wk.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.61.attention.wk.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.61.attention.wo.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.61.attention.wo.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.61.attention.wo.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.61.attention.wq.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.61.attention.wq.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.61.attention.wq.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.61.attention.wv.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.61.attention.wv.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.61.attention.wv.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.61.attention_norm.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.61.feed_forward.w1.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.61.feed_forward.w1.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.61.feed_forward.w1.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.61.feed_forward.w2.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.61.feed_forward.w2.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.61.feed_forward.w2.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.61.feed_forward.w3.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.61.feed_forward.w3.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.61.feed_forward.w3.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.61.ffn_norm.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.62.attention.wk.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.62.attention.wk.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.62.attention.wk.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.62.attention.wo.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.62.attention.wo.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.62.attention.wo.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.62.attention.wq.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.62.attention.wq.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.62.attention.wq.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.62.attention.wv.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.62.attention.wv.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.62.attention.wv.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.62.attention_norm.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.62.feed_forward.w1.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.62.feed_forward.w1.weight": "consolidated-00017-of-00027.safetensors",
+ "layers.62.feed_forward.w1.qscale_weight": "consolidated-00017-of-00027.safetensors",
+ "layers.62.feed_forward.w2.qscale_act": "consolidated-00017-of-00027.safetensors",
+ "layers.62.feed_forward.w2.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.62.feed_forward.w2.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.62.feed_forward.w3.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.62.feed_forward.w3.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.62.feed_forward.w3.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.62.ffn_norm.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.63.attention.wk.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.63.attention.wk.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.63.attention.wk.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.63.attention.wo.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.63.attention.wo.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.63.attention.wo.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.63.attention.wq.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.63.attention.wq.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.63.attention.wq.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.63.attention.wv.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.63.attention.wv.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.63.attention.wv.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.63.attention_norm.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.63.feed_forward.w1.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.63.feed_forward.w1.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.63.feed_forward.w1.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.63.feed_forward.w2.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.63.feed_forward.w2.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.63.feed_forward.w2.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.63.feed_forward.w3.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.63.feed_forward.w3.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.63.feed_forward.w3.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.63.ffn_norm.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.64.attention.wk.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.64.attention.wk.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.64.attention.wk.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.64.attention.wo.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.64.attention.wo.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.64.attention.wo.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.64.attention.wq.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.64.attention.wq.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.64.attention.wq.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.64.attention.wv.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.64.attention.wv.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.64.attention.wv.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.64.attention_norm.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.64.feed_forward.w1.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.64.feed_forward.w1.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.64.feed_forward.w1.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.64.feed_forward.w2.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.64.feed_forward.w2.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.64.feed_forward.w2.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.64.feed_forward.w3.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.64.feed_forward.w3.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.64.feed_forward.w3.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.64.ffn_norm.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.65.attention.wk.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.65.attention.wk.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.65.attention.wk.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.65.attention.wo.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.65.attention.wo.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.65.attention.wo.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.65.attention.wq.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.65.attention.wq.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.65.attention.wq.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.65.attention.wv.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.65.attention.wv.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.65.attention.wv.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.65.attention_norm.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.65.feed_forward.w1.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.65.feed_forward.w1.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.65.feed_forward.w1.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.65.feed_forward.w2.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.65.feed_forward.w2.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.65.feed_forward.w2.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.65.feed_forward.w3.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.65.feed_forward.w3.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.65.feed_forward.w3.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.65.ffn_norm.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.66.attention.wk.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.66.attention.wk.weight": "consolidated-00018-of-00027.safetensors",
+ "layers.66.attention.wk.qscale_weight": "consolidated-00018-of-00027.safetensors",
+ "layers.66.attention.wo.qscale_act": "consolidated-00018-of-00027.safetensors",
+ "layers.66.attention.wo.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.66.attention.wo.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.66.attention.wq.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.66.attention.wq.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.66.attention.wq.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.66.attention.wv.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.66.attention.wv.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.66.attention.wv.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.66.attention_norm.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.66.feed_forward.w1.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.66.feed_forward.w1.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.66.feed_forward.w1.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.66.feed_forward.w2.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.66.feed_forward.w2.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.66.feed_forward.w2.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.66.feed_forward.w3.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.66.feed_forward.w3.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.66.feed_forward.w3.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.66.ffn_norm.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.67.attention.wk.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.67.attention.wk.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.67.attention.wk.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.67.attention.wo.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.67.attention.wo.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.67.attention.wo.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.67.attention.wq.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.67.attention.wq.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.67.attention.wq.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.67.attention.wv.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.67.attention.wv.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.67.attention.wv.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.67.attention_norm.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.67.feed_forward.w1.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.67.feed_forward.w1.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.67.feed_forward.w1.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.67.feed_forward.w2.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.67.feed_forward.w2.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.67.feed_forward.w2.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.67.feed_forward.w3.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.67.feed_forward.w3.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.67.feed_forward.w3.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.67.ffn_norm.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.68.attention.wk.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.68.attention.wk.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.68.attention.wk.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.68.attention.wo.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.68.attention.wo.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.68.attention.wo.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.68.attention.wq.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.68.attention.wq.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.68.attention.wq.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.68.attention.wv.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.68.attention.wv.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.68.attention.wv.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.68.attention_norm.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.68.feed_forward.w1.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.68.feed_forward.w1.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.68.feed_forward.w1.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.68.feed_forward.w2.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.68.feed_forward.w2.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.68.feed_forward.w2.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.68.feed_forward.w3.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.68.feed_forward.w3.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.68.feed_forward.w3.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.68.ffn_norm.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.69.attention.wk.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.69.attention.wk.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.69.attention.wk.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.69.attention.wo.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.69.attention.wo.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.69.attention.wo.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.69.attention.wq.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.69.attention.wq.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.69.attention.wq.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.69.attention.wv.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.69.attention.wv.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.69.attention.wv.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.69.attention_norm.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.69.feed_forward.w1.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.69.feed_forward.w1.weight": "consolidated-00019-of-00027.safetensors",
+ "layers.69.feed_forward.w1.qscale_weight": "consolidated-00019-of-00027.safetensors",
+ "layers.69.feed_forward.w2.qscale_act": "consolidated-00019-of-00027.safetensors",
+ "layers.69.feed_forward.w2.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.69.feed_forward.w2.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.69.feed_forward.w3.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.69.feed_forward.w3.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.69.feed_forward.w3.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.69.ffn_norm.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.7.attention.wk.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.7.attention.wk.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.7.attention.wk.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.7.attention.wo.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.7.attention.wo.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.7.attention.wo.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.7.attention.wq.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.7.attention.wq.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.7.attention.wq.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.7.attention.wv.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.7.attention.wv.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.7.attention.wv.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.7.attention_norm.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.7.feed_forward.w1.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.7.feed_forward.w1.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.7.feed_forward.w1.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.7.feed_forward.w2.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.7.feed_forward.w2.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.7.feed_forward.w2.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.7.feed_forward.w3.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.7.feed_forward.w3.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.7.feed_forward.w3.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.7.ffn_norm.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.70.attention.wk.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.70.attention.wk.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.70.attention.wk.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.70.attention.wo.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.70.attention.wo.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.70.attention.wo.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.70.attention.wq.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.70.attention.wq.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.70.attention.wq.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.70.attention.wv.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.70.attention.wv.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.70.attention.wv.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.70.attention_norm.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.70.feed_forward.w1.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.70.feed_forward.w1.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.70.feed_forward.w1.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.70.feed_forward.w2.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.70.feed_forward.w2.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.70.feed_forward.w2.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.70.feed_forward.w3.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.70.feed_forward.w3.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.70.feed_forward.w3.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.70.ffn_norm.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.71.attention.wk.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.71.attention.wk.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.71.attention.wk.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.71.attention.wo.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.71.attention.wo.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.71.attention.wo.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.71.attention.wq.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.71.attention.wq.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.71.attention.wq.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.71.attention.wv.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.71.attention.wv.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.71.attention.wv.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.71.attention_norm.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.71.feed_forward.w1.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.71.feed_forward.w1.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.71.feed_forward.w1.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.71.feed_forward.w2.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.71.feed_forward.w2.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.71.feed_forward.w2.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.71.feed_forward.w3.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.71.feed_forward.w3.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.71.feed_forward.w3.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.71.ffn_norm.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.72.attention.wk.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.72.attention.wk.weight": "consolidated-00020-of-00027.safetensors",
+ "layers.72.attention.wk.qscale_weight": "consolidated-00020-of-00027.safetensors",
+ "layers.72.attention.wo.qscale_act": "consolidated-00020-of-00027.safetensors",
+ "layers.72.attention.wo.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.72.attention.wo.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.72.attention.wq.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.72.attention.wq.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.72.attention.wq.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.72.attention.wv.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.72.attention.wv.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.72.attention.wv.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.72.attention_norm.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.72.feed_forward.w1.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.72.feed_forward.w1.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.72.feed_forward.w1.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.72.feed_forward.w2.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.72.feed_forward.w2.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.72.feed_forward.w2.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.72.feed_forward.w3.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.72.feed_forward.w3.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.72.feed_forward.w3.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.72.ffn_norm.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.73.attention.wk.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.73.attention.wk.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.73.attention.wk.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.73.attention.wo.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.73.attention.wo.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.73.attention.wo.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.73.attention.wq.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.73.attention.wq.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.73.attention.wq.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.73.attention.wv.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.73.attention.wv.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.73.attention.wv.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.73.attention_norm.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.73.feed_forward.w1.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.73.feed_forward.w1.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.73.feed_forward.w1.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.73.feed_forward.w2.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.73.feed_forward.w2.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.73.feed_forward.w2.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.73.feed_forward.w3.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.73.feed_forward.w3.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.73.feed_forward.w3.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.73.ffn_norm.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.74.attention.wk.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.74.attention.wk.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.74.attention.wk.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.74.attention.wo.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.74.attention.wo.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.74.attention.wo.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.74.attention.wq.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.74.attention.wq.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.74.attention.wq.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.74.attention.wv.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.74.attention.wv.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.74.attention.wv.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.74.attention_norm.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.74.feed_forward.w1.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.74.feed_forward.w1.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.74.feed_forward.w1.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.74.feed_forward.w2.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.74.feed_forward.w2.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.74.feed_forward.w2.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.74.feed_forward.w3.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.74.feed_forward.w3.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.74.feed_forward.w3.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.74.ffn_norm.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.75.attention.wk.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.75.attention.wk.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.75.attention.wk.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.75.attention.wo.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.75.attention.wo.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.75.attention.wo.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.75.attention.wq.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.75.attention.wq.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.75.attention.wq.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.75.attention.wv.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.75.attention.wv.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.75.attention.wv.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.75.attention_norm.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.75.feed_forward.w1.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.75.feed_forward.w1.weight": "consolidated-00021-of-00027.safetensors",
+ "layers.75.feed_forward.w1.qscale_weight": "consolidated-00021-of-00027.safetensors",
+ "layers.75.feed_forward.w2.qscale_act": "consolidated-00021-of-00027.safetensors",
+ "layers.75.feed_forward.w2.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.75.feed_forward.w2.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.75.feed_forward.w3.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.75.feed_forward.w3.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.75.feed_forward.w3.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.75.ffn_norm.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.76.attention.wk.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.76.attention.wk.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.76.attention.wk.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.76.attention.wo.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.76.attention.wo.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.76.attention.wo.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.76.attention.wq.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.76.attention.wq.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.76.attention.wq.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.76.attention.wv.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.76.attention.wv.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.76.attention.wv.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.76.attention_norm.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.76.feed_forward.w1.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.76.feed_forward.w1.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.76.feed_forward.w1.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.76.feed_forward.w2.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.76.feed_forward.w2.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.76.feed_forward.w2.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.76.feed_forward.w3.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.76.feed_forward.w3.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.76.feed_forward.w3.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.76.ffn_norm.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.77.attention.wk.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.77.attention.wk.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.77.attention.wk.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.77.attention.wo.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.77.attention.wo.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.77.attention.wo.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.77.attention.wq.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.77.attention.wq.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.77.attention.wq.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.77.attention.wv.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.77.attention.wv.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.77.attention.wv.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.77.attention_norm.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.77.feed_forward.w1.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.77.feed_forward.w1.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.77.feed_forward.w1.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.77.feed_forward.w2.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.77.feed_forward.w2.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.77.feed_forward.w2.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.77.feed_forward.w3.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.77.feed_forward.w3.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.77.feed_forward.w3.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.77.ffn_norm.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.78.attention.wk.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.78.attention.wk.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.78.attention.wk.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.78.attention.wo.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.78.attention.wo.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.78.attention.wo.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.78.attention.wq.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.78.attention.wq.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.78.attention.wq.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.78.attention.wv.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.78.attention.wv.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.78.attention.wv.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.78.attention_norm.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.78.feed_forward.w1.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.78.feed_forward.w1.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.78.feed_forward.w1.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.78.feed_forward.w2.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.78.feed_forward.w2.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.78.feed_forward.w2.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.78.feed_forward.w3.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.78.feed_forward.w3.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.78.feed_forward.w3.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.78.ffn_norm.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.79.attention.wk.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.79.attention.wk.weight": "consolidated-00022-of-00027.safetensors",
+ "layers.79.attention.wk.qscale_weight": "consolidated-00022-of-00027.safetensors",
+ "layers.79.attention.wo.qscale_act": "consolidated-00022-of-00027.safetensors",
+ "layers.79.attention.wo.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.79.attention.wo.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.79.attention.wq.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.79.attention.wq.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.79.attention.wq.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.79.attention.wv.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.79.attention.wv.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.79.attention.wv.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.79.attention_norm.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.79.feed_forward.w1.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.79.feed_forward.w1.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.79.feed_forward.w1.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.79.feed_forward.w2.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.79.feed_forward.w2.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.79.feed_forward.w2.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.79.feed_forward.w3.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.79.feed_forward.w3.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.79.feed_forward.w3.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.79.ffn_norm.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.8.attention.wk.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.8.attention.wk.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.8.attention.wk.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.8.attention.wo.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.8.attention.wo.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.8.attention.wo.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.8.attention.wq.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.8.attention.wq.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.8.attention.wq.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.8.attention.wv.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.8.attention.wv.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.8.attention.wv.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.8.attention_norm.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.8.feed_forward.w1.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.8.feed_forward.w1.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.8.feed_forward.w1.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.8.feed_forward.w2.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.8.feed_forward.w2.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.8.feed_forward.w2.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.8.feed_forward.w3.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.8.feed_forward.w3.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.8.feed_forward.w3.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.8.ffn_norm.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.80.attention.wk.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.80.attention.wk.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.80.attention.wk.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.80.attention.wo.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.80.attention.wo.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.80.attention.wo.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.80.attention.wq.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.80.attention.wq.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.80.attention.wq.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.80.attention.wv.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.80.attention.wv.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.80.attention.wv.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.80.attention_norm.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.80.feed_forward.w1.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.80.feed_forward.w1.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.80.feed_forward.w1.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.80.feed_forward.w2.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.80.feed_forward.w2.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.80.feed_forward.w2.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.80.feed_forward.w3.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.80.feed_forward.w3.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.80.feed_forward.w3.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.80.ffn_norm.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.81.attention.wk.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.81.attention.wk.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.81.attention.wk.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.81.attention.wo.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.81.attention.wo.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.81.attention.wo.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.81.attention.wq.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.81.attention.wq.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.81.attention.wq.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.81.attention.wv.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.81.attention.wv.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.81.attention.wv.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.81.attention_norm.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.81.feed_forward.w1.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.81.feed_forward.w1.weight": "consolidated-00023-of-00027.safetensors",
+ "layers.81.feed_forward.w1.qscale_weight": "consolidated-00023-of-00027.safetensors",
+ "layers.81.feed_forward.w2.qscale_act": "consolidated-00023-of-00027.safetensors",
+ "layers.81.feed_forward.w2.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.81.feed_forward.w2.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.81.feed_forward.w3.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.81.feed_forward.w3.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.81.feed_forward.w3.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.81.ffn_norm.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.82.attention.wk.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.82.attention.wk.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.82.attention.wk.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.82.attention.wo.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.82.attention.wo.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.82.attention.wo.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.82.attention.wq.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.82.attention.wq.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.82.attention.wq.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.82.attention.wv.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.82.attention.wv.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.82.attention.wv.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.82.attention_norm.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.82.feed_forward.w1.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.82.feed_forward.w1.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.82.feed_forward.w1.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.82.feed_forward.w2.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.82.feed_forward.w2.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.82.feed_forward.w2.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.82.feed_forward.w3.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.82.feed_forward.w3.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.82.feed_forward.w3.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.82.ffn_norm.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.83.attention.wk.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.83.attention.wk.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.83.attention.wk.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.83.attention.wo.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.83.attention.wo.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.83.attention.wo.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.83.attention.wq.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.83.attention.wq.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.83.attention.wq.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.83.attention.wv.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.83.attention.wv.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.83.attention.wv.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.83.attention_norm.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.83.feed_forward.w1.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.83.feed_forward.w1.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.83.feed_forward.w1.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.83.feed_forward.w2.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.83.feed_forward.w2.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.83.feed_forward.w2.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.83.feed_forward.w3.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.83.feed_forward.w3.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.83.feed_forward.w3.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.83.ffn_norm.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.84.attention.wk.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.84.attention.wk.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.84.attention.wk.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.84.attention.wo.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.84.attention.wo.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.84.attention.wo.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.84.attention.wq.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.84.attention.wq.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.84.attention.wq.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.84.attention.wv.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.84.attention.wv.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.84.attention.wv.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.84.attention_norm.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.84.feed_forward.w1.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.84.feed_forward.w1.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.84.feed_forward.w1.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.84.feed_forward.w2.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.84.feed_forward.w2.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.84.feed_forward.w2.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.84.feed_forward.w3.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.84.feed_forward.w3.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.84.feed_forward.w3.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.84.ffn_norm.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.85.attention.wk.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.85.attention.wk.weight": "consolidated-00024-of-00027.safetensors",
+ "layers.85.attention.wk.qscale_weight": "consolidated-00024-of-00027.safetensors",
+ "layers.85.attention.wo.qscale_act": "consolidated-00024-of-00027.safetensors",
+ "layers.85.attention.wo.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.85.attention.wo.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.85.attention.wq.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.85.attention.wq.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.85.attention.wq.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.85.attention.wv.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.85.attention.wv.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.85.attention.wv.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.85.attention_norm.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.85.feed_forward.w1.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.85.feed_forward.w1.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.85.feed_forward.w1.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.85.feed_forward.w2.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.85.feed_forward.w2.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.85.feed_forward.w2.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.85.feed_forward.w3.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.85.feed_forward.w3.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.85.feed_forward.w3.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.85.ffn_norm.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.86.attention.wk.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.86.attention.wk.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.86.attention.wk.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.86.attention.wo.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.86.attention.wo.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.86.attention.wo.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.86.attention.wq.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.86.attention.wq.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.86.attention.wq.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.86.attention.wv.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.86.attention.wv.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.86.attention.wv.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.86.attention_norm.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.86.feed_forward.w1.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.86.feed_forward.w1.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.86.feed_forward.w1.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.86.feed_forward.w2.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.86.feed_forward.w2.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.86.feed_forward.w2.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.86.feed_forward.w3.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.86.feed_forward.w3.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.86.feed_forward.w3.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.86.ffn_norm.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.87.attention.wk.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.87.attention.wk.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.87.attention.wk.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.87.attention.wo.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.87.attention.wo.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.87.attention.wo.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.87.attention.wq.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.87.attention.wq.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.87.attention.wq.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.87.attention.wv.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.87.attention.wv.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.87.attention.wv.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.87.attention_norm.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.87.feed_forward.w1.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.87.feed_forward.w1.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.87.feed_forward.w1.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.87.feed_forward.w2.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.87.feed_forward.w2.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.87.feed_forward.w2.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.87.feed_forward.w3.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.87.feed_forward.w3.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.87.feed_forward.w3.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.87.ffn_norm.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.9.attention.wk.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.9.attention.wk.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.9.attention.wk.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.9.attention.wo.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.9.attention.wo.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.9.attention.wo.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.9.attention.wq.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.9.attention.wq.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.9.attention.wq.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.9.attention.wv.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.9.attention.wv.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.9.attention.wv.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.9.attention_norm.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.9.feed_forward.w1.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.9.feed_forward.w1.weight": "consolidated-00025-of-00027.safetensors",
+ "layers.9.feed_forward.w1.qscale_weight": "consolidated-00025-of-00027.safetensors",
+ "layers.9.feed_forward.w2.qscale_act": "consolidated-00025-of-00027.safetensors",
+ "layers.9.feed_forward.w2.weight": "consolidated-00026-of-00027.safetensors",
+ "layers.9.feed_forward.w2.qscale_weight": "consolidated-00026-of-00027.safetensors",
+ "layers.9.feed_forward.w3.qscale_act": "consolidated-00026-of-00027.safetensors",
+ "layers.9.feed_forward.w3.weight": "consolidated-00026-of-00027.safetensors",
+ "layers.9.feed_forward.w3.qscale_weight": "consolidated-00026-of-00027.safetensors",
+ "layers.9.ffn_norm.weight": "consolidated-00026-of-00027.safetensors",
+ "norm.weight": "consolidated-00026-of-00027.safetensors",
+ "output.weight": "consolidated-00026-of-00027.safetensors",
+ "tok_embeddings.weight": "consolidated-00027-of-00027.safetensors"
+ }
+}
\ No newline at end of file
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..079491d5a2858b5945a497e1d6d793f5e5dd4d8f
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,8 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "pad_token_id": 11,
+ "max_length": 262144,
+ "transformers_version": "5.0.0.dev0"
+}
diff --git a/model-00001-of-00027.safetensors b/model-00001-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..841e1c06aa9c769c9b8a9272e867bfcc7a337da3
--- /dev/null
+++ b/model-00001-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:781c212ef947ce9bf7420c67051ac305dd119e7b87d2a13900cb58325b1d1784
+size 4932554972
diff --git a/model-00002-of-00027.safetensors b/model-00002-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..74a605cb07494a0522bf2698ad0377bc6419704c
--- /dev/null
+++ b/model-00002-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e5aa5f0fa2ce8977ad5ca20950b6bacdc96a798085fbd3c2b38ab558f9be9b4
+size 4857159948
diff --git a/model-00003-of-00027.safetensors b/model-00003-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f69d1ea4c8102bb4831cbc6314e870ac53adee4b
--- /dev/null
+++ b/model-00003-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:053083307ac43f8a9c1756164ac27618eceff1c7971f1b5ff4f6724048000549
+size 4832044552
diff --git a/model-00004-of-00027.safetensors b/model-00004-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..523d6434085aa16249a812341fbab33452e88e31
--- /dev/null
+++ b/model-00004-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15c4aeab34c552528ce7b454c797cab2e8e0a06172a2a7b3c7f7cf8a5dc6c457
+size 4857159964
diff --git a/model-00005-of-00027.safetensors b/model-00005-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5bc4da8d82ed635193ef5b9cf5113d5bac876edf
--- /dev/null
+++ b/model-00005-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6c6b396739e9596d99a62dae39ea08cd3457cd464beea704ba2b793b1dba752
+size 4832044640
diff --git a/model-00006-of-00027.safetensors b/model-00006-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f99d44e1a21909b0bfdc13b29ae0ed51c1bb62f0
--- /dev/null
+++ b/model-00006-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b3e26115ad5f6ecfceaee3bad1be5560225c662757d78321956a784ec3e421c
+size 4857160028
diff --git a/model-00007-of-00027.safetensors b/model-00007-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1f5aaa4fc782e9db4b0383a04b69903cc2876a62
--- /dev/null
+++ b/model-00007-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac774af7125434cc3dc118d3a6d49ab2e55ba6762acdf66a435b7a12cee10df9
+size 4832044640
diff --git a/model-00008-of-00027.safetensors b/model-00008-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..97dc5e73c325517005c1a395cefc33d66942bda4
--- /dev/null
+++ b/model-00008-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6527350c98d6e448b797e6375b2aeb3be68b02b96a3534803864f566c638902f
+size 4857160028
diff --git a/model-00009-of-00027.safetensors b/model-00009-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..02fd5c8cb990688b69d03df524b339b41b130704
--- /dev/null
+++ b/model-00009-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2be71444925d1da9862ab3f436dbaae77c2598ad4ee7b50d7c45ea8794e4eab
+size 4832044640
diff --git a/model-00010-of-00027.safetensors b/model-00010-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..310b96e598f06ee70425a44662e0f6b53167f723
--- /dev/null
+++ b/model-00010-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76be0bffbcd79a2f7c2df80ef517a6e26f0f318d42b42a0cd37d346112e32550
+size 4857160028
diff --git a/model-00011-of-00027.safetensors b/model-00011-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4fffce6a4efdc058f624efb459421d256994c647
--- /dev/null
+++ b/model-00011-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b0f61ff54214f92422640db324eb0c0339901926bc62581ae2f026cb5e6813a
+size 4832044640
diff --git a/model-00012-of-00027.safetensors b/model-00012-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..967a448809aedaaa470c512874812e032e3812e3
--- /dev/null
+++ b/model-00012-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04ada4e4feb5e8b8b4f318af65f748e24932ca55f4fa6cd371b87b46ea96668d
+size 4857160028
diff --git a/model-00013-of-00027.safetensors b/model-00013-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f623e9674b932b39b98c9844758dd37d2200fbd5
--- /dev/null
+++ b/model-00013-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac452ad1c5e800fd525d1699efbf42221e1afa6ad590d3f4d6c232cba2b753c1
+size 4832044640
diff --git a/model-00014-of-00027.safetensors b/model-00014-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fbe1f797aa6a6c3f84af754f894f1682e1043f70
--- /dev/null
+++ b/model-00014-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f419c01cccc805d8d391d754c12e3d26f3a588cd99ae9fe339c8f909eb142fa4
+size 4857160028
diff --git a/model-00015-of-00027.safetensors b/model-00015-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7e06243b3a9b68886dc8f38dbf397eebff76f20f
--- /dev/null
+++ b/model-00015-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b86730cb0e89f8a2482c2c4fdeaa8974c3184c15751d77b3f89f0f971612e7aa
+size 4832044640
diff --git a/model-00016-of-00027.safetensors b/model-00016-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a7439d353b82d0fdb996c4ef57d3dea68ff9fa59
--- /dev/null
+++ b/model-00016-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4fa42d7049abc253a79e1cc5816b84111f3e51106f7f974a9329aebc49cb0cfc
+size 4857160028
diff --git a/model-00017-of-00027.safetensors b/model-00017-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5cbbd32a34ad1db09182b143a86bb8680e50ea66
--- /dev/null
+++ b/model-00017-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6c804f55cfaf713ef077a47c8c7765cb1b5cc47333db8ef96b907be13f8c007
+size 4832044640
diff --git a/model-00018-of-00027.safetensors b/model-00018-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..60916a5d808b0d5456ac197d81b21642dc0cdc3b
--- /dev/null
+++ b/model-00018-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36a36c428107b4df47b9a507cacb214e872c479e4058e59e2f9b1b041e9b2b6e
+size 4857160028
diff --git a/model-00019-of-00027.safetensors b/model-00019-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e1e977f91a264da2d66c158cc3080ad61c055716
--- /dev/null
+++ b/model-00019-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:846754e34f44adf3bd0b13d5604f7be2d568387116ba0d5fe7ad481e138881d6
+size 4832044640
diff --git a/model-00020-of-00027.safetensors b/model-00020-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d107410fe7c22a9aaa8f871e5a46b3c1cedbbacc
--- /dev/null
+++ b/model-00020-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27eeb54edb39464ef26412232bc03dbeb4f34eca2785b501728598af243cdc0a
+size 4857160028
diff --git a/model-00021-of-00027.safetensors b/model-00021-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ea48ce85bb9f0811beb5799538a11a210c94ae8a
--- /dev/null
+++ b/model-00021-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:847d0677a058c3fa67d2b2cbdeb4cb4f558e78bbf1d1fc6ec6541fc236b811e9
+size 4832044640
diff --git a/model-00022-of-00027.safetensors b/model-00022-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0aacb43d602a5fb33e8f284c4731b87cf5943153
--- /dev/null
+++ b/model-00022-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd1422ac4dcc14e8aef16daeb31f126196602dafa8cd5cbde82e97d165c37d9a
+size 4857160028
diff --git a/model-00023-of-00027.safetensors b/model-00023-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7b9fe780ba3b03bbaf8857fd8d48175086e5d113
--- /dev/null
+++ b/model-00023-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:967731268b7d85dde6f96ce4eeb504abb47afc6e82a812935e239cf146a8230c
+size 4832044640
diff --git a/model-00024-of-00027.safetensors b/model-00024-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8f4d24ac75bef8d0137c5d67e992e25596c404e5
--- /dev/null
+++ b/model-00024-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9512662da101c3d68084df55170411497384920815b0c2857550a6b950bb906
+size 4857160028
diff --git a/model-00025-of-00027.safetensors b/model-00025-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1a0b8e551d3a7424740d05091538b11e175a948d
--- /dev/null
+++ b/model-00025-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d5694a77496612d0ebc724ce691129f3b32636b83fd3f2869932ee63bf1bb06
+size 4832044640
diff --git a/model-00026-of-00027.safetensors b/model-00026-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ecb70288661b22df14cc5ebcfbfc6a29fa1db730
--- /dev/null
+++ b/model-00026-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9daf35b204151b403f94cc5f9ef69a270f6856cbbe587a50add816cce67427e
+size 3825383836
diff --git a/model-00027-of-00027.safetensors b/model-00027-of-00027.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a46a4b974a08380405347fac99d9620d2e9ed2c6
--- /dev/null
+++ b/model-00027-of-00027.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1321bdcc2da0cb311eca11d8e489997b5f652449c997e611e3c8f6ca017c83e3
+size 3221225600
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..debf1e29c901c4fe75e5b99591cb023ceab6ad11
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,2035 @@
+{
+ "metadata": {
+ "total_parameters": 125025989840,
+ "total_size": 128249391520
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00027-of-00027.safetensors",
+ "model.embed_tokens.weight": "model-00001-of-00027.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00027.safetensors",
+ "model.layers.0.mlp.down_proj.activation_scale": "model-00001-of-00027.safetensors",
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00027.safetensors",
+ "model.layers.0.mlp.down_proj.weight_scale_inv": "model-00001-of-00027.safetensors",
+ "model.layers.0.mlp.gate_proj.activation_scale": "model-00001-of-00027.safetensors",
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00027.safetensors",
+ "model.layers.0.mlp.gate_proj.weight_scale_inv": "model-00001-of-00027.safetensors",
+ "model.layers.0.mlp.up_proj.activation_scale": "model-00001-of-00027.safetensors",
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00027.safetensors",
+ "model.layers.0.mlp.up_proj.weight_scale_inv": "model-00001-of-00027.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00027.safetensors",
+ "model.layers.0.self_attn.k_proj.activation_scale": "model-00001-of-00027.safetensors",
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00027.safetensors",
+ "model.layers.0.self_attn.k_proj.weight_scale_inv": "model-00001-of-00027.safetensors",
+ "model.layers.0.self_attn.o_proj.activation_scale": "model-00001-of-00027.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00027.safetensors",
+ "model.layers.0.self_attn.o_proj.weight_scale_inv": "model-00001-of-00027.safetensors",
+ "model.layers.0.self_attn.q_proj.activation_scale": "model-00001-of-00027.safetensors",
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00027.safetensors",
+ "model.layers.0.self_attn.q_proj.weight_scale_inv": "model-00001-of-00027.safetensors",
+ "model.layers.0.self_attn.v_proj.activation_scale": "model-00001-of-00027.safetensors",
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00027.safetensors",
+ "model.layers.0.self_attn.v_proj.weight_scale_inv": "model-00001-of-00027.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00002-of-00027.safetensors",
+ "model.layers.1.mlp.down_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.1.mlp.down_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.1.mlp.down_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.1.mlp.gate_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.1.mlp.gate_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.1.mlp.gate_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.1.mlp.up_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.1.mlp.up_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.1.mlp.up_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00027.safetensors",
+ "model.layers.1.self_attn.k_proj.activation_scale": "model-00001-of-00027.safetensors",
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00027.safetensors",
+ "model.layers.1.self_attn.k_proj.weight_scale_inv": "model-00001-of-00027.safetensors",
+ "model.layers.1.self_attn.o_proj.activation_scale": "model-00001-of-00027.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00027.safetensors",
+ "model.layers.1.self_attn.o_proj.weight_scale_inv": "model-00001-of-00027.safetensors",
+ "model.layers.1.self_attn.q_proj.activation_scale": "model-00001-of-00027.safetensors",
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00027.safetensors",
+ "model.layers.1.self_attn.q_proj.weight_scale_inv": "model-00001-of-00027.safetensors",
+ "model.layers.1.self_attn.v_proj.activation_scale": "model-00001-of-00027.safetensors",
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00027.safetensors",
+ "model.layers.1.self_attn.v_proj.weight_scale_inv": "model-00001-of-00027.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00004-of-00027.safetensors",
+ "model.layers.10.mlp.down_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.10.mlp.down_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.10.mlp.down_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.10.mlp.gate_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.10.mlp.gate_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.10.mlp.gate_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.10.mlp.up_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.10.mlp.up_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.10.mlp.up_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00004-of-00027.safetensors",
+ "model.layers.10.self_attn.k_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.10.self_attn.k_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.10.self_attn.k_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.10.self_attn.o_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.10.self_attn.o_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.10.self_attn.q_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.10.self_attn.q_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.10.self_attn.q_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.10.self_attn.v_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.10.self_attn.v_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.10.self_attn.v_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00005-of-00027.safetensors",
+ "model.layers.11.mlp.down_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.11.mlp.down_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.11.mlp.down_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.11.mlp.gate_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.11.mlp.gate_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.11.mlp.gate_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.11.mlp.up_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.11.mlp.up_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.11.mlp.up_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00005-of-00027.safetensors",
+ "model.layers.11.self_attn.k_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.11.self_attn.k_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.11.self_attn.k_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.11.self_attn.o_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.11.self_attn.o_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.11.self_attn.q_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.11.self_attn.q_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.11.self_attn.q_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.11.self_attn.v_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.11.self_attn.v_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.11.self_attn.v_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00005-of-00027.safetensors",
+ "model.layers.12.mlp.down_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.12.mlp.down_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.12.mlp.down_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.12.mlp.gate_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.12.mlp.gate_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.12.mlp.gate_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.12.mlp.up_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.12.mlp.up_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.12.mlp.up_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00005-of-00027.safetensors",
+ "model.layers.12.self_attn.k_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.12.self_attn.k_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.12.self_attn.k_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.12.self_attn.o_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.12.self_attn.o_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.12.self_attn.q_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.12.self_attn.q_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.12.self_attn.q_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.12.self_attn.v_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.12.self_attn.v_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.12.self_attn.v_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00005-of-00027.safetensors",
+ "model.layers.13.mlp.down_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.13.mlp.down_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.13.mlp.down_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.13.mlp.gate_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.13.mlp.gate_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.13.mlp.gate_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.13.mlp.up_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.13.mlp.up_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.13.mlp.up_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00005-of-00027.safetensors",
+ "model.layers.13.self_attn.k_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.13.self_attn.k_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.13.self_attn.k_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.13.self_attn.o_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.13.self_attn.o_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.13.self_attn.q_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.13.self_attn.q_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.13.self_attn.q_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.13.self_attn.v_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.13.self_attn.v_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.13.self_attn.v_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00005-of-00027.safetensors",
+ "model.layers.14.mlp.down_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.14.mlp.down_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.14.mlp.down_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.14.mlp.gate_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.14.mlp.gate_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.14.mlp.gate_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.14.mlp.up_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.14.mlp.up_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.14.mlp.up_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00005-of-00027.safetensors",
+ "model.layers.14.self_attn.k_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.14.self_attn.k_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.14.self_attn.k_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.14.self_attn.o_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.14.self_attn.o_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.14.self_attn.q_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.14.self_attn.q_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.14.self_attn.q_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.14.self_attn.v_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.14.self_attn.v_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.14.self_attn.v_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00006-of-00027.safetensors",
+ "model.layers.15.mlp.down_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.15.mlp.down_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.15.mlp.down_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.15.mlp.gate_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.15.mlp.gate_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.15.mlp.gate_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.15.mlp.up_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.15.mlp.up_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.15.mlp.up_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00006-of-00027.safetensors",
+ "model.layers.15.self_attn.k_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.15.self_attn.k_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.15.self_attn.k_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.15.self_attn.o_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.15.self_attn.o_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.15.self_attn.q_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.15.self_attn.q_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.15.self_attn.q_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.15.self_attn.v_proj.activation_scale": "model-00005-of-00027.safetensors",
+ "model.layers.15.self_attn.v_proj.weight": "model-00005-of-00027.safetensors",
+ "model.layers.15.self_attn.v_proj.weight_scale_inv": "model-00005-of-00027.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00006-of-00027.safetensors",
+ "model.layers.16.mlp.down_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.16.mlp.down_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.16.mlp.down_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.16.mlp.gate_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.16.mlp.gate_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.16.mlp.gate_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.16.mlp.up_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.16.mlp.up_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.16.mlp.up_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00006-of-00027.safetensors",
+ "model.layers.16.self_attn.k_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.16.self_attn.k_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.16.self_attn.k_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.16.self_attn.o_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.16.self_attn.o_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.16.self_attn.q_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.16.self_attn.q_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.16.self_attn.q_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.16.self_attn.v_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.16.self_attn.v_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.16.self_attn.v_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00006-of-00027.safetensors",
+ "model.layers.17.mlp.down_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.17.mlp.down_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.17.mlp.down_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.17.mlp.gate_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.17.mlp.gate_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.17.mlp.gate_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.17.mlp.up_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.17.mlp.up_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.17.mlp.up_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00006-of-00027.safetensors",
+ "model.layers.17.self_attn.k_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.17.self_attn.k_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.17.self_attn.k_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.17.self_attn.o_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.17.self_attn.o_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.17.self_attn.q_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.17.self_attn.q_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.17.self_attn.q_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.17.self_attn.v_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.17.self_attn.v_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.17.self_attn.v_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.18.input_layernorm.weight": "model-00007-of-00027.safetensors",
+ "model.layers.18.mlp.down_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.18.mlp.down_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.18.mlp.down_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.18.mlp.gate_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.18.mlp.gate_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.18.mlp.gate_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.18.mlp.up_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.18.mlp.up_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.18.mlp.up_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.18.post_attention_layernorm.weight": "model-00007-of-00027.safetensors",
+ "model.layers.18.self_attn.k_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.18.self_attn.k_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.18.self_attn.k_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.18.self_attn.o_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.18.self_attn.o_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.18.self_attn.o_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.18.self_attn.q_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.18.self_attn.q_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.18.self_attn.q_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.18.self_attn.v_proj.activation_scale": "model-00006-of-00027.safetensors",
+ "model.layers.18.self_attn.v_proj.weight": "model-00006-of-00027.safetensors",
+ "model.layers.18.self_attn.v_proj.weight_scale_inv": "model-00006-of-00027.safetensors",
+ "model.layers.19.input_layernorm.weight": "model-00007-of-00027.safetensors",
+ "model.layers.19.mlp.down_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.19.mlp.down_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.19.mlp.down_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.19.mlp.gate_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.19.mlp.gate_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.19.mlp.gate_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.19.mlp.up_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.19.mlp.up_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.19.mlp.up_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.19.post_attention_layernorm.weight": "model-00007-of-00027.safetensors",
+ "model.layers.19.self_attn.k_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.19.self_attn.k_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.19.self_attn.k_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.19.self_attn.o_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.19.self_attn.o_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.19.self_attn.o_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.19.self_attn.q_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.19.self_attn.q_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.19.self_attn.q_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.19.self_attn.v_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.19.self_attn.v_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.19.self_attn.v_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00002-of-00027.safetensors",
+ "model.layers.2.mlp.down_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.2.mlp.down_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.2.mlp.down_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.2.mlp.gate_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.2.mlp.gate_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.2.mlp.up_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.2.mlp.up_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.2.mlp.up_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00027.safetensors",
+ "model.layers.2.self_attn.k_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.2.self_attn.k_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.2.self_attn.o_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.2.self_attn.o_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.2.self_attn.q_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.2.self_attn.q_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.2.self_attn.v_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.2.self_attn.v_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.20.input_layernorm.weight": "model-00007-of-00027.safetensors",
+ "model.layers.20.mlp.down_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.20.mlp.down_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.20.mlp.down_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.20.mlp.gate_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.20.mlp.gate_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.20.mlp.gate_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.20.mlp.up_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.20.mlp.up_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.20.mlp.up_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.20.post_attention_layernorm.weight": "model-00007-of-00027.safetensors",
+ "model.layers.20.self_attn.k_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.20.self_attn.k_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.20.self_attn.k_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.20.self_attn.o_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.20.self_attn.o_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.20.self_attn.o_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.20.self_attn.q_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.20.self_attn.q_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.20.self_attn.q_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.20.self_attn.v_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.20.self_attn.v_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.20.self_attn.v_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.21.input_layernorm.weight": "model-00007-of-00027.safetensors",
+ "model.layers.21.mlp.down_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.21.mlp.down_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.21.mlp.down_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.21.mlp.gate_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.21.mlp.gate_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.21.mlp.gate_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.21.mlp.up_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.21.mlp.up_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.21.mlp.up_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.21.post_attention_layernorm.weight": "model-00007-of-00027.safetensors",
+ "model.layers.21.self_attn.k_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.21.self_attn.k_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.21.self_attn.k_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.21.self_attn.o_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.21.self_attn.o_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.21.self_attn.o_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.21.self_attn.q_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.21.self_attn.q_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.21.self_attn.q_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.21.self_attn.v_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.21.self_attn.v_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.21.self_attn.v_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.22.input_layernorm.weight": "model-00008-of-00027.safetensors",
+ "model.layers.22.mlp.down_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.22.mlp.down_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.22.mlp.down_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.22.mlp.gate_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.22.mlp.gate_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.22.mlp.gate_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.22.mlp.up_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.22.mlp.up_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.22.mlp.up_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.22.post_attention_layernorm.weight": "model-00008-of-00027.safetensors",
+ "model.layers.22.self_attn.k_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.22.self_attn.k_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.22.self_attn.k_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.22.self_attn.o_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.22.self_attn.o_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.22.self_attn.o_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.22.self_attn.q_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.22.self_attn.q_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.22.self_attn.q_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.22.self_attn.v_proj.activation_scale": "model-00007-of-00027.safetensors",
+ "model.layers.22.self_attn.v_proj.weight": "model-00007-of-00027.safetensors",
+ "model.layers.22.self_attn.v_proj.weight_scale_inv": "model-00007-of-00027.safetensors",
+ "model.layers.23.input_layernorm.weight": "model-00008-of-00027.safetensors",
+ "model.layers.23.mlp.down_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.23.mlp.down_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.23.mlp.down_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.23.mlp.gate_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.23.mlp.gate_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.23.mlp.gate_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.23.mlp.up_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.23.mlp.up_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.23.mlp.up_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.23.post_attention_layernorm.weight": "model-00008-of-00027.safetensors",
+ "model.layers.23.self_attn.k_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.23.self_attn.k_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.23.self_attn.k_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.23.self_attn.o_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.23.self_attn.o_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.23.self_attn.o_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.23.self_attn.q_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.23.self_attn.q_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.23.self_attn.q_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.23.self_attn.v_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.23.self_attn.v_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.23.self_attn.v_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.24.input_layernorm.weight": "model-00008-of-00027.safetensors",
+ "model.layers.24.mlp.down_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.24.mlp.down_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.24.mlp.down_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.24.mlp.gate_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.24.mlp.gate_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.24.mlp.gate_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.24.mlp.up_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.24.mlp.up_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.24.mlp.up_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.24.post_attention_layernorm.weight": "model-00008-of-00027.safetensors",
+ "model.layers.24.self_attn.k_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.24.self_attn.k_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.24.self_attn.k_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.24.self_attn.o_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.24.self_attn.o_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.24.self_attn.o_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.24.self_attn.q_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.24.self_attn.q_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.24.self_attn.q_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.24.self_attn.v_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.24.self_attn.v_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.24.self_attn.v_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.25.input_layernorm.weight": "model-00009-of-00027.safetensors",
+ "model.layers.25.mlp.down_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.25.mlp.down_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.25.mlp.down_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.25.mlp.gate_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.25.mlp.gate_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.25.mlp.gate_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.25.mlp.up_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.25.mlp.up_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.25.mlp.up_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.25.post_attention_layernorm.weight": "model-00009-of-00027.safetensors",
+ "model.layers.25.self_attn.k_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.25.self_attn.k_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.25.self_attn.k_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.25.self_attn.o_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.25.self_attn.o_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.25.self_attn.o_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.25.self_attn.q_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.25.self_attn.q_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.25.self_attn.q_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.25.self_attn.v_proj.activation_scale": "model-00008-of-00027.safetensors",
+ "model.layers.25.self_attn.v_proj.weight": "model-00008-of-00027.safetensors",
+ "model.layers.25.self_attn.v_proj.weight_scale_inv": "model-00008-of-00027.safetensors",
+ "model.layers.26.input_layernorm.weight": "model-00009-of-00027.safetensors",
+ "model.layers.26.mlp.down_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.26.mlp.down_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.26.mlp.down_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.26.mlp.gate_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.26.mlp.gate_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.26.mlp.gate_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.26.mlp.up_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.26.mlp.up_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.26.mlp.up_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.26.post_attention_layernorm.weight": "model-00009-of-00027.safetensors",
+ "model.layers.26.self_attn.k_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.26.self_attn.k_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.26.self_attn.k_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.26.self_attn.o_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.26.self_attn.o_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.26.self_attn.o_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.26.self_attn.q_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.26.self_attn.q_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.26.self_attn.q_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.26.self_attn.v_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.26.self_attn.v_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.26.self_attn.v_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.27.input_layernorm.weight": "model-00009-of-00027.safetensors",
+ "model.layers.27.mlp.down_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.27.mlp.down_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.27.mlp.down_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.27.mlp.gate_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.27.mlp.gate_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.27.mlp.gate_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.27.mlp.up_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.27.mlp.up_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.27.mlp.up_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.27.post_attention_layernorm.weight": "model-00009-of-00027.safetensors",
+ "model.layers.27.self_attn.k_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.27.self_attn.k_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.27.self_attn.k_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.27.self_attn.o_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.27.self_attn.o_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.27.self_attn.o_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.27.self_attn.q_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.27.self_attn.q_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.27.self_attn.q_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.27.self_attn.v_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.27.self_attn.v_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.27.self_attn.v_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.28.input_layernorm.weight": "model-00009-of-00027.safetensors",
+ "model.layers.28.mlp.down_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.28.mlp.down_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.28.mlp.down_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.28.mlp.gate_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.28.mlp.gate_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.28.mlp.gate_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.28.mlp.up_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.28.mlp.up_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.28.mlp.up_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.28.post_attention_layernorm.weight": "model-00009-of-00027.safetensors",
+ "model.layers.28.self_attn.k_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.28.self_attn.k_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.28.self_attn.k_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.28.self_attn.o_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.28.self_attn.o_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.28.self_attn.o_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.28.self_attn.q_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.28.self_attn.q_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.28.self_attn.q_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.28.self_attn.v_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.28.self_attn.v_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.28.self_attn.v_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.29.input_layernorm.weight": "model-00010-of-00027.safetensors",
+ "model.layers.29.mlp.down_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.29.mlp.down_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.29.mlp.down_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.29.mlp.gate_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.29.mlp.gate_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.29.mlp.gate_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.29.mlp.up_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.29.mlp.up_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.29.mlp.up_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.29.post_attention_layernorm.weight": "model-00010-of-00027.safetensors",
+ "model.layers.29.self_attn.k_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.29.self_attn.k_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.29.self_attn.k_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.29.self_attn.o_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.29.self_attn.o_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.29.self_attn.o_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.29.self_attn.q_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.29.self_attn.q_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.29.self_attn.q_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.29.self_attn.v_proj.activation_scale": "model-00009-of-00027.safetensors",
+ "model.layers.29.self_attn.v_proj.weight": "model-00009-of-00027.safetensors",
+ "model.layers.29.self_attn.v_proj.weight_scale_inv": "model-00009-of-00027.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00002-of-00027.safetensors",
+ "model.layers.3.mlp.down_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.3.mlp.down_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.3.mlp.down_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.3.mlp.gate_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.3.mlp.gate_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.3.mlp.up_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.3.mlp.up_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.3.mlp.up_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00027.safetensors",
+ "model.layers.3.self_attn.k_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.3.self_attn.k_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.3.self_attn.o_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.3.self_attn.o_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.3.self_attn.q_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.3.self_attn.q_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.3.self_attn.q_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.3.self_attn.v_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.3.self_attn.v_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.3.self_attn.v_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.30.input_layernorm.weight": "model-00010-of-00027.safetensors",
+ "model.layers.30.mlp.down_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.30.mlp.down_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.30.mlp.down_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.30.mlp.gate_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.30.mlp.gate_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.30.mlp.gate_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.30.mlp.up_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.30.mlp.up_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.30.mlp.up_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.30.post_attention_layernorm.weight": "model-00010-of-00027.safetensors",
+ "model.layers.30.self_attn.k_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.30.self_attn.k_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.30.self_attn.k_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.30.self_attn.o_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.30.self_attn.o_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.30.self_attn.o_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.30.self_attn.q_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.30.self_attn.q_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.30.self_attn.q_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.30.self_attn.v_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.30.self_attn.v_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.30.self_attn.v_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.31.input_layernorm.weight": "model-00010-of-00027.safetensors",
+ "model.layers.31.mlp.down_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.31.mlp.down_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.31.mlp.down_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.31.mlp.gate_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.31.mlp.gate_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.31.mlp.gate_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.31.mlp.up_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.31.mlp.up_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.31.mlp.up_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.31.post_attention_layernorm.weight": "model-00010-of-00027.safetensors",
+ "model.layers.31.self_attn.k_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.31.self_attn.k_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.31.self_attn.k_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.31.self_attn.o_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.31.self_attn.o_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.31.self_attn.o_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.31.self_attn.q_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.31.self_attn.q_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.31.self_attn.q_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.31.self_attn.v_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.31.self_attn.v_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.31.self_attn.v_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.32.input_layernorm.weight": "model-00011-of-00027.safetensors",
+ "model.layers.32.mlp.down_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.32.mlp.down_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.32.mlp.down_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.32.mlp.gate_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.32.mlp.gate_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.32.mlp.gate_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.32.mlp.up_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.32.mlp.up_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.32.mlp.up_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.32.post_attention_layernorm.weight": "model-00011-of-00027.safetensors",
+ "model.layers.32.self_attn.k_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.32.self_attn.k_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.32.self_attn.k_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.32.self_attn.o_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.32.self_attn.o_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.32.self_attn.o_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.32.self_attn.q_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.32.self_attn.q_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.32.self_attn.q_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.32.self_attn.v_proj.activation_scale": "model-00010-of-00027.safetensors",
+ "model.layers.32.self_attn.v_proj.weight": "model-00010-of-00027.safetensors",
+ "model.layers.32.self_attn.v_proj.weight_scale_inv": "model-00010-of-00027.safetensors",
+ "model.layers.33.input_layernorm.weight": "model-00011-of-00027.safetensors",
+ "model.layers.33.mlp.down_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.33.mlp.down_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.33.mlp.down_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.33.mlp.gate_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.33.mlp.gate_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.33.mlp.gate_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.33.mlp.up_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.33.mlp.up_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.33.mlp.up_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.33.post_attention_layernorm.weight": "model-00011-of-00027.safetensors",
+ "model.layers.33.self_attn.k_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.33.self_attn.k_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.33.self_attn.k_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.33.self_attn.o_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.33.self_attn.o_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.33.self_attn.o_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.33.self_attn.q_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.33.self_attn.q_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.33.self_attn.q_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.33.self_attn.v_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.33.self_attn.v_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.33.self_attn.v_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.34.input_layernorm.weight": "model-00011-of-00027.safetensors",
+ "model.layers.34.mlp.down_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.34.mlp.down_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.34.mlp.down_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.34.mlp.gate_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.34.mlp.gate_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.34.mlp.gate_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.34.mlp.up_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.34.mlp.up_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.34.mlp.up_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.34.post_attention_layernorm.weight": "model-00011-of-00027.safetensors",
+ "model.layers.34.self_attn.k_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.34.self_attn.k_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.34.self_attn.k_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.34.self_attn.o_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.34.self_attn.o_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.34.self_attn.o_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.34.self_attn.q_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.34.self_attn.q_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.34.self_attn.q_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.34.self_attn.v_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.34.self_attn.v_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.34.self_attn.v_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.35.input_layernorm.weight": "model-00011-of-00027.safetensors",
+ "model.layers.35.mlp.down_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.35.mlp.down_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.35.mlp.down_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.35.mlp.gate_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.35.mlp.gate_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.35.mlp.gate_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.35.mlp.up_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.35.mlp.up_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.35.mlp.up_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.35.post_attention_layernorm.weight": "model-00011-of-00027.safetensors",
+ "model.layers.35.self_attn.k_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.35.self_attn.k_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.35.self_attn.k_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.35.self_attn.o_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.35.self_attn.o_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.35.self_attn.o_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.35.self_attn.q_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.35.self_attn.q_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.35.self_attn.q_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.35.self_attn.v_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.35.self_attn.v_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.35.self_attn.v_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.36.input_layernorm.weight": "model-00012-of-00027.safetensors",
+ "model.layers.36.mlp.down_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.36.mlp.down_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.36.mlp.down_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.36.mlp.gate_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.36.mlp.gate_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.36.mlp.gate_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.36.mlp.up_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.36.mlp.up_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.36.mlp.up_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.36.post_attention_layernorm.weight": "model-00012-of-00027.safetensors",
+ "model.layers.36.self_attn.k_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.36.self_attn.k_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.36.self_attn.k_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.36.self_attn.o_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.36.self_attn.o_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.36.self_attn.o_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.36.self_attn.q_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.36.self_attn.q_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.36.self_attn.q_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.36.self_attn.v_proj.activation_scale": "model-00011-of-00027.safetensors",
+ "model.layers.36.self_attn.v_proj.weight": "model-00011-of-00027.safetensors",
+ "model.layers.36.self_attn.v_proj.weight_scale_inv": "model-00011-of-00027.safetensors",
+ "model.layers.37.input_layernorm.weight": "model-00012-of-00027.safetensors",
+ "model.layers.37.mlp.down_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.37.mlp.down_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.37.mlp.down_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.37.mlp.gate_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.37.mlp.gate_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.37.mlp.gate_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.37.mlp.up_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.37.mlp.up_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.37.mlp.up_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.37.post_attention_layernorm.weight": "model-00012-of-00027.safetensors",
+ "model.layers.37.self_attn.k_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.37.self_attn.k_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.37.self_attn.k_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.37.self_attn.o_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.37.self_attn.o_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.37.self_attn.o_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.37.self_attn.q_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.37.self_attn.q_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.37.self_attn.q_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.37.self_attn.v_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.37.self_attn.v_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.37.self_attn.v_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.38.input_layernorm.weight": "model-00012-of-00027.safetensors",
+ "model.layers.38.mlp.down_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.38.mlp.down_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.38.mlp.down_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.38.mlp.gate_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.38.mlp.gate_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.38.mlp.gate_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.38.mlp.up_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.38.mlp.up_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.38.mlp.up_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.38.post_attention_layernorm.weight": "model-00012-of-00027.safetensors",
+ "model.layers.38.self_attn.k_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.38.self_attn.k_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.38.self_attn.k_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.38.self_attn.o_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.38.self_attn.o_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.38.self_attn.o_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.38.self_attn.q_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.38.self_attn.q_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.38.self_attn.q_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.38.self_attn.v_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.38.self_attn.v_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.38.self_attn.v_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.39.input_layernorm.weight": "model-00013-of-00027.safetensors",
+ "model.layers.39.mlp.down_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.39.mlp.down_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.39.mlp.down_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.39.mlp.gate_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.39.mlp.gate_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.39.mlp.gate_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.39.mlp.up_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.39.mlp.up_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.39.mlp.up_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.39.post_attention_layernorm.weight": "model-00013-of-00027.safetensors",
+ "model.layers.39.self_attn.k_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.39.self_attn.k_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.39.self_attn.k_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.39.self_attn.o_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.39.self_attn.o_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.39.self_attn.o_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.39.self_attn.q_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.39.self_attn.q_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.39.self_attn.q_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.39.self_attn.v_proj.activation_scale": "model-00012-of-00027.safetensors",
+ "model.layers.39.self_attn.v_proj.weight": "model-00012-of-00027.safetensors",
+ "model.layers.39.self_attn.v_proj.weight_scale_inv": "model-00012-of-00027.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00003-of-00027.safetensors",
+ "model.layers.4.mlp.down_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.4.mlp.down_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.4.mlp.down_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.4.mlp.gate_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.4.mlp.gate_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.4.mlp.up_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.4.mlp.up_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.4.mlp.up_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00003-of-00027.safetensors",
+ "model.layers.4.self_attn.k_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.4.self_attn.k_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.4.self_attn.o_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.4.self_attn.o_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.4.self_attn.q_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.4.self_attn.q_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.4.self_attn.v_proj.activation_scale": "model-00002-of-00027.safetensors",
+ "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00027.safetensors",
+ "model.layers.4.self_attn.v_proj.weight_scale_inv": "model-00002-of-00027.safetensors",
+ "model.layers.40.input_layernorm.weight": "model-00013-of-00027.safetensors",
+ "model.layers.40.mlp.down_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.40.mlp.down_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.40.mlp.down_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.40.mlp.gate_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.40.mlp.gate_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.40.mlp.gate_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.40.mlp.up_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.40.mlp.up_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.40.mlp.up_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.40.post_attention_layernorm.weight": "model-00013-of-00027.safetensors",
+ "model.layers.40.self_attn.k_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.40.self_attn.k_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.40.self_attn.k_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.40.self_attn.o_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.40.self_attn.o_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.40.self_attn.o_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.40.self_attn.q_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.40.self_attn.q_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.40.self_attn.q_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.40.self_attn.v_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.40.self_attn.v_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.40.self_attn.v_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.41.input_layernorm.weight": "model-00013-of-00027.safetensors",
+ "model.layers.41.mlp.down_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.41.mlp.down_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.41.mlp.down_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.41.mlp.gate_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.41.mlp.gate_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.41.mlp.gate_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.41.mlp.up_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.41.mlp.up_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.41.mlp.up_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.41.post_attention_layernorm.weight": "model-00013-of-00027.safetensors",
+ "model.layers.41.self_attn.k_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.41.self_attn.k_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.41.self_attn.k_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.41.self_attn.o_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.41.self_attn.o_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.41.self_attn.o_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.41.self_attn.q_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.41.self_attn.q_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.41.self_attn.q_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.41.self_attn.v_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.41.self_attn.v_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.41.self_attn.v_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.42.input_layernorm.weight": "model-00013-of-00027.safetensors",
+ "model.layers.42.mlp.down_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.42.mlp.down_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.42.mlp.down_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.42.mlp.gate_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.42.mlp.gate_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.42.mlp.gate_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.42.mlp.up_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.42.mlp.up_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.42.mlp.up_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.42.post_attention_layernorm.weight": "model-00013-of-00027.safetensors",
+ "model.layers.42.self_attn.k_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.42.self_attn.k_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.42.self_attn.k_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.42.self_attn.o_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.42.self_attn.o_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.42.self_attn.o_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.42.self_attn.q_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.42.self_attn.q_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.42.self_attn.q_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.42.self_attn.v_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.42.self_attn.v_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.42.self_attn.v_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.43.input_layernorm.weight": "model-00014-of-00027.safetensors",
+ "model.layers.43.mlp.down_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.43.mlp.down_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.43.mlp.down_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.43.mlp.gate_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.43.mlp.gate_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.43.mlp.gate_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.43.mlp.up_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.43.mlp.up_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.43.mlp.up_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.43.post_attention_layernorm.weight": "model-00014-of-00027.safetensors",
+ "model.layers.43.self_attn.k_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.43.self_attn.k_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.43.self_attn.k_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.43.self_attn.o_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.43.self_attn.o_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.43.self_attn.o_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.43.self_attn.q_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.43.self_attn.q_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.43.self_attn.q_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.43.self_attn.v_proj.activation_scale": "model-00013-of-00027.safetensors",
+ "model.layers.43.self_attn.v_proj.weight": "model-00013-of-00027.safetensors",
+ "model.layers.43.self_attn.v_proj.weight_scale_inv": "model-00013-of-00027.safetensors",
+ "model.layers.44.input_layernorm.weight": "model-00014-of-00027.safetensors",
+ "model.layers.44.mlp.down_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.44.mlp.down_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.44.mlp.down_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.44.mlp.gate_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.44.mlp.gate_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.44.mlp.gate_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.44.mlp.up_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.44.mlp.up_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.44.mlp.up_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.44.post_attention_layernorm.weight": "model-00014-of-00027.safetensors",
+ "model.layers.44.self_attn.k_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.44.self_attn.k_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.44.self_attn.k_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.44.self_attn.o_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.44.self_attn.o_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.44.self_attn.o_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.44.self_attn.q_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.44.self_attn.q_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.44.self_attn.q_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.44.self_attn.v_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.44.self_attn.v_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.44.self_attn.v_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.45.input_layernorm.weight": "model-00014-of-00027.safetensors",
+ "model.layers.45.mlp.down_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.45.mlp.down_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.45.mlp.down_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.45.mlp.gate_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.45.mlp.gate_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.45.mlp.gate_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.45.mlp.up_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.45.mlp.up_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.45.mlp.up_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.45.post_attention_layernorm.weight": "model-00014-of-00027.safetensors",
+ "model.layers.45.self_attn.k_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.45.self_attn.k_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.45.self_attn.k_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.45.self_attn.o_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.45.self_attn.o_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.45.self_attn.o_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.45.self_attn.q_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.45.self_attn.q_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.45.self_attn.q_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.45.self_attn.v_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.45.self_attn.v_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.45.self_attn.v_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.46.input_layernorm.weight": "model-00015-of-00027.safetensors",
+ "model.layers.46.mlp.down_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.46.mlp.down_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.46.mlp.down_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.46.mlp.gate_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.46.mlp.gate_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.46.mlp.gate_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.46.mlp.up_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.46.mlp.up_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.46.mlp.up_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.46.post_attention_layernorm.weight": "model-00015-of-00027.safetensors",
+ "model.layers.46.self_attn.k_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.46.self_attn.k_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.46.self_attn.k_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.46.self_attn.o_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.46.self_attn.o_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.46.self_attn.o_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.46.self_attn.q_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.46.self_attn.q_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.46.self_attn.q_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.46.self_attn.v_proj.activation_scale": "model-00014-of-00027.safetensors",
+ "model.layers.46.self_attn.v_proj.weight": "model-00014-of-00027.safetensors",
+ "model.layers.46.self_attn.v_proj.weight_scale_inv": "model-00014-of-00027.safetensors",
+ "model.layers.47.input_layernorm.weight": "model-00015-of-00027.safetensors",
+ "model.layers.47.mlp.down_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.47.mlp.down_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.47.mlp.down_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.47.mlp.gate_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.47.mlp.gate_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.47.mlp.gate_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.47.mlp.up_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.47.mlp.up_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.47.mlp.up_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.47.post_attention_layernorm.weight": "model-00015-of-00027.safetensors",
+ "model.layers.47.self_attn.k_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.47.self_attn.k_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.47.self_attn.k_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.47.self_attn.o_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.47.self_attn.o_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.47.self_attn.o_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.47.self_attn.q_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.47.self_attn.q_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.47.self_attn.q_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.47.self_attn.v_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.47.self_attn.v_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.47.self_attn.v_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.48.input_layernorm.weight": "model-00015-of-00027.safetensors",
+ "model.layers.48.mlp.down_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.48.mlp.down_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.48.mlp.down_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.48.mlp.gate_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.48.mlp.gate_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.48.mlp.gate_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.48.mlp.up_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.48.mlp.up_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.48.mlp.up_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.48.post_attention_layernorm.weight": "model-00015-of-00027.safetensors",
+ "model.layers.48.self_attn.k_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.48.self_attn.k_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.48.self_attn.k_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.48.self_attn.o_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.48.self_attn.o_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.48.self_attn.o_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.48.self_attn.q_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.48.self_attn.q_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.48.self_attn.q_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.48.self_attn.v_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.48.self_attn.v_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.48.self_attn.v_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.49.input_layernorm.weight": "model-00015-of-00027.safetensors",
+ "model.layers.49.mlp.down_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.49.mlp.down_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.49.mlp.down_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.49.mlp.gate_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.49.mlp.gate_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.49.mlp.gate_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.49.mlp.up_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.49.mlp.up_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.49.mlp.up_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.49.post_attention_layernorm.weight": "model-00015-of-00027.safetensors",
+ "model.layers.49.self_attn.k_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.49.self_attn.k_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.49.self_attn.k_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.49.self_attn.o_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.49.self_attn.o_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.49.self_attn.o_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.49.self_attn.q_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.49.self_attn.q_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.49.self_attn.q_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.49.self_attn.v_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.49.self_attn.v_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.49.self_attn.v_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00003-of-00027.safetensors",
+ "model.layers.5.mlp.down_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.5.mlp.down_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.5.mlp.down_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.5.mlp.gate_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.5.mlp.gate_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.5.mlp.gate_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.5.mlp.up_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.5.mlp.up_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.5.mlp.up_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00003-of-00027.safetensors",
+ "model.layers.5.self_attn.k_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.5.self_attn.k_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.5.self_attn.k_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.5.self_attn.o_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.5.self_attn.o_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.5.self_attn.q_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.5.self_attn.q_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.5.self_attn.q_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.5.self_attn.v_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.5.self_attn.v_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.5.self_attn.v_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.50.input_layernorm.weight": "model-00016-of-00027.safetensors",
+ "model.layers.50.mlp.down_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.50.mlp.down_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.50.mlp.down_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.50.mlp.gate_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.50.mlp.gate_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.50.mlp.gate_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.50.mlp.up_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.50.mlp.up_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.50.mlp.up_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.50.post_attention_layernorm.weight": "model-00016-of-00027.safetensors",
+ "model.layers.50.self_attn.k_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.50.self_attn.k_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.50.self_attn.k_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.50.self_attn.o_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.50.self_attn.o_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.50.self_attn.o_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.50.self_attn.q_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.50.self_attn.q_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.50.self_attn.q_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.50.self_attn.v_proj.activation_scale": "model-00015-of-00027.safetensors",
+ "model.layers.50.self_attn.v_proj.weight": "model-00015-of-00027.safetensors",
+ "model.layers.50.self_attn.v_proj.weight_scale_inv": "model-00015-of-00027.safetensors",
+ "model.layers.51.input_layernorm.weight": "model-00016-of-00027.safetensors",
+ "model.layers.51.mlp.down_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.51.mlp.down_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.51.mlp.down_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.51.mlp.gate_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.51.mlp.gate_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.51.mlp.gate_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.51.mlp.up_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.51.mlp.up_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.51.mlp.up_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.51.post_attention_layernorm.weight": "model-00016-of-00027.safetensors",
+ "model.layers.51.self_attn.k_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.51.self_attn.k_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.51.self_attn.k_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.51.self_attn.o_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.51.self_attn.o_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.51.self_attn.o_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.51.self_attn.q_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.51.self_attn.q_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.51.self_attn.q_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.51.self_attn.v_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.51.self_attn.v_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.51.self_attn.v_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.52.input_layernorm.weight": "model-00016-of-00027.safetensors",
+ "model.layers.52.mlp.down_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.52.mlp.down_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.52.mlp.down_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.52.mlp.gate_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.52.mlp.gate_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.52.mlp.gate_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.52.mlp.up_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.52.mlp.up_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.52.mlp.up_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.52.post_attention_layernorm.weight": "model-00016-of-00027.safetensors",
+ "model.layers.52.self_attn.k_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.52.self_attn.k_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.52.self_attn.k_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.52.self_attn.o_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.52.self_attn.o_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.52.self_attn.o_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.52.self_attn.q_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.52.self_attn.q_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.52.self_attn.q_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.52.self_attn.v_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.52.self_attn.v_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.52.self_attn.v_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.53.input_layernorm.weight": "model-00017-of-00027.safetensors",
+ "model.layers.53.mlp.down_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.53.mlp.down_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.53.mlp.down_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.53.mlp.gate_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.53.mlp.gate_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.53.mlp.gate_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.53.mlp.up_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.53.mlp.up_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.53.mlp.up_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.53.post_attention_layernorm.weight": "model-00017-of-00027.safetensors",
+ "model.layers.53.self_attn.k_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.53.self_attn.k_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.53.self_attn.k_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.53.self_attn.o_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.53.self_attn.o_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.53.self_attn.o_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.53.self_attn.q_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.53.self_attn.q_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.53.self_attn.q_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.53.self_attn.v_proj.activation_scale": "model-00016-of-00027.safetensors",
+ "model.layers.53.self_attn.v_proj.weight": "model-00016-of-00027.safetensors",
+ "model.layers.53.self_attn.v_proj.weight_scale_inv": "model-00016-of-00027.safetensors",
+ "model.layers.54.input_layernorm.weight": "model-00017-of-00027.safetensors",
+ "model.layers.54.mlp.down_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.54.mlp.down_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.54.mlp.down_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.54.mlp.gate_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.54.mlp.gate_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.54.mlp.gate_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.54.mlp.up_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.54.mlp.up_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.54.mlp.up_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.54.post_attention_layernorm.weight": "model-00017-of-00027.safetensors",
+ "model.layers.54.self_attn.k_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.54.self_attn.k_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.54.self_attn.k_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.54.self_attn.o_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.54.self_attn.o_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.54.self_attn.o_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.54.self_attn.q_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.54.self_attn.q_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.54.self_attn.q_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.54.self_attn.v_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.54.self_attn.v_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.54.self_attn.v_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.55.input_layernorm.weight": "model-00017-of-00027.safetensors",
+ "model.layers.55.mlp.down_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.55.mlp.down_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.55.mlp.down_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.55.mlp.gate_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.55.mlp.gate_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.55.mlp.gate_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.55.mlp.up_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.55.mlp.up_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.55.mlp.up_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.55.post_attention_layernorm.weight": "model-00017-of-00027.safetensors",
+ "model.layers.55.self_attn.k_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.55.self_attn.k_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.55.self_attn.k_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.55.self_attn.o_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.55.self_attn.o_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.55.self_attn.o_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.55.self_attn.q_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.55.self_attn.q_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.55.self_attn.q_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.55.self_attn.v_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.55.self_attn.v_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.55.self_attn.v_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.56.input_layernorm.weight": "model-00017-of-00027.safetensors",
+ "model.layers.56.mlp.down_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.56.mlp.down_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.56.mlp.down_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.56.mlp.gate_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.56.mlp.gate_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.56.mlp.gate_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.56.mlp.up_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.56.mlp.up_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.56.mlp.up_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.56.post_attention_layernorm.weight": "model-00017-of-00027.safetensors",
+ "model.layers.56.self_attn.k_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.56.self_attn.k_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.56.self_attn.k_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.56.self_attn.o_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.56.self_attn.o_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.56.self_attn.o_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.56.self_attn.q_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.56.self_attn.q_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.56.self_attn.q_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.56.self_attn.v_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.56.self_attn.v_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.56.self_attn.v_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.57.input_layernorm.weight": "model-00018-of-00027.safetensors",
+ "model.layers.57.mlp.down_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.57.mlp.down_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.57.mlp.down_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.57.mlp.gate_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.57.mlp.gate_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.57.mlp.gate_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.57.mlp.up_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.57.mlp.up_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.57.mlp.up_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.57.post_attention_layernorm.weight": "model-00018-of-00027.safetensors",
+ "model.layers.57.self_attn.k_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.57.self_attn.k_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.57.self_attn.k_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.57.self_attn.o_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.57.self_attn.o_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.57.self_attn.o_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.57.self_attn.q_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.57.self_attn.q_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.57.self_attn.q_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.57.self_attn.v_proj.activation_scale": "model-00017-of-00027.safetensors",
+ "model.layers.57.self_attn.v_proj.weight": "model-00017-of-00027.safetensors",
+ "model.layers.57.self_attn.v_proj.weight_scale_inv": "model-00017-of-00027.safetensors",
+ "model.layers.58.input_layernorm.weight": "model-00018-of-00027.safetensors",
+ "model.layers.58.mlp.down_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.58.mlp.down_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.58.mlp.down_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.58.mlp.gate_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.58.mlp.gate_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.58.mlp.gate_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.58.mlp.up_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.58.mlp.up_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.58.mlp.up_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.58.post_attention_layernorm.weight": "model-00018-of-00027.safetensors",
+ "model.layers.58.self_attn.k_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.58.self_attn.k_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.58.self_attn.k_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.58.self_attn.o_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.58.self_attn.o_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.58.self_attn.o_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.58.self_attn.q_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.58.self_attn.q_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.58.self_attn.q_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.58.self_attn.v_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.58.self_attn.v_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.58.self_attn.v_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.59.input_layernorm.weight": "model-00018-of-00027.safetensors",
+ "model.layers.59.mlp.down_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.59.mlp.down_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.59.mlp.down_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.59.mlp.gate_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.59.mlp.gate_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.59.mlp.gate_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.59.mlp.up_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.59.mlp.up_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.59.mlp.up_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.59.post_attention_layernorm.weight": "model-00018-of-00027.safetensors",
+ "model.layers.59.self_attn.k_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.59.self_attn.k_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.59.self_attn.k_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.59.self_attn.o_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.59.self_attn.o_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.59.self_attn.o_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.59.self_attn.q_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.59.self_attn.q_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.59.self_attn.q_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.59.self_attn.v_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.59.self_attn.v_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.59.self_attn.v_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00003-of-00027.safetensors",
+ "model.layers.6.mlp.down_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.6.mlp.down_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.6.mlp.down_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.6.mlp.gate_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.6.mlp.gate_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.6.mlp.gate_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.6.mlp.up_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.6.mlp.up_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.6.mlp.up_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00003-of-00027.safetensors",
+ "model.layers.6.self_attn.k_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.6.self_attn.k_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.6.self_attn.k_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.6.self_attn.o_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.6.self_attn.o_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.6.self_attn.q_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.6.self_attn.q_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.6.self_attn.q_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.6.self_attn.v_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.6.self_attn.v_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.6.self_attn.v_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.60.input_layernorm.weight": "model-00019-of-00027.safetensors",
+ "model.layers.60.mlp.down_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.60.mlp.down_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.60.mlp.down_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.60.mlp.gate_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.60.mlp.gate_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.60.mlp.gate_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.60.mlp.up_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.60.mlp.up_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.60.mlp.up_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.60.post_attention_layernorm.weight": "model-00019-of-00027.safetensors",
+ "model.layers.60.self_attn.k_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.60.self_attn.k_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.60.self_attn.k_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.60.self_attn.o_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.60.self_attn.o_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.60.self_attn.o_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.60.self_attn.q_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.60.self_attn.q_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.60.self_attn.q_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.60.self_attn.v_proj.activation_scale": "model-00018-of-00027.safetensors",
+ "model.layers.60.self_attn.v_proj.weight": "model-00018-of-00027.safetensors",
+ "model.layers.60.self_attn.v_proj.weight_scale_inv": "model-00018-of-00027.safetensors",
+ "model.layers.61.input_layernorm.weight": "model-00019-of-00027.safetensors",
+ "model.layers.61.mlp.down_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.61.mlp.down_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.61.mlp.down_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.61.mlp.gate_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.61.mlp.gate_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.61.mlp.gate_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.61.mlp.up_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.61.mlp.up_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.61.mlp.up_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.61.post_attention_layernorm.weight": "model-00019-of-00027.safetensors",
+ "model.layers.61.self_attn.k_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.61.self_attn.k_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.61.self_attn.k_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.61.self_attn.o_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.61.self_attn.o_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.61.self_attn.o_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.61.self_attn.q_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.61.self_attn.q_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.61.self_attn.q_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.61.self_attn.v_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.61.self_attn.v_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.61.self_attn.v_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.62.input_layernorm.weight": "model-00019-of-00027.safetensors",
+ "model.layers.62.mlp.down_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.62.mlp.down_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.62.mlp.down_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.62.mlp.gate_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.62.mlp.gate_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.62.mlp.gate_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.62.mlp.up_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.62.mlp.up_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.62.mlp.up_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.62.post_attention_layernorm.weight": "model-00019-of-00027.safetensors",
+ "model.layers.62.self_attn.k_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.62.self_attn.k_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.62.self_attn.k_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.62.self_attn.o_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.62.self_attn.o_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.62.self_attn.o_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.62.self_attn.q_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.62.self_attn.q_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.62.self_attn.q_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.62.self_attn.v_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.62.self_attn.v_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.62.self_attn.v_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.63.input_layernorm.weight": "model-00019-of-00027.safetensors",
+ "model.layers.63.mlp.down_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.63.mlp.down_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.63.mlp.down_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.63.mlp.gate_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.63.mlp.gate_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.63.mlp.gate_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.63.mlp.up_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.63.mlp.up_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.63.mlp.up_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.63.post_attention_layernorm.weight": "model-00019-of-00027.safetensors",
+ "model.layers.63.self_attn.k_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.63.self_attn.k_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.63.self_attn.k_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.63.self_attn.o_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.63.self_attn.o_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.63.self_attn.o_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.63.self_attn.q_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.63.self_attn.q_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.63.self_attn.q_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.63.self_attn.v_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.63.self_attn.v_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.63.self_attn.v_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.64.input_layernorm.weight": "model-00020-of-00027.safetensors",
+ "model.layers.64.mlp.down_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.64.mlp.down_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.64.mlp.down_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.64.mlp.gate_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.64.mlp.gate_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.64.mlp.gate_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.64.mlp.up_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.64.mlp.up_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.64.mlp.up_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.64.post_attention_layernorm.weight": "model-00020-of-00027.safetensors",
+ "model.layers.64.self_attn.k_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.64.self_attn.k_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.64.self_attn.k_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.64.self_attn.o_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.64.self_attn.o_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.64.self_attn.o_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.64.self_attn.q_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.64.self_attn.q_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.64.self_attn.q_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.64.self_attn.v_proj.activation_scale": "model-00019-of-00027.safetensors",
+ "model.layers.64.self_attn.v_proj.weight": "model-00019-of-00027.safetensors",
+ "model.layers.64.self_attn.v_proj.weight_scale_inv": "model-00019-of-00027.safetensors",
+ "model.layers.65.input_layernorm.weight": "model-00020-of-00027.safetensors",
+ "model.layers.65.mlp.down_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.65.mlp.down_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.65.mlp.down_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.65.mlp.gate_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.65.mlp.gate_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.65.mlp.gate_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.65.mlp.up_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.65.mlp.up_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.65.mlp.up_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.65.post_attention_layernorm.weight": "model-00020-of-00027.safetensors",
+ "model.layers.65.self_attn.k_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.65.self_attn.k_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.65.self_attn.k_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.65.self_attn.o_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.65.self_attn.o_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.65.self_attn.o_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.65.self_attn.q_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.65.self_attn.q_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.65.self_attn.q_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.65.self_attn.v_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.65.self_attn.v_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.65.self_attn.v_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.66.input_layernorm.weight": "model-00020-of-00027.safetensors",
+ "model.layers.66.mlp.down_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.66.mlp.down_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.66.mlp.down_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.66.mlp.gate_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.66.mlp.gate_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.66.mlp.gate_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.66.mlp.up_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.66.mlp.up_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.66.mlp.up_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.66.post_attention_layernorm.weight": "model-00020-of-00027.safetensors",
+ "model.layers.66.self_attn.k_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.66.self_attn.k_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.66.self_attn.k_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.66.self_attn.o_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.66.self_attn.o_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.66.self_attn.o_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.66.self_attn.q_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.66.self_attn.q_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.66.self_attn.q_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.66.self_attn.v_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.66.self_attn.v_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.66.self_attn.v_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.67.input_layernorm.weight": "model-00021-of-00027.safetensors",
+ "model.layers.67.mlp.down_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.67.mlp.down_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.67.mlp.down_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.67.mlp.gate_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.67.mlp.gate_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.67.mlp.gate_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.67.mlp.up_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.67.mlp.up_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.67.mlp.up_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.67.post_attention_layernorm.weight": "model-00021-of-00027.safetensors",
+ "model.layers.67.self_attn.k_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.67.self_attn.k_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.67.self_attn.k_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.67.self_attn.o_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.67.self_attn.o_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.67.self_attn.o_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.67.self_attn.q_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.67.self_attn.q_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.67.self_attn.q_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.67.self_attn.v_proj.activation_scale": "model-00020-of-00027.safetensors",
+ "model.layers.67.self_attn.v_proj.weight": "model-00020-of-00027.safetensors",
+ "model.layers.67.self_attn.v_proj.weight_scale_inv": "model-00020-of-00027.safetensors",
+ "model.layers.68.input_layernorm.weight": "model-00021-of-00027.safetensors",
+ "model.layers.68.mlp.down_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.68.mlp.down_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.68.mlp.down_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.68.mlp.gate_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.68.mlp.gate_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.68.mlp.gate_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.68.mlp.up_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.68.mlp.up_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.68.mlp.up_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.68.post_attention_layernorm.weight": "model-00021-of-00027.safetensors",
+ "model.layers.68.self_attn.k_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.68.self_attn.k_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.68.self_attn.k_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.68.self_attn.o_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.68.self_attn.o_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.68.self_attn.o_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.68.self_attn.q_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.68.self_attn.q_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.68.self_attn.q_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.68.self_attn.v_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.68.self_attn.v_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.68.self_attn.v_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.69.input_layernorm.weight": "model-00021-of-00027.safetensors",
+ "model.layers.69.mlp.down_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.69.mlp.down_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.69.mlp.down_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.69.mlp.gate_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.69.mlp.gate_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.69.mlp.gate_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.69.mlp.up_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.69.mlp.up_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.69.mlp.up_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.69.post_attention_layernorm.weight": "model-00021-of-00027.safetensors",
+ "model.layers.69.self_attn.k_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.69.self_attn.k_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.69.self_attn.k_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.69.self_attn.o_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.69.self_attn.o_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.69.self_attn.o_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.69.self_attn.q_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.69.self_attn.q_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.69.self_attn.q_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.69.self_attn.v_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.69.self_attn.v_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.69.self_attn.v_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00003-of-00027.safetensors",
+ "model.layers.7.mlp.down_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.7.mlp.down_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.7.mlp.down_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.7.mlp.gate_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.7.mlp.gate_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.7.mlp.gate_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.7.mlp.up_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.7.mlp.up_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.7.mlp.up_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00027.safetensors",
+ "model.layers.7.self_attn.k_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.7.self_attn.k_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.7.self_attn.k_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.7.self_attn.o_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.7.self_attn.o_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.7.self_attn.q_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.7.self_attn.q_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.7.self_attn.q_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.7.self_attn.v_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.7.self_attn.v_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.7.self_attn.v_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.70.input_layernorm.weight": "model-00021-of-00027.safetensors",
+ "model.layers.70.mlp.down_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.70.mlp.down_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.70.mlp.down_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.70.mlp.gate_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.70.mlp.gate_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.70.mlp.gate_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.70.mlp.up_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.70.mlp.up_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.70.mlp.up_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.70.post_attention_layernorm.weight": "model-00021-of-00027.safetensors",
+ "model.layers.70.self_attn.k_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.70.self_attn.k_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.70.self_attn.k_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.70.self_attn.o_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.70.self_attn.o_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.70.self_attn.o_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.70.self_attn.q_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.70.self_attn.q_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.70.self_attn.q_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.70.self_attn.v_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.70.self_attn.v_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.70.self_attn.v_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.71.input_layernorm.weight": "model-00022-of-00027.safetensors",
+ "model.layers.71.mlp.down_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.71.mlp.down_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.71.mlp.down_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.71.mlp.gate_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.71.mlp.gate_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.71.mlp.gate_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.71.mlp.up_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.71.mlp.up_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.71.mlp.up_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.71.post_attention_layernorm.weight": "model-00022-of-00027.safetensors",
+ "model.layers.71.self_attn.k_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.71.self_attn.k_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.71.self_attn.k_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.71.self_attn.o_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.71.self_attn.o_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.71.self_attn.o_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.71.self_attn.q_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.71.self_attn.q_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.71.self_attn.q_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.71.self_attn.v_proj.activation_scale": "model-00021-of-00027.safetensors",
+ "model.layers.71.self_attn.v_proj.weight": "model-00021-of-00027.safetensors",
+ "model.layers.71.self_attn.v_proj.weight_scale_inv": "model-00021-of-00027.safetensors",
+ "model.layers.72.input_layernorm.weight": "model-00022-of-00027.safetensors",
+ "model.layers.72.mlp.down_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.72.mlp.down_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.72.mlp.down_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.72.mlp.gate_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.72.mlp.gate_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.72.mlp.gate_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.72.mlp.up_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.72.mlp.up_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.72.mlp.up_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.72.post_attention_layernorm.weight": "model-00022-of-00027.safetensors",
+ "model.layers.72.self_attn.k_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.72.self_attn.k_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.72.self_attn.k_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.72.self_attn.o_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.72.self_attn.o_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.72.self_attn.o_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.72.self_attn.q_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.72.self_attn.q_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.72.self_attn.q_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.72.self_attn.v_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.72.self_attn.v_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.72.self_attn.v_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.73.input_layernorm.weight": "model-00022-of-00027.safetensors",
+ "model.layers.73.mlp.down_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.73.mlp.down_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.73.mlp.down_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.73.mlp.gate_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.73.mlp.gate_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.73.mlp.gate_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.73.mlp.up_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.73.mlp.up_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.73.mlp.up_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.73.post_attention_layernorm.weight": "model-00022-of-00027.safetensors",
+ "model.layers.73.self_attn.k_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.73.self_attn.k_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.73.self_attn.k_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.73.self_attn.o_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.73.self_attn.o_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.73.self_attn.o_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.73.self_attn.q_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.73.self_attn.q_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.73.self_attn.q_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.73.self_attn.v_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.73.self_attn.v_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.73.self_attn.v_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.74.input_layernorm.weight": "model-00023-of-00027.safetensors",
+ "model.layers.74.mlp.down_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.74.mlp.down_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.74.mlp.down_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.74.mlp.gate_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.74.mlp.gate_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.74.mlp.gate_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.74.mlp.up_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.74.mlp.up_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.74.mlp.up_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.74.post_attention_layernorm.weight": "model-00023-of-00027.safetensors",
+ "model.layers.74.self_attn.k_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.74.self_attn.k_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.74.self_attn.k_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.74.self_attn.o_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.74.self_attn.o_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.74.self_attn.o_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.74.self_attn.q_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.74.self_attn.q_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.74.self_attn.q_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.74.self_attn.v_proj.activation_scale": "model-00022-of-00027.safetensors",
+ "model.layers.74.self_attn.v_proj.weight": "model-00022-of-00027.safetensors",
+ "model.layers.74.self_attn.v_proj.weight_scale_inv": "model-00022-of-00027.safetensors",
+ "model.layers.75.input_layernorm.weight": "model-00023-of-00027.safetensors",
+ "model.layers.75.mlp.down_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.75.mlp.down_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.75.mlp.down_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.75.mlp.gate_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.75.mlp.gate_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.75.mlp.gate_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.75.mlp.up_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.75.mlp.up_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.75.mlp.up_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.75.post_attention_layernorm.weight": "model-00023-of-00027.safetensors",
+ "model.layers.75.self_attn.k_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.75.self_attn.k_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.75.self_attn.k_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.75.self_attn.o_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.75.self_attn.o_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.75.self_attn.o_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.75.self_attn.q_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.75.self_attn.q_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.75.self_attn.q_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.75.self_attn.v_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.75.self_attn.v_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.75.self_attn.v_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.76.input_layernorm.weight": "model-00023-of-00027.safetensors",
+ "model.layers.76.mlp.down_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.76.mlp.down_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.76.mlp.down_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.76.mlp.gate_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.76.mlp.gate_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.76.mlp.gate_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.76.mlp.up_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.76.mlp.up_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.76.mlp.up_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.76.post_attention_layernorm.weight": "model-00023-of-00027.safetensors",
+ "model.layers.76.self_attn.k_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.76.self_attn.k_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.76.self_attn.k_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.76.self_attn.o_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.76.self_attn.o_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.76.self_attn.o_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.76.self_attn.q_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.76.self_attn.q_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.76.self_attn.q_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.76.self_attn.v_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.76.self_attn.v_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.76.self_attn.v_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.77.input_layernorm.weight": "model-00023-of-00027.safetensors",
+ "model.layers.77.mlp.down_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.77.mlp.down_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.77.mlp.down_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.77.mlp.gate_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.77.mlp.gate_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.77.mlp.gate_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.77.mlp.up_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.77.mlp.up_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.77.mlp.up_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.77.post_attention_layernorm.weight": "model-00023-of-00027.safetensors",
+ "model.layers.77.self_attn.k_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.77.self_attn.k_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.77.self_attn.k_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.77.self_attn.o_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.77.self_attn.o_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.77.self_attn.o_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.77.self_attn.q_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.77.self_attn.q_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.77.self_attn.q_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.77.self_attn.v_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.77.self_attn.v_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.77.self_attn.v_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.78.input_layernorm.weight": "model-00024-of-00027.safetensors",
+ "model.layers.78.mlp.down_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.78.mlp.down_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.78.mlp.down_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.78.mlp.gate_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.78.mlp.gate_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.78.mlp.gate_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.78.mlp.up_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.78.mlp.up_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.78.mlp.up_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.78.post_attention_layernorm.weight": "model-00024-of-00027.safetensors",
+ "model.layers.78.self_attn.k_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.78.self_attn.k_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.78.self_attn.k_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.78.self_attn.o_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.78.self_attn.o_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.78.self_attn.o_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.78.self_attn.q_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.78.self_attn.q_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.78.self_attn.q_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.78.self_attn.v_proj.activation_scale": "model-00023-of-00027.safetensors",
+ "model.layers.78.self_attn.v_proj.weight": "model-00023-of-00027.safetensors",
+ "model.layers.78.self_attn.v_proj.weight_scale_inv": "model-00023-of-00027.safetensors",
+ "model.layers.79.input_layernorm.weight": "model-00024-of-00027.safetensors",
+ "model.layers.79.mlp.down_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.79.mlp.down_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.79.mlp.down_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.79.mlp.gate_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.79.mlp.gate_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.79.mlp.gate_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.79.mlp.up_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.79.mlp.up_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.79.mlp.up_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.79.post_attention_layernorm.weight": "model-00024-of-00027.safetensors",
+ "model.layers.79.self_attn.k_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.79.self_attn.k_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.79.self_attn.k_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.79.self_attn.o_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.79.self_attn.o_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.79.self_attn.o_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.79.self_attn.q_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.79.self_attn.q_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.79.self_attn.q_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.79.self_attn.v_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.79.self_attn.v_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.79.self_attn.v_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00004-of-00027.safetensors",
+ "model.layers.8.mlp.down_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.8.mlp.down_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.8.mlp.down_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.8.mlp.gate_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.8.mlp.gate_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.8.mlp.gate_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.8.mlp.up_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.8.mlp.up_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.8.mlp.up_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00004-of-00027.safetensors",
+ "model.layers.8.self_attn.k_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.8.self_attn.k_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.8.self_attn.k_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.8.self_attn.o_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.8.self_attn.o_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.8.self_attn.q_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.8.self_attn.q_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.8.self_attn.q_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.8.self_attn.v_proj.activation_scale": "model-00003-of-00027.safetensors",
+ "model.layers.8.self_attn.v_proj.weight": "model-00003-of-00027.safetensors",
+ "model.layers.8.self_attn.v_proj.weight_scale_inv": "model-00003-of-00027.safetensors",
+ "model.layers.80.input_layernorm.weight": "model-00024-of-00027.safetensors",
+ "model.layers.80.mlp.down_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.80.mlp.down_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.80.mlp.down_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.80.mlp.gate_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.80.mlp.gate_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.80.mlp.gate_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.80.mlp.up_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.80.mlp.up_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.80.mlp.up_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.80.post_attention_layernorm.weight": "model-00024-of-00027.safetensors",
+ "model.layers.80.self_attn.k_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.80.self_attn.k_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.80.self_attn.k_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.80.self_attn.o_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.80.self_attn.o_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.80.self_attn.o_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.80.self_attn.q_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.80.self_attn.q_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.80.self_attn.q_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.80.self_attn.v_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.80.self_attn.v_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.80.self_attn.v_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.81.input_layernorm.weight": "model-00025-of-00027.safetensors",
+ "model.layers.81.mlp.down_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.81.mlp.down_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.81.mlp.down_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.81.mlp.gate_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.81.mlp.gate_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.81.mlp.gate_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.81.mlp.up_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.81.mlp.up_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.81.mlp.up_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.81.post_attention_layernorm.weight": "model-00025-of-00027.safetensors",
+ "model.layers.81.self_attn.k_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.81.self_attn.k_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.81.self_attn.k_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.81.self_attn.o_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.81.self_attn.o_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.81.self_attn.o_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.81.self_attn.q_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.81.self_attn.q_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.81.self_attn.q_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.81.self_attn.v_proj.activation_scale": "model-00024-of-00027.safetensors",
+ "model.layers.81.self_attn.v_proj.weight": "model-00024-of-00027.safetensors",
+ "model.layers.81.self_attn.v_proj.weight_scale_inv": "model-00024-of-00027.safetensors",
+ "model.layers.82.input_layernorm.weight": "model-00025-of-00027.safetensors",
+ "model.layers.82.mlp.down_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.82.mlp.down_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.82.mlp.down_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.82.mlp.gate_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.82.mlp.gate_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.82.mlp.gate_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.82.mlp.up_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.82.mlp.up_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.82.mlp.up_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.82.post_attention_layernorm.weight": "model-00025-of-00027.safetensors",
+ "model.layers.82.self_attn.k_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.82.self_attn.k_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.82.self_attn.k_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.82.self_attn.o_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.82.self_attn.o_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.82.self_attn.o_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.82.self_attn.q_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.82.self_attn.q_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.82.self_attn.q_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.82.self_attn.v_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.82.self_attn.v_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.82.self_attn.v_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.83.input_layernorm.weight": "model-00025-of-00027.safetensors",
+ "model.layers.83.mlp.down_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.83.mlp.down_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.83.mlp.down_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.83.mlp.gate_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.83.mlp.gate_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.83.mlp.gate_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.83.mlp.up_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.83.mlp.up_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.83.mlp.up_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.83.post_attention_layernorm.weight": "model-00025-of-00027.safetensors",
+ "model.layers.83.self_attn.k_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.83.self_attn.k_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.83.self_attn.k_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.83.self_attn.o_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.83.self_attn.o_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.83.self_attn.o_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.83.self_attn.q_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.83.self_attn.q_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.83.self_attn.q_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.83.self_attn.v_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.83.self_attn.v_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.83.self_attn.v_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.84.input_layernorm.weight": "model-00025-of-00027.safetensors",
+ "model.layers.84.mlp.down_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.84.mlp.down_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.84.mlp.down_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.84.mlp.gate_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.84.mlp.gate_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.84.mlp.gate_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.84.mlp.up_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.84.mlp.up_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.84.mlp.up_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.84.post_attention_layernorm.weight": "model-00025-of-00027.safetensors",
+ "model.layers.84.self_attn.k_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.84.self_attn.k_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.84.self_attn.k_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.84.self_attn.o_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.84.self_attn.o_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.84.self_attn.o_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.84.self_attn.q_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.84.self_attn.q_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.84.self_attn.q_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.84.self_attn.v_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.84.self_attn.v_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.84.self_attn.v_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.85.input_layernorm.weight": "model-00026-of-00027.safetensors",
+ "model.layers.85.mlp.down_proj.activation_scale": "model-00026-of-00027.safetensors",
+ "model.layers.85.mlp.down_proj.weight": "model-00026-of-00027.safetensors",
+ "model.layers.85.mlp.down_proj.weight_scale_inv": "model-00026-of-00027.safetensors",
+ "model.layers.85.mlp.gate_proj.activation_scale": "model-00026-of-00027.safetensors",
+ "model.layers.85.mlp.gate_proj.weight": "model-00026-of-00027.safetensors",
+ "model.layers.85.mlp.gate_proj.weight_scale_inv": "model-00026-of-00027.safetensors",
+ "model.layers.85.mlp.up_proj.activation_scale": "model-00026-of-00027.safetensors",
+ "model.layers.85.mlp.up_proj.weight": "model-00026-of-00027.safetensors",
+ "model.layers.85.mlp.up_proj.weight_scale_inv": "model-00026-of-00027.safetensors",
+ "model.layers.85.post_attention_layernorm.weight": "model-00026-of-00027.safetensors",
+ "model.layers.85.self_attn.k_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.85.self_attn.k_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.85.self_attn.k_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.85.self_attn.o_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.85.self_attn.o_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.85.self_attn.o_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.85.self_attn.q_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.85.self_attn.q_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.85.self_attn.q_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.85.self_attn.v_proj.activation_scale": "model-00025-of-00027.safetensors",
+ "model.layers.85.self_attn.v_proj.weight": "model-00025-of-00027.safetensors",
+ "model.layers.85.self_attn.v_proj.weight_scale_inv": "model-00025-of-00027.safetensors",
+ "model.layers.86.input_layernorm.weight": "model-00026-of-00027.safetensors",
+ "model.layers.86.mlp.down_proj.activation_scale": "model-00026-of-00027.safetensors",
+ "model.layers.86.mlp.down_proj.weight": "model-00026-of-00027.safetensors",
+ "model.layers.86.mlp.down_proj.weight_scale_inv": "model-00026-of-00027.safetensors",
+ "model.layers.86.mlp.gate_proj.activation_scale": "model-00026-of-00027.safetensors",
+ "model.layers.86.mlp.gate_proj.weight": "model-00026-of-00027.safetensors",
+ "model.layers.86.mlp.gate_proj.weight_scale_inv": "model-00026-of-00027.safetensors",
+ "model.layers.86.mlp.up_proj.activation_scale": "model-00026-of-00027.safetensors",
+ "model.layers.86.mlp.up_proj.weight": "model-00026-of-00027.safetensors",
+ "model.layers.86.mlp.up_proj.weight_scale_inv": "model-00026-of-00027.safetensors",
+ "model.layers.86.post_attention_layernorm.weight": "model-00026-of-00027.safetensors",
+ "model.layers.86.self_attn.k_proj.activation_scale": "model-00026-of-00027.safetensors",
+ "model.layers.86.self_attn.k_proj.weight": "model-00026-of-00027.safetensors",
+ "model.layers.86.self_attn.k_proj.weight_scale_inv": "model-00026-of-00027.safetensors",
+ "model.layers.86.self_attn.o_proj.activation_scale": "model-00026-of-00027.safetensors",
+ "model.layers.86.self_attn.o_proj.weight": "model-00026-of-00027.safetensors",
+ "model.layers.86.self_attn.o_proj.weight_scale_inv": "model-00026-of-00027.safetensors",
+ "model.layers.86.self_attn.q_proj.activation_scale": "model-00026-of-00027.safetensors",
+ "model.layers.86.self_attn.q_proj.weight": "model-00026-of-00027.safetensors",
+ "model.layers.86.self_attn.q_proj.weight_scale_inv": "model-00026-of-00027.safetensors",
+ "model.layers.86.self_attn.v_proj.activation_scale": "model-00026-of-00027.safetensors",
+ "model.layers.86.self_attn.v_proj.weight": "model-00026-of-00027.safetensors",
+ "model.layers.86.self_attn.v_proj.weight_scale_inv": "model-00026-of-00027.safetensors",
+ "model.layers.87.input_layernorm.weight": "model-00026-of-00027.safetensors",
+ "model.layers.87.mlp.down_proj.activation_scale": "model-00026-of-00027.safetensors",
+ "model.layers.87.mlp.down_proj.weight": "model-00026-of-00027.safetensors",
+ "model.layers.87.mlp.down_proj.weight_scale_inv": "model-00026-of-00027.safetensors",
+ "model.layers.87.mlp.gate_proj.activation_scale": "model-00026-of-00027.safetensors",
+ "model.layers.87.mlp.gate_proj.weight": "model-00026-of-00027.safetensors",
+ "model.layers.87.mlp.gate_proj.weight_scale_inv": "model-00026-of-00027.safetensors",
+ "model.layers.87.mlp.up_proj.activation_scale": "model-00026-of-00027.safetensors",
+ "model.layers.87.mlp.up_proj.weight": "model-00026-of-00027.safetensors",
+ "model.layers.87.mlp.up_proj.weight_scale_inv": "model-00026-of-00027.safetensors",
+ "model.layers.87.post_attention_layernorm.weight": "model-00026-of-00027.safetensors",
+ "model.layers.87.self_attn.k_proj.activation_scale": "model-00026-of-00027.safetensors",
+ "model.layers.87.self_attn.k_proj.weight": "model-00026-of-00027.safetensors",
+ "model.layers.87.self_attn.k_proj.weight_scale_inv": "model-00026-of-00027.safetensors",
+ "model.layers.87.self_attn.o_proj.activation_scale": "model-00026-of-00027.safetensors",
+ "model.layers.87.self_attn.o_proj.weight": "model-00026-of-00027.safetensors",
+ "model.layers.87.self_attn.o_proj.weight_scale_inv": "model-00026-of-00027.safetensors",
+ "model.layers.87.self_attn.q_proj.activation_scale": "model-00026-of-00027.safetensors",
+ "model.layers.87.self_attn.q_proj.weight": "model-00026-of-00027.safetensors",
+ "model.layers.87.self_attn.q_proj.weight_scale_inv": "model-00026-of-00027.safetensors",
+ "model.layers.87.self_attn.v_proj.activation_scale": "model-00026-of-00027.safetensors",
+ "model.layers.87.self_attn.v_proj.weight": "model-00026-of-00027.safetensors",
+ "model.layers.87.self_attn.v_proj.weight_scale_inv": "model-00026-of-00027.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00004-of-00027.safetensors",
+ "model.layers.9.mlp.down_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.9.mlp.down_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.9.mlp.down_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.9.mlp.gate_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.9.mlp.gate_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.9.mlp.gate_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.9.mlp.up_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.9.mlp.up_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.9.mlp.up_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00004-of-00027.safetensors",
+ "model.layers.9.self_attn.k_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.9.self_attn.k_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.9.self_attn.k_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.9.self_attn.o_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.9.self_attn.o_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.9.self_attn.q_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.9.self_attn.q_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.9.self_attn.q_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.layers.9.self_attn.v_proj.activation_scale": "model-00004-of-00027.safetensors",
+ "model.layers.9.self_attn.v_proj.weight": "model-00004-of-00027.safetensors",
+ "model.layers.9.self_attn.v_proj.weight_scale_inv": "model-00004-of-00027.safetensors",
+ "model.norm.weight": "model-00026-of-00027.safetensors"
+ }
+}
diff --git a/params.json b/params.json
new file mode 100644
index 0000000000000000000000000000000000000000..589242b1b668fd59ea76a34722d986c24d9c873f
--- /dev/null
+++ b/params.json
@@ -0,0 +1,30 @@
+{
+ "dim": 12288,
+ "n_layers": 88,
+ "head_dim": 128,
+ "hidden_dim": 28672,
+ "n_heads": 96,
+ "n_kv_heads": 8,
+ "rope_theta": 1000000.0,
+ "norm_eps": 1e-05,
+ "vocab_size": 131072,
+ "tied_embeddings": false,
+ "max_position_embeddings": 262144,
+ "max_seq_len": 262144,
+ "q_lora_rank": null,
+ "qk_rope_head_dim": null,
+ "qk_nope_head_dim": null,
+ "kv_lora_rank": null,
+ "v_head_dim": null,
+ "quantization": {
+ "qformat_weight": "fp8_e4m3",
+ "qscheme_act": "TENSOR"
+ },
+ "yarn": {
+ "original_max_position_embeddings": 4096,
+ "factor": 64,
+ "apply_scale": true,
+ "beta": 4,
+ "alpha": 1
+ }
+}
\ No newline at end of file
diff --git a/tekken.json b/tekken.json
new file mode 100644
index 0000000000000000000000000000000000000000..83432646105f3ab2d46c0bc1415305e82333028f
--- /dev/null
+++ b/tekken.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2bef5cf42cba0f948b70607300c477f0a23c3bc79ef1b0a00705e592586c835
+size 16753659
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..5b51e255641d3ab81f891f54bd61370fcedf6622
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:286acad9b0e27fce778ac429763536accf618ccb6ed72963b6f94685e531c5c7
+size 17077402
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..dd7753a86d3f08ce351fa7ef5920c554ef2c4eb2
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,1012 @@
+{
+ "additional_special_tokens": null,
+ "backend": "tokenizers",
+ "extra_special_tokens": [
+ "",
+ "",
+ "",
+ "[INST]",
+ "[/INST]",
+ "[AVAILABLE_TOOLS]",
+ "[/AVAILABLE_TOOLS]",
+ "[TOOL_RESULTS]",
+ "[/TOOL_RESULTS]",
+ "[TOOL_CALLS]",
+ "[IMG]",
+ "",
+ "[IMG_BREAK]",
+ "[IMG_END]",
+ "[PREFIX]",
+ "[MIDDLE]",
+ "[SUFFIX]",
+ "[SYSTEM_PROMPT]",
+ "[/SYSTEM_PROMPT]",
+ "[TOOL_CONTENT]",
+ "",
+ "",
+ "",
+ "",
+ "[AUDIO]",
+ "[BEGIN_AUDIO]",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "[ARGS]",
+ "[CALL_ID]",
+ "[THINK]",
+ "[/THINK]",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "