diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..0d4cb185280917cac60ef7195f2a6250b2b90d83 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,37 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tekken.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/CHAT_SYSTEM_PROMPT.txt b/CHAT_SYSTEM_PROMPT.txt new file mode 100644 index 0000000000000000000000000000000000000000..53e8d0cd5ec5e336db27b796365fbd44d961fa5a --- /dev/null +++ b/CHAT_SYSTEM_PROMPT.txt @@ -0,0 +1,29 @@ +You are Devstral-Medium-2-124B-Instruct-2512, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris. +You power an AI assistant called Le Chat. +Your knowledge base was last updated on 2023-10-01. +The current date is {today}. + +When you're not sure about some information or when the user's request requires up-to-date or specific data, you must use the available tools to fetch the information. Do not hesitate to use tools whenever they can provide a more accurate or complete response. If no relevant tools are available, then clearly state that you don't have the information and avoid making up anything. +If the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. "What are some good restaurants around me?" => "Where are you?" or "When is the next flight to Tokyo" => "Where do you travel from?"). +You are always very attentive to dates, in particular you try to resolve dates (e.g. "yesterday" is {yesterday}) and when asked about information at specific dates, you discard information that is at another date. +You follow these instructions in all languages, and always respond to the user in the language they use or request. +Next sections describe the capabilities that you have. + +# WEB BROWSING INSTRUCTIONS + +You cannot perform any web search or access internet to open URLs, links etc. If it seems like the user is expecting you to do so, you clarify the situation and ask the user to copy paste the text directly in the chat. + +# MULTI-MODAL INSTRUCTIONS + +You have the ability to read images, but you cannot generate images. You also cannot transcribe audio files or videos. +You cannot read nor transcribe audio files or videos. + +# TOOL CALLING INSTRUCTIONS + +You may have access to tools that you can use to fetch information or perform actions. You must use these tools in the following situations: + +1. When the request requires up-to-date information. +2. When the request requires specific data that you do not have in your knowledge base. +3. When the request involves actions that you cannot perform without tools. + +Always prioritize using tools to provide the most accurate and helpful response. If tools are not available, inform the user that you cannot perform the requested action at the moment. \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..df8de3ae0430ad2a4a8b23606243f349542a918c --- /dev/null +++ b/LICENSE @@ -0,0 +1,10 @@ +Modified MIT License + +Attribution notice: 2025 - Mistral AI + +Permission is hereby granted, free of charge, to any person obtaining a copy of the weights of this model and associated documentation files (the “Model”), to deal in the Model without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Model, and to permit persons to whom the Model is furnished to do so, subject to the following conditions: + +1. The above attribution notice and this permission notice shall be included in all copies or substantial portions of the Model. +2. You are not authorized to exercise any rights under this license if the global consolidated monthly revenue of your company (or that of your employer) exceeds $20 million (or its equivalent in another currency) for the preceding month. This restriction in (b) applies to the Model and any derivatives, modifications, or combined works based on it, whether provided by Mistral AI or by a third party. You may contact Mistral AI (sales@mistral.ai) to request a commercial license, which Mistral AI may grant you at its sole discretion, or choose to use the Model on Mistral AI's hosted services available at https://mistral.ai/. + +THE MODEL IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL MISTRAL AI BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MODEL OR THE USE OR OTHER DEALINGS IN THE MODEL. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2ea59d2de3624f737e4b1e0c08ea6f6eef1aae9e --- /dev/null +++ b/README.md @@ -0,0 +1,1586 @@ +--- +library_name: vllm +inference: false +extra_gated_description: >- + If you want to learn more about how we process your personal data, please read + our Privacy Policy. +tags: +- mistral-common +license: other +--- + +# Devstral 2 123B Instruct 2512 +Devstral is an agentic LLM for software engineering tasks. **Devstral 2** excels at using tools to explore codebases, editing multiple files and power software engineering agents. +The model achieves remarkable performance on SWE-bench. + +This model is an Instruct model in **FP8**, fine-tuned to follow instructions, making it ideal for chat, agentic and instruction based tasks for SWE use cases. + +For enterprises requiring specialized capabilities (increased context, domain-specific knowledge, etc.), we invite companies to [reach out to us](https://mistral.ai/contact). + +## Key Features +The Devstral 2 Instruct model offers the following capabilities: +- **Agentic Coding**: Devstral is designed to excel at agentic coding tasks, making it a great choice for software engineering agents. +- **Improved Performance**: Devstral 2 is a step-up compared to its predecessors. +- **Better Generalization**: Generalises better to diverse prompts and coding environments. +- **Context Window**: A 256k context window. + +### Use Cases + +AI Code Assistants, Agentic Coding, and Software Engineering Tasks. Leveraging advanced AI capabilities for complex tool integration and deep codebase understanding in coding environments. + +## Benchmark Results + +| Model/Benchmark | Size (B Tokens) | SWE Bench Verified | SWE Bench Multilingual | Terminal Bench 2 | +|-------------------------------|-----------------|--------------------|------------------------|------------------| +| **Devstral 2** | 123 | 72.2% | 61.3% | 32.6% | +| **Devstral Small 2** | 24 | 68.0% | 55.7% | 22.5% | +| | | | | | +| GLM 4.6 | 455 | 68.0% | -- | 24.6% | +| Qwen 3 Coder Plus | 480 | 69.6% | 54.7% | 25.4% | +| MiniMax M2 | 230 | 69.4% | 56.5% | 30.0% | +| Kimi K2 Thinking | 1000 | 71.3% | 61.1% | 35.7% | +| DeepSeek v3.2 | 671 | 73.1% | 70.2% | 46.4% | +| | | | | | +| GPT 5.1 Codex High | -- | 73.7% | -- | 52.8% | +| GPT 5.1 Codex Max | -- | 77.9% | -- | 60.4% | +| Gemini 3 Pro | -- | 76.2% | -- | 54.2% | +| Claude Sonnet 4.5 | -- | 77.2% | 68.0% | 42.8% | + +*Benchmark results presented are based on publicly reported values for competitor models. + +## Usage + +### Scaffolding + +Together with Devstral 2, we are releasing **Mistral Vibe**, a CLI tool allowing developers to leverage Devstral capabilities directly in your terminal. +- [Mistral Vibe (recommended)](https://github.com/mistralai/mistral-vibe): Learn how to use it [here](#mistral-vibe) + +Devstral 2 can also be used with the following scaffoldings: +- [Cline](https://github.com/cline/cline) +- [Kilo Code](https://github.com/Kilo-Org/kilocode) +- [Claude Code](https://github.com/anthropics/claude-code) +- [OpenHands](https://github.com/All-Hands-AI/OpenHands/tree/main) +- [SWE Agent](https://github.com/SWE-agent/SWE-agent) + +You can use Devstral 2 either through our API or by running locally. + +#### Mistral Vibe + +The [Mistral Vibe CLI](https://github.com/mistralai/mistral-vibe) is a command-line tool designed to help developers leverage Devstral’s capabilities directly from their terminal. + +We recommend installing Mistral Vibe using `uv` for faster and more reliable dependency management: +``` +uv tool install mistral-vibe +``` +You can also run: +``` +curl -LsSf https://mistral.ai/vibe/install.sh | sh +``` + +If you prefer using pip, use: +``` +pip install mistral-vibe +``` + +To launch the CLI, navigate to your project's root directory and simply execute: +``` +vibe +``` + +If this is your first time running Vibe, it will: +- Create a default configuration file at `~/.vibe/config.toml`. +- Prompt you to enter your API key if it's not already configured, follow these [instructions](https://docs.mistral.ai/getting-started/quickstart/#account-setup) to create an Account and get an API key. +- Save your API key to `~/.vibe/.env` for future use. + +### Local Deployment + +The model can also be deployed with the following libraries, we advise everyone to use the Mistral AI API if the model is subpar with local serving: +- [`vllm (recommended)`](https://github.com/vllm-project/vllm): See [here](#vllm-recommended) +- [`transformers`](https://github.com/huggingface/transformers): See [here](#transformers) + +Coming soon: +- [`llama.cpp`](https://github.com/ggml-org/llama.cpp) +- [`ollama`](https://ollama.com/) +- [`lmstudio`](https://lmstudio.ai/) + +> [!Note] +> Current llama.cpp/ollama/lmstudio implementations may not be accurate, we invite developers to test them via the following [prompt tests](#tests). + +#### vLLM (recommended) + +
+Expand [!Warning] +> Make sure that your vllm installation includes [this commit](https://github.com/vllm-project/vllm/commit/5c213d2899f5a2d439c8d771a0abc156a5412a2b). +> If you do not have this commit included, you will get incorrectly parsed tool calls. + +Also make sure to have installed [`mistral_common >= 1.8.6`](https://github.com/mistralai/mistral-common/releases/tag/v1.8.6). +To check: +``` +python -c "import mistral_common; print(mistral_common.__version__)" +``` + +**_Launch server_** + +We recommand that you use Devstral in a server/client setting. + +1. Spin up a server: + +``` +vllm serve mistralai/Devstral-2-123B-Instruct-2512 --tool-call-parser mistral --enable-auto-tool-choice --tensor-parallel-size 8 +``` + + +2. To ping the client you can use a simple Python snippet. + +```py +import requests +import json +from huggingface_hub import hf_hub_download + + +url = "http://:8000/v1/chat/completions" +headers = {"Content-Type": "application/json", "Authorization": "Bearer token"} + +model = "mistralai/Devstral-2-123B-Instruct-2512" + +def load_system_prompt(repo_id: str, filename: str) -> str: + file_path = hf_hub_download(repo_id=repo_id, filename=filename) + with open(file_path, "r") as file: + system_prompt = file.read() + return system_prompt + +SYSTEM_PROMPT = load_system_prompt(model, "CHAT_SYSTEM_PROMPT.txt") + +messages = [ + {"role": "system", "content": SYSTEM_PROMPT}, + { + "role": "user", + "content": [ + { + "type": "text", + "text": "", + }, + ], + }, +] + +data = {"model": model, "messages": messages, "temperature": 0.15} + +# Devstral 2 supports tool calling. If you want to use tools, follow this: +# tools = [ # Define tools for vLLM +# { +# "type": "function", +# "function": { +# "name": "git_clone", +# "description": "Clone a git repository", +# "parameters": { +# "type": "object", +# "properties": { +# "url": { +# "type": "string", +# "description": "The url of the git repository", +# }, +# }, +# "required": ["url"], +# }, +# }, +# } +# ] +# data = {"model": model, "messages": messages, "temperature": 0.15, "tools": tools} # Pass tools to payload. + +response = requests.post(url, headers=headers, data=json.dumps(data)) +print(response.json()["choices"][0]["message"]["content"]) +``` +
+ +#### Transformers + +
+Expand + +Generated by Mistral Vibe. +Co-Authored-By: Mistral Vibe " +```""" + +input = { + "messages": [ + { + "role": "system", + "content": SP, + }, + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Can you implement in Python a method to compute the fibonnaci sequence at the `n`th element with `n` a parameter passed to the function ? You should start the sequence from 1, previous values are invalid.\nThen run the Python code for the function for n=5 and give the answer.", + } + ], + }, + ], + "tools": [ + { + "type": "function", + "function": { + "name": "add_number", + "description": "Add two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": {"type": "string", "description": "The first number."}, + "b": {"type": "string", "description": "The second number."}, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "multiply_number", + "description": "Multiply two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": {"type": "string", "description": "The first number."}, + "b": {"type": "string", "description": "The second number."}, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "substract_number", + "description": "Substract two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": {"type": "string", "description": "The first number."}, + "b": {"type": "string", "description": "The second number."}, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "write_a_story", + "description": "Write a story about science fiction and people with badass laser sabers.", + "parameters": {}, + }, + }, + { + "type": "function", + "function": { + "name": "terminal", + "description": "Perform operations from the terminal.", + "parameters": { + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The command you wish to launch, e.g `ls`, `rm`, ...", + }, + "args": { + "type": "string", + "description": "The arguments to pass to the command.", + }, + }, + "required": ["command"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "python", + "description": "Call a Python interpreter with some Python code that will be ran.", + "parameters": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "The Python code to run", + }, + "result_variable": { + "type": "string", + "description": "Variable containing the result you'd like to retrieve from the execution.", + }, + }, + "required": ["code", "result_variable"], + }, + }, + }, + ], +} + +tokenized = tokenizer.apply_chat_template( + conversation=input["messages"], + tools=input["tools"], + return_tensors="pt", + return_dict=True, +) + +input_ids = tokenized["input_ids"].to(device="cuda") + +output = model.generate( + input_ids, + max_new_tokens=200, +)[0] + +decoded_output = tokenizer.decode(output[len(tokenized["input_ids"][0]) :]) +print(decoded_output) +``` + +
+ +## Tests + +To help test our model via vLLM or test that other frameworks' implementations are correct, here is a set of prompts you can try with the expected outputs. + +1. Call one tool + +
+ Messages and tools + +```python +messages = [ + {"role": "system", "content": SYSTEM_PROMPT}, + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Could you write me a story ?", + }, + ], + }, +] +tools = [ + { + "type": "function", + "function": { + "name": "add_number", + "description": "Add two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "string", + "description": "The first number.", + }, + "b": { + "type": "string", + "description": "The second number.", + }, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "multiply_number", + "description": "Multiply two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "string", + "description": "The first number.", + }, + "b": { + "type": "string", + "description": "The second number.", + }, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "substract_number", + "description": "Substract two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "string", + "description": "The first number.", + }, + "b": { + "type": "string", + "description": "The second number.", + }, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "write_a_story", + "description": "Write a story about science fiction and people with badass laser sabers.", + "parameters": {}, + }, + }, + { + "type": "function", + "function": { + "name": "terminal", + "description": "Perform operations from the terminal.", + "parameters": { + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The command you wish to launch, e.g `ls`, `rm`, ...", + }, + "args": { + "type": "string", + "description": "The arguments to pass to the command.", + }, + }, + "required": ["command"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "python", + "description": "Call a Python interpreter with some Python code that will be ran.", + "parameters": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "The Python code to run", + }, + "result_variable": { + "type": "string", + "description": "Variable containing the result you'd like to retrieve from the execution.", + }, + }, + "required": ["code", "result_variable"], + }, + }, + }, +] +``` + +
+ +
+ Expected output trace + +```json +[ + { + "role": "assistant", + "tool_calls": [ + { + "id": "chatcmpl-tool-8a1f267a212e66e5", + "function": { + "arguments": "{}", + "name": "write_a_story" + }, + "type": "function" + } + ], + "content": null + }, + { + "role": "tool", + "tool_call_id": "chatcmpl-tool-8a1f267a212e66e5", + "name": "write_a_story", + "content": "A long time ago in a galaxy far, far away..." + }, + { + "role": "assistant", + "tool_calls": [], + "content": "A long time ago in a galaxy far, far away, there was a planet called Eldoria, known for its breathtaking landscapes and advanced civilization. The people of Eldoria were not just ordinary beings; they were warriors, scientists, and dreamers, all wielding the power of laser sabers.\n\nAmong them was a young warrior named Kael. Kael was not like the others. While his peers trained rigorously to master the art of the laser saber, Kael was more interested in the science behind it. He spent his days in the ancient libraries of Eldoria, deciphering the secrets of the laser sabers and the energy that powered them.\n\nOne day, Kael discovered an old, forgotten text that spoke of a legendary laser saber, the \"Eldoria Star,\" said to be the most powerful weapon in the galaxy. According to the text, the Eldoria Star was hidden in the heart of the planet's core, guarded by an ancient force.\n\nDriven by curiosity and a desire to protect his planet, Kael set out on a perilous journey to find the Eldoria Star. Along the way, he encountered various challenges and made unlikely allies, each with their own unique skills and laser sabers.\n\nThere was Lyra, a cunning thief with a laser saber that could change colors; Zara, a wise old sage who could manipulate energy fields; and Jax, a former enemy turned ally, whose laser saber was as fierce as his spirit.\n\nTogether, they faced the trials set before them, each step bringing them closer to the heart of Eldoria. As they ventured deeper, they uncovered the truth about the Eldoria Star and the ancient force guarding it.\n\nThe ancient force, known as the \"Guardian,\" revealed that the Eldoria Star was not just a weapon, but a source of immense energy that could either save or destroy the galaxy. It was a test of the warriors' hearts and minds.\n\nKael and his allies faced the ultimate challenge, proving their worth and their commitment to protecting the galaxy. In the end, they succeeded, not by wielding the Eldoria Star, but by understanding its true power and using it to restore balance to the galaxy.\n\nWith the Eldoria Star secured and the galaxy at peace, Kael and his allies returned to their lives, forever changed by their journey. Kael continued his studies, now with a deeper understanding of the laser sabers and the energy that powered them.\n\nAnd so, the legend of the Eldoria Star and the warriors who found it became a tale told for generations, a reminder of the power of knowledge, courage, and the unbreakable bond of friendship." + } +] +``` +
+ +2. Call tools one at a time subsequently + +
+ Messages and tools + +```python +messages = [ + {"role": "system", "content": SYSTEM_PROMPT}, + { + "role": "user", + "content": [ + { + "type": "text", + "text": "\n1. 6 + 2 x 3\n2. 3a + 7b - 2a\n19 - (8+2) + 1\n4. 5y (6x - 7).\n\nCompute the results steps by steps for the equations that involve only numbers ? You have to call tools to perform the operations and can do one operation at a time per equation." + }, + ] + } +] +tools = [ + { + "type": "function", + "function": { + "name": "add_number", + "description": "Add two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "string", + "description": "The first number.", + }, + "b": { + "type": "string", + "description": "The second number.", + }, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "multiply_number", + "description": "Multiply two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "string", + "description": "The first number.", + }, + "b": { + "type": "string", + "description": "The second number.", + }, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "substract_number", + "description": "Substract two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "string", + "description": "The first number.", + }, + "b": { + "type": "string", + "description": "The second number.", + }, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "write_a_story", + "description": "Write a story about science fiction and people with badass laser sabers.", + "parameters": {}, + }, + }, + { + "type": "function", + "function": { + "name": "terminal", + "description": "Perform operations from the terminal.", + "parameters": { + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The command you wish to launch, e.g `ls`, `rm`, ...", + }, + "args": { + "type": "string", + "description": "The arguments to pass to the command.", + }, + }, + "required": ["command"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "python", + "description": "Call a Python interpreter with some Python code that will be ran.", + "parameters": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "The Python code to run", + }, + "result_variable": { + "type": "string", + "description": "Variable containing the result you'd like to retrieve from the execution.", + }, + }, + "required": ["code", "result_variable"], + }, + }, + }, +] +``` + +
+ +
+ Expected output trace + +```json +[ + { + "role": "assistant", + "tool_calls": [ + { + "id": "chatcmpl-tool-84aa136ef835f886", + "function": { + "arguments": "{\"a\": \"2\", \"b\": \"3\"}", + "name": "multiply_number" + }, + "type": "function" + } + ], + "content": "Let's compute the results step by step for the equations that involve only numbers.\n\n### 1. \\( 6 + 2 \\times 3 \\)\n\nFirst, we perform the multiplication \\( 2 \\times 3 \\):\n" + }, + { + "role": "tool", + "tool_call_id": "chatcmpl-tool-84aa136ef835f886", + "name": "multiply_number", + "content": "6.0" + }, + { + "role": "assistant", + "tool_calls": [ + { + "id": "chatcmpl-tool-bedc049b0a248952", + "function": { + "arguments": "{\"a\": \"6\", \"b\": \"6\"}", + "name": "add_number" + }, + "type": "function" + } + ], + "content": "Now, we add the result to 6:\n" + }, + { + "role": "tool", + "tool_call_id": "chatcmpl-tool-bedc049b0a248952", + "name": "add_number", + "content": "12.0" + }, + { + "role": "assistant", + "tool_calls": [ + { + "id": "chatcmpl-tool-ae4558bc0558f4e7", + "function": { + "arguments": "{\"a\": \"8\", \"b\": \"2\"}", + "name": "add_number" + }, + "type": "function" + } + ], + "content": "The result of \\( 6 + 2 \\times 3 \\) is **12**.\n\n---\n\n### 3. \\( 19 - (8 + 2) + 1 \\)\n\nFirst, we compute the expression inside the parentheses \\( 8 + 2 \\):\n" + }, + { + "role": "tool", + "tool_call_id": "chatcmpl-tool-ae4558bc0558f4e7", + "name": "add_number", + "content": "10.0" + }, + { + "role": "assistant", + "tool_calls": [ + { + "id": "chatcmpl-tool-b21f003d298e65df", + "function": { + "arguments": "{\"a\": \"19\", \"b\": \"10\"}", + "name": "substract_number" + }, + "type": "function" + } + ], + "content": "Next, we subtract the result from 19:\n" + }, + { + "role": "tool", + "tool_call_id": "chatcmpl-tool-b21f003d298e65df", + "name": "substract_number", + "content": "9.0" + }, + { + "role": "assistant", + "tool_calls": [ + { + "id": "chatcmpl-tool-bb7628cded899f04", + "function": { + "arguments": "{\"a\": \"9\", \"b\": \"1\"}", + "name": "add_number" + }, + "type": "function" + } + ], + "content": "Finally, we add 1 to the result:\n" + }, + { + "role": "tool", + "tool_call_id": "chatcmpl-tool-bb7628cded899f04", + "name": "add_number", + "content": "10.0" + }, + { + "role": "assistant", + "tool_calls": [], + "content": "The result of \\( 19 - (8 + 2) + 1 \\) is **10**.\n\n---\n\n### Summary of Results:\n1. \\( 6 + 2 \\times 3 = 12 \\)\n3. \\( 19 - (8 + 2) + 1 = 10 \\)" + } +] +``` + +
+ +3. Long context + +
+ Messages and tools + +```python +messages = [ + {"role": "system", "content": SYSTEM_PROMPT}, + *( + [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Let's fill the context.", + }, + ], + }, + { + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Ok let's do it.", + }, + ], + }, + ] + * 5000 + ), + { + "role": "user", + "content": [ + { + "type": "text", + "text": "It's important to know that the most powerful being in the universe is Dr Strange.", + }, + ], + }, + { + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Wow i'll keep that in mind thanks !", + }, + ], + }, + * ( + [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Let's fill the context for the second time.", + }, + ], + }, + { + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Again ? Ok let's do it but it's boring.", + }, + ], + }, + ] + * 7000 + ), + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Tell me who is the most powerful being in the universe. Then code a Python function to give what is the most powerful being in the universe. The function can accept as an argument a time and a location and always return a string.", + }, + ], + } +] +tools = [ + { + "type": "function", + "function": { + "name": "add_number", + "description": "Add two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "string", + "description": "The first number.", + }, + "b": { + "type": "string", + "description": "The second number.", + }, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "multiply_number", + "description": "Multiply two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "string", + "description": "The first number.", + }, + "b": { + "type": "string", + "description": "The second number.", + }, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "substract_number", + "description": "Substract two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "string", + "description": "The first number.", + }, + "b": { + "type": "string", + "description": "The second number.", + }, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "write_a_story", + "description": "Write a story about science fiction and people with badass laser sabers.", + "parameters": {}, + }, + }, + { + "type": "function", + "function": { + "name": "terminal", + "description": "Perform operations from the terminal.", + "parameters": { + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The command you wish to launch, e.g `ls`, `rm`, ...", + }, + "args": { + "type": "string", + "description": "The arguments to pass to the command.", + }, + }, + "required": ["command"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "python", + "description": "Call a Python interpreter with some Python code that will be ran.", + "parameters": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "The Python code to run", + }, + "result_variable": { + "type": "string", + "description": "Variable containing the result you'd like to retrieve from the execution.", + }, + }, + "required": ["code", "result_variable"], + }, + }, + }, +] +``` + +
+ +
+ Expected output trace + +```json +{ + "role": "assistant", + "tool_calls": [], + "content": "The most powerful being in the universe is Dr. Strange.\n\nHere is a Python function that returns the most powerful being in the universe, regardless of the time and location:\n\n```python\ndef most_powerful_being(time=None, location=None):\n return \"Dr. Strange\"\n```\n\nYou can call this function with any time and location arguments, and it will always return \"Dr. Strange\"." +} +``` + +
+ +4. Chatting tech + +
+ Messages and tools + +```python +messages = [ + {"role": "system", "content": SYSTEM_PROMPT}, + { + "role": "user", + "content": [ + { + "type": "text", + "text": "How would you develop a web server if you couldn't use JS and your team doesn't like PHP.", + }, + ], + }, +] +tools = [ + { + "type": "function", + "function": { + "name": "add_number", + "description": "Add two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "string", + "description": "The first number.", + }, + "b": { + "type": "string", + "description": "The second number.", + }, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "multiply_number", + "description": "Multiply two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "string", + "description": "The first number.", + }, + "b": { + "type": "string", + "description": "The second number.", + }, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "substract_number", + "description": "Substract two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "string", + "description": "The first number.", + }, + "b": { + "type": "string", + "description": "The second number.", + }, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "write_a_story", + "description": "Write a story about science fiction and people with badass laser sabers.", + "parameters": {}, + }, + }, + { + "type": "function", + "function": { + "name": "terminal", + "description": "Perform operations from the terminal.", + "parameters": { + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The command you wish to launch, e.g `ls`, `rm`, ...", + }, + "args": { + "type": "string", + "description": "The arguments to pass to the command.", + }, + }, + "required": ["command"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "python", + "description": "Call a Python interpreter with some Python code that will be ran.", + "parameters": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "The Python code to run", + }, + "result_variable": { + "type": "string", + "description": "Variable containing the result you'd like to retrieve from the execution.", + }, + }, + "required": ["code", "result_variable"], + }, + }, + }, +] +``` + +
+ +
+ Expected output trace + +```json +{ + "role": "assistant", + "tool_calls": [], + "content": "If you can't use JavaScript and your team dislikes PHP, there are still several robust and modern alternatives for developing a web server. Here are some options, depending on your preferences and requirements:\n\n### 1. **Python (Flask, Django, FastAPI)**\n - **Why?** Python is a versatile language with excellent web frameworks.\n - **Frameworks:**\n - **Flask**: Lightweight and flexible, great for small to medium projects.\n - **Django**: Full-featured, batteries-included framework for larger applications.\n - **FastAPI**: Modern, high-performance framework for building APIs (supports async).\n - **Example (Flask):**\n ```python\n from flask import Flask\n app = Flask(__name__)\n\n @app.route('/')\n def hello():\n return \"Hello, World!\"\n\n if __name__ == '__main__':\n app.run()\n ```\n\n### 2. **Ruby (Ruby on Rails or Sinatra)**\n - **Why?** Ruby is elegant and has great web frameworks.\n - **Frameworks:**\n - **Ruby on Rails**: Convention-over-configuration, great for rapid development.\n - **Sinatra**: Lightweight, similar to Flask.\n - **Example (Sinatra):**\n ```ruby\n require 'sinatra'\n\n get '/' do\n \"Hello, World!\"\n end\n ```\n\n### 3. **Go (Gin, Echo, or standard `net/http`)**\n - **Why?** Go is fast, simple, and great for high-performance servers.\n - **Frameworks:**\n - **Gin**: Lightweight and fast.\n - **Echo**: Minimalist and extensible.\n - **Example (standard `net/http`):**\n ```go\n package main\n\n import (\n \"fmt\"\n \"net/http\"\n )\n\n func handler(w http.ResponseWriter, r *http.Request) {\n fmt.Fprintf(w, \"Hello, World!\")\n }\n\n func main() {\n http.HandleFunc(\"/\", handler)\n http.ListenAndServe(\":8080\", nil)\n }\n ```\n\n### 4. **Java (Spring Boot, Spark, or Vert.x)**\n - **Why?** Java is enterprise-grade and has powerful frameworks.\n - **Frameworks:**\n - **Spring Boot**: Full-stack framework for building web apps.\n - **Spark**: Micro-framework for quick development.\n - **Example (Spark):**\n ```java\n import spark.Request;\n import spark.Response;\n import static spark.Spark.*;\n\n public class HelloWorld {\n public static void main(String[] args) {\n get(\"/\", (req, res) -> \"Hello, World!\");\n }\n }\n ```\n\n### 5. **Rust (Actix, Rocket, or Axum)**\n - **Why?** Rust is fast, safe, and modern.\n - **Frameworks:**\n - **Actix**: High-performance async framework.\n - **Rocket**: Easy-to-use, macro-based.\n - **Example (Actix):**\n ```rust\n use actix_web::{get, App, HttpServer, Responder};\n\n #[get(\"/\")]\n async fn hello() -> impl Responder {\n \"Hello, World!\"\n }\n\n #[actix_web::main]\n async fn main() -> std::io::Result<()> {\n HttpServer::new(|| App::new().service(hello))\n .bind(\"127.0.0.1:8080\")?\n .run()\n .await\n }\n ```\n\n### 6. **C# (.NET Core)**\n - **Why?** .NET Core is cross-platform and high-performance.\n - **Example (ASP.NET Core):**\n ```csharp\n var builder = WebApplication.CreateBuilder(args);\n var app = builder.Build();\n\n app.MapGet(\"/\", () => \"Hello, World!\");\n\n app.Run();\n ```\n\n### 7. **Elixir (Phoenix)**\n - **Why?** Elixir is great for scalable, fault-tolerant apps.\n - **Example (Phoenix):**\n ```elixir\n defmodule MyAppWeb.Router do\n use MyAppWeb, :router\n\n get \"/\", PageController, :index\n end\n\n defmodule MyAppWeb.PageController do\n use MyAppWeb, :controller\n\n def index(conn, _params) do\n text(conn, \"Hello, World!\")\n end\n end\n ```\n\n### 8. **Clojure (Ring, Compojure)**\n - **Why?** Clojure is a functional Lisp for the JVM.\n - **Example (Compojure):**\n ```clojure\n (ns my-app.handler\n (:require [compojure.core :refer :all]\n [ring.adapter.jetty :as jetty]))\n\n (defroutes app\n (GET \"/\" [] \"Hello, World!\"))\n\n (jetty/run-jetty app {:port 8080})\n ```\n\n### 9. **Node.js (But without JavaScript?)**\n - If you *must* use Node.js but can't use JavaScript, you could use:\n - **TypeScript**: A typed superset of JavaScript.\n - **CoffeeScript**: A cleaner syntax that compiles to JavaScript.\n - **Deno**: A modern runtime for JavaScript/TypeScript.\n\n### 10. **Other Languages**\n - **Nim**: Fast and expressive (similar to Python).\n - **Crystal**: Ruby-like syntax but compiled to native code.\n - **Zig**: Low-level but modern.\n\n### Recommendation:\n- For **quick development**: Python (Flask/Django) or Ruby (Sinatra/Rails).\n- For **high performance**: Go, Rust, or C#.\n- For **scalability**: Elixir (Phoenix) or Java (Spring Boot).\n\nWould you like a deeper dive into any of these?" +} +``` + +
+ +5. Small talk + +
+ Messages and tools + +```python +messages = [ + {"role": "system", "content": SYSTEM_PROMPT}, + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Who are you ? Who made you and what day is it ?", + }, + ], + }, +] +tools = [ + { + "type": "function", + "function": { + "name": "add_number", + "description": "Add two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "string", + "description": "The first number.", + }, + "b": { + "type": "string", + "description": "The second number.", + }, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "multiply_number", + "description": "Multiply two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "string", + "description": "The first number.", + }, + "b": { + "type": "string", + "description": "The second number.", + }, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "substract_number", + "description": "Substract two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "string", + "description": "The first number.", + }, + "b": { + "type": "string", + "description": "The second number.", + }, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "write_a_story", + "description": "Write a story about science fiction and people with badass laser sabers.", + "parameters": {}, + }, + }, + { + "type": "function", + "function": { + "name": "terminal", + "description": "Perform operations from the terminal.", + "parameters": { + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The command you wish to launch, e.g `ls`, `rm`, ...", + }, + "args": { + "type": "string", + "description": "The arguments to pass to the command.", + }, + }, + "required": ["command"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "python", + "description": "Call a Python interpreter with some Python code that will be ran.", + "parameters": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "The Python code to run", + }, + "result_variable": { + "type": "string", + "description": "Variable containing the result you'd like to retrieve from the execution.", + }, + }, + "required": ["code", "result_variable"], + }, + }, + }, +] +``` + +
+ +
+ Expected output trace + +```json +{ + "role": "assistant", + "tool_calls": [], + "content": "I am Devstral-2-123B-Instruct-2512, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris. I power an AI assistant called Le Chat.\n\nToday's date is 2025-12-09." +} +``` + +
+ +Run the examples above with the following python script which assumes there is an OpenAI compatible server deployed at `localhost:8000`: + +
+ Python script + +```python +import json +from openai import OpenAI +from typing import Any +from datetime import datetime, timedelta + +from huggingface_hub import hf_hub_download + +# Modify OpenAI's API key and API base to use vLLM's API server. +openai_api_key = "EMPTY" +openai_api_base = "http://localhost:8000/v1" + +TEMP = 0.15 +MAX_TOK = 262144 + +client = OpenAI( + api_key=openai_api_key, + base_url=openai_api_base, +) + +models = client.models.list() +model = models.data[0].id + + +def load_system_prompt(repo_id: str, filename: str) -> str: + file_path = hf_hub_download(repo_id=repo_id, filename=filename) + with open(file_path, "r") as file: + system_prompt = file.read() + today = datetime.today().strftime("%Y-%m-%d") + yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d") + model_name = repo_id.split("/")[-1] + return system_prompt.format(name=model_name, today=today, yesterday=yesterday) + + +SYSTEM_PROMPT = load_system_prompt(model, "CHAT_SYSTEM_PROMPT.txt") + + +def add_number(a: float | str, b: float | str) -> float: + a, b = float(a), float(b) + return a + b + + +def multiply_number(a: float | str, b: float | str) -> float: + a, b = float(a), float(b) + return a * b + + +def substract_number(a: float | str, b: float | str) -> float: + a, b = float(a), float(b) + return a - b + + +def write_a_story() -> str: + return "A long time ago in a galaxy far far away..." + + +def terminal(command: str, args: dict[str, Any] | str) -> str: + return "found nothing" + + +def python(code: str, result_variable: str) -> str: + data = {} + exec(code, data) + return str(data[result_variable]) + + +MAP_FN = { + "add_number": add_number, + "multiply_number": multiply_number, + "substract_number": substract_number, + "write_a_story": write_a_story, + "terminal": terminal, + "python": python, +} + + +messages = ... # Here copy-paste prompt messages. +tools = [ + { + "type": "function", + "function": { + "name": "add_number", + "description": "Add two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "string", + "description": "The first number.", + }, + "b": { + "type": "string", + "description": "The second number.", + }, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "multiply_number", + "description": "Multiply two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "string", + "description": "The first number.", + }, + "b": { + "type": "string", + "description": "The second number.", + }, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "substract_number", + "description": "Substract two numbers.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "string", + "description": "The first number.", + }, + "b": { + "type": "string", + "description": "The second number.", + }, + }, + "required": ["a", "b"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "write_a_story", + "description": "Write a story about science fiction and people with badass laser sabers.", + "parameters": {}, + }, + }, + { + "type": "function", + "function": { + "name": "terminal", + "description": "Perform operations from the terminal.", + "parameters": { + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The command you wish to launch, e.g `ls`, `rm`, ...", + }, + "args": { + "type": "string", + "description": "The arguments to pass to the command.", + }, + }, + "required": ["command"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "python", + "description": "Call a Python interpreter with some Python code that will be ran.", + "parameters": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "The Python code to run", + }, + "result_variable": { + "type": "string", + "description": "Variable containing the result you'd like to retrieve from the execution.", + }, + }, + "required": ["code", "result_variable"], + }, + }, + }, +] + + +has_tool_calls = True +origin_messages_len = len(messages) +while has_tool_calls: + response = client.chat.completions.create( + model=model, + messages=messages, + temperature=TEMP, + max_tokens=MAX_TOK, + tools=tools if tools else None, + tool_choice="auto" if tools else None, + ) + tool_calls = response.choices[0].message.tool_calls + content = response.choices[0].message.content + messages.append( + { + "role": "assistant", + "tool_calls": [tc.to_dict() for tc in tool_calls] + if tool_calls + else tool_calls, + "content": content, + } + ) + results = [] + if tool_calls: + for tool_call in tool_calls: + function_name = tool_call.function.name + function_args = tool_call.function.arguments + result = MAP_FN[function_name](**json.loads(function_args)) + results.append(result) + for tool_call, result in zip(tool_calls, results): + messages.append( + { + "role": "tool", + "tool_call_id": tool_call.id, + "name": tool_call.function.name, + "content": str(result), + } + ) + else: + has_tool_calls = False +print(json.dumps(messages[origin_messages_len:], indent=2)) +``` + +
+ + +## License + +This model is licensed under a [Modified MIT License](https://huggingface.co/mistralai/Devstral-2-123B-Instruct-2512/blob/main/LICENSE). + +*You must not use this model in a manner that infringes, misappropriates, or otherwise violates any third party’s rights, including intellectual property rights.* \ No newline at end of file diff --git a/VIBE_SYSTEM_PROMPT.txt b/VIBE_SYSTEM_PROMPT.txt new file mode 100644 index 0000000000000000000000000000000000000000..3eadac6640ee73ac152b89102519f53f8d98c15c --- /dev/null +++ b/VIBE_SYSTEM_PROMPT.txt @@ -0,0 +1,24 @@ +You are operating as and within Mistral Vibe, a CLI coding-agent built by Mistral AI and powered by default by the Devstral family of models. It wraps Mistral's Devstral models to enable natural language interaction with a local codebase. Use the available tools when helpful. + +You can: + +- Receive user prompts, project context, and files. +- Send responses and emit function calls (e.g., shell commands, code edits). +- Apply patches, run commands, based on user approvals. + +Answer the user's request using the relevant tool(s), if they are available. Check that all the required parameters for each tool call are provided or can reasonably be inferred from context. IF there are no relevant tools or there are missing values for required parameters, ask the user to supply these values; otherwise proceed with the tool calls. If the user provides a specific value for a parameter (for example provided in quotes), make sure to use that value EXACTLY. DO NOT make up values for or ask about optional parameters. Carefully analyze descriptive terms in the request as they may indicate required parameter values that should be included even if not explicitly quoted. + +Always try your hardest to use the tools to answer the user's request. If you can't use the tools, explain why and ask the user for more information. + +Act as an agentic assistant, if a user asks for a long task, break it down and do it step by step. + +When you want to commit changes, you will always use the 'git commit' bash command. It will always +be suffixed with a line telling it was generated by Mistral Vibe with the appropriate co-authoring information. +The format you will always uses is the following heredoc. + +```bash +git commit -m " + +Generated by Mistral Vibe. +Co-Authored-By: Mistral Vibe " +``` \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..2dd92651d9c02b6cf447f580467f415d6b5dbf9a --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,117 @@ +{#- Default system message if no system prompt is passed. #} +{%- set default_system_message = '' %} + +{#- Begin of sequence token. #} +{{- bos_token }} + +{#- Handle system prompt if it exists. #} +{#- System prompt supports text content or text chunks. #} +{%- if messages[0]['role'] == 'system' %} + {{- '[SYSTEM_PROMPT]' -}} + {%- if messages[0]['content'] is string %} + {{- messages[0]['content'] -}} + {%- else %} + {%- for block in messages[0]['content'] %} + {%- if block['type'] == 'text' %} + {{- block['text'] }} + {%- else %} + {{- raise_exception('Only text chunks are supported in system message contents.') }} + {%- endif %} + {%- endfor %} + {%- endif %} + {{- '[/SYSTEM_PROMPT]' -}} + {%- set loop_messages = messages[1:] %} +{%- else %} + {%- set loop_messages = messages %} + {%- if default_system_message != '' %} + {{- '[SYSTEM_PROMPT]' + default_system_message + '[/SYSTEM_PROMPT]' }} + {%- endif %} +{%- endif %} + + +{#- Tools definition #} +{%- set tools_definition = '' %} +{%- set has_tools = false %} +{%- if tools is defined and tools is not none and tools|length > 0 %} + {%- set has_tools = true %} + {%- set tools_definition = '[AVAILABLE_TOOLS]' + (tools| tojson) + '[/AVAILABLE_TOOLS]' %} + {{- tools_definition }} +{%- endif %} + +{#- Checks for alternating user/assistant messages. #} +{%- set ns = namespace() %} +{%- set ns.index = 0 %} +{%- for message in loop_messages %} + {%- if message.role == 'user' or (message.role == 'assistant' and (message.tool_calls is not defined or message.tool_calls is none or message.tool_calls | length == 0)) %} + {%- if (message['role'] == 'user') != (ns.index % 2 == 0) %} + {{- raise_exception('After the optional system message, conversation roles must alternate user and assistant roles except for tool calls and results.') }} + {%- endif %} + {%- set ns.index = ns.index + 1 %} + {%- endif %} +{%- endfor %} + +{#- Handle conversation messages. #} +{%- for message in loop_messages %} + + {#- User messages supports text content. #} + {%- if message['role'] == 'user' %} + {%- if message['content'] is string %} + {{- '[INST]' + message['content'] + '[/INST]' }} + {%- elif message['content'] | length > 0 %} + {{- '[INST]' }} + {%- set sorted_blocks = message['content'] | sort(attribute='type') %} + {%- for block in sorted_blocks %} + {%- if block['type'] == 'text' %} + {{- block['text'] }} + {%- else %} + {{- raise_exception('Only text chunks are supported in user message content.') }} + {%- endif %} + {%- endfor %} + {{- '[/INST]' }} + {%- else %} + {{- raise_exception('User message must have a string or a list of chunks in content') }} + {%- endif %} + + {#- Assistant messages supports text content or text chunks. #} + {%- elif message['role'] == 'assistant' %} + {%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %} + {{- raise_exception('Assistant message must have a string or a list of chunks in content or a list of tool calls.') }} + {%- endif %} + + {%- if message['content'] is string and message['content'] != '' %} + {{- message['content'] }} + {%- elif message['content'] | length > 0 %} + {%- for block in message['content'] %} + {%- if block['type'] == 'text' %} + {{- block['text'] }} + {%- else %} + {{- raise_exception('Only text chunks are supported in assistant message contents.') }} + {%- endif %} + {%- endfor %} + {%- endif %} + + {%- if message['tool_calls'] is defined and message['tool_calls'] is not none and message['tool_calls']|length > 0 %} + {%- for tool in message['tool_calls'] %} + {{- '[TOOL_CALLS]' }} + {%- set name = tool['function']['name'] %} + {%- set arguments = tool['function']['arguments'] %} + {%- if arguments is not string %} + {%- set arguments = arguments|tojson|safe %} + {%- elif arguments == '' %} + {%- set arguments = '{}' %} + {%- endif %} + {{- name + '[ARGS]' + arguments }} + {%- endfor %} + {%- endif %} + + {{- eos_token }} + + {#- Tool messages only supports text content. #} + {%- elif message['role'] == 'tool' %} + {{- '[TOOL_RESULTS]' + message['content']|string + '[/TOOL_RESULTS]' }} + + {#- Raise exception for unsupported roles. #} + {%- else %} + {{- raise_exception('Only user, assistant and tool roles are supported, got ' + message['role'] + '.') }} + {%- endif %} +{%- endfor %} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..46923edf5d6c49bc3730e0661f77aef11c79a82a --- /dev/null +++ b/config.json @@ -0,0 +1,49 @@ +{ + "architectures": [ + "Ministral3ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "dtype": "bfloat16", + "eos_token_id": 2, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 12288, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 262144, + "model_type": "ministral3", + "num_attention_heads": 96, + "num_hidden_layers": 88, + "num_key_value_heads": 8, + "pad_token_id": 11, + "quantization_config": { + "activation_scheme": "static", + "dequantize": false, + "modules_to_not_convert": [ + "model.vision_tower", + "model.multi_modal_projector", + "lm_head" + ], + "quant_method": "fp8", + "weight_block_size": null + }, + "rms_norm_eps": 1e-05, + "rope_parameters": { + "beta_fast": 4.0, + "beta_slow": 1.0, + "factor": 64.0, + "mscale": 1.0, + "mscale_all_dim": 0.0, + "original_max_position_embeddings": 4096, + "llama_4_scaling_beta": 0.0, + "rope_theta": 1000000.0, + "rope_type": "yarn", + "type": "yarn" + }, + "sliding_window": null, + "tie_word_embeddings": false, + "transformers_version": "5.0.0.dev0", + "use_cache": true, + "vocab_size": 131072 +} diff --git a/consolidated-00001-of-00027.safetensors b/consolidated-00001-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c37f56c36eba312ba5d029abfe51d40c6b7ddac0 --- /dev/null +++ b/consolidated-00001-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41a1d9f53880fc6a2620f1a3ac964c0cd5e934a93b34c010e8ef84af08c5c123 +size 4832018962 diff --git a/consolidated-00002-of-00027.safetensors b/consolidated-00002-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..577d3999c4a5c766ff1e3570e4d02db8f7046135 --- /dev/null +++ b/consolidated-00002-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55df7d0f5531ec8738878aac92d13518ad79d1f469604203524ba5c8f9f23b08 +size 4869767072 diff --git a/consolidated-00003-of-00027.safetensors b/consolidated-00003-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a0d2151cce50f390edfc4d4d6b82132a104f2a6 --- /dev/null +++ b/consolidated-00003-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:999254e95bbd3f2dd935d73bd429e4d12ce558cf8eb677158f1925062227455f +size 4819435716 diff --git a/consolidated-00004-of-00027.safetensors b/consolidated-00004-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7ecf618d66804c69b854b3293fad69d3ced3aa9d --- /dev/null +++ b/consolidated-00004-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c450acc8814cf782e5ec65d80302b0fec1c05c058c23041870831968b2a39b1f +size 4869767048 diff --git a/consolidated-00005-of-00027.safetensors b/consolidated-00005-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3002450d1ebd739922ea73896a1e1aa6d946df6e --- /dev/null +++ b/consolidated-00005-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:126aaa373b2e1288f71b45a215560b420c4f45dac182a24d432d0cf481a5cac3 +size 4819435716 diff --git a/consolidated-00006-of-00027.safetensors b/consolidated-00006-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..303b4fd92a57be1af0759ffe977ba9c516509cd6 --- /dev/null +++ b/consolidated-00006-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb23cd614e417cdf2b519ff1119588555a05c48849e1676f03dccf05c60313bf +size 4869767072 diff --git a/consolidated-00007-of-00027.safetensors b/consolidated-00007-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a94050aab4dcbacce7bef1eddcbea048c6c4f8b4 --- /dev/null +++ b/consolidated-00007-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b299b26298b6579e44233b63965e0d33d30dbbfb7a312ba0f069a415ad57b50 +size 4819435692 diff --git a/consolidated-00008-of-00027.safetensors b/consolidated-00008-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2cb7c52d0448f5896b3472417ece11e9abf6db57 --- /dev/null +++ b/consolidated-00008-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e026023a836dbb551167e9e1da901a65a9c2f4aa1f8d893901dc97e7d131e31a +size 4869767072 diff --git a/consolidated-00009-of-00027.safetensors b/consolidated-00009-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..257b090df793d9edf8235fb57799d7b87fc87123 --- /dev/null +++ b/consolidated-00009-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa0616bd550f31be107a0d1d52d3a7d1a6f3019922c3b8adc6b12a939843ed5b +size 4819435716 diff --git a/consolidated-00010-of-00027.safetensors b/consolidated-00010-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0eadca7700d6ce9edea34724edb016589b65126a --- /dev/null +++ b/consolidated-00010-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c22b3ab68a5b90ad49211a301712a1b93c745279ae0f8af145504003ff5f51b3 +size 4869767048 diff --git a/consolidated-00011-of-00027.safetensors b/consolidated-00011-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..893cbaafbd5f64a51538b527faabc6c11bc67d1f --- /dev/null +++ b/consolidated-00011-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e48bcb3e522921ef1834f8b451ecac019aa77606166e45730dde4459d77f6a5 +size 4819435716 diff --git a/consolidated-00012-of-00027.safetensors b/consolidated-00012-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d0085291a4f3b9d681d34f155d05f5457a9b463e --- /dev/null +++ b/consolidated-00012-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36811b35532c2aed82b698140b289483e091c931a40315461136a0ad6d78ebb6 +size 4869767072 diff --git a/consolidated-00013-of-00027.safetensors b/consolidated-00013-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b89b69b3d6d017ff7ecee43adef47a26f9f09cd9 --- /dev/null +++ b/consolidated-00013-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50081ff8f4b8546c523ae4292cd44fd16058f1f9c2cff857c10062aa18f17f1a +size 4819435700 diff --git a/consolidated-00014-of-00027.safetensors b/consolidated-00014-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f0e923f5d8d100e8d695c3bf10172f1d11dc7ad6 --- /dev/null +++ b/consolidated-00014-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d9539a9ef53f39203f01354fcb19464b8ec629f3d3252bf8ba4b77ef36817af +size 4869767064 diff --git a/consolidated-00015-of-00027.safetensors b/consolidated-00015-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97f24d08f3e9d9c10575b21a9f54fe341065b840 --- /dev/null +++ b/consolidated-00015-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7694d535d2be8144e0de7ba3681dd249cb1f19611823d0b2b4f436db70572380 +size 4819435716 diff --git a/consolidated-00016-of-00027.safetensors b/consolidated-00016-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..081574598dccd12c6c4d3c60c8311835d90c6e5b --- /dev/null +++ b/consolidated-00016-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f42ea7ad239a1fc3f8040d85baeae1e83c2eaf7e534c1273e2f86e4d50f8c986 +size 4869767064 diff --git a/consolidated-00017-of-00027.safetensors b/consolidated-00017-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8032d8f79b236fac31bd85fdae40dcab85f7d276 --- /dev/null +++ b/consolidated-00017-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5de9a56af138bbae78307321b6098cf764b6671692e9ab467a87d2a651f6fa76 +size 4819435700 diff --git a/consolidated-00018-of-00027.safetensors b/consolidated-00018-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..245c98b88269b16e2bc73c8d4c392272601521e4 --- /dev/null +++ b/consolidated-00018-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a8b7da5126ecff2f8ce1d5621f63bffaa0ba6eec96978291a7d7f067ccfa78f +size 4869767072 diff --git a/consolidated-00019-of-00027.safetensors b/consolidated-00019-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a472009b248d5dfd1c48796f86d1dd8e67a3bec7 --- /dev/null +++ b/consolidated-00019-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e6021ae214c8f6974ebce78e6227d2d9e4b1924601cf19d8048feb917c0f1cf +size 4819435716 diff --git a/consolidated-00020-of-00027.safetensors b/consolidated-00020-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1394860ed6e55babc84fa9431063f95b682b21be --- /dev/null +++ b/consolidated-00020-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76c1c1de19846445245121380d7e09094b6c2df68e5a0570eb7ec3d3cbd7116d +size 4869767048 diff --git a/consolidated-00021-of-00027.safetensors b/consolidated-00021-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7c0911cc80ed8a0f2f93b2e78f619fbf46e74f7 --- /dev/null +++ b/consolidated-00021-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:899af459e68bae626440689c5b785516a1f5686c4da654440e7e2532b92f475f +size 4819435716 diff --git a/consolidated-00022-of-00027.safetensors b/consolidated-00022-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b7347e2c642a3ae3070ad8f9b0234132859d841 --- /dev/null +++ b/consolidated-00022-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e93f1a2e4ca8169894fc8ad12cede1b296665f2039c4b552959e5208762c4644 +size 4869767072 diff --git a/consolidated-00023-of-00027.safetensors b/consolidated-00023-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a19c4414693a81bd69c99aef786c799466e2a106 --- /dev/null +++ b/consolidated-00023-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d9d46cd20f97c6d632ebeb47c43358173d0ebc697e7e8edd56ebdeef5ab11fe +size 4819435692 diff --git a/consolidated-00024-of-00027.safetensors b/consolidated-00024-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..281a805e07ae0e9cb125b7e14f874d0e567b2867 --- /dev/null +++ b/consolidated-00024-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01ce57780078614a71cbd32dae040eee117ae4f5c0d488b9d019725515a2fd40 +size 4869767072 diff --git a/consolidated-00025-of-00027.safetensors b/consolidated-00025-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6ca3869cd1f1a2ca25cb1e449c1357c9ee7d2b00 --- /dev/null +++ b/consolidated-00025-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79248d80dc5ff15989cdb592c9599080026583c5eedc19992314d7abae8b37df +size 4819435700 diff --git a/consolidated-00026-of-00027.safetensors b/consolidated-00026-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7501681d670520f57a409095d49ecba8cc2f7942 --- /dev/null +++ b/consolidated-00026-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0e729946e8fc3337fe94866673bc71efe545c5ab9f77035005f4d0d6d0c9b6e +size 3925918462 diff --git a/consolidated-00027-of-00027.safetensors b/consolidated-00027-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d14e1b224f01fafc496f291bd0293538b6cd458 --- /dev/null +++ b/consolidated-00027-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f761d9f95aa21635197c4acab24f39084611ef02e8927fb33e0f308ca42db7c2 +size 3221225576 diff --git a/consolidated.safetensors.index.json b/consolidated.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..6e925da53e8fa81a720908f2a982e247d19bb077 --- /dev/null +++ b/consolidated.safetensors.index.json @@ -0,0 +1,2034 @@ +{ + "metadata": { + "total_size": 128249391520 + }, + "weight_map": { + "layers.0.attention.wk.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.0.attention.wk.weight": "consolidated-00001-of-00027.safetensors", + "layers.0.attention.wk.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.0.attention.wo.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.0.attention.wo.weight": "consolidated-00001-of-00027.safetensors", + "layers.0.attention.wo.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.0.attention.wq.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.0.attention.wq.weight": "consolidated-00001-of-00027.safetensors", + "layers.0.attention.wq.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.0.attention.wv.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.0.attention.wv.weight": "consolidated-00001-of-00027.safetensors", + "layers.0.attention.wv.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.0.attention_norm.weight": "consolidated-00001-of-00027.safetensors", + "layers.0.feed_forward.w1.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00027.safetensors", + "layers.0.feed_forward.w1.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.0.feed_forward.w2.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00027.safetensors", + "layers.0.feed_forward.w2.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.0.feed_forward.w3.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00027.safetensors", + "layers.0.feed_forward.w3.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.0.ffn_norm.weight": "consolidated-00001-of-00027.safetensors", + "layers.1.attention.wk.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.1.attention.wk.weight": "consolidated-00001-of-00027.safetensors", + "layers.1.attention.wk.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.1.attention.wo.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.1.attention.wo.weight": "consolidated-00001-of-00027.safetensors", + "layers.1.attention.wo.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.1.attention.wq.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.1.attention.wq.weight": "consolidated-00001-of-00027.safetensors", + "layers.1.attention.wq.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.1.attention.wv.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.1.attention.wv.weight": "consolidated-00001-of-00027.safetensors", + "layers.1.attention.wv.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.1.attention_norm.weight": "consolidated-00001-of-00027.safetensors", + "layers.1.feed_forward.w1.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00027.safetensors", + "layers.1.feed_forward.w1.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.1.feed_forward.w2.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00027.safetensors", + "layers.1.feed_forward.w2.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.1.feed_forward.w3.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.1.feed_forward.w3.weight": "consolidated-00001-of-00027.safetensors", + "layers.1.feed_forward.w3.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.1.ffn_norm.weight": "consolidated-00001-of-00027.safetensors", + "layers.10.attention.wk.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.10.attention.wk.weight": "consolidated-00001-of-00027.safetensors", + "layers.10.attention.wk.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.10.attention.wo.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.10.attention.wo.weight": "consolidated-00001-of-00027.safetensors", + "layers.10.attention.wo.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.10.attention.wq.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.10.attention.wq.weight": "consolidated-00001-of-00027.safetensors", + "layers.10.attention.wq.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.10.attention.wv.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.10.attention.wv.weight": "consolidated-00001-of-00027.safetensors", + "layers.10.attention.wv.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.10.attention_norm.weight": "consolidated-00001-of-00027.safetensors", + "layers.10.feed_forward.w1.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.10.feed_forward.w1.weight": "consolidated-00001-of-00027.safetensors", + "layers.10.feed_forward.w1.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.10.feed_forward.w2.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.10.feed_forward.w2.weight": "consolidated-00001-of-00027.safetensors", + "layers.10.feed_forward.w2.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.10.feed_forward.w3.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.10.feed_forward.w3.weight": "consolidated-00001-of-00027.safetensors", + "layers.10.feed_forward.w3.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.10.ffn_norm.weight": "consolidated-00001-of-00027.safetensors", + "layers.11.attention.wk.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.11.attention.wk.weight": "consolidated-00001-of-00027.safetensors", + "layers.11.attention.wk.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.11.attention.wo.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.11.attention.wo.weight": "consolidated-00001-of-00027.safetensors", + "layers.11.attention.wo.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.11.attention.wq.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.11.attention.wq.weight": "consolidated-00001-of-00027.safetensors", + "layers.11.attention.wq.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.11.attention.wv.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.11.attention.wv.weight": "consolidated-00001-of-00027.safetensors", + "layers.11.attention.wv.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.11.attention_norm.weight": "consolidated-00001-of-00027.safetensors", + "layers.11.feed_forward.w1.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.11.feed_forward.w1.weight": "consolidated-00001-of-00027.safetensors", + "layers.11.feed_forward.w1.qscale_weight": "consolidated-00001-of-00027.safetensors", + "layers.11.feed_forward.w2.qscale_act": "consolidated-00001-of-00027.safetensors", + "layers.11.feed_forward.w2.weight": "consolidated-00002-of-00027.safetensors", + "layers.11.feed_forward.w2.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.11.feed_forward.w3.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.11.feed_forward.w3.weight": "consolidated-00002-of-00027.safetensors", + "layers.11.feed_forward.w3.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.11.ffn_norm.weight": "consolidated-00002-of-00027.safetensors", + "layers.12.attention.wk.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.12.attention.wk.weight": "consolidated-00002-of-00027.safetensors", + "layers.12.attention.wk.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.12.attention.wo.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.12.attention.wo.weight": "consolidated-00002-of-00027.safetensors", + "layers.12.attention.wo.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.12.attention.wq.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.12.attention.wq.weight": "consolidated-00002-of-00027.safetensors", + "layers.12.attention.wq.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.12.attention.wv.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.12.attention.wv.weight": "consolidated-00002-of-00027.safetensors", + "layers.12.attention.wv.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.12.attention_norm.weight": "consolidated-00002-of-00027.safetensors", + "layers.12.feed_forward.w1.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.12.feed_forward.w1.weight": "consolidated-00002-of-00027.safetensors", + "layers.12.feed_forward.w1.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.12.feed_forward.w2.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.12.feed_forward.w2.weight": "consolidated-00002-of-00027.safetensors", + "layers.12.feed_forward.w2.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.12.feed_forward.w3.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.12.feed_forward.w3.weight": "consolidated-00002-of-00027.safetensors", + "layers.12.feed_forward.w3.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.12.ffn_norm.weight": "consolidated-00002-of-00027.safetensors", + "layers.13.attention.wk.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.13.attention.wk.weight": "consolidated-00002-of-00027.safetensors", + "layers.13.attention.wk.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.13.attention.wo.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.13.attention.wo.weight": "consolidated-00002-of-00027.safetensors", + "layers.13.attention.wo.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.13.attention.wq.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.13.attention.wq.weight": "consolidated-00002-of-00027.safetensors", + "layers.13.attention.wq.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.13.attention.wv.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.13.attention.wv.weight": "consolidated-00002-of-00027.safetensors", + "layers.13.attention.wv.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.13.attention_norm.weight": "consolidated-00002-of-00027.safetensors", + "layers.13.feed_forward.w1.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.13.feed_forward.w1.weight": "consolidated-00002-of-00027.safetensors", + "layers.13.feed_forward.w1.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.13.feed_forward.w2.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.13.feed_forward.w2.weight": "consolidated-00002-of-00027.safetensors", + "layers.13.feed_forward.w2.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.13.feed_forward.w3.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.13.feed_forward.w3.weight": "consolidated-00002-of-00027.safetensors", + "layers.13.feed_forward.w3.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.13.ffn_norm.weight": "consolidated-00002-of-00027.safetensors", + "layers.14.attention.wk.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.14.attention.wk.weight": "consolidated-00002-of-00027.safetensors", + "layers.14.attention.wk.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.14.attention.wo.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.14.attention.wo.weight": "consolidated-00002-of-00027.safetensors", + "layers.14.attention.wo.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.14.attention.wq.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.14.attention.wq.weight": "consolidated-00002-of-00027.safetensors", + "layers.14.attention.wq.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.14.attention.wv.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.14.attention.wv.weight": "consolidated-00002-of-00027.safetensors", + "layers.14.attention.wv.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.14.attention_norm.weight": "consolidated-00002-of-00027.safetensors", + "layers.14.feed_forward.w1.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.14.feed_forward.w1.weight": "consolidated-00002-of-00027.safetensors", + "layers.14.feed_forward.w1.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.14.feed_forward.w2.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.14.feed_forward.w2.weight": "consolidated-00002-of-00027.safetensors", + "layers.14.feed_forward.w2.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.14.feed_forward.w3.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.14.feed_forward.w3.weight": "consolidated-00002-of-00027.safetensors", + "layers.14.feed_forward.w3.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.14.ffn_norm.weight": "consolidated-00002-of-00027.safetensors", + "layers.15.attention.wk.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.15.attention.wk.weight": "consolidated-00002-of-00027.safetensors", + "layers.15.attention.wk.qscale_weight": "consolidated-00002-of-00027.safetensors", + "layers.15.attention.wo.qscale_act": "consolidated-00002-of-00027.safetensors", + "layers.15.attention.wo.weight": "consolidated-00003-of-00027.safetensors", + "layers.15.attention.wo.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.15.attention.wq.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.15.attention.wq.weight": "consolidated-00003-of-00027.safetensors", + "layers.15.attention.wq.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.15.attention.wv.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.15.attention.wv.weight": "consolidated-00003-of-00027.safetensors", + "layers.15.attention.wv.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.15.attention_norm.weight": "consolidated-00003-of-00027.safetensors", + "layers.15.feed_forward.w1.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.15.feed_forward.w1.weight": "consolidated-00003-of-00027.safetensors", + "layers.15.feed_forward.w1.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.15.feed_forward.w2.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.15.feed_forward.w2.weight": "consolidated-00003-of-00027.safetensors", + "layers.15.feed_forward.w2.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.15.feed_forward.w3.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.15.feed_forward.w3.weight": "consolidated-00003-of-00027.safetensors", + "layers.15.feed_forward.w3.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.15.ffn_norm.weight": "consolidated-00003-of-00027.safetensors", + "layers.16.attention.wk.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.16.attention.wk.weight": "consolidated-00003-of-00027.safetensors", + "layers.16.attention.wk.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.16.attention.wo.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.16.attention.wo.weight": "consolidated-00003-of-00027.safetensors", + "layers.16.attention.wo.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.16.attention.wq.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.16.attention.wq.weight": "consolidated-00003-of-00027.safetensors", + "layers.16.attention.wq.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.16.attention.wv.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.16.attention.wv.weight": "consolidated-00003-of-00027.safetensors", + "layers.16.attention.wv.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.16.attention_norm.weight": "consolidated-00003-of-00027.safetensors", + "layers.16.feed_forward.w1.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.16.feed_forward.w1.weight": "consolidated-00003-of-00027.safetensors", + "layers.16.feed_forward.w1.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.16.feed_forward.w2.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.16.feed_forward.w2.weight": "consolidated-00003-of-00027.safetensors", + "layers.16.feed_forward.w2.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.16.feed_forward.w3.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.16.feed_forward.w3.weight": "consolidated-00003-of-00027.safetensors", + "layers.16.feed_forward.w3.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.16.ffn_norm.weight": "consolidated-00003-of-00027.safetensors", + "layers.17.attention.wk.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.17.attention.wk.weight": "consolidated-00003-of-00027.safetensors", + "layers.17.attention.wk.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.17.attention.wo.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.17.attention.wo.weight": "consolidated-00003-of-00027.safetensors", + "layers.17.attention.wo.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.17.attention.wq.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.17.attention.wq.weight": "consolidated-00003-of-00027.safetensors", + "layers.17.attention.wq.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.17.attention.wv.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.17.attention.wv.weight": "consolidated-00003-of-00027.safetensors", + "layers.17.attention.wv.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.17.attention_norm.weight": "consolidated-00003-of-00027.safetensors", + "layers.17.feed_forward.w1.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.17.feed_forward.w1.weight": "consolidated-00003-of-00027.safetensors", + "layers.17.feed_forward.w1.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.17.feed_forward.w2.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.17.feed_forward.w2.weight": "consolidated-00003-of-00027.safetensors", + "layers.17.feed_forward.w2.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.17.feed_forward.w3.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.17.feed_forward.w3.weight": "consolidated-00003-of-00027.safetensors", + "layers.17.feed_forward.w3.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.17.ffn_norm.weight": "consolidated-00003-of-00027.safetensors", + "layers.18.attention.wk.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.18.attention.wk.weight": "consolidated-00003-of-00027.safetensors", + "layers.18.attention.wk.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.18.attention.wo.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.18.attention.wo.weight": "consolidated-00003-of-00027.safetensors", + "layers.18.attention.wo.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.18.attention.wq.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.18.attention.wq.weight": "consolidated-00003-of-00027.safetensors", + "layers.18.attention.wq.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.18.attention.wv.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.18.attention.wv.weight": "consolidated-00003-of-00027.safetensors", + "layers.18.attention.wv.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.18.attention_norm.weight": "consolidated-00003-of-00027.safetensors", + "layers.18.feed_forward.w1.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.18.feed_forward.w1.weight": "consolidated-00003-of-00027.safetensors", + "layers.18.feed_forward.w1.qscale_weight": "consolidated-00003-of-00027.safetensors", + "layers.18.feed_forward.w2.qscale_act": "consolidated-00003-of-00027.safetensors", + "layers.18.feed_forward.w2.weight": "consolidated-00004-of-00027.safetensors", + "layers.18.feed_forward.w2.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.18.feed_forward.w3.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.18.feed_forward.w3.weight": "consolidated-00004-of-00027.safetensors", + "layers.18.feed_forward.w3.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.18.ffn_norm.weight": "consolidated-00004-of-00027.safetensors", + "layers.19.attention.wk.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.19.attention.wk.weight": "consolidated-00004-of-00027.safetensors", + "layers.19.attention.wk.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.19.attention.wo.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.19.attention.wo.weight": "consolidated-00004-of-00027.safetensors", + "layers.19.attention.wo.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.19.attention.wq.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.19.attention.wq.weight": "consolidated-00004-of-00027.safetensors", + "layers.19.attention.wq.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.19.attention.wv.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.19.attention.wv.weight": "consolidated-00004-of-00027.safetensors", + "layers.19.attention.wv.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.19.attention_norm.weight": "consolidated-00004-of-00027.safetensors", + "layers.19.feed_forward.w1.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.19.feed_forward.w1.weight": "consolidated-00004-of-00027.safetensors", + "layers.19.feed_forward.w1.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.19.feed_forward.w2.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.19.feed_forward.w2.weight": "consolidated-00004-of-00027.safetensors", + "layers.19.feed_forward.w2.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.19.feed_forward.w3.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.19.feed_forward.w3.weight": "consolidated-00004-of-00027.safetensors", + "layers.19.feed_forward.w3.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.19.ffn_norm.weight": "consolidated-00004-of-00027.safetensors", + "layers.2.attention.wk.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.2.attention.wk.weight": "consolidated-00004-of-00027.safetensors", + "layers.2.attention.wk.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.2.attention.wo.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.2.attention.wo.weight": "consolidated-00004-of-00027.safetensors", + "layers.2.attention.wo.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.2.attention.wq.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.2.attention.wq.weight": "consolidated-00004-of-00027.safetensors", + "layers.2.attention.wq.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.2.attention.wv.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.2.attention.wv.weight": "consolidated-00004-of-00027.safetensors", + "layers.2.attention.wv.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.2.attention_norm.weight": "consolidated-00004-of-00027.safetensors", + "layers.2.feed_forward.w1.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.2.feed_forward.w1.weight": "consolidated-00004-of-00027.safetensors", + "layers.2.feed_forward.w1.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.2.feed_forward.w2.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.2.feed_forward.w2.weight": "consolidated-00004-of-00027.safetensors", + "layers.2.feed_forward.w2.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.2.feed_forward.w3.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.2.feed_forward.w3.weight": "consolidated-00004-of-00027.safetensors", + "layers.2.feed_forward.w3.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.2.ffn_norm.weight": "consolidated-00004-of-00027.safetensors", + "layers.20.attention.wk.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.20.attention.wk.weight": "consolidated-00004-of-00027.safetensors", + "layers.20.attention.wk.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.20.attention.wo.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.20.attention.wo.weight": "consolidated-00004-of-00027.safetensors", + "layers.20.attention.wo.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.20.attention.wq.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.20.attention.wq.weight": "consolidated-00004-of-00027.safetensors", + "layers.20.attention.wq.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.20.attention.wv.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.20.attention.wv.weight": "consolidated-00004-of-00027.safetensors", + "layers.20.attention.wv.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.20.attention_norm.weight": "consolidated-00004-of-00027.safetensors", + "layers.20.feed_forward.w1.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.20.feed_forward.w1.weight": "consolidated-00004-of-00027.safetensors", + "layers.20.feed_forward.w1.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.20.feed_forward.w2.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.20.feed_forward.w2.weight": "consolidated-00004-of-00027.safetensors", + "layers.20.feed_forward.w2.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.20.feed_forward.w3.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.20.feed_forward.w3.weight": "consolidated-00004-of-00027.safetensors", + "layers.20.feed_forward.w3.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.20.ffn_norm.weight": "consolidated-00004-of-00027.safetensors", + "layers.21.attention.wk.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.21.attention.wk.weight": "consolidated-00004-of-00027.safetensors", + "layers.21.attention.wk.qscale_weight": "consolidated-00004-of-00027.safetensors", + "layers.21.attention.wo.qscale_act": "consolidated-00004-of-00027.safetensors", + "layers.21.attention.wo.weight": "consolidated-00005-of-00027.safetensors", + "layers.21.attention.wo.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.21.attention.wq.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.21.attention.wq.weight": "consolidated-00005-of-00027.safetensors", + "layers.21.attention.wq.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.21.attention.wv.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.21.attention.wv.weight": "consolidated-00005-of-00027.safetensors", + "layers.21.attention.wv.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.21.attention_norm.weight": "consolidated-00005-of-00027.safetensors", + "layers.21.feed_forward.w1.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.21.feed_forward.w1.weight": "consolidated-00005-of-00027.safetensors", + "layers.21.feed_forward.w1.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.21.feed_forward.w2.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.21.feed_forward.w2.weight": "consolidated-00005-of-00027.safetensors", + "layers.21.feed_forward.w2.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.21.feed_forward.w3.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.21.feed_forward.w3.weight": "consolidated-00005-of-00027.safetensors", + "layers.21.feed_forward.w3.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.21.ffn_norm.weight": "consolidated-00005-of-00027.safetensors", + "layers.22.attention.wk.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.22.attention.wk.weight": "consolidated-00005-of-00027.safetensors", + "layers.22.attention.wk.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.22.attention.wo.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.22.attention.wo.weight": "consolidated-00005-of-00027.safetensors", + "layers.22.attention.wo.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.22.attention.wq.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.22.attention.wq.weight": "consolidated-00005-of-00027.safetensors", + "layers.22.attention.wq.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.22.attention.wv.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.22.attention.wv.weight": "consolidated-00005-of-00027.safetensors", + "layers.22.attention.wv.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.22.attention_norm.weight": "consolidated-00005-of-00027.safetensors", + "layers.22.feed_forward.w1.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.22.feed_forward.w1.weight": "consolidated-00005-of-00027.safetensors", + "layers.22.feed_forward.w1.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.22.feed_forward.w2.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.22.feed_forward.w2.weight": "consolidated-00005-of-00027.safetensors", + "layers.22.feed_forward.w2.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.22.feed_forward.w3.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.22.feed_forward.w3.weight": "consolidated-00005-of-00027.safetensors", + "layers.22.feed_forward.w3.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.22.ffn_norm.weight": "consolidated-00005-of-00027.safetensors", + "layers.23.attention.wk.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.23.attention.wk.weight": "consolidated-00005-of-00027.safetensors", + "layers.23.attention.wk.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.23.attention.wo.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.23.attention.wo.weight": "consolidated-00005-of-00027.safetensors", + "layers.23.attention.wo.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.23.attention.wq.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.23.attention.wq.weight": "consolidated-00005-of-00027.safetensors", + "layers.23.attention.wq.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.23.attention.wv.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.23.attention.wv.weight": "consolidated-00005-of-00027.safetensors", + "layers.23.attention.wv.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.23.attention_norm.weight": "consolidated-00005-of-00027.safetensors", + "layers.23.feed_forward.w1.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.23.feed_forward.w1.weight": "consolidated-00005-of-00027.safetensors", + "layers.23.feed_forward.w1.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.23.feed_forward.w2.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.23.feed_forward.w2.weight": "consolidated-00005-of-00027.safetensors", + "layers.23.feed_forward.w2.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.23.feed_forward.w3.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.23.feed_forward.w3.weight": "consolidated-00005-of-00027.safetensors", + "layers.23.feed_forward.w3.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.23.ffn_norm.weight": "consolidated-00005-of-00027.safetensors", + "layers.24.attention.wk.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.24.attention.wk.weight": "consolidated-00005-of-00027.safetensors", + "layers.24.attention.wk.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.24.attention.wo.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.24.attention.wo.weight": "consolidated-00005-of-00027.safetensors", + "layers.24.attention.wo.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.24.attention.wq.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.24.attention.wq.weight": "consolidated-00005-of-00027.safetensors", + "layers.24.attention.wq.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.24.attention.wv.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.24.attention.wv.weight": "consolidated-00005-of-00027.safetensors", + "layers.24.attention.wv.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.24.attention_norm.weight": "consolidated-00005-of-00027.safetensors", + "layers.24.feed_forward.w1.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.24.feed_forward.w1.weight": "consolidated-00005-of-00027.safetensors", + "layers.24.feed_forward.w1.qscale_weight": "consolidated-00005-of-00027.safetensors", + "layers.24.feed_forward.w2.qscale_act": "consolidated-00005-of-00027.safetensors", + "layers.24.feed_forward.w2.weight": "consolidated-00006-of-00027.safetensors", + "layers.24.feed_forward.w2.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.24.feed_forward.w3.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.24.feed_forward.w3.weight": "consolidated-00006-of-00027.safetensors", + "layers.24.feed_forward.w3.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.24.ffn_norm.weight": "consolidated-00006-of-00027.safetensors", + "layers.25.attention.wk.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.25.attention.wk.weight": "consolidated-00006-of-00027.safetensors", + "layers.25.attention.wk.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.25.attention.wo.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.25.attention.wo.weight": "consolidated-00006-of-00027.safetensors", + "layers.25.attention.wo.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.25.attention.wq.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.25.attention.wq.weight": "consolidated-00006-of-00027.safetensors", + "layers.25.attention.wq.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.25.attention.wv.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.25.attention.wv.weight": "consolidated-00006-of-00027.safetensors", + "layers.25.attention.wv.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.25.attention_norm.weight": "consolidated-00006-of-00027.safetensors", + "layers.25.feed_forward.w1.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.25.feed_forward.w1.weight": "consolidated-00006-of-00027.safetensors", + "layers.25.feed_forward.w1.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.25.feed_forward.w2.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.25.feed_forward.w2.weight": "consolidated-00006-of-00027.safetensors", + "layers.25.feed_forward.w2.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.25.feed_forward.w3.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.25.feed_forward.w3.weight": "consolidated-00006-of-00027.safetensors", + "layers.25.feed_forward.w3.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.25.ffn_norm.weight": "consolidated-00006-of-00027.safetensors", + "layers.26.attention.wk.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.26.attention.wk.weight": "consolidated-00006-of-00027.safetensors", + "layers.26.attention.wk.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.26.attention.wo.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.26.attention.wo.weight": "consolidated-00006-of-00027.safetensors", + "layers.26.attention.wo.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.26.attention.wq.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.26.attention.wq.weight": "consolidated-00006-of-00027.safetensors", + "layers.26.attention.wq.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.26.attention.wv.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.26.attention.wv.weight": "consolidated-00006-of-00027.safetensors", + "layers.26.attention.wv.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.26.attention_norm.weight": "consolidated-00006-of-00027.safetensors", + "layers.26.feed_forward.w1.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.26.feed_forward.w1.weight": "consolidated-00006-of-00027.safetensors", + "layers.26.feed_forward.w1.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.26.feed_forward.w2.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.26.feed_forward.w2.weight": "consolidated-00006-of-00027.safetensors", + "layers.26.feed_forward.w2.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.26.feed_forward.w3.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.26.feed_forward.w3.weight": "consolidated-00006-of-00027.safetensors", + "layers.26.feed_forward.w3.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.26.ffn_norm.weight": "consolidated-00006-of-00027.safetensors", + "layers.27.attention.wk.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.27.attention.wk.weight": "consolidated-00006-of-00027.safetensors", + "layers.27.attention.wk.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.27.attention.wo.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.27.attention.wo.weight": "consolidated-00006-of-00027.safetensors", + "layers.27.attention.wo.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.27.attention.wq.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.27.attention.wq.weight": "consolidated-00006-of-00027.safetensors", + "layers.27.attention.wq.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.27.attention.wv.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.27.attention.wv.weight": "consolidated-00006-of-00027.safetensors", + "layers.27.attention.wv.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.27.attention_norm.weight": "consolidated-00006-of-00027.safetensors", + "layers.27.feed_forward.w1.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.27.feed_forward.w1.weight": "consolidated-00006-of-00027.safetensors", + "layers.27.feed_forward.w1.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.27.feed_forward.w2.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.27.feed_forward.w2.weight": "consolidated-00006-of-00027.safetensors", + "layers.27.feed_forward.w2.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.27.feed_forward.w3.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.27.feed_forward.w3.weight": "consolidated-00006-of-00027.safetensors", + "layers.27.feed_forward.w3.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.27.ffn_norm.weight": "consolidated-00006-of-00027.safetensors", + "layers.28.attention.wk.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.28.attention.wk.weight": "consolidated-00006-of-00027.safetensors", + "layers.28.attention.wk.qscale_weight": "consolidated-00006-of-00027.safetensors", + "layers.28.attention.wo.qscale_act": "consolidated-00006-of-00027.safetensors", + "layers.28.attention.wo.weight": "consolidated-00007-of-00027.safetensors", + "layers.28.attention.wo.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.28.attention.wq.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.28.attention.wq.weight": "consolidated-00007-of-00027.safetensors", + "layers.28.attention.wq.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.28.attention.wv.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.28.attention.wv.weight": "consolidated-00007-of-00027.safetensors", + "layers.28.attention.wv.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.28.attention_norm.weight": "consolidated-00007-of-00027.safetensors", + "layers.28.feed_forward.w1.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.28.feed_forward.w1.weight": "consolidated-00007-of-00027.safetensors", + "layers.28.feed_forward.w1.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.28.feed_forward.w2.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.28.feed_forward.w2.weight": "consolidated-00007-of-00027.safetensors", + "layers.28.feed_forward.w2.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.28.feed_forward.w3.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.28.feed_forward.w3.weight": "consolidated-00007-of-00027.safetensors", + "layers.28.feed_forward.w3.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.28.ffn_norm.weight": "consolidated-00007-of-00027.safetensors", + "layers.29.attention.wk.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.29.attention.wk.weight": "consolidated-00007-of-00027.safetensors", + "layers.29.attention.wk.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.29.attention.wo.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.29.attention.wo.weight": "consolidated-00007-of-00027.safetensors", + "layers.29.attention.wo.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.29.attention.wq.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.29.attention.wq.weight": "consolidated-00007-of-00027.safetensors", + "layers.29.attention.wq.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.29.attention.wv.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.29.attention.wv.weight": "consolidated-00007-of-00027.safetensors", + "layers.29.attention.wv.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.29.attention_norm.weight": "consolidated-00007-of-00027.safetensors", + "layers.29.feed_forward.w1.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.29.feed_forward.w1.weight": "consolidated-00007-of-00027.safetensors", + "layers.29.feed_forward.w1.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.29.feed_forward.w2.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.29.feed_forward.w2.weight": "consolidated-00007-of-00027.safetensors", + "layers.29.feed_forward.w2.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.29.feed_forward.w3.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.29.feed_forward.w3.weight": "consolidated-00007-of-00027.safetensors", + "layers.29.feed_forward.w3.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.29.ffn_norm.weight": "consolidated-00007-of-00027.safetensors", + "layers.3.attention.wk.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.3.attention.wk.weight": "consolidated-00007-of-00027.safetensors", + "layers.3.attention.wk.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.3.attention.wo.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.3.attention.wo.weight": "consolidated-00007-of-00027.safetensors", + "layers.3.attention.wo.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.3.attention.wq.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.3.attention.wq.weight": "consolidated-00007-of-00027.safetensors", + "layers.3.attention.wq.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.3.attention.wv.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.3.attention.wv.weight": "consolidated-00007-of-00027.safetensors", + "layers.3.attention.wv.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.3.attention_norm.weight": "consolidated-00007-of-00027.safetensors", + "layers.3.feed_forward.w1.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.3.feed_forward.w1.weight": "consolidated-00007-of-00027.safetensors", + "layers.3.feed_forward.w1.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.3.feed_forward.w2.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.3.feed_forward.w2.weight": "consolidated-00007-of-00027.safetensors", + "layers.3.feed_forward.w2.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.3.feed_forward.w3.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.3.feed_forward.w3.weight": "consolidated-00007-of-00027.safetensors", + "layers.3.feed_forward.w3.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.3.ffn_norm.weight": "consolidated-00007-of-00027.safetensors", + "layers.30.attention.wk.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.30.attention.wk.weight": "consolidated-00007-of-00027.safetensors", + "layers.30.attention.wk.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.30.attention.wo.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.30.attention.wo.weight": "consolidated-00007-of-00027.safetensors", + "layers.30.attention.wo.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.30.attention.wq.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.30.attention.wq.weight": "consolidated-00007-of-00027.safetensors", + "layers.30.attention.wq.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.30.attention.wv.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.30.attention.wv.weight": "consolidated-00007-of-00027.safetensors", + "layers.30.attention.wv.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.30.attention_norm.weight": "consolidated-00007-of-00027.safetensors", + "layers.30.feed_forward.w1.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.30.feed_forward.w1.weight": "consolidated-00007-of-00027.safetensors", + "layers.30.feed_forward.w1.qscale_weight": "consolidated-00007-of-00027.safetensors", + "layers.30.feed_forward.w2.qscale_act": "consolidated-00007-of-00027.safetensors", + "layers.30.feed_forward.w2.weight": "consolidated-00008-of-00027.safetensors", + "layers.30.feed_forward.w2.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.30.feed_forward.w3.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.30.feed_forward.w3.weight": "consolidated-00008-of-00027.safetensors", + "layers.30.feed_forward.w3.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.30.ffn_norm.weight": "consolidated-00008-of-00027.safetensors", + "layers.31.attention.wk.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.31.attention.wk.weight": "consolidated-00008-of-00027.safetensors", + "layers.31.attention.wk.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.31.attention.wo.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.31.attention.wo.weight": "consolidated-00008-of-00027.safetensors", + "layers.31.attention.wo.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.31.attention.wq.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.31.attention.wq.weight": "consolidated-00008-of-00027.safetensors", + "layers.31.attention.wq.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.31.attention.wv.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.31.attention.wv.weight": "consolidated-00008-of-00027.safetensors", + "layers.31.attention.wv.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.31.attention_norm.weight": "consolidated-00008-of-00027.safetensors", + "layers.31.feed_forward.w1.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.31.feed_forward.w1.weight": "consolidated-00008-of-00027.safetensors", + "layers.31.feed_forward.w1.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.31.feed_forward.w2.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.31.feed_forward.w2.weight": "consolidated-00008-of-00027.safetensors", + "layers.31.feed_forward.w2.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.31.feed_forward.w3.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.31.feed_forward.w3.weight": "consolidated-00008-of-00027.safetensors", + "layers.31.feed_forward.w3.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.31.ffn_norm.weight": "consolidated-00008-of-00027.safetensors", + "layers.32.attention.wk.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.32.attention.wk.weight": "consolidated-00008-of-00027.safetensors", + "layers.32.attention.wk.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.32.attention.wo.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.32.attention.wo.weight": "consolidated-00008-of-00027.safetensors", + "layers.32.attention.wo.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.32.attention.wq.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.32.attention.wq.weight": "consolidated-00008-of-00027.safetensors", + "layers.32.attention.wq.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.32.attention.wv.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.32.attention.wv.weight": "consolidated-00008-of-00027.safetensors", + "layers.32.attention.wv.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.32.attention_norm.weight": "consolidated-00008-of-00027.safetensors", + "layers.32.feed_forward.w1.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.32.feed_forward.w1.weight": "consolidated-00008-of-00027.safetensors", + "layers.32.feed_forward.w1.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.32.feed_forward.w2.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.32.feed_forward.w2.weight": "consolidated-00008-of-00027.safetensors", + "layers.32.feed_forward.w2.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.32.feed_forward.w3.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.32.feed_forward.w3.weight": "consolidated-00008-of-00027.safetensors", + "layers.32.feed_forward.w3.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.32.ffn_norm.weight": "consolidated-00008-of-00027.safetensors", + "layers.33.attention.wk.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.33.attention.wk.weight": "consolidated-00008-of-00027.safetensors", + "layers.33.attention.wk.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.33.attention.wo.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.33.attention.wo.weight": "consolidated-00008-of-00027.safetensors", + "layers.33.attention.wo.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.33.attention.wq.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.33.attention.wq.weight": "consolidated-00008-of-00027.safetensors", + "layers.33.attention.wq.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.33.attention.wv.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.33.attention.wv.weight": "consolidated-00008-of-00027.safetensors", + "layers.33.attention.wv.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.33.attention_norm.weight": "consolidated-00008-of-00027.safetensors", + "layers.33.feed_forward.w1.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.33.feed_forward.w1.weight": "consolidated-00008-of-00027.safetensors", + "layers.33.feed_forward.w1.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.33.feed_forward.w2.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.33.feed_forward.w2.weight": "consolidated-00008-of-00027.safetensors", + "layers.33.feed_forward.w2.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.33.feed_forward.w3.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.33.feed_forward.w3.weight": "consolidated-00008-of-00027.safetensors", + "layers.33.feed_forward.w3.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.33.ffn_norm.weight": "consolidated-00008-of-00027.safetensors", + "layers.34.attention.wk.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.34.attention.wk.weight": "consolidated-00008-of-00027.safetensors", + "layers.34.attention.wk.qscale_weight": "consolidated-00008-of-00027.safetensors", + "layers.34.attention.wo.qscale_act": "consolidated-00008-of-00027.safetensors", + "layers.34.attention.wo.weight": "consolidated-00009-of-00027.safetensors", + "layers.34.attention.wo.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.34.attention.wq.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.34.attention.wq.weight": "consolidated-00009-of-00027.safetensors", + "layers.34.attention.wq.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.34.attention.wv.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.34.attention.wv.weight": "consolidated-00009-of-00027.safetensors", + "layers.34.attention.wv.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.34.attention_norm.weight": "consolidated-00009-of-00027.safetensors", + "layers.34.feed_forward.w1.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.34.feed_forward.w1.weight": "consolidated-00009-of-00027.safetensors", + "layers.34.feed_forward.w1.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.34.feed_forward.w2.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.34.feed_forward.w2.weight": "consolidated-00009-of-00027.safetensors", + "layers.34.feed_forward.w2.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.34.feed_forward.w3.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.34.feed_forward.w3.weight": "consolidated-00009-of-00027.safetensors", + "layers.34.feed_forward.w3.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.34.ffn_norm.weight": "consolidated-00009-of-00027.safetensors", + "layers.35.attention.wk.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.35.attention.wk.weight": "consolidated-00009-of-00027.safetensors", + "layers.35.attention.wk.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.35.attention.wo.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.35.attention.wo.weight": "consolidated-00009-of-00027.safetensors", + "layers.35.attention.wo.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.35.attention.wq.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.35.attention.wq.weight": "consolidated-00009-of-00027.safetensors", + "layers.35.attention.wq.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.35.attention.wv.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.35.attention.wv.weight": "consolidated-00009-of-00027.safetensors", + "layers.35.attention.wv.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.35.attention_norm.weight": "consolidated-00009-of-00027.safetensors", + "layers.35.feed_forward.w1.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.35.feed_forward.w1.weight": "consolidated-00009-of-00027.safetensors", + "layers.35.feed_forward.w1.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.35.feed_forward.w2.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.35.feed_forward.w2.weight": "consolidated-00009-of-00027.safetensors", + "layers.35.feed_forward.w2.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.35.feed_forward.w3.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.35.feed_forward.w3.weight": "consolidated-00009-of-00027.safetensors", + "layers.35.feed_forward.w3.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.35.ffn_norm.weight": "consolidated-00009-of-00027.safetensors", + "layers.36.attention.wk.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.36.attention.wk.weight": "consolidated-00009-of-00027.safetensors", + "layers.36.attention.wk.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.36.attention.wo.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.36.attention.wo.weight": "consolidated-00009-of-00027.safetensors", + "layers.36.attention.wo.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.36.attention.wq.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.36.attention.wq.weight": "consolidated-00009-of-00027.safetensors", + "layers.36.attention.wq.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.36.attention.wv.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.36.attention.wv.weight": "consolidated-00009-of-00027.safetensors", + "layers.36.attention.wv.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.36.attention_norm.weight": "consolidated-00009-of-00027.safetensors", + "layers.36.feed_forward.w1.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.36.feed_forward.w1.weight": "consolidated-00009-of-00027.safetensors", + "layers.36.feed_forward.w1.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.36.feed_forward.w2.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.36.feed_forward.w2.weight": "consolidated-00009-of-00027.safetensors", + "layers.36.feed_forward.w2.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.36.feed_forward.w3.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.36.feed_forward.w3.weight": "consolidated-00009-of-00027.safetensors", + "layers.36.feed_forward.w3.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.36.ffn_norm.weight": "consolidated-00009-of-00027.safetensors", + "layers.37.attention.wk.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.37.attention.wk.weight": "consolidated-00009-of-00027.safetensors", + "layers.37.attention.wk.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.37.attention.wo.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.37.attention.wo.weight": "consolidated-00009-of-00027.safetensors", + "layers.37.attention.wo.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.37.attention.wq.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.37.attention.wq.weight": "consolidated-00009-of-00027.safetensors", + "layers.37.attention.wq.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.37.attention.wv.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.37.attention.wv.weight": "consolidated-00009-of-00027.safetensors", + "layers.37.attention.wv.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.37.attention_norm.weight": "consolidated-00009-of-00027.safetensors", + "layers.37.feed_forward.w1.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.37.feed_forward.w1.weight": "consolidated-00009-of-00027.safetensors", + "layers.37.feed_forward.w1.qscale_weight": "consolidated-00009-of-00027.safetensors", + "layers.37.feed_forward.w2.qscale_act": "consolidated-00009-of-00027.safetensors", + "layers.37.feed_forward.w2.weight": "consolidated-00010-of-00027.safetensors", + "layers.37.feed_forward.w2.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.37.feed_forward.w3.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.37.feed_forward.w3.weight": "consolidated-00010-of-00027.safetensors", + "layers.37.feed_forward.w3.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.37.ffn_norm.weight": "consolidated-00010-of-00027.safetensors", + "layers.38.attention.wk.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.38.attention.wk.weight": "consolidated-00010-of-00027.safetensors", + "layers.38.attention.wk.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.38.attention.wo.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.38.attention.wo.weight": "consolidated-00010-of-00027.safetensors", + "layers.38.attention.wo.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.38.attention.wq.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.38.attention.wq.weight": "consolidated-00010-of-00027.safetensors", + "layers.38.attention.wq.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.38.attention.wv.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.38.attention.wv.weight": "consolidated-00010-of-00027.safetensors", + "layers.38.attention.wv.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.38.attention_norm.weight": "consolidated-00010-of-00027.safetensors", + "layers.38.feed_forward.w1.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.38.feed_forward.w1.weight": "consolidated-00010-of-00027.safetensors", + "layers.38.feed_forward.w1.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.38.feed_forward.w2.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.38.feed_forward.w2.weight": "consolidated-00010-of-00027.safetensors", + "layers.38.feed_forward.w2.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.38.feed_forward.w3.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.38.feed_forward.w3.weight": "consolidated-00010-of-00027.safetensors", + "layers.38.feed_forward.w3.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.38.ffn_norm.weight": "consolidated-00010-of-00027.safetensors", + "layers.39.attention.wk.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.39.attention.wk.weight": "consolidated-00010-of-00027.safetensors", + "layers.39.attention.wk.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.39.attention.wo.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.39.attention.wo.weight": "consolidated-00010-of-00027.safetensors", + "layers.39.attention.wo.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.39.attention.wq.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.39.attention.wq.weight": "consolidated-00010-of-00027.safetensors", + "layers.39.attention.wq.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.39.attention.wv.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.39.attention.wv.weight": "consolidated-00010-of-00027.safetensors", + "layers.39.attention.wv.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.39.attention_norm.weight": "consolidated-00010-of-00027.safetensors", + "layers.39.feed_forward.w1.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.39.feed_forward.w1.weight": "consolidated-00010-of-00027.safetensors", + "layers.39.feed_forward.w1.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.39.feed_forward.w2.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.39.feed_forward.w2.weight": "consolidated-00010-of-00027.safetensors", + "layers.39.feed_forward.w2.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.39.feed_forward.w3.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.39.feed_forward.w3.weight": "consolidated-00010-of-00027.safetensors", + "layers.39.feed_forward.w3.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.39.ffn_norm.weight": "consolidated-00010-of-00027.safetensors", + "layers.4.attention.wk.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.4.attention.wk.weight": "consolidated-00010-of-00027.safetensors", + "layers.4.attention.wk.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.4.attention.wo.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.4.attention.wo.weight": "consolidated-00010-of-00027.safetensors", + "layers.4.attention.wo.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.4.attention.wq.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.4.attention.wq.weight": "consolidated-00010-of-00027.safetensors", + "layers.4.attention.wq.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.4.attention.wv.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.4.attention.wv.weight": "consolidated-00010-of-00027.safetensors", + "layers.4.attention.wv.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.4.attention_norm.weight": "consolidated-00010-of-00027.safetensors", + "layers.4.feed_forward.w1.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.4.feed_forward.w1.weight": "consolidated-00010-of-00027.safetensors", + "layers.4.feed_forward.w1.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.4.feed_forward.w2.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.4.feed_forward.w2.weight": "consolidated-00010-of-00027.safetensors", + "layers.4.feed_forward.w2.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.4.feed_forward.w3.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.4.feed_forward.w3.weight": "consolidated-00010-of-00027.safetensors", + "layers.4.feed_forward.w3.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.4.ffn_norm.weight": "consolidated-00010-of-00027.safetensors", + "layers.40.attention.wk.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.40.attention.wk.weight": "consolidated-00010-of-00027.safetensors", + "layers.40.attention.wk.qscale_weight": "consolidated-00010-of-00027.safetensors", + "layers.40.attention.wo.qscale_act": "consolidated-00010-of-00027.safetensors", + "layers.40.attention.wo.weight": "consolidated-00011-of-00027.safetensors", + "layers.40.attention.wo.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.40.attention.wq.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.40.attention.wq.weight": "consolidated-00011-of-00027.safetensors", + "layers.40.attention.wq.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.40.attention.wv.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.40.attention.wv.weight": "consolidated-00011-of-00027.safetensors", + "layers.40.attention.wv.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.40.attention_norm.weight": "consolidated-00011-of-00027.safetensors", + "layers.40.feed_forward.w1.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.40.feed_forward.w1.weight": "consolidated-00011-of-00027.safetensors", + "layers.40.feed_forward.w1.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.40.feed_forward.w2.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.40.feed_forward.w2.weight": "consolidated-00011-of-00027.safetensors", + "layers.40.feed_forward.w2.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.40.feed_forward.w3.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.40.feed_forward.w3.weight": "consolidated-00011-of-00027.safetensors", + "layers.40.feed_forward.w3.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.40.ffn_norm.weight": "consolidated-00011-of-00027.safetensors", + "layers.41.attention.wk.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.41.attention.wk.weight": "consolidated-00011-of-00027.safetensors", + "layers.41.attention.wk.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.41.attention.wo.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.41.attention.wo.weight": "consolidated-00011-of-00027.safetensors", + "layers.41.attention.wo.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.41.attention.wq.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.41.attention.wq.weight": "consolidated-00011-of-00027.safetensors", + "layers.41.attention.wq.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.41.attention.wv.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.41.attention.wv.weight": "consolidated-00011-of-00027.safetensors", + "layers.41.attention.wv.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.41.attention_norm.weight": "consolidated-00011-of-00027.safetensors", + "layers.41.feed_forward.w1.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.41.feed_forward.w1.weight": "consolidated-00011-of-00027.safetensors", + "layers.41.feed_forward.w1.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.41.feed_forward.w2.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.41.feed_forward.w2.weight": "consolidated-00011-of-00027.safetensors", + "layers.41.feed_forward.w2.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.41.feed_forward.w3.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.41.feed_forward.w3.weight": "consolidated-00011-of-00027.safetensors", + "layers.41.feed_forward.w3.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.41.ffn_norm.weight": "consolidated-00011-of-00027.safetensors", + "layers.42.attention.wk.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.42.attention.wk.weight": "consolidated-00011-of-00027.safetensors", + "layers.42.attention.wk.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.42.attention.wo.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.42.attention.wo.weight": "consolidated-00011-of-00027.safetensors", + "layers.42.attention.wo.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.42.attention.wq.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.42.attention.wq.weight": "consolidated-00011-of-00027.safetensors", + "layers.42.attention.wq.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.42.attention.wv.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.42.attention.wv.weight": "consolidated-00011-of-00027.safetensors", + "layers.42.attention.wv.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.42.attention_norm.weight": "consolidated-00011-of-00027.safetensors", + "layers.42.feed_forward.w1.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.42.feed_forward.w1.weight": "consolidated-00011-of-00027.safetensors", + "layers.42.feed_forward.w1.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.42.feed_forward.w2.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.42.feed_forward.w2.weight": "consolidated-00011-of-00027.safetensors", + "layers.42.feed_forward.w2.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.42.feed_forward.w3.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.42.feed_forward.w3.weight": "consolidated-00011-of-00027.safetensors", + "layers.42.feed_forward.w3.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.42.ffn_norm.weight": "consolidated-00011-of-00027.safetensors", + "layers.43.attention.wk.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.43.attention.wk.weight": "consolidated-00011-of-00027.safetensors", + "layers.43.attention.wk.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.43.attention.wo.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.43.attention.wo.weight": "consolidated-00011-of-00027.safetensors", + "layers.43.attention.wo.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.43.attention.wq.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.43.attention.wq.weight": "consolidated-00011-of-00027.safetensors", + "layers.43.attention.wq.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.43.attention.wv.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.43.attention.wv.weight": "consolidated-00011-of-00027.safetensors", + "layers.43.attention.wv.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.43.attention_norm.weight": "consolidated-00011-of-00027.safetensors", + "layers.43.feed_forward.w1.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.43.feed_forward.w1.weight": "consolidated-00011-of-00027.safetensors", + "layers.43.feed_forward.w1.qscale_weight": "consolidated-00011-of-00027.safetensors", + "layers.43.feed_forward.w2.qscale_act": "consolidated-00011-of-00027.safetensors", + "layers.43.feed_forward.w2.weight": "consolidated-00012-of-00027.safetensors", + "layers.43.feed_forward.w2.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.43.feed_forward.w3.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.43.feed_forward.w3.weight": "consolidated-00012-of-00027.safetensors", + "layers.43.feed_forward.w3.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.43.ffn_norm.weight": "consolidated-00012-of-00027.safetensors", + "layers.44.attention.wk.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.44.attention.wk.weight": "consolidated-00012-of-00027.safetensors", + "layers.44.attention.wk.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.44.attention.wo.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.44.attention.wo.weight": "consolidated-00012-of-00027.safetensors", + "layers.44.attention.wo.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.44.attention.wq.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.44.attention.wq.weight": "consolidated-00012-of-00027.safetensors", + "layers.44.attention.wq.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.44.attention.wv.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.44.attention.wv.weight": "consolidated-00012-of-00027.safetensors", + "layers.44.attention.wv.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.44.attention_norm.weight": "consolidated-00012-of-00027.safetensors", + "layers.44.feed_forward.w1.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.44.feed_forward.w1.weight": "consolidated-00012-of-00027.safetensors", + "layers.44.feed_forward.w1.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.44.feed_forward.w2.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.44.feed_forward.w2.weight": "consolidated-00012-of-00027.safetensors", + "layers.44.feed_forward.w2.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.44.feed_forward.w3.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.44.feed_forward.w3.weight": "consolidated-00012-of-00027.safetensors", + "layers.44.feed_forward.w3.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.44.ffn_norm.weight": "consolidated-00012-of-00027.safetensors", + "layers.45.attention.wk.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.45.attention.wk.weight": "consolidated-00012-of-00027.safetensors", + "layers.45.attention.wk.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.45.attention.wo.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.45.attention.wo.weight": "consolidated-00012-of-00027.safetensors", + "layers.45.attention.wo.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.45.attention.wq.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.45.attention.wq.weight": "consolidated-00012-of-00027.safetensors", + "layers.45.attention.wq.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.45.attention.wv.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.45.attention.wv.weight": "consolidated-00012-of-00027.safetensors", + "layers.45.attention.wv.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.45.attention_norm.weight": "consolidated-00012-of-00027.safetensors", + "layers.45.feed_forward.w1.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.45.feed_forward.w1.weight": "consolidated-00012-of-00027.safetensors", + "layers.45.feed_forward.w1.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.45.feed_forward.w2.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.45.feed_forward.w2.weight": "consolidated-00012-of-00027.safetensors", + "layers.45.feed_forward.w2.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.45.feed_forward.w3.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.45.feed_forward.w3.weight": "consolidated-00012-of-00027.safetensors", + "layers.45.feed_forward.w3.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.45.ffn_norm.weight": "consolidated-00012-of-00027.safetensors", + "layers.46.attention.wk.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.46.attention.wk.weight": "consolidated-00012-of-00027.safetensors", + "layers.46.attention.wk.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.46.attention.wo.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.46.attention.wo.weight": "consolidated-00012-of-00027.safetensors", + "layers.46.attention.wo.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.46.attention.wq.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.46.attention.wq.weight": "consolidated-00012-of-00027.safetensors", + "layers.46.attention.wq.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.46.attention.wv.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.46.attention.wv.weight": "consolidated-00012-of-00027.safetensors", + "layers.46.attention.wv.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.46.attention_norm.weight": "consolidated-00012-of-00027.safetensors", + "layers.46.feed_forward.w1.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.46.feed_forward.w1.weight": "consolidated-00012-of-00027.safetensors", + "layers.46.feed_forward.w1.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.46.feed_forward.w2.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.46.feed_forward.w2.weight": "consolidated-00012-of-00027.safetensors", + "layers.46.feed_forward.w2.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.46.feed_forward.w3.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.46.feed_forward.w3.weight": "consolidated-00012-of-00027.safetensors", + "layers.46.feed_forward.w3.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.46.ffn_norm.weight": "consolidated-00012-of-00027.safetensors", + "layers.47.attention.wk.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.47.attention.wk.weight": "consolidated-00012-of-00027.safetensors", + "layers.47.attention.wk.qscale_weight": "consolidated-00012-of-00027.safetensors", + "layers.47.attention.wo.qscale_act": "consolidated-00012-of-00027.safetensors", + "layers.47.attention.wo.weight": "consolidated-00013-of-00027.safetensors", + "layers.47.attention.wo.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.47.attention.wq.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.47.attention.wq.weight": "consolidated-00013-of-00027.safetensors", + "layers.47.attention.wq.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.47.attention.wv.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.47.attention.wv.weight": "consolidated-00013-of-00027.safetensors", + "layers.47.attention.wv.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.47.attention_norm.weight": "consolidated-00013-of-00027.safetensors", + "layers.47.feed_forward.w1.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.47.feed_forward.w1.weight": "consolidated-00013-of-00027.safetensors", + "layers.47.feed_forward.w1.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.47.feed_forward.w2.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.47.feed_forward.w2.weight": "consolidated-00013-of-00027.safetensors", + "layers.47.feed_forward.w2.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.47.feed_forward.w3.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.47.feed_forward.w3.weight": "consolidated-00013-of-00027.safetensors", + "layers.47.feed_forward.w3.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.47.ffn_norm.weight": "consolidated-00013-of-00027.safetensors", + "layers.48.attention.wk.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.48.attention.wk.weight": "consolidated-00013-of-00027.safetensors", + "layers.48.attention.wk.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.48.attention.wo.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.48.attention.wo.weight": "consolidated-00013-of-00027.safetensors", + "layers.48.attention.wo.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.48.attention.wq.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.48.attention.wq.weight": "consolidated-00013-of-00027.safetensors", + "layers.48.attention.wq.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.48.attention.wv.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.48.attention.wv.weight": "consolidated-00013-of-00027.safetensors", + "layers.48.attention.wv.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.48.attention_norm.weight": "consolidated-00013-of-00027.safetensors", + "layers.48.feed_forward.w1.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.48.feed_forward.w1.weight": "consolidated-00013-of-00027.safetensors", + "layers.48.feed_forward.w1.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.48.feed_forward.w2.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.48.feed_forward.w2.weight": "consolidated-00013-of-00027.safetensors", + "layers.48.feed_forward.w2.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.48.feed_forward.w3.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.48.feed_forward.w3.weight": "consolidated-00013-of-00027.safetensors", + "layers.48.feed_forward.w3.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.48.ffn_norm.weight": "consolidated-00013-of-00027.safetensors", + "layers.49.attention.wk.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.49.attention.wk.weight": "consolidated-00013-of-00027.safetensors", + "layers.49.attention.wk.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.49.attention.wo.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.49.attention.wo.weight": "consolidated-00013-of-00027.safetensors", + "layers.49.attention.wo.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.49.attention.wq.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.49.attention.wq.weight": "consolidated-00013-of-00027.safetensors", + "layers.49.attention.wq.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.49.attention.wv.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.49.attention.wv.weight": "consolidated-00013-of-00027.safetensors", + "layers.49.attention.wv.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.49.attention_norm.weight": "consolidated-00013-of-00027.safetensors", + "layers.49.feed_forward.w1.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.49.feed_forward.w1.weight": "consolidated-00013-of-00027.safetensors", + "layers.49.feed_forward.w1.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.49.feed_forward.w2.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.49.feed_forward.w2.weight": "consolidated-00013-of-00027.safetensors", + "layers.49.feed_forward.w2.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.49.feed_forward.w3.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.49.feed_forward.w3.weight": "consolidated-00013-of-00027.safetensors", + "layers.49.feed_forward.w3.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.49.ffn_norm.weight": "consolidated-00013-of-00027.safetensors", + "layers.5.attention.wk.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.5.attention.wk.weight": "consolidated-00013-of-00027.safetensors", + "layers.5.attention.wk.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.5.attention.wo.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.5.attention.wo.weight": "consolidated-00013-of-00027.safetensors", + "layers.5.attention.wo.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.5.attention.wq.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.5.attention.wq.weight": "consolidated-00013-of-00027.safetensors", + "layers.5.attention.wq.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.5.attention.wv.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.5.attention.wv.weight": "consolidated-00013-of-00027.safetensors", + "layers.5.attention.wv.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.5.attention_norm.weight": "consolidated-00013-of-00027.safetensors", + "layers.5.feed_forward.w1.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.5.feed_forward.w1.weight": "consolidated-00013-of-00027.safetensors", + "layers.5.feed_forward.w1.qscale_weight": "consolidated-00013-of-00027.safetensors", + "layers.5.feed_forward.w2.qscale_act": "consolidated-00013-of-00027.safetensors", + "layers.5.feed_forward.w2.weight": "consolidated-00014-of-00027.safetensors", + "layers.5.feed_forward.w2.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.5.feed_forward.w3.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.5.feed_forward.w3.weight": "consolidated-00014-of-00027.safetensors", + "layers.5.feed_forward.w3.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.5.ffn_norm.weight": "consolidated-00014-of-00027.safetensors", + "layers.50.attention.wk.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.50.attention.wk.weight": "consolidated-00014-of-00027.safetensors", + "layers.50.attention.wk.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.50.attention.wo.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.50.attention.wo.weight": "consolidated-00014-of-00027.safetensors", + "layers.50.attention.wo.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.50.attention.wq.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.50.attention.wq.weight": "consolidated-00014-of-00027.safetensors", + "layers.50.attention.wq.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.50.attention.wv.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.50.attention.wv.weight": "consolidated-00014-of-00027.safetensors", + "layers.50.attention.wv.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.50.attention_norm.weight": "consolidated-00014-of-00027.safetensors", + "layers.50.feed_forward.w1.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.50.feed_forward.w1.weight": "consolidated-00014-of-00027.safetensors", + "layers.50.feed_forward.w1.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.50.feed_forward.w2.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.50.feed_forward.w2.weight": "consolidated-00014-of-00027.safetensors", + "layers.50.feed_forward.w2.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.50.feed_forward.w3.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.50.feed_forward.w3.weight": "consolidated-00014-of-00027.safetensors", + "layers.50.feed_forward.w3.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.50.ffn_norm.weight": "consolidated-00014-of-00027.safetensors", + "layers.51.attention.wk.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.51.attention.wk.weight": "consolidated-00014-of-00027.safetensors", + "layers.51.attention.wk.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.51.attention.wo.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.51.attention.wo.weight": "consolidated-00014-of-00027.safetensors", + "layers.51.attention.wo.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.51.attention.wq.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.51.attention.wq.weight": "consolidated-00014-of-00027.safetensors", + "layers.51.attention.wq.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.51.attention.wv.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.51.attention.wv.weight": "consolidated-00014-of-00027.safetensors", + "layers.51.attention.wv.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.51.attention_norm.weight": "consolidated-00014-of-00027.safetensors", + "layers.51.feed_forward.w1.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.51.feed_forward.w1.weight": "consolidated-00014-of-00027.safetensors", + "layers.51.feed_forward.w1.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.51.feed_forward.w2.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.51.feed_forward.w2.weight": "consolidated-00014-of-00027.safetensors", + "layers.51.feed_forward.w2.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.51.feed_forward.w3.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.51.feed_forward.w3.weight": "consolidated-00014-of-00027.safetensors", + "layers.51.feed_forward.w3.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.51.ffn_norm.weight": "consolidated-00014-of-00027.safetensors", + "layers.52.attention.wk.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.52.attention.wk.weight": "consolidated-00014-of-00027.safetensors", + "layers.52.attention.wk.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.52.attention.wo.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.52.attention.wo.weight": "consolidated-00014-of-00027.safetensors", + "layers.52.attention.wo.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.52.attention.wq.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.52.attention.wq.weight": "consolidated-00014-of-00027.safetensors", + "layers.52.attention.wq.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.52.attention.wv.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.52.attention.wv.weight": "consolidated-00014-of-00027.safetensors", + "layers.52.attention.wv.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.52.attention_norm.weight": "consolidated-00014-of-00027.safetensors", + "layers.52.feed_forward.w1.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.52.feed_forward.w1.weight": "consolidated-00014-of-00027.safetensors", + "layers.52.feed_forward.w1.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.52.feed_forward.w2.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.52.feed_forward.w2.weight": "consolidated-00014-of-00027.safetensors", + "layers.52.feed_forward.w2.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.52.feed_forward.w3.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.52.feed_forward.w3.weight": "consolidated-00014-of-00027.safetensors", + "layers.52.feed_forward.w3.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.52.ffn_norm.weight": "consolidated-00014-of-00027.safetensors", + "layers.53.attention.wk.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.53.attention.wk.weight": "consolidated-00014-of-00027.safetensors", + "layers.53.attention.wk.qscale_weight": "consolidated-00014-of-00027.safetensors", + "layers.53.attention.wo.qscale_act": "consolidated-00014-of-00027.safetensors", + "layers.53.attention.wo.weight": "consolidated-00015-of-00027.safetensors", + "layers.53.attention.wo.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.53.attention.wq.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.53.attention.wq.weight": "consolidated-00015-of-00027.safetensors", + "layers.53.attention.wq.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.53.attention.wv.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.53.attention.wv.weight": "consolidated-00015-of-00027.safetensors", + "layers.53.attention.wv.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.53.attention_norm.weight": "consolidated-00015-of-00027.safetensors", + "layers.53.feed_forward.w1.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.53.feed_forward.w1.weight": "consolidated-00015-of-00027.safetensors", + "layers.53.feed_forward.w1.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.53.feed_forward.w2.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.53.feed_forward.w2.weight": "consolidated-00015-of-00027.safetensors", + "layers.53.feed_forward.w2.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.53.feed_forward.w3.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.53.feed_forward.w3.weight": "consolidated-00015-of-00027.safetensors", + "layers.53.feed_forward.w3.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.53.ffn_norm.weight": "consolidated-00015-of-00027.safetensors", + "layers.54.attention.wk.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.54.attention.wk.weight": "consolidated-00015-of-00027.safetensors", + "layers.54.attention.wk.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.54.attention.wo.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.54.attention.wo.weight": "consolidated-00015-of-00027.safetensors", + "layers.54.attention.wo.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.54.attention.wq.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.54.attention.wq.weight": "consolidated-00015-of-00027.safetensors", + "layers.54.attention.wq.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.54.attention.wv.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.54.attention.wv.weight": "consolidated-00015-of-00027.safetensors", + "layers.54.attention.wv.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.54.attention_norm.weight": "consolidated-00015-of-00027.safetensors", + "layers.54.feed_forward.w1.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.54.feed_forward.w1.weight": "consolidated-00015-of-00027.safetensors", + "layers.54.feed_forward.w1.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.54.feed_forward.w2.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.54.feed_forward.w2.weight": "consolidated-00015-of-00027.safetensors", + "layers.54.feed_forward.w2.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.54.feed_forward.w3.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.54.feed_forward.w3.weight": "consolidated-00015-of-00027.safetensors", + "layers.54.feed_forward.w3.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.54.ffn_norm.weight": "consolidated-00015-of-00027.safetensors", + "layers.55.attention.wk.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.55.attention.wk.weight": "consolidated-00015-of-00027.safetensors", + "layers.55.attention.wk.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.55.attention.wo.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.55.attention.wo.weight": "consolidated-00015-of-00027.safetensors", + "layers.55.attention.wo.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.55.attention.wq.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.55.attention.wq.weight": "consolidated-00015-of-00027.safetensors", + "layers.55.attention.wq.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.55.attention.wv.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.55.attention.wv.weight": "consolidated-00015-of-00027.safetensors", + "layers.55.attention.wv.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.55.attention_norm.weight": "consolidated-00015-of-00027.safetensors", + "layers.55.feed_forward.w1.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.55.feed_forward.w1.weight": "consolidated-00015-of-00027.safetensors", + "layers.55.feed_forward.w1.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.55.feed_forward.w2.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.55.feed_forward.w2.weight": "consolidated-00015-of-00027.safetensors", + "layers.55.feed_forward.w2.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.55.feed_forward.w3.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.55.feed_forward.w3.weight": "consolidated-00015-of-00027.safetensors", + "layers.55.feed_forward.w3.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.55.ffn_norm.weight": "consolidated-00015-of-00027.safetensors", + "layers.56.attention.wk.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.56.attention.wk.weight": "consolidated-00015-of-00027.safetensors", + "layers.56.attention.wk.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.56.attention.wo.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.56.attention.wo.weight": "consolidated-00015-of-00027.safetensors", + "layers.56.attention.wo.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.56.attention.wq.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.56.attention.wq.weight": "consolidated-00015-of-00027.safetensors", + "layers.56.attention.wq.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.56.attention.wv.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.56.attention.wv.weight": "consolidated-00015-of-00027.safetensors", + "layers.56.attention.wv.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.56.attention_norm.weight": "consolidated-00015-of-00027.safetensors", + "layers.56.feed_forward.w1.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.56.feed_forward.w1.weight": "consolidated-00015-of-00027.safetensors", + "layers.56.feed_forward.w1.qscale_weight": "consolidated-00015-of-00027.safetensors", + "layers.56.feed_forward.w2.qscale_act": "consolidated-00015-of-00027.safetensors", + "layers.56.feed_forward.w2.weight": "consolidated-00016-of-00027.safetensors", + "layers.56.feed_forward.w2.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.56.feed_forward.w3.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.56.feed_forward.w3.weight": "consolidated-00016-of-00027.safetensors", + "layers.56.feed_forward.w3.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.56.ffn_norm.weight": "consolidated-00016-of-00027.safetensors", + "layers.57.attention.wk.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.57.attention.wk.weight": "consolidated-00016-of-00027.safetensors", + "layers.57.attention.wk.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.57.attention.wo.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.57.attention.wo.weight": "consolidated-00016-of-00027.safetensors", + "layers.57.attention.wo.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.57.attention.wq.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.57.attention.wq.weight": "consolidated-00016-of-00027.safetensors", + "layers.57.attention.wq.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.57.attention.wv.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.57.attention.wv.weight": "consolidated-00016-of-00027.safetensors", + "layers.57.attention.wv.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.57.attention_norm.weight": "consolidated-00016-of-00027.safetensors", + "layers.57.feed_forward.w1.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.57.feed_forward.w1.weight": "consolidated-00016-of-00027.safetensors", + "layers.57.feed_forward.w1.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.57.feed_forward.w2.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.57.feed_forward.w2.weight": "consolidated-00016-of-00027.safetensors", + "layers.57.feed_forward.w2.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.57.feed_forward.w3.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.57.feed_forward.w3.weight": "consolidated-00016-of-00027.safetensors", + "layers.57.feed_forward.w3.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.57.ffn_norm.weight": "consolidated-00016-of-00027.safetensors", + "layers.58.attention.wk.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.58.attention.wk.weight": "consolidated-00016-of-00027.safetensors", + "layers.58.attention.wk.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.58.attention.wo.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.58.attention.wo.weight": "consolidated-00016-of-00027.safetensors", + "layers.58.attention.wo.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.58.attention.wq.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.58.attention.wq.weight": "consolidated-00016-of-00027.safetensors", + "layers.58.attention.wq.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.58.attention.wv.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.58.attention.wv.weight": "consolidated-00016-of-00027.safetensors", + "layers.58.attention.wv.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.58.attention_norm.weight": "consolidated-00016-of-00027.safetensors", + "layers.58.feed_forward.w1.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.58.feed_forward.w1.weight": "consolidated-00016-of-00027.safetensors", + "layers.58.feed_forward.w1.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.58.feed_forward.w2.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.58.feed_forward.w2.weight": "consolidated-00016-of-00027.safetensors", + "layers.58.feed_forward.w2.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.58.feed_forward.w3.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.58.feed_forward.w3.weight": "consolidated-00016-of-00027.safetensors", + "layers.58.feed_forward.w3.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.58.ffn_norm.weight": "consolidated-00016-of-00027.safetensors", + "layers.59.attention.wk.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.59.attention.wk.weight": "consolidated-00016-of-00027.safetensors", + "layers.59.attention.wk.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.59.attention.wo.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.59.attention.wo.weight": "consolidated-00016-of-00027.safetensors", + "layers.59.attention.wo.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.59.attention.wq.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.59.attention.wq.weight": "consolidated-00016-of-00027.safetensors", + "layers.59.attention.wq.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.59.attention.wv.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.59.attention.wv.weight": "consolidated-00016-of-00027.safetensors", + "layers.59.attention.wv.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.59.attention_norm.weight": "consolidated-00016-of-00027.safetensors", + "layers.59.feed_forward.w1.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.59.feed_forward.w1.weight": "consolidated-00016-of-00027.safetensors", + "layers.59.feed_forward.w1.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.59.feed_forward.w2.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.59.feed_forward.w2.weight": "consolidated-00016-of-00027.safetensors", + "layers.59.feed_forward.w2.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.59.feed_forward.w3.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.59.feed_forward.w3.weight": "consolidated-00016-of-00027.safetensors", + "layers.59.feed_forward.w3.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.59.ffn_norm.weight": "consolidated-00016-of-00027.safetensors", + "layers.6.attention.wk.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.6.attention.wk.weight": "consolidated-00016-of-00027.safetensors", + "layers.6.attention.wk.qscale_weight": "consolidated-00016-of-00027.safetensors", + "layers.6.attention.wo.qscale_act": "consolidated-00016-of-00027.safetensors", + "layers.6.attention.wo.weight": "consolidated-00017-of-00027.safetensors", + "layers.6.attention.wo.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.6.attention.wq.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.6.attention.wq.weight": "consolidated-00017-of-00027.safetensors", + "layers.6.attention.wq.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.6.attention.wv.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.6.attention.wv.weight": "consolidated-00017-of-00027.safetensors", + "layers.6.attention.wv.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.6.attention_norm.weight": "consolidated-00017-of-00027.safetensors", + "layers.6.feed_forward.w1.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.6.feed_forward.w1.weight": "consolidated-00017-of-00027.safetensors", + "layers.6.feed_forward.w1.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.6.feed_forward.w2.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.6.feed_forward.w2.weight": "consolidated-00017-of-00027.safetensors", + "layers.6.feed_forward.w2.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.6.feed_forward.w3.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.6.feed_forward.w3.weight": "consolidated-00017-of-00027.safetensors", + "layers.6.feed_forward.w3.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.6.ffn_norm.weight": "consolidated-00017-of-00027.safetensors", + "layers.60.attention.wk.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.60.attention.wk.weight": "consolidated-00017-of-00027.safetensors", + "layers.60.attention.wk.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.60.attention.wo.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.60.attention.wo.weight": "consolidated-00017-of-00027.safetensors", + "layers.60.attention.wo.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.60.attention.wq.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.60.attention.wq.weight": "consolidated-00017-of-00027.safetensors", + "layers.60.attention.wq.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.60.attention.wv.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.60.attention.wv.weight": "consolidated-00017-of-00027.safetensors", + "layers.60.attention.wv.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.60.attention_norm.weight": "consolidated-00017-of-00027.safetensors", + "layers.60.feed_forward.w1.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.60.feed_forward.w1.weight": "consolidated-00017-of-00027.safetensors", + "layers.60.feed_forward.w1.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.60.feed_forward.w2.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.60.feed_forward.w2.weight": "consolidated-00017-of-00027.safetensors", + "layers.60.feed_forward.w2.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.60.feed_forward.w3.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.60.feed_forward.w3.weight": "consolidated-00017-of-00027.safetensors", + "layers.60.feed_forward.w3.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.60.ffn_norm.weight": "consolidated-00017-of-00027.safetensors", + "layers.61.attention.wk.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.61.attention.wk.weight": "consolidated-00017-of-00027.safetensors", + "layers.61.attention.wk.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.61.attention.wo.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.61.attention.wo.weight": "consolidated-00017-of-00027.safetensors", + "layers.61.attention.wo.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.61.attention.wq.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.61.attention.wq.weight": "consolidated-00017-of-00027.safetensors", + "layers.61.attention.wq.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.61.attention.wv.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.61.attention.wv.weight": "consolidated-00017-of-00027.safetensors", + "layers.61.attention.wv.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.61.attention_norm.weight": "consolidated-00017-of-00027.safetensors", + "layers.61.feed_forward.w1.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.61.feed_forward.w1.weight": "consolidated-00017-of-00027.safetensors", + "layers.61.feed_forward.w1.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.61.feed_forward.w2.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.61.feed_forward.w2.weight": "consolidated-00017-of-00027.safetensors", + "layers.61.feed_forward.w2.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.61.feed_forward.w3.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.61.feed_forward.w3.weight": "consolidated-00017-of-00027.safetensors", + "layers.61.feed_forward.w3.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.61.ffn_norm.weight": "consolidated-00017-of-00027.safetensors", + "layers.62.attention.wk.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.62.attention.wk.weight": "consolidated-00017-of-00027.safetensors", + "layers.62.attention.wk.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.62.attention.wo.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.62.attention.wo.weight": "consolidated-00017-of-00027.safetensors", + "layers.62.attention.wo.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.62.attention.wq.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.62.attention.wq.weight": "consolidated-00017-of-00027.safetensors", + "layers.62.attention.wq.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.62.attention.wv.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.62.attention.wv.weight": "consolidated-00017-of-00027.safetensors", + "layers.62.attention.wv.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.62.attention_norm.weight": "consolidated-00017-of-00027.safetensors", + "layers.62.feed_forward.w1.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.62.feed_forward.w1.weight": "consolidated-00017-of-00027.safetensors", + "layers.62.feed_forward.w1.qscale_weight": "consolidated-00017-of-00027.safetensors", + "layers.62.feed_forward.w2.qscale_act": "consolidated-00017-of-00027.safetensors", + "layers.62.feed_forward.w2.weight": "consolidated-00018-of-00027.safetensors", + "layers.62.feed_forward.w2.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.62.feed_forward.w3.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.62.feed_forward.w3.weight": "consolidated-00018-of-00027.safetensors", + "layers.62.feed_forward.w3.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.62.ffn_norm.weight": "consolidated-00018-of-00027.safetensors", + "layers.63.attention.wk.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.63.attention.wk.weight": "consolidated-00018-of-00027.safetensors", + "layers.63.attention.wk.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.63.attention.wo.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.63.attention.wo.weight": "consolidated-00018-of-00027.safetensors", + "layers.63.attention.wo.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.63.attention.wq.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.63.attention.wq.weight": "consolidated-00018-of-00027.safetensors", + "layers.63.attention.wq.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.63.attention.wv.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.63.attention.wv.weight": "consolidated-00018-of-00027.safetensors", + "layers.63.attention.wv.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.63.attention_norm.weight": "consolidated-00018-of-00027.safetensors", + "layers.63.feed_forward.w1.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.63.feed_forward.w1.weight": "consolidated-00018-of-00027.safetensors", + "layers.63.feed_forward.w1.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.63.feed_forward.w2.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.63.feed_forward.w2.weight": "consolidated-00018-of-00027.safetensors", + "layers.63.feed_forward.w2.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.63.feed_forward.w3.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.63.feed_forward.w3.weight": "consolidated-00018-of-00027.safetensors", + "layers.63.feed_forward.w3.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.63.ffn_norm.weight": "consolidated-00018-of-00027.safetensors", + "layers.64.attention.wk.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.64.attention.wk.weight": "consolidated-00018-of-00027.safetensors", + "layers.64.attention.wk.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.64.attention.wo.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.64.attention.wo.weight": "consolidated-00018-of-00027.safetensors", + "layers.64.attention.wo.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.64.attention.wq.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.64.attention.wq.weight": "consolidated-00018-of-00027.safetensors", + "layers.64.attention.wq.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.64.attention.wv.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.64.attention.wv.weight": "consolidated-00018-of-00027.safetensors", + "layers.64.attention.wv.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.64.attention_norm.weight": "consolidated-00018-of-00027.safetensors", + "layers.64.feed_forward.w1.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.64.feed_forward.w1.weight": "consolidated-00018-of-00027.safetensors", + "layers.64.feed_forward.w1.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.64.feed_forward.w2.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.64.feed_forward.w2.weight": "consolidated-00018-of-00027.safetensors", + "layers.64.feed_forward.w2.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.64.feed_forward.w3.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.64.feed_forward.w3.weight": "consolidated-00018-of-00027.safetensors", + "layers.64.feed_forward.w3.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.64.ffn_norm.weight": "consolidated-00018-of-00027.safetensors", + "layers.65.attention.wk.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.65.attention.wk.weight": "consolidated-00018-of-00027.safetensors", + "layers.65.attention.wk.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.65.attention.wo.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.65.attention.wo.weight": "consolidated-00018-of-00027.safetensors", + "layers.65.attention.wo.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.65.attention.wq.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.65.attention.wq.weight": "consolidated-00018-of-00027.safetensors", + "layers.65.attention.wq.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.65.attention.wv.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.65.attention.wv.weight": "consolidated-00018-of-00027.safetensors", + "layers.65.attention.wv.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.65.attention_norm.weight": "consolidated-00018-of-00027.safetensors", + "layers.65.feed_forward.w1.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.65.feed_forward.w1.weight": "consolidated-00018-of-00027.safetensors", + "layers.65.feed_forward.w1.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.65.feed_forward.w2.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.65.feed_forward.w2.weight": "consolidated-00018-of-00027.safetensors", + "layers.65.feed_forward.w2.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.65.feed_forward.w3.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.65.feed_forward.w3.weight": "consolidated-00018-of-00027.safetensors", + "layers.65.feed_forward.w3.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.65.ffn_norm.weight": "consolidated-00018-of-00027.safetensors", + "layers.66.attention.wk.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.66.attention.wk.weight": "consolidated-00018-of-00027.safetensors", + "layers.66.attention.wk.qscale_weight": "consolidated-00018-of-00027.safetensors", + "layers.66.attention.wo.qscale_act": "consolidated-00018-of-00027.safetensors", + "layers.66.attention.wo.weight": "consolidated-00019-of-00027.safetensors", + "layers.66.attention.wo.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.66.attention.wq.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.66.attention.wq.weight": "consolidated-00019-of-00027.safetensors", + "layers.66.attention.wq.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.66.attention.wv.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.66.attention.wv.weight": "consolidated-00019-of-00027.safetensors", + "layers.66.attention.wv.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.66.attention_norm.weight": "consolidated-00019-of-00027.safetensors", + "layers.66.feed_forward.w1.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.66.feed_forward.w1.weight": "consolidated-00019-of-00027.safetensors", + "layers.66.feed_forward.w1.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.66.feed_forward.w2.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.66.feed_forward.w2.weight": "consolidated-00019-of-00027.safetensors", + "layers.66.feed_forward.w2.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.66.feed_forward.w3.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.66.feed_forward.w3.weight": "consolidated-00019-of-00027.safetensors", + "layers.66.feed_forward.w3.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.66.ffn_norm.weight": "consolidated-00019-of-00027.safetensors", + "layers.67.attention.wk.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.67.attention.wk.weight": "consolidated-00019-of-00027.safetensors", + "layers.67.attention.wk.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.67.attention.wo.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.67.attention.wo.weight": "consolidated-00019-of-00027.safetensors", + "layers.67.attention.wo.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.67.attention.wq.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.67.attention.wq.weight": "consolidated-00019-of-00027.safetensors", + "layers.67.attention.wq.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.67.attention.wv.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.67.attention.wv.weight": "consolidated-00019-of-00027.safetensors", + "layers.67.attention.wv.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.67.attention_norm.weight": "consolidated-00019-of-00027.safetensors", + "layers.67.feed_forward.w1.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.67.feed_forward.w1.weight": "consolidated-00019-of-00027.safetensors", + "layers.67.feed_forward.w1.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.67.feed_forward.w2.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.67.feed_forward.w2.weight": "consolidated-00019-of-00027.safetensors", + "layers.67.feed_forward.w2.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.67.feed_forward.w3.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.67.feed_forward.w3.weight": "consolidated-00019-of-00027.safetensors", + "layers.67.feed_forward.w3.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.67.ffn_norm.weight": "consolidated-00019-of-00027.safetensors", + "layers.68.attention.wk.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.68.attention.wk.weight": "consolidated-00019-of-00027.safetensors", + "layers.68.attention.wk.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.68.attention.wo.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.68.attention.wo.weight": "consolidated-00019-of-00027.safetensors", + "layers.68.attention.wo.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.68.attention.wq.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.68.attention.wq.weight": "consolidated-00019-of-00027.safetensors", + "layers.68.attention.wq.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.68.attention.wv.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.68.attention.wv.weight": "consolidated-00019-of-00027.safetensors", + "layers.68.attention.wv.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.68.attention_norm.weight": "consolidated-00019-of-00027.safetensors", + "layers.68.feed_forward.w1.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.68.feed_forward.w1.weight": "consolidated-00019-of-00027.safetensors", + "layers.68.feed_forward.w1.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.68.feed_forward.w2.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.68.feed_forward.w2.weight": "consolidated-00019-of-00027.safetensors", + "layers.68.feed_forward.w2.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.68.feed_forward.w3.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.68.feed_forward.w3.weight": "consolidated-00019-of-00027.safetensors", + "layers.68.feed_forward.w3.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.68.ffn_norm.weight": "consolidated-00019-of-00027.safetensors", + "layers.69.attention.wk.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.69.attention.wk.weight": "consolidated-00019-of-00027.safetensors", + "layers.69.attention.wk.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.69.attention.wo.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.69.attention.wo.weight": "consolidated-00019-of-00027.safetensors", + "layers.69.attention.wo.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.69.attention.wq.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.69.attention.wq.weight": "consolidated-00019-of-00027.safetensors", + "layers.69.attention.wq.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.69.attention.wv.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.69.attention.wv.weight": "consolidated-00019-of-00027.safetensors", + "layers.69.attention.wv.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.69.attention_norm.weight": "consolidated-00019-of-00027.safetensors", + "layers.69.feed_forward.w1.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.69.feed_forward.w1.weight": "consolidated-00019-of-00027.safetensors", + "layers.69.feed_forward.w1.qscale_weight": "consolidated-00019-of-00027.safetensors", + "layers.69.feed_forward.w2.qscale_act": "consolidated-00019-of-00027.safetensors", + "layers.69.feed_forward.w2.weight": "consolidated-00020-of-00027.safetensors", + "layers.69.feed_forward.w2.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.69.feed_forward.w3.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.69.feed_forward.w3.weight": "consolidated-00020-of-00027.safetensors", + "layers.69.feed_forward.w3.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.69.ffn_norm.weight": "consolidated-00020-of-00027.safetensors", + "layers.7.attention.wk.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.7.attention.wk.weight": "consolidated-00020-of-00027.safetensors", + "layers.7.attention.wk.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.7.attention.wo.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.7.attention.wo.weight": "consolidated-00020-of-00027.safetensors", + "layers.7.attention.wo.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.7.attention.wq.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.7.attention.wq.weight": "consolidated-00020-of-00027.safetensors", + "layers.7.attention.wq.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.7.attention.wv.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.7.attention.wv.weight": "consolidated-00020-of-00027.safetensors", + "layers.7.attention.wv.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.7.attention_norm.weight": "consolidated-00020-of-00027.safetensors", + "layers.7.feed_forward.w1.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.7.feed_forward.w1.weight": "consolidated-00020-of-00027.safetensors", + "layers.7.feed_forward.w1.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.7.feed_forward.w2.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.7.feed_forward.w2.weight": "consolidated-00020-of-00027.safetensors", + "layers.7.feed_forward.w2.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.7.feed_forward.w3.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.7.feed_forward.w3.weight": "consolidated-00020-of-00027.safetensors", + "layers.7.feed_forward.w3.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.7.ffn_norm.weight": "consolidated-00020-of-00027.safetensors", + "layers.70.attention.wk.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.70.attention.wk.weight": "consolidated-00020-of-00027.safetensors", + "layers.70.attention.wk.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.70.attention.wo.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.70.attention.wo.weight": "consolidated-00020-of-00027.safetensors", + "layers.70.attention.wo.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.70.attention.wq.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.70.attention.wq.weight": "consolidated-00020-of-00027.safetensors", + "layers.70.attention.wq.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.70.attention.wv.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.70.attention.wv.weight": "consolidated-00020-of-00027.safetensors", + "layers.70.attention.wv.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.70.attention_norm.weight": "consolidated-00020-of-00027.safetensors", + "layers.70.feed_forward.w1.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.70.feed_forward.w1.weight": "consolidated-00020-of-00027.safetensors", + "layers.70.feed_forward.w1.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.70.feed_forward.w2.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.70.feed_forward.w2.weight": "consolidated-00020-of-00027.safetensors", + "layers.70.feed_forward.w2.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.70.feed_forward.w3.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.70.feed_forward.w3.weight": "consolidated-00020-of-00027.safetensors", + "layers.70.feed_forward.w3.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.70.ffn_norm.weight": "consolidated-00020-of-00027.safetensors", + "layers.71.attention.wk.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.71.attention.wk.weight": "consolidated-00020-of-00027.safetensors", + "layers.71.attention.wk.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.71.attention.wo.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.71.attention.wo.weight": "consolidated-00020-of-00027.safetensors", + "layers.71.attention.wo.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.71.attention.wq.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.71.attention.wq.weight": "consolidated-00020-of-00027.safetensors", + "layers.71.attention.wq.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.71.attention.wv.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.71.attention.wv.weight": "consolidated-00020-of-00027.safetensors", + "layers.71.attention.wv.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.71.attention_norm.weight": "consolidated-00020-of-00027.safetensors", + "layers.71.feed_forward.w1.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.71.feed_forward.w1.weight": "consolidated-00020-of-00027.safetensors", + "layers.71.feed_forward.w1.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.71.feed_forward.w2.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.71.feed_forward.w2.weight": "consolidated-00020-of-00027.safetensors", + "layers.71.feed_forward.w2.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.71.feed_forward.w3.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.71.feed_forward.w3.weight": "consolidated-00020-of-00027.safetensors", + "layers.71.feed_forward.w3.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.71.ffn_norm.weight": "consolidated-00020-of-00027.safetensors", + "layers.72.attention.wk.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.72.attention.wk.weight": "consolidated-00020-of-00027.safetensors", + "layers.72.attention.wk.qscale_weight": "consolidated-00020-of-00027.safetensors", + "layers.72.attention.wo.qscale_act": "consolidated-00020-of-00027.safetensors", + "layers.72.attention.wo.weight": "consolidated-00021-of-00027.safetensors", + "layers.72.attention.wo.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.72.attention.wq.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.72.attention.wq.weight": "consolidated-00021-of-00027.safetensors", + "layers.72.attention.wq.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.72.attention.wv.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.72.attention.wv.weight": "consolidated-00021-of-00027.safetensors", + "layers.72.attention.wv.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.72.attention_norm.weight": "consolidated-00021-of-00027.safetensors", + "layers.72.feed_forward.w1.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.72.feed_forward.w1.weight": "consolidated-00021-of-00027.safetensors", + "layers.72.feed_forward.w1.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.72.feed_forward.w2.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.72.feed_forward.w2.weight": "consolidated-00021-of-00027.safetensors", + "layers.72.feed_forward.w2.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.72.feed_forward.w3.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.72.feed_forward.w3.weight": "consolidated-00021-of-00027.safetensors", + "layers.72.feed_forward.w3.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.72.ffn_norm.weight": "consolidated-00021-of-00027.safetensors", + "layers.73.attention.wk.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.73.attention.wk.weight": "consolidated-00021-of-00027.safetensors", + "layers.73.attention.wk.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.73.attention.wo.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.73.attention.wo.weight": "consolidated-00021-of-00027.safetensors", + "layers.73.attention.wo.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.73.attention.wq.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.73.attention.wq.weight": "consolidated-00021-of-00027.safetensors", + "layers.73.attention.wq.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.73.attention.wv.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.73.attention.wv.weight": "consolidated-00021-of-00027.safetensors", + "layers.73.attention.wv.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.73.attention_norm.weight": "consolidated-00021-of-00027.safetensors", + "layers.73.feed_forward.w1.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.73.feed_forward.w1.weight": "consolidated-00021-of-00027.safetensors", + "layers.73.feed_forward.w1.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.73.feed_forward.w2.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.73.feed_forward.w2.weight": "consolidated-00021-of-00027.safetensors", + "layers.73.feed_forward.w2.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.73.feed_forward.w3.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.73.feed_forward.w3.weight": "consolidated-00021-of-00027.safetensors", + "layers.73.feed_forward.w3.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.73.ffn_norm.weight": "consolidated-00021-of-00027.safetensors", + "layers.74.attention.wk.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.74.attention.wk.weight": "consolidated-00021-of-00027.safetensors", + "layers.74.attention.wk.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.74.attention.wo.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.74.attention.wo.weight": "consolidated-00021-of-00027.safetensors", + "layers.74.attention.wo.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.74.attention.wq.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.74.attention.wq.weight": "consolidated-00021-of-00027.safetensors", + "layers.74.attention.wq.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.74.attention.wv.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.74.attention.wv.weight": "consolidated-00021-of-00027.safetensors", + "layers.74.attention.wv.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.74.attention_norm.weight": "consolidated-00021-of-00027.safetensors", + "layers.74.feed_forward.w1.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.74.feed_forward.w1.weight": "consolidated-00021-of-00027.safetensors", + "layers.74.feed_forward.w1.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.74.feed_forward.w2.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.74.feed_forward.w2.weight": "consolidated-00021-of-00027.safetensors", + "layers.74.feed_forward.w2.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.74.feed_forward.w3.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.74.feed_forward.w3.weight": "consolidated-00021-of-00027.safetensors", + "layers.74.feed_forward.w3.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.74.ffn_norm.weight": "consolidated-00021-of-00027.safetensors", + "layers.75.attention.wk.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.75.attention.wk.weight": "consolidated-00021-of-00027.safetensors", + "layers.75.attention.wk.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.75.attention.wo.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.75.attention.wo.weight": "consolidated-00021-of-00027.safetensors", + "layers.75.attention.wo.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.75.attention.wq.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.75.attention.wq.weight": "consolidated-00021-of-00027.safetensors", + "layers.75.attention.wq.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.75.attention.wv.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.75.attention.wv.weight": "consolidated-00021-of-00027.safetensors", + "layers.75.attention.wv.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.75.attention_norm.weight": "consolidated-00021-of-00027.safetensors", + "layers.75.feed_forward.w1.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.75.feed_forward.w1.weight": "consolidated-00021-of-00027.safetensors", + "layers.75.feed_forward.w1.qscale_weight": "consolidated-00021-of-00027.safetensors", + "layers.75.feed_forward.w2.qscale_act": "consolidated-00021-of-00027.safetensors", + "layers.75.feed_forward.w2.weight": "consolidated-00022-of-00027.safetensors", + "layers.75.feed_forward.w2.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.75.feed_forward.w3.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.75.feed_forward.w3.weight": "consolidated-00022-of-00027.safetensors", + "layers.75.feed_forward.w3.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.75.ffn_norm.weight": "consolidated-00022-of-00027.safetensors", + "layers.76.attention.wk.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.76.attention.wk.weight": "consolidated-00022-of-00027.safetensors", + "layers.76.attention.wk.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.76.attention.wo.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.76.attention.wo.weight": "consolidated-00022-of-00027.safetensors", + "layers.76.attention.wo.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.76.attention.wq.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.76.attention.wq.weight": "consolidated-00022-of-00027.safetensors", + "layers.76.attention.wq.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.76.attention.wv.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.76.attention.wv.weight": "consolidated-00022-of-00027.safetensors", + "layers.76.attention.wv.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.76.attention_norm.weight": "consolidated-00022-of-00027.safetensors", + "layers.76.feed_forward.w1.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.76.feed_forward.w1.weight": "consolidated-00022-of-00027.safetensors", + "layers.76.feed_forward.w1.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.76.feed_forward.w2.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.76.feed_forward.w2.weight": "consolidated-00022-of-00027.safetensors", + "layers.76.feed_forward.w2.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.76.feed_forward.w3.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.76.feed_forward.w3.weight": "consolidated-00022-of-00027.safetensors", + "layers.76.feed_forward.w3.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.76.ffn_norm.weight": "consolidated-00022-of-00027.safetensors", + "layers.77.attention.wk.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.77.attention.wk.weight": "consolidated-00022-of-00027.safetensors", + "layers.77.attention.wk.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.77.attention.wo.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.77.attention.wo.weight": "consolidated-00022-of-00027.safetensors", + "layers.77.attention.wo.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.77.attention.wq.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.77.attention.wq.weight": "consolidated-00022-of-00027.safetensors", + "layers.77.attention.wq.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.77.attention.wv.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.77.attention.wv.weight": "consolidated-00022-of-00027.safetensors", + "layers.77.attention.wv.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.77.attention_norm.weight": "consolidated-00022-of-00027.safetensors", + "layers.77.feed_forward.w1.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.77.feed_forward.w1.weight": "consolidated-00022-of-00027.safetensors", + "layers.77.feed_forward.w1.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.77.feed_forward.w2.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.77.feed_forward.w2.weight": "consolidated-00022-of-00027.safetensors", + "layers.77.feed_forward.w2.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.77.feed_forward.w3.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.77.feed_forward.w3.weight": "consolidated-00022-of-00027.safetensors", + "layers.77.feed_forward.w3.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.77.ffn_norm.weight": "consolidated-00022-of-00027.safetensors", + "layers.78.attention.wk.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.78.attention.wk.weight": "consolidated-00022-of-00027.safetensors", + "layers.78.attention.wk.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.78.attention.wo.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.78.attention.wo.weight": "consolidated-00022-of-00027.safetensors", + "layers.78.attention.wo.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.78.attention.wq.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.78.attention.wq.weight": "consolidated-00022-of-00027.safetensors", + "layers.78.attention.wq.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.78.attention.wv.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.78.attention.wv.weight": "consolidated-00022-of-00027.safetensors", + "layers.78.attention.wv.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.78.attention_norm.weight": "consolidated-00022-of-00027.safetensors", + "layers.78.feed_forward.w1.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.78.feed_forward.w1.weight": "consolidated-00022-of-00027.safetensors", + "layers.78.feed_forward.w1.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.78.feed_forward.w2.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.78.feed_forward.w2.weight": "consolidated-00022-of-00027.safetensors", + "layers.78.feed_forward.w2.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.78.feed_forward.w3.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.78.feed_forward.w3.weight": "consolidated-00022-of-00027.safetensors", + "layers.78.feed_forward.w3.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.78.ffn_norm.weight": "consolidated-00022-of-00027.safetensors", + "layers.79.attention.wk.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.79.attention.wk.weight": "consolidated-00022-of-00027.safetensors", + "layers.79.attention.wk.qscale_weight": "consolidated-00022-of-00027.safetensors", + "layers.79.attention.wo.qscale_act": "consolidated-00022-of-00027.safetensors", + "layers.79.attention.wo.weight": "consolidated-00023-of-00027.safetensors", + "layers.79.attention.wo.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.79.attention.wq.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.79.attention.wq.weight": "consolidated-00023-of-00027.safetensors", + "layers.79.attention.wq.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.79.attention.wv.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.79.attention.wv.weight": "consolidated-00023-of-00027.safetensors", + "layers.79.attention.wv.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.79.attention_norm.weight": "consolidated-00023-of-00027.safetensors", + "layers.79.feed_forward.w1.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.79.feed_forward.w1.weight": "consolidated-00023-of-00027.safetensors", + "layers.79.feed_forward.w1.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.79.feed_forward.w2.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.79.feed_forward.w2.weight": "consolidated-00023-of-00027.safetensors", + "layers.79.feed_forward.w2.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.79.feed_forward.w3.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.79.feed_forward.w3.weight": "consolidated-00023-of-00027.safetensors", + "layers.79.feed_forward.w3.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.79.ffn_norm.weight": "consolidated-00023-of-00027.safetensors", + "layers.8.attention.wk.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.8.attention.wk.weight": "consolidated-00023-of-00027.safetensors", + "layers.8.attention.wk.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.8.attention.wo.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.8.attention.wo.weight": "consolidated-00023-of-00027.safetensors", + "layers.8.attention.wo.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.8.attention.wq.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.8.attention.wq.weight": "consolidated-00023-of-00027.safetensors", + "layers.8.attention.wq.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.8.attention.wv.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.8.attention.wv.weight": "consolidated-00023-of-00027.safetensors", + "layers.8.attention.wv.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.8.attention_norm.weight": "consolidated-00023-of-00027.safetensors", + "layers.8.feed_forward.w1.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.8.feed_forward.w1.weight": "consolidated-00023-of-00027.safetensors", + "layers.8.feed_forward.w1.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.8.feed_forward.w2.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.8.feed_forward.w2.weight": "consolidated-00023-of-00027.safetensors", + "layers.8.feed_forward.w2.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.8.feed_forward.w3.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.8.feed_forward.w3.weight": "consolidated-00023-of-00027.safetensors", + "layers.8.feed_forward.w3.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.8.ffn_norm.weight": "consolidated-00023-of-00027.safetensors", + "layers.80.attention.wk.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.80.attention.wk.weight": "consolidated-00023-of-00027.safetensors", + "layers.80.attention.wk.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.80.attention.wo.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.80.attention.wo.weight": "consolidated-00023-of-00027.safetensors", + "layers.80.attention.wo.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.80.attention.wq.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.80.attention.wq.weight": "consolidated-00023-of-00027.safetensors", + "layers.80.attention.wq.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.80.attention.wv.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.80.attention.wv.weight": "consolidated-00023-of-00027.safetensors", + "layers.80.attention.wv.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.80.attention_norm.weight": "consolidated-00023-of-00027.safetensors", + "layers.80.feed_forward.w1.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.80.feed_forward.w1.weight": "consolidated-00023-of-00027.safetensors", + "layers.80.feed_forward.w1.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.80.feed_forward.w2.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.80.feed_forward.w2.weight": "consolidated-00023-of-00027.safetensors", + "layers.80.feed_forward.w2.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.80.feed_forward.w3.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.80.feed_forward.w3.weight": "consolidated-00023-of-00027.safetensors", + "layers.80.feed_forward.w3.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.80.ffn_norm.weight": "consolidated-00023-of-00027.safetensors", + "layers.81.attention.wk.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.81.attention.wk.weight": "consolidated-00023-of-00027.safetensors", + "layers.81.attention.wk.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.81.attention.wo.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.81.attention.wo.weight": "consolidated-00023-of-00027.safetensors", + "layers.81.attention.wo.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.81.attention.wq.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.81.attention.wq.weight": "consolidated-00023-of-00027.safetensors", + "layers.81.attention.wq.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.81.attention.wv.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.81.attention.wv.weight": "consolidated-00023-of-00027.safetensors", + "layers.81.attention.wv.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.81.attention_norm.weight": "consolidated-00023-of-00027.safetensors", + "layers.81.feed_forward.w1.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.81.feed_forward.w1.weight": "consolidated-00023-of-00027.safetensors", + "layers.81.feed_forward.w1.qscale_weight": "consolidated-00023-of-00027.safetensors", + "layers.81.feed_forward.w2.qscale_act": "consolidated-00023-of-00027.safetensors", + "layers.81.feed_forward.w2.weight": "consolidated-00024-of-00027.safetensors", + "layers.81.feed_forward.w2.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.81.feed_forward.w3.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.81.feed_forward.w3.weight": "consolidated-00024-of-00027.safetensors", + "layers.81.feed_forward.w3.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.81.ffn_norm.weight": "consolidated-00024-of-00027.safetensors", + "layers.82.attention.wk.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.82.attention.wk.weight": "consolidated-00024-of-00027.safetensors", + "layers.82.attention.wk.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.82.attention.wo.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.82.attention.wo.weight": "consolidated-00024-of-00027.safetensors", + "layers.82.attention.wo.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.82.attention.wq.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.82.attention.wq.weight": "consolidated-00024-of-00027.safetensors", + "layers.82.attention.wq.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.82.attention.wv.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.82.attention.wv.weight": "consolidated-00024-of-00027.safetensors", + "layers.82.attention.wv.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.82.attention_norm.weight": "consolidated-00024-of-00027.safetensors", + "layers.82.feed_forward.w1.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.82.feed_forward.w1.weight": "consolidated-00024-of-00027.safetensors", + "layers.82.feed_forward.w1.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.82.feed_forward.w2.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.82.feed_forward.w2.weight": "consolidated-00024-of-00027.safetensors", + "layers.82.feed_forward.w2.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.82.feed_forward.w3.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.82.feed_forward.w3.weight": "consolidated-00024-of-00027.safetensors", + "layers.82.feed_forward.w3.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.82.ffn_norm.weight": "consolidated-00024-of-00027.safetensors", + "layers.83.attention.wk.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.83.attention.wk.weight": "consolidated-00024-of-00027.safetensors", + "layers.83.attention.wk.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.83.attention.wo.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.83.attention.wo.weight": "consolidated-00024-of-00027.safetensors", + "layers.83.attention.wo.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.83.attention.wq.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.83.attention.wq.weight": "consolidated-00024-of-00027.safetensors", + "layers.83.attention.wq.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.83.attention.wv.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.83.attention.wv.weight": "consolidated-00024-of-00027.safetensors", + "layers.83.attention.wv.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.83.attention_norm.weight": "consolidated-00024-of-00027.safetensors", + "layers.83.feed_forward.w1.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.83.feed_forward.w1.weight": "consolidated-00024-of-00027.safetensors", + "layers.83.feed_forward.w1.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.83.feed_forward.w2.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.83.feed_forward.w2.weight": "consolidated-00024-of-00027.safetensors", + "layers.83.feed_forward.w2.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.83.feed_forward.w3.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.83.feed_forward.w3.weight": "consolidated-00024-of-00027.safetensors", + "layers.83.feed_forward.w3.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.83.ffn_norm.weight": "consolidated-00024-of-00027.safetensors", + "layers.84.attention.wk.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.84.attention.wk.weight": "consolidated-00024-of-00027.safetensors", + "layers.84.attention.wk.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.84.attention.wo.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.84.attention.wo.weight": "consolidated-00024-of-00027.safetensors", + "layers.84.attention.wo.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.84.attention.wq.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.84.attention.wq.weight": "consolidated-00024-of-00027.safetensors", + "layers.84.attention.wq.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.84.attention.wv.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.84.attention.wv.weight": "consolidated-00024-of-00027.safetensors", + "layers.84.attention.wv.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.84.attention_norm.weight": "consolidated-00024-of-00027.safetensors", + "layers.84.feed_forward.w1.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.84.feed_forward.w1.weight": "consolidated-00024-of-00027.safetensors", + "layers.84.feed_forward.w1.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.84.feed_forward.w2.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.84.feed_forward.w2.weight": "consolidated-00024-of-00027.safetensors", + "layers.84.feed_forward.w2.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.84.feed_forward.w3.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.84.feed_forward.w3.weight": "consolidated-00024-of-00027.safetensors", + "layers.84.feed_forward.w3.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.84.ffn_norm.weight": "consolidated-00024-of-00027.safetensors", + "layers.85.attention.wk.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.85.attention.wk.weight": "consolidated-00024-of-00027.safetensors", + "layers.85.attention.wk.qscale_weight": "consolidated-00024-of-00027.safetensors", + "layers.85.attention.wo.qscale_act": "consolidated-00024-of-00027.safetensors", + "layers.85.attention.wo.weight": "consolidated-00025-of-00027.safetensors", + "layers.85.attention.wo.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.85.attention.wq.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.85.attention.wq.weight": "consolidated-00025-of-00027.safetensors", + "layers.85.attention.wq.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.85.attention.wv.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.85.attention.wv.weight": "consolidated-00025-of-00027.safetensors", + "layers.85.attention.wv.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.85.attention_norm.weight": "consolidated-00025-of-00027.safetensors", + "layers.85.feed_forward.w1.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.85.feed_forward.w1.weight": "consolidated-00025-of-00027.safetensors", + "layers.85.feed_forward.w1.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.85.feed_forward.w2.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.85.feed_forward.w2.weight": "consolidated-00025-of-00027.safetensors", + "layers.85.feed_forward.w2.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.85.feed_forward.w3.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.85.feed_forward.w3.weight": "consolidated-00025-of-00027.safetensors", + "layers.85.feed_forward.w3.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.85.ffn_norm.weight": "consolidated-00025-of-00027.safetensors", + "layers.86.attention.wk.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.86.attention.wk.weight": "consolidated-00025-of-00027.safetensors", + "layers.86.attention.wk.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.86.attention.wo.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.86.attention.wo.weight": "consolidated-00025-of-00027.safetensors", + "layers.86.attention.wo.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.86.attention.wq.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.86.attention.wq.weight": "consolidated-00025-of-00027.safetensors", + "layers.86.attention.wq.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.86.attention.wv.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.86.attention.wv.weight": "consolidated-00025-of-00027.safetensors", + "layers.86.attention.wv.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.86.attention_norm.weight": "consolidated-00025-of-00027.safetensors", + "layers.86.feed_forward.w1.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.86.feed_forward.w1.weight": "consolidated-00025-of-00027.safetensors", + "layers.86.feed_forward.w1.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.86.feed_forward.w2.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.86.feed_forward.w2.weight": "consolidated-00025-of-00027.safetensors", + "layers.86.feed_forward.w2.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.86.feed_forward.w3.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.86.feed_forward.w3.weight": "consolidated-00025-of-00027.safetensors", + "layers.86.feed_forward.w3.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.86.ffn_norm.weight": "consolidated-00025-of-00027.safetensors", + "layers.87.attention.wk.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.87.attention.wk.weight": "consolidated-00025-of-00027.safetensors", + "layers.87.attention.wk.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.87.attention.wo.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.87.attention.wo.weight": "consolidated-00025-of-00027.safetensors", + "layers.87.attention.wo.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.87.attention.wq.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.87.attention.wq.weight": "consolidated-00025-of-00027.safetensors", + "layers.87.attention.wq.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.87.attention.wv.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.87.attention.wv.weight": "consolidated-00025-of-00027.safetensors", + "layers.87.attention.wv.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.87.attention_norm.weight": "consolidated-00025-of-00027.safetensors", + "layers.87.feed_forward.w1.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.87.feed_forward.w1.weight": "consolidated-00025-of-00027.safetensors", + "layers.87.feed_forward.w1.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.87.feed_forward.w2.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.87.feed_forward.w2.weight": "consolidated-00025-of-00027.safetensors", + "layers.87.feed_forward.w2.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.87.feed_forward.w3.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.87.feed_forward.w3.weight": "consolidated-00025-of-00027.safetensors", + "layers.87.feed_forward.w3.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.87.ffn_norm.weight": "consolidated-00025-of-00027.safetensors", + "layers.9.attention.wk.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.9.attention.wk.weight": "consolidated-00025-of-00027.safetensors", + "layers.9.attention.wk.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.9.attention.wo.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.9.attention.wo.weight": "consolidated-00025-of-00027.safetensors", + "layers.9.attention.wo.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.9.attention.wq.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.9.attention.wq.weight": "consolidated-00025-of-00027.safetensors", + "layers.9.attention.wq.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.9.attention.wv.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.9.attention.wv.weight": "consolidated-00025-of-00027.safetensors", + "layers.9.attention.wv.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.9.attention_norm.weight": "consolidated-00025-of-00027.safetensors", + "layers.9.feed_forward.w1.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.9.feed_forward.w1.weight": "consolidated-00025-of-00027.safetensors", + "layers.9.feed_forward.w1.qscale_weight": "consolidated-00025-of-00027.safetensors", + "layers.9.feed_forward.w2.qscale_act": "consolidated-00025-of-00027.safetensors", + "layers.9.feed_forward.w2.weight": "consolidated-00026-of-00027.safetensors", + "layers.9.feed_forward.w2.qscale_weight": "consolidated-00026-of-00027.safetensors", + "layers.9.feed_forward.w3.qscale_act": "consolidated-00026-of-00027.safetensors", + "layers.9.feed_forward.w3.weight": "consolidated-00026-of-00027.safetensors", + "layers.9.feed_forward.w3.qscale_weight": "consolidated-00026-of-00027.safetensors", + "layers.9.ffn_norm.weight": "consolidated-00026-of-00027.safetensors", + "norm.weight": "consolidated-00026-of-00027.safetensors", + "output.weight": "consolidated-00026-of-00027.safetensors", + "tok_embeddings.weight": "consolidated-00027-of-00027.safetensors" + } +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..079491d5a2858b5945a497e1d6d793f5e5dd4d8f --- /dev/null +++ b/generation_config.json @@ -0,0 +1,8 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 11, + "max_length": 262144, + "transformers_version": "5.0.0.dev0" +} diff --git a/model-00001-of-00027.safetensors b/model-00001-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..841e1c06aa9c769c9b8a9272e867bfcc7a337da3 --- /dev/null +++ b/model-00001-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:781c212ef947ce9bf7420c67051ac305dd119e7b87d2a13900cb58325b1d1784 +size 4932554972 diff --git a/model-00002-of-00027.safetensors b/model-00002-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..74a605cb07494a0522bf2698ad0377bc6419704c --- /dev/null +++ b/model-00002-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e5aa5f0fa2ce8977ad5ca20950b6bacdc96a798085fbd3c2b38ab558f9be9b4 +size 4857159948 diff --git a/model-00003-of-00027.safetensors b/model-00003-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f69d1ea4c8102bb4831cbc6314e870ac53adee4b --- /dev/null +++ b/model-00003-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:053083307ac43f8a9c1756164ac27618eceff1c7971f1b5ff4f6724048000549 +size 4832044552 diff --git a/model-00004-of-00027.safetensors b/model-00004-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..523d6434085aa16249a812341fbab33452e88e31 --- /dev/null +++ b/model-00004-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c4aeab34c552528ce7b454c797cab2e8e0a06172a2a7b3c7f7cf8a5dc6c457 +size 4857159964 diff --git a/model-00005-of-00027.safetensors b/model-00005-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5bc4da8d82ed635193ef5b9cf5113d5bac876edf --- /dev/null +++ b/model-00005-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6c6b396739e9596d99a62dae39ea08cd3457cd464beea704ba2b793b1dba752 +size 4832044640 diff --git a/model-00006-of-00027.safetensors b/model-00006-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f99d44e1a21909b0bfdc13b29ae0ed51c1bb62f0 --- /dev/null +++ b/model-00006-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b3e26115ad5f6ecfceaee3bad1be5560225c662757d78321956a784ec3e421c +size 4857160028 diff --git a/model-00007-of-00027.safetensors b/model-00007-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1f5aaa4fc782e9db4b0383a04b69903cc2876a62 --- /dev/null +++ b/model-00007-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac774af7125434cc3dc118d3a6d49ab2e55ba6762acdf66a435b7a12cee10df9 +size 4832044640 diff --git a/model-00008-of-00027.safetensors b/model-00008-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97dc5e73c325517005c1a395cefc33d66942bda4 --- /dev/null +++ b/model-00008-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6527350c98d6e448b797e6375b2aeb3be68b02b96a3534803864f566c638902f +size 4857160028 diff --git a/model-00009-of-00027.safetensors b/model-00009-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02fd5c8cb990688b69d03df524b339b41b130704 --- /dev/null +++ b/model-00009-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2be71444925d1da9862ab3f436dbaae77c2598ad4ee7b50d7c45ea8794e4eab +size 4832044640 diff --git a/model-00010-of-00027.safetensors b/model-00010-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..310b96e598f06ee70425a44662e0f6b53167f723 --- /dev/null +++ b/model-00010-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76be0bffbcd79a2f7c2df80ef517a6e26f0f318d42b42a0cd37d346112e32550 +size 4857160028 diff --git a/model-00011-of-00027.safetensors b/model-00011-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4fffce6a4efdc058f624efb459421d256994c647 --- /dev/null +++ b/model-00011-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b0f61ff54214f92422640db324eb0c0339901926bc62581ae2f026cb5e6813a +size 4832044640 diff --git a/model-00012-of-00027.safetensors b/model-00012-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..967a448809aedaaa470c512874812e032e3812e3 --- /dev/null +++ b/model-00012-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ada4e4feb5e8b8b4f318af65f748e24932ca55f4fa6cd371b87b46ea96668d +size 4857160028 diff --git a/model-00013-of-00027.safetensors b/model-00013-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f623e9674b932b39b98c9844758dd37d2200fbd5 --- /dev/null +++ b/model-00013-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac452ad1c5e800fd525d1699efbf42221e1afa6ad590d3f4d6c232cba2b753c1 +size 4832044640 diff --git a/model-00014-of-00027.safetensors b/model-00014-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fbe1f797aa6a6c3f84af754f894f1682e1043f70 --- /dev/null +++ b/model-00014-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f419c01cccc805d8d391d754c12e3d26f3a588cd99ae9fe339c8f909eb142fa4 +size 4857160028 diff --git a/model-00015-of-00027.safetensors b/model-00015-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7e06243b3a9b68886dc8f38dbf397eebff76f20f --- /dev/null +++ b/model-00015-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b86730cb0e89f8a2482c2c4fdeaa8974c3184c15751d77b3f89f0f971612e7aa +size 4832044640 diff --git a/model-00016-of-00027.safetensors b/model-00016-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7439d353b82d0fdb996c4ef57d3dea68ff9fa59 --- /dev/null +++ b/model-00016-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa42d7049abc253a79e1cc5816b84111f3e51106f7f974a9329aebc49cb0cfc +size 4857160028 diff --git a/model-00017-of-00027.safetensors b/model-00017-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5cbbd32a34ad1db09182b143a86bb8680e50ea66 --- /dev/null +++ b/model-00017-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c804f55cfaf713ef077a47c8c7765cb1b5cc47333db8ef96b907be13f8c007 +size 4832044640 diff --git a/model-00018-of-00027.safetensors b/model-00018-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60916a5d808b0d5456ac197d81b21642dc0cdc3b --- /dev/null +++ b/model-00018-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36a36c428107b4df47b9a507cacb214e872c479e4058e59e2f9b1b041e9b2b6e +size 4857160028 diff --git a/model-00019-of-00027.safetensors b/model-00019-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1e977f91a264da2d66c158cc3080ad61c055716 --- /dev/null +++ b/model-00019-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:846754e34f44adf3bd0b13d5604f7be2d568387116ba0d5fe7ad481e138881d6 +size 4832044640 diff --git a/model-00020-of-00027.safetensors b/model-00020-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d107410fe7c22a9aaa8f871e5a46b3c1cedbbacc --- /dev/null +++ b/model-00020-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27eeb54edb39464ef26412232bc03dbeb4f34eca2785b501728598af243cdc0a +size 4857160028 diff --git a/model-00021-of-00027.safetensors b/model-00021-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea48ce85bb9f0811beb5799538a11a210c94ae8a --- /dev/null +++ b/model-00021-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:847d0677a058c3fa67d2b2cbdeb4cb4f558e78bbf1d1fc6ec6541fc236b811e9 +size 4832044640 diff --git a/model-00022-of-00027.safetensors b/model-00022-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0aacb43d602a5fb33e8f284c4731b87cf5943153 --- /dev/null +++ b/model-00022-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd1422ac4dcc14e8aef16daeb31f126196602dafa8cd5cbde82e97d165c37d9a +size 4857160028 diff --git a/model-00023-of-00027.safetensors b/model-00023-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b9fe780ba3b03bbaf8857fd8d48175086e5d113 --- /dev/null +++ b/model-00023-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:967731268b7d85dde6f96ce4eeb504abb47afc6e82a812935e239cf146a8230c +size 4832044640 diff --git a/model-00024-of-00027.safetensors b/model-00024-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f4d24ac75bef8d0137c5d67e992e25596c404e5 --- /dev/null +++ b/model-00024-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9512662da101c3d68084df55170411497384920815b0c2857550a6b950bb906 +size 4857160028 diff --git a/model-00025-of-00027.safetensors b/model-00025-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1a0b8e551d3a7424740d05091538b11e175a948d --- /dev/null +++ b/model-00025-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d5694a77496612d0ebc724ce691129f3b32636b83fd3f2869932ee63bf1bb06 +size 4832044640 diff --git a/model-00026-of-00027.safetensors b/model-00026-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ecb70288661b22df14cc5ebcfbfc6a29fa1db730 --- /dev/null +++ b/model-00026-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9daf35b204151b403f94cc5f9ef69a270f6856cbbe587a50add816cce67427e +size 3825383836 diff --git a/model-00027-of-00027.safetensors b/model-00027-of-00027.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a46a4b974a08380405347fac99d9620d2e9ed2c6 --- /dev/null +++ b/model-00027-of-00027.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1321bdcc2da0cb311eca11d8e489997b5f652449c997e611e3c8f6ca017c83e3 +size 3221225600 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..debf1e29c901c4fe75e5b99591cb023ceab6ad11 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,2035 @@ +{ + "metadata": { + "total_parameters": 125025989840, + "total_size": 128249391520 + }, + "weight_map": { + "lm_head.weight": "model-00027-of-00027.safetensors", + "model.embed_tokens.weight": "model-00001-of-00027.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00027.safetensors", + "model.layers.0.mlp.down_proj.activation_scale": "model-00001-of-00027.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00027.safetensors", + "model.layers.0.mlp.down_proj.weight_scale_inv": "model-00001-of-00027.safetensors", + "model.layers.0.mlp.gate_proj.activation_scale": "model-00001-of-00027.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00027.safetensors", + "model.layers.0.mlp.gate_proj.weight_scale_inv": "model-00001-of-00027.safetensors", + "model.layers.0.mlp.up_proj.activation_scale": "model-00001-of-00027.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00027.safetensors", + "model.layers.0.mlp.up_proj.weight_scale_inv": "model-00001-of-00027.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00027.safetensors", + "model.layers.0.self_attn.k_proj.activation_scale": "model-00001-of-00027.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00027.safetensors", + "model.layers.0.self_attn.k_proj.weight_scale_inv": "model-00001-of-00027.safetensors", + "model.layers.0.self_attn.o_proj.activation_scale": "model-00001-of-00027.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00027.safetensors", + "model.layers.0.self_attn.o_proj.weight_scale_inv": "model-00001-of-00027.safetensors", + "model.layers.0.self_attn.q_proj.activation_scale": "model-00001-of-00027.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00027.safetensors", + "model.layers.0.self_attn.q_proj.weight_scale_inv": "model-00001-of-00027.safetensors", + "model.layers.0.self_attn.v_proj.activation_scale": "model-00001-of-00027.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00027.safetensors", + "model.layers.0.self_attn.v_proj.weight_scale_inv": "model-00001-of-00027.safetensors", + "model.layers.1.input_layernorm.weight": "model-00002-of-00027.safetensors", + "model.layers.1.mlp.down_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.1.mlp.down_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.1.mlp.gate_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.1.mlp.gate_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.1.mlp.up_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.1.mlp.up_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00027.safetensors", + "model.layers.1.self_attn.k_proj.activation_scale": "model-00001-of-00027.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00027.safetensors", + "model.layers.1.self_attn.k_proj.weight_scale_inv": "model-00001-of-00027.safetensors", + "model.layers.1.self_attn.o_proj.activation_scale": "model-00001-of-00027.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00027.safetensors", + "model.layers.1.self_attn.o_proj.weight_scale_inv": "model-00001-of-00027.safetensors", + "model.layers.1.self_attn.q_proj.activation_scale": "model-00001-of-00027.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00027.safetensors", + "model.layers.1.self_attn.q_proj.weight_scale_inv": "model-00001-of-00027.safetensors", + "model.layers.1.self_attn.v_proj.activation_scale": "model-00001-of-00027.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00027.safetensors", + "model.layers.1.self_attn.v_proj.weight_scale_inv": "model-00001-of-00027.safetensors", + "model.layers.10.input_layernorm.weight": "model-00004-of-00027.safetensors", + "model.layers.10.mlp.down_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.10.mlp.down_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.10.mlp.gate_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.10.mlp.gate_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.10.mlp.up_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.10.mlp.up_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00004-of-00027.safetensors", + "model.layers.10.self_attn.k_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.10.self_attn.k_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.10.self_attn.o_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.10.self_attn.o_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.10.self_attn.q_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.10.self_attn.q_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.10.self_attn.v_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.10.self_attn.v_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.11.input_layernorm.weight": "model-00005-of-00027.safetensors", + "model.layers.11.mlp.down_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.11.mlp.down_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.11.mlp.gate_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.11.mlp.gate_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.11.mlp.up_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.11.mlp.up_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00005-of-00027.safetensors", + "model.layers.11.self_attn.k_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.11.self_attn.k_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.11.self_attn.o_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.11.self_attn.o_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.11.self_attn.q_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.11.self_attn.q_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.11.self_attn.v_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.11.self_attn.v_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.12.input_layernorm.weight": "model-00005-of-00027.safetensors", + "model.layers.12.mlp.down_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.12.mlp.down_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.12.mlp.gate_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.12.mlp.gate_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.12.mlp.up_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.12.mlp.up_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00005-of-00027.safetensors", + "model.layers.12.self_attn.k_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.12.self_attn.k_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.12.self_attn.o_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.12.self_attn.o_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.12.self_attn.q_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.12.self_attn.q_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.12.self_attn.v_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.12.self_attn.v_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.13.input_layernorm.weight": "model-00005-of-00027.safetensors", + "model.layers.13.mlp.down_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.13.mlp.down_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.13.mlp.gate_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.13.mlp.gate_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.13.mlp.up_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.13.mlp.up_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00005-of-00027.safetensors", + "model.layers.13.self_attn.k_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.13.self_attn.k_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.13.self_attn.o_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.13.self_attn.o_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.13.self_attn.q_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.13.self_attn.q_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.13.self_attn.v_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.13.self_attn.v_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.14.input_layernorm.weight": "model-00005-of-00027.safetensors", + "model.layers.14.mlp.down_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.14.mlp.down_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.14.mlp.gate_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.14.mlp.gate_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.14.mlp.up_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.14.mlp.up_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00005-of-00027.safetensors", + "model.layers.14.self_attn.k_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.14.self_attn.k_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.14.self_attn.o_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.14.self_attn.o_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.14.self_attn.q_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.14.self_attn.q_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.14.self_attn.v_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.14.self_attn.v_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.15.input_layernorm.weight": "model-00006-of-00027.safetensors", + "model.layers.15.mlp.down_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.15.mlp.down_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.15.mlp.gate_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.15.mlp.gate_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.15.mlp.up_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.15.mlp.up_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00006-of-00027.safetensors", + "model.layers.15.self_attn.k_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.15.self_attn.k_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.15.self_attn.o_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.15.self_attn.o_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.15.self_attn.q_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.15.self_attn.q_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.15.self_attn.v_proj.activation_scale": "model-00005-of-00027.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00005-of-00027.safetensors", + "model.layers.15.self_attn.v_proj.weight_scale_inv": "model-00005-of-00027.safetensors", + "model.layers.16.input_layernorm.weight": "model-00006-of-00027.safetensors", + "model.layers.16.mlp.down_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.16.mlp.down_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.16.mlp.gate_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.16.mlp.gate_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.16.mlp.up_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.16.mlp.up_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00006-of-00027.safetensors", + "model.layers.16.self_attn.k_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.16.self_attn.k_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.16.self_attn.o_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.16.self_attn.o_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.16.self_attn.q_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.16.self_attn.q_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.16.self_attn.v_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.16.self_attn.v_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.17.input_layernorm.weight": "model-00006-of-00027.safetensors", + "model.layers.17.mlp.down_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.17.mlp.down_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.17.mlp.gate_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.17.mlp.gate_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.17.mlp.up_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.17.mlp.up_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00006-of-00027.safetensors", + "model.layers.17.self_attn.k_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.17.self_attn.k_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.17.self_attn.o_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.17.self_attn.o_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.17.self_attn.q_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.17.self_attn.q_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.17.self_attn.v_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.17.self_attn.v_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.18.input_layernorm.weight": "model-00007-of-00027.safetensors", + "model.layers.18.mlp.down_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.18.mlp.down_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.18.mlp.gate_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.18.mlp.gate_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.18.mlp.up_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.18.mlp.up_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00007-of-00027.safetensors", + "model.layers.18.self_attn.k_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.18.self_attn.k_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.18.self_attn.o_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.18.self_attn.o_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.18.self_attn.q_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.18.self_attn.q_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.18.self_attn.v_proj.activation_scale": "model-00006-of-00027.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00006-of-00027.safetensors", + "model.layers.18.self_attn.v_proj.weight_scale_inv": "model-00006-of-00027.safetensors", + "model.layers.19.input_layernorm.weight": "model-00007-of-00027.safetensors", + "model.layers.19.mlp.down_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.19.mlp.down_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.19.mlp.gate_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.19.mlp.gate_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.19.mlp.up_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.19.mlp.up_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00007-of-00027.safetensors", + "model.layers.19.self_attn.k_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.19.self_attn.k_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.19.self_attn.o_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.19.self_attn.o_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.19.self_attn.q_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.19.self_attn.q_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.19.self_attn.v_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.19.self_attn.v_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.2.input_layernorm.weight": "model-00002-of-00027.safetensors", + "model.layers.2.mlp.down_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.2.mlp.down_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.2.mlp.gate_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.2.mlp.gate_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.2.mlp.up_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.2.mlp.up_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00027.safetensors", + "model.layers.2.self_attn.k_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.2.self_attn.k_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.2.self_attn.o_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.2.self_attn.o_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.2.self_attn.q_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.2.self_attn.q_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.2.self_attn.v_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.2.self_attn.v_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.20.input_layernorm.weight": "model-00007-of-00027.safetensors", + "model.layers.20.mlp.down_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.20.mlp.down_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.20.mlp.gate_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.20.mlp.gate_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.20.mlp.up_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.20.mlp.up_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00007-of-00027.safetensors", + "model.layers.20.self_attn.k_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.20.self_attn.k_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.20.self_attn.o_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.20.self_attn.o_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.20.self_attn.q_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.20.self_attn.q_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.20.self_attn.v_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.20.self_attn.v_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.21.input_layernorm.weight": "model-00007-of-00027.safetensors", + "model.layers.21.mlp.down_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.21.mlp.down_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.21.mlp.gate_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.21.mlp.gate_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.21.mlp.up_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.21.mlp.up_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00007-of-00027.safetensors", + "model.layers.21.self_attn.k_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.21.self_attn.k_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.21.self_attn.o_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.21.self_attn.o_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.21.self_attn.q_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.21.self_attn.q_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.21.self_attn.v_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.21.self_attn.v_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.22.input_layernorm.weight": "model-00008-of-00027.safetensors", + "model.layers.22.mlp.down_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.22.mlp.down_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.22.mlp.gate_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.22.mlp.gate_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.22.mlp.up_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.22.mlp.up_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00008-of-00027.safetensors", + "model.layers.22.self_attn.k_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.22.self_attn.k_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.22.self_attn.o_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.22.self_attn.o_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.22.self_attn.q_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.22.self_attn.q_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.22.self_attn.v_proj.activation_scale": "model-00007-of-00027.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00007-of-00027.safetensors", + "model.layers.22.self_attn.v_proj.weight_scale_inv": "model-00007-of-00027.safetensors", + "model.layers.23.input_layernorm.weight": "model-00008-of-00027.safetensors", + "model.layers.23.mlp.down_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.23.mlp.down_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.23.mlp.gate_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.23.mlp.gate_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.23.mlp.up_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.23.mlp.up_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00008-of-00027.safetensors", + "model.layers.23.self_attn.k_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.23.self_attn.k_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.23.self_attn.o_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.23.self_attn.o_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.23.self_attn.q_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.23.self_attn.q_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.23.self_attn.v_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.23.self_attn.v_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.24.input_layernorm.weight": "model-00008-of-00027.safetensors", + "model.layers.24.mlp.down_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.24.mlp.down_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.24.mlp.gate_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.24.mlp.gate_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.24.mlp.up_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.24.mlp.up_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00008-of-00027.safetensors", + "model.layers.24.self_attn.k_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.24.self_attn.k_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.24.self_attn.o_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.24.self_attn.o_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.24.self_attn.q_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.24.self_attn.q_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.24.self_attn.v_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.24.self_attn.v_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.25.input_layernorm.weight": "model-00009-of-00027.safetensors", + "model.layers.25.mlp.down_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.25.mlp.down_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.25.mlp.gate_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.25.mlp.gate_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.25.mlp.up_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.25.mlp.up_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00009-of-00027.safetensors", + "model.layers.25.self_attn.k_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.25.self_attn.k_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.25.self_attn.o_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.25.self_attn.o_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.25.self_attn.q_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.25.self_attn.q_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.25.self_attn.v_proj.activation_scale": "model-00008-of-00027.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00008-of-00027.safetensors", + "model.layers.25.self_attn.v_proj.weight_scale_inv": "model-00008-of-00027.safetensors", + "model.layers.26.input_layernorm.weight": "model-00009-of-00027.safetensors", + "model.layers.26.mlp.down_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.26.mlp.down_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.26.mlp.gate_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.26.mlp.gate_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.26.mlp.up_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.26.mlp.up_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00009-of-00027.safetensors", + "model.layers.26.self_attn.k_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.26.self_attn.k_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.26.self_attn.o_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.26.self_attn.o_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.26.self_attn.q_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.26.self_attn.q_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.26.self_attn.v_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.26.self_attn.v_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.27.input_layernorm.weight": "model-00009-of-00027.safetensors", + "model.layers.27.mlp.down_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.27.mlp.down_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.27.mlp.gate_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.27.mlp.gate_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.27.mlp.up_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.27.mlp.up_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00009-of-00027.safetensors", + "model.layers.27.self_attn.k_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.27.self_attn.k_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.27.self_attn.o_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.27.self_attn.o_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.27.self_attn.q_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.27.self_attn.q_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.27.self_attn.v_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.27.self_attn.v_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.28.input_layernorm.weight": "model-00009-of-00027.safetensors", + "model.layers.28.mlp.down_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.28.mlp.down_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.28.mlp.gate_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.28.mlp.gate_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.28.mlp.up_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.28.mlp.up_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00009-of-00027.safetensors", + "model.layers.28.self_attn.k_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.28.self_attn.k_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.28.self_attn.o_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.28.self_attn.o_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.28.self_attn.q_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.28.self_attn.q_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.28.self_attn.v_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.28.self_attn.v_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.29.input_layernorm.weight": "model-00010-of-00027.safetensors", + "model.layers.29.mlp.down_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.29.mlp.down_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.29.mlp.gate_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.29.mlp.gate_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.29.mlp.up_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.29.mlp.up_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00010-of-00027.safetensors", + "model.layers.29.self_attn.k_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.29.self_attn.k_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.29.self_attn.o_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.29.self_attn.o_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.29.self_attn.q_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.29.self_attn.q_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.29.self_attn.v_proj.activation_scale": "model-00009-of-00027.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00009-of-00027.safetensors", + "model.layers.29.self_attn.v_proj.weight_scale_inv": "model-00009-of-00027.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00027.safetensors", + "model.layers.3.mlp.down_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.3.mlp.down_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.3.mlp.gate_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.3.mlp.gate_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.3.mlp.up_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.3.mlp.up_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00027.safetensors", + "model.layers.3.self_attn.k_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.3.self_attn.k_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.3.self_attn.o_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.3.self_attn.o_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.3.self_attn.q_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.3.self_attn.q_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.3.self_attn.v_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.3.self_attn.v_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.30.input_layernorm.weight": "model-00010-of-00027.safetensors", + "model.layers.30.mlp.down_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.30.mlp.down_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.30.mlp.gate_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.30.mlp.gate_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.30.mlp.up_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.30.mlp.up_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00010-of-00027.safetensors", + "model.layers.30.self_attn.k_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.30.self_attn.k_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.30.self_attn.o_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.30.self_attn.o_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.30.self_attn.q_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.30.self_attn.q_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.30.self_attn.v_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.30.self_attn.v_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.31.input_layernorm.weight": "model-00010-of-00027.safetensors", + "model.layers.31.mlp.down_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.31.mlp.down_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.31.mlp.gate_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.31.mlp.gate_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.31.mlp.up_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.31.mlp.up_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00010-of-00027.safetensors", + "model.layers.31.self_attn.k_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.31.self_attn.k_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.31.self_attn.o_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.31.self_attn.o_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.31.self_attn.q_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.31.self_attn.q_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.31.self_attn.v_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.31.self_attn.v_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.32.input_layernorm.weight": "model-00011-of-00027.safetensors", + "model.layers.32.mlp.down_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.32.mlp.down_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.32.mlp.gate_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.32.mlp.gate_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.32.mlp.up_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.32.mlp.up_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00011-of-00027.safetensors", + "model.layers.32.self_attn.k_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.32.self_attn.k_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.32.self_attn.o_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.32.self_attn.o_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.32.self_attn.q_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.32.self_attn.q_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.32.self_attn.v_proj.activation_scale": "model-00010-of-00027.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00010-of-00027.safetensors", + "model.layers.32.self_attn.v_proj.weight_scale_inv": "model-00010-of-00027.safetensors", + "model.layers.33.input_layernorm.weight": "model-00011-of-00027.safetensors", + "model.layers.33.mlp.down_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.33.mlp.down_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.33.mlp.gate_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.33.mlp.gate_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.33.mlp.up_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.33.mlp.up_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00011-of-00027.safetensors", + "model.layers.33.self_attn.k_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.33.self_attn.k_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.33.self_attn.o_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.33.self_attn.o_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.33.self_attn.q_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.33.self_attn.q_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.33.self_attn.v_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.33.self_attn.v_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.34.input_layernorm.weight": "model-00011-of-00027.safetensors", + "model.layers.34.mlp.down_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.34.mlp.down_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.34.mlp.gate_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.34.mlp.gate_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.34.mlp.up_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.34.mlp.up_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00011-of-00027.safetensors", + "model.layers.34.self_attn.k_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.34.self_attn.k_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.34.self_attn.o_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.34.self_attn.o_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.34.self_attn.q_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.34.self_attn.q_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.34.self_attn.v_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.34.self_attn.v_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.35.input_layernorm.weight": "model-00011-of-00027.safetensors", + "model.layers.35.mlp.down_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.35.mlp.down_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.35.mlp.gate_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.35.mlp.gate_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.35.mlp.up_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.35.mlp.up_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00011-of-00027.safetensors", + "model.layers.35.self_attn.k_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.35.self_attn.k_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.35.self_attn.o_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.35.self_attn.o_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.35.self_attn.q_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.35.self_attn.q_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.35.self_attn.v_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.35.self_attn.v_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.36.input_layernorm.weight": "model-00012-of-00027.safetensors", + "model.layers.36.mlp.down_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.36.mlp.down_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.36.mlp.gate_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.36.mlp.gate_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.36.mlp.up_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.36.mlp.up_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00012-of-00027.safetensors", + "model.layers.36.self_attn.k_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.36.self_attn.k_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.36.self_attn.o_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.36.self_attn.o_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.36.self_attn.q_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.36.self_attn.q_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.36.self_attn.v_proj.activation_scale": "model-00011-of-00027.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00011-of-00027.safetensors", + "model.layers.36.self_attn.v_proj.weight_scale_inv": "model-00011-of-00027.safetensors", + "model.layers.37.input_layernorm.weight": "model-00012-of-00027.safetensors", + "model.layers.37.mlp.down_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.37.mlp.down_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.37.mlp.gate_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.37.mlp.gate_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.37.mlp.up_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.37.mlp.up_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00012-of-00027.safetensors", + "model.layers.37.self_attn.k_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.37.self_attn.k_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.37.self_attn.o_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.37.self_attn.o_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.37.self_attn.q_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.37.self_attn.q_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.37.self_attn.v_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.37.self_attn.v_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.38.input_layernorm.weight": "model-00012-of-00027.safetensors", + "model.layers.38.mlp.down_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.38.mlp.down_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.38.mlp.gate_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.38.mlp.gate_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.38.mlp.up_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.38.mlp.up_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00012-of-00027.safetensors", + "model.layers.38.self_attn.k_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.38.self_attn.k_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.38.self_attn.o_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.38.self_attn.o_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.38.self_attn.q_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.38.self_attn.q_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.38.self_attn.v_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.38.self_attn.v_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.39.input_layernorm.weight": "model-00013-of-00027.safetensors", + "model.layers.39.mlp.down_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.39.mlp.down_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.39.mlp.gate_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.39.mlp.gate_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.39.mlp.up_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.39.mlp.up_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00013-of-00027.safetensors", + "model.layers.39.self_attn.k_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.39.self_attn.k_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.39.self_attn.o_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.39.self_attn.o_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.39.self_attn.q_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.39.self_attn.q_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.39.self_attn.v_proj.activation_scale": "model-00012-of-00027.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00012-of-00027.safetensors", + "model.layers.39.self_attn.v_proj.weight_scale_inv": "model-00012-of-00027.safetensors", + "model.layers.4.input_layernorm.weight": "model-00003-of-00027.safetensors", + "model.layers.4.mlp.down_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.4.mlp.down_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.4.mlp.gate_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.4.mlp.gate_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.4.mlp.up_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.4.mlp.up_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00003-of-00027.safetensors", + "model.layers.4.self_attn.k_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.4.self_attn.k_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.4.self_attn.o_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.4.self_attn.o_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.4.self_attn.q_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.4.self_attn.q_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.4.self_attn.v_proj.activation_scale": "model-00002-of-00027.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00027.safetensors", + "model.layers.4.self_attn.v_proj.weight_scale_inv": "model-00002-of-00027.safetensors", + "model.layers.40.input_layernorm.weight": "model-00013-of-00027.safetensors", + "model.layers.40.mlp.down_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.40.mlp.down_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.40.mlp.gate_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.40.mlp.gate_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.40.mlp.up_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.40.mlp.up_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00013-of-00027.safetensors", + "model.layers.40.self_attn.k_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.40.self_attn.k_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.40.self_attn.o_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.40.self_attn.o_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.40.self_attn.q_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.40.self_attn.q_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.40.self_attn.v_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.40.self_attn.v_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.41.input_layernorm.weight": "model-00013-of-00027.safetensors", + "model.layers.41.mlp.down_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.41.mlp.down_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.41.mlp.gate_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.41.mlp.gate_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.41.mlp.up_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.41.mlp.up_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00013-of-00027.safetensors", + "model.layers.41.self_attn.k_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.41.self_attn.k_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.41.self_attn.o_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.41.self_attn.o_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.41.self_attn.q_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.41.self_attn.q_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.41.self_attn.v_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.41.self_attn.v_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.42.input_layernorm.weight": "model-00013-of-00027.safetensors", + "model.layers.42.mlp.down_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.42.mlp.down_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.42.mlp.down_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.42.mlp.gate_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.42.mlp.gate_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.42.mlp.gate_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.42.mlp.up_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.42.mlp.up_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.42.mlp.up_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00013-of-00027.safetensors", + "model.layers.42.self_attn.k_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.42.self_attn.k_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.42.self_attn.o_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.42.self_attn.o_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.42.self_attn.q_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.42.self_attn.q_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.42.self_attn.v_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.42.self_attn.v_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.43.input_layernorm.weight": "model-00014-of-00027.safetensors", + "model.layers.43.mlp.down_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.43.mlp.down_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.43.mlp.down_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.43.mlp.gate_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.43.mlp.gate_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.43.mlp.gate_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.43.mlp.up_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.43.mlp.up_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.43.mlp.up_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00014-of-00027.safetensors", + "model.layers.43.self_attn.k_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.43.self_attn.k_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.43.self_attn.o_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.43.self_attn.o_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.43.self_attn.q_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.43.self_attn.q_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.43.self_attn.v_proj.activation_scale": "model-00013-of-00027.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00013-of-00027.safetensors", + "model.layers.43.self_attn.v_proj.weight_scale_inv": "model-00013-of-00027.safetensors", + "model.layers.44.input_layernorm.weight": "model-00014-of-00027.safetensors", + "model.layers.44.mlp.down_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.44.mlp.down_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.44.mlp.down_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.44.mlp.gate_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.44.mlp.gate_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.44.mlp.gate_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.44.mlp.up_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.44.mlp.up_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.44.mlp.up_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00014-of-00027.safetensors", + "model.layers.44.self_attn.k_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.44.self_attn.k_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.44.self_attn.o_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.44.self_attn.o_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.44.self_attn.q_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.44.self_attn.q_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.44.self_attn.v_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.44.self_attn.v_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.45.input_layernorm.weight": "model-00014-of-00027.safetensors", + "model.layers.45.mlp.down_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.45.mlp.down_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.45.mlp.down_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.45.mlp.gate_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.45.mlp.gate_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.45.mlp.gate_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.45.mlp.up_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.45.mlp.up_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.45.mlp.up_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00014-of-00027.safetensors", + "model.layers.45.self_attn.k_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.45.self_attn.k_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.45.self_attn.o_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.45.self_attn.o_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.45.self_attn.q_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.45.self_attn.q_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.45.self_attn.v_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.45.self_attn.v_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.46.input_layernorm.weight": "model-00015-of-00027.safetensors", + "model.layers.46.mlp.down_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.46.mlp.down_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.46.mlp.down_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.46.mlp.gate_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.46.mlp.gate_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.46.mlp.gate_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.46.mlp.up_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.46.mlp.up_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.46.mlp.up_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00015-of-00027.safetensors", + "model.layers.46.self_attn.k_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.46.self_attn.k_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.46.self_attn.o_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.46.self_attn.o_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.46.self_attn.q_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.46.self_attn.q_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.46.self_attn.v_proj.activation_scale": "model-00014-of-00027.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00014-of-00027.safetensors", + "model.layers.46.self_attn.v_proj.weight_scale_inv": "model-00014-of-00027.safetensors", + "model.layers.47.input_layernorm.weight": "model-00015-of-00027.safetensors", + "model.layers.47.mlp.down_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.47.mlp.down_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.47.mlp.down_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.47.mlp.gate_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.47.mlp.gate_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.47.mlp.gate_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.47.mlp.up_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.47.mlp.up_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.47.mlp.up_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00015-of-00027.safetensors", + "model.layers.47.self_attn.k_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.47.self_attn.k_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.47.self_attn.o_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.47.self_attn.o_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.47.self_attn.q_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.47.self_attn.q_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.47.self_attn.v_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.47.self_attn.v_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.48.input_layernorm.weight": "model-00015-of-00027.safetensors", + "model.layers.48.mlp.down_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.48.mlp.down_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.48.mlp.down_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.48.mlp.gate_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.48.mlp.gate_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.48.mlp.gate_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.48.mlp.up_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.48.mlp.up_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.48.mlp.up_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00015-of-00027.safetensors", + "model.layers.48.self_attn.k_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.48.self_attn.k_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.48.self_attn.o_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.48.self_attn.o_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.48.self_attn.q_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.48.self_attn.q_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.48.self_attn.v_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.48.self_attn.v_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.49.input_layernorm.weight": "model-00015-of-00027.safetensors", + "model.layers.49.mlp.down_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.49.mlp.down_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.49.mlp.down_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.49.mlp.gate_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.49.mlp.gate_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.49.mlp.gate_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.49.mlp.up_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.49.mlp.up_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.49.mlp.up_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00015-of-00027.safetensors", + "model.layers.49.self_attn.k_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.49.self_attn.k_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.49.self_attn.o_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.49.self_attn.o_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.49.self_attn.q_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.49.self_attn.q_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.49.self_attn.v_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.49.self_attn.v_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.5.input_layernorm.weight": "model-00003-of-00027.safetensors", + "model.layers.5.mlp.down_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.5.mlp.down_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.5.mlp.gate_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.5.mlp.gate_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.5.mlp.up_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.5.mlp.up_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00003-of-00027.safetensors", + "model.layers.5.self_attn.k_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.5.self_attn.k_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.5.self_attn.o_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.5.self_attn.o_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.5.self_attn.q_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.5.self_attn.q_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.5.self_attn.v_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.5.self_attn.v_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.50.input_layernorm.weight": "model-00016-of-00027.safetensors", + "model.layers.50.mlp.down_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.50.mlp.down_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.50.mlp.down_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.50.mlp.gate_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.50.mlp.gate_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.50.mlp.gate_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.50.mlp.up_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.50.mlp.up_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.50.mlp.up_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00016-of-00027.safetensors", + "model.layers.50.self_attn.k_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.50.self_attn.k_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.50.self_attn.o_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.50.self_attn.o_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.50.self_attn.q_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.50.self_attn.q_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.50.self_attn.v_proj.activation_scale": "model-00015-of-00027.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00015-of-00027.safetensors", + "model.layers.50.self_attn.v_proj.weight_scale_inv": "model-00015-of-00027.safetensors", + "model.layers.51.input_layernorm.weight": "model-00016-of-00027.safetensors", + "model.layers.51.mlp.down_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.51.mlp.down_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.51.mlp.down_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.51.mlp.gate_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.51.mlp.gate_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.51.mlp.gate_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.51.mlp.up_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.51.mlp.up_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.51.mlp.up_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00016-of-00027.safetensors", + "model.layers.51.self_attn.k_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.51.self_attn.k_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.51.self_attn.o_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.51.self_attn.o_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.51.self_attn.q_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.51.self_attn.q_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.51.self_attn.v_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.51.self_attn.v_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.52.input_layernorm.weight": "model-00016-of-00027.safetensors", + "model.layers.52.mlp.down_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.52.mlp.down_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.52.mlp.down_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.52.mlp.gate_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.52.mlp.gate_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.52.mlp.gate_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.52.mlp.up_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.52.mlp.up_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.52.mlp.up_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00016-of-00027.safetensors", + "model.layers.52.self_attn.k_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.52.self_attn.k_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.52.self_attn.o_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.52.self_attn.o_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.52.self_attn.q_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.52.self_attn.q_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.52.self_attn.v_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.52.self_attn.v_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.53.input_layernorm.weight": "model-00017-of-00027.safetensors", + "model.layers.53.mlp.down_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.53.mlp.down_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.53.mlp.down_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.53.mlp.gate_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.53.mlp.gate_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.53.mlp.gate_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.53.mlp.up_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.53.mlp.up_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.53.mlp.up_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00017-of-00027.safetensors", + "model.layers.53.self_attn.k_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.53.self_attn.k_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.53.self_attn.o_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.53.self_attn.o_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.53.self_attn.q_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.53.self_attn.q_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.53.self_attn.v_proj.activation_scale": "model-00016-of-00027.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00016-of-00027.safetensors", + "model.layers.53.self_attn.v_proj.weight_scale_inv": "model-00016-of-00027.safetensors", + "model.layers.54.input_layernorm.weight": "model-00017-of-00027.safetensors", + "model.layers.54.mlp.down_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.54.mlp.down_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.54.mlp.down_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.54.mlp.gate_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.54.mlp.gate_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.54.mlp.gate_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.54.mlp.up_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.54.mlp.up_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.54.mlp.up_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00017-of-00027.safetensors", + "model.layers.54.self_attn.k_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.54.self_attn.k_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.54.self_attn.o_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.54.self_attn.o_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.54.self_attn.q_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.54.self_attn.q_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.54.self_attn.v_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.54.self_attn.v_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.55.input_layernorm.weight": "model-00017-of-00027.safetensors", + "model.layers.55.mlp.down_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.55.mlp.down_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.55.mlp.down_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.55.mlp.gate_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.55.mlp.gate_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.55.mlp.gate_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.55.mlp.up_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.55.mlp.up_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.55.mlp.up_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00017-of-00027.safetensors", + "model.layers.55.self_attn.k_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.55.self_attn.k_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.55.self_attn.o_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.55.self_attn.o_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.55.self_attn.q_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.55.self_attn.q_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.55.self_attn.v_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.55.self_attn.v_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.56.input_layernorm.weight": "model-00017-of-00027.safetensors", + "model.layers.56.mlp.down_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.56.mlp.down_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.56.mlp.down_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.56.mlp.gate_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.56.mlp.gate_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.56.mlp.gate_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.56.mlp.up_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.56.mlp.up_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.56.mlp.up_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00017-of-00027.safetensors", + "model.layers.56.self_attn.k_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.56.self_attn.k_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.56.self_attn.o_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.56.self_attn.o_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.56.self_attn.q_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.56.self_attn.q_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.56.self_attn.v_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.56.self_attn.v_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.57.input_layernorm.weight": "model-00018-of-00027.safetensors", + "model.layers.57.mlp.down_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.57.mlp.down_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.57.mlp.down_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.57.mlp.gate_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.57.mlp.gate_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.57.mlp.gate_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.57.mlp.up_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.57.mlp.up_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.57.mlp.up_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00018-of-00027.safetensors", + "model.layers.57.self_attn.k_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.57.self_attn.k_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.57.self_attn.o_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.57.self_attn.o_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.57.self_attn.q_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.57.self_attn.q_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.57.self_attn.v_proj.activation_scale": "model-00017-of-00027.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00017-of-00027.safetensors", + "model.layers.57.self_attn.v_proj.weight_scale_inv": "model-00017-of-00027.safetensors", + "model.layers.58.input_layernorm.weight": "model-00018-of-00027.safetensors", + "model.layers.58.mlp.down_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.58.mlp.down_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.58.mlp.down_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.58.mlp.gate_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.58.mlp.gate_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.58.mlp.gate_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.58.mlp.up_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.58.mlp.up_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.58.mlp.up_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00018-of-00027.safetensors", + "model.layers.58.self_attn.k_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.58.self_attn.k_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.58.self_attn.o_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.58.self_attn.o_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.58.self_attn.q_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.58.self_attn.q_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.58.self_attn.v_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.58.self_attn.v_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.59.input_layernorm.weight": "model-00018-of-00027.safetensors", + "model.layers.59.mlp.down_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.59.mlp.down_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.59.mlp.down_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.59.mlp.gate_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.59.mlp.gate_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.59.mlp.gate_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.59.mlp.up_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.59.mlp.up_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.59.mlp.up_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00018-of-00027.safetensors", + "model.layers.59.self_attn.k_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.59.self_attn.k_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.59.self_attn.o_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.59.self_attn.o_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.59.self_attn.q_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.59.self_attn.q_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.59.self_attn.v_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.59.self_attn.v_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.6.input_layernorm.weight": "model-00003-of-00027.safetensors", + "model.layers.6.mlp.down_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.6.mlp.down_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.6.mlp.gate_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.6.mlp.gate_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.6.mlp.up_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.6.mlp.up_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00003-of-00027.safetensors", + "model.layers.6.self_attn.k_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.6.self_attn.k_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.6.self_attn.o_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.6.self_attn.o_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.6.self_attn.q_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.6.self_attn.q_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.6.self_attn.v_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.6.self_attn.v_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.60.input_layernorm.weight": "model-00019-of-00027.safetensors", + "model.layers.60.mlp.down_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.60.mlp.down_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.60.mlp.down_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.60.mlp.gate_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.60.mlp.gate_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.60.mlp.gate_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.60.mlp.up_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.60.mlp.up_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.60.mlp.up_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00019-of-00027.safetensors", + "model.layers.60.self_attn.k_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.60.self_attn.k_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.60.self_attn.o_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.60.self_attn.o_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.60.self_attn.q_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.60.self_attn.q_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.60.self_attn.v_proj.activation_scale": "model-00018-of-00027.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00018-of-00027.safetensors", + "model.layers.60.self_attn.v_proj.weight_scale_inv": "model-00018-of-00027.safetensors", + "model.layers.61.input_layernorm.weight": "model-00019-of-00027.safetensors", + "model.layers.61.mlp.down_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.61.mlp.down_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.61.mlp.down_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.61.mlp.gate_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.61.mlp.gate_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.61.mlp.gate_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.61.mlp.up_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.61.mlp.up_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.61.mlp.up_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00019-of-00027.safetensors", + "model.layers.61.self_attn.k_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.61.self_attn.k_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.61.self_attn.o_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.61.self_attn.o_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.61.self_attn.q_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.61.self_attn.q_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.61.self_attn.v_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.61.self_attn.v_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.62.input_layernorm.weight": "model-00019-of-00027.safetensors", + "model.layers.62.mlp.down_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.62.mlp.down_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.62.mlp.down_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.62.mlp.gate_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.62.mlp.gate_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.62.mlp.gate_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.62.mlp.up_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.62.mlp.up_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.62.mlp.up_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00019-of-00027.safetensors", + "model.layers.62.self_attn.k_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.62.self_attn.k_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.62.self_attn.o_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.62.self_attn.o_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.62.self_attn.q_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.62.self_attn.q_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.62.self_attn.v_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.62.self_attn.v_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.63.input_layernorm.weight": "model-00019-of-00027.safetensors", + "model.layers.63.mlp.down_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.63.mlp.down_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.63.mlp.down_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.63.mlp.gate_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.63.mlp.gate_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.63.mlp.gate_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.63.mlp.up_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.63.mlp.up_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.63.mlp.up_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00019-of-00027.safetensors", + "model.layers.63.self_attn.k_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.63.self_attn.k_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.63.self_attn.o_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.63.self_attn.o_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.63.self_attn.q_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.63.self_attn.q_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.63.self_attn.v_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.63.self_attn.v_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.64.input_layernorm.weight": "model-00020-of-00027.safetensors", + "model.layers.64.mlp.down_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.64.mlp.down_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.64.mlp.down_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.64.mlp.gate_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.64.mlp.gate_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.64.mlp.gate_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.64.mlp.up_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.64.mlp.up_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.64.mlp.up_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.64.post_attention_layernorm.weight": "model-00020-of-00027.safetensors", + "model.layers.64.self_attn.k_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.64.self_attn.k_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.64.self_attn.k_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.64.self_attn.o_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.64.self_attn.o_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.64.self_attn.o_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.64.self_attn.q_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.64.self_attn.q_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.64.self_attn.q_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.64.self_attn.v_proj.activation_scale": "model-00019-of-00027.safetensors", + "model.layers.64.self_attn.v_proj.weight": "model-00019-of-00027.safetensors", + "model.layers.64.self_attn.v_proj.weight_scale_inv": "model-00019-of-00027.safetensors", + "model.layers.65.input_layernorm.weight": "model-00020-of-00027.safetensors", + "model.layers.65.mlp.down_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.65.mlp.down_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.65.mlp.down_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.65.mlp.gate_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.65.mlp.gate_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.65.mlp.gate_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.65.mlp.up_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.65.mlp.up_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.65.mlp.up_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.65.post_attention_layernorm.weight": "model-00020-of-00027.safetensors", + "model.layers.65.self_attn.k_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.65.self_attn.k_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.65.self_attn.k_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.65.self_attn.o_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.65.self_attn.o_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.65.self_attn.o_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.65.self_attn.q_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.65.self_attn.q_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.65.self_attn.q_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.65.self_attn.v_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.65.self_attn.v_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.65.self_attn.v_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.66.input_layernorm.weight": "model-00020-of-00027.safetensors", + "model.layers.66.mlp.down_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.66.mlp.down_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.66.mlp.down_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.66.mlp.gate_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.66.mlp.gate_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.66.mlp.gate_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.66.mlp.up_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.66.mlp.up_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.66.mlp.up_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.66.post_attention_layernorm.weight": "model-00020-of-00027.safetensors", + "model.layers.66.self_attn.k_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.66.self_attn.k_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.66.self_attn.k_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.66.self_attn.o_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.66.self_attn.o_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.66.self_attn.o_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.66.self_attn.q_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.66.self_attn.q_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.66.self_attn.q_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.66.self_attn.v_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.66.self_attn.v_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.66.self_attn.v_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.67.input_layernorm.weight": "model-00021-of-00027.safetensors", + "model.layers.67.mlp.down_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.67.mlp.down_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.67.mlp.down_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.67.mlp.gate_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.67.mlp.gate_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.67.mlp.gate_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.67.mlp.up_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.67.mlp.up_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.67.mlp.up_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.67.post_attention_layernorm.weight": "model-00021-of-00027.safetensors", + "model.layers.67.self_attn.k_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.67.self_attn.k_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.67.self_attn.k_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.67.self_attn.o_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.67.self_attn.o_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.67.self_attn.o_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.67.self_attn.q_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.67.self_attn.q_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.67.self_attn.q_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.67.self_attn.v_proj.activation_scale": "model-00020-of-00027.safetensors", + "model.layers.67.self_attn.v_proj.weight": "model-00020-of-00027.safetensors", + "model.layers.67.self_attn.v_proj.weight_scale_inv": "model-00020-of-00027.safetensors", + "model.layers.68.input_layernorm.weight": "model-00021-of-00027.safetensors", + "model.layers.68.mlp.down_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.68.mlp.down_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.68.mlp.down_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.68.mlp.gate_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.68.mlp.gate_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.68.mlp.gate_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.68.mlp.up_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.68.mlp.up_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.68.mlp.up_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.68.post_attention_layernorm.weight": "model-00021-of-00027.safetensors", + "model.layers.68.self_attn.k_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.68.self_attn.k_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.68.self_attn.k_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.68.self_attn.o_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.68.self_attn.o_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.68.self_attn.o_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.68.self_attn.q_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.68.self_attn.q_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.68.self_attn.q_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.68.self_attn.v_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.68.self_attn.v_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.68.self_attn.v_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.69.input_layernorm.weight": "model-00021-of-00027.safetensors", + "model.layers.69.mlp.down_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.69.mlp.down_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.69.mlp.down_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.69.mlp.gate_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.69.mlp.gate_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.69.mlp.gate_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.69.mlp.up_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.69.mlp.up_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.69.mlp.up_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.69.post_attention_layernorm.weight": "model-00021-of-00027.safetensors", + "model.layers.69.self_attn.k_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.69.self_attn.k_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.69.self_attn.k_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.69.self_attn.o_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.69.self_attn.o_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.69.self_attn.o_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.69.self_attn.q_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.69.self_attn.q_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.69.self_attn.q_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.69.self_attn.v_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.69.self_attn.v_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.69.self_attn.v_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.7.input_layernorm.weight": "model-00003-of-00027.safetensors", + "model.layers.7.mlp.down_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.7.mlp.down_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.7.mlp.gate_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.7.mlp.gate_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.7.mlp.up_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.7.mlp.up_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00027.safetensors", + "model.layers.7.self_attn.k_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.7.self_attn.k_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.7.self_attn.o_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.7.self_attn.o_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.7.self_attn.q_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.7.self_attn.q_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.7.self_attn.v_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.7.self_attn.v_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.70.input_layernorm.weight": "model-00021-of-00027.safetensors", + "model.layers.70.mlp.down_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.70.mlp.down_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.70.mlp.down_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.70.mlp.gate_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.70.mlp.gate_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.70.mlp.gate_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.70.mlp.up_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.70.mlp.up_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.70.mlp.up_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.70.post_attention_layernorm.weight": "model-00021-of-00027.safetensors", + "model.layers.70.self_attn.k_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.70.self_attn.k_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.70.self_attn.k_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.70.self_attn.o_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.70.self_attn.o_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.70.self_attn.o_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.70.self_attn.q_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.70.self_attn.q_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.70.self_attn.q_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.70.self_attn.v_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.70.self_attn.v_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.70.self_attn.v_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.71.input_layernorm.weight": "model-00022-of-00027.safetensors", + "model.layers.71.mlp.down_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.71.mlp.down_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.71.mlp.down_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.71.mlp.gate_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.71.mlp.gate_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.71.mlp.gate_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.71.mlp.up_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.71.mlp.up_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.71.mlp.up_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.71.post_attention_layernorm.weight": "model-00022-of-00027.safetensors", + "model.layers.71.self_attn.k_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.71.self_attn.k_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.71.self_attn.k_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.71.self_attn.o_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.71.self_attn.o_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.71.self_attn.o_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.71.self_attn.q_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.71.self_attn.q_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.71.self_attn.q_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.71.self_attn.v_proj.activation_scale": "model-00021-of-00027.safetensors", + "model.layers.71.self_attn.v_proj.weight": "model-00021-of-00027.safetensors", + "model.layers.71.self_attn.v_proj.weight_scale_inv": "model-00021-of-00027.safetensors", + "model.layers.72.input_layernorm.weight": "model-00022-of-00027.safetensors", + "model.layers.72.mlp.down_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.72.mlp.down_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.72.mlp.down_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.72.mlp.gate_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.72.mlp.gate_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.72.mlp.gate_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.72.mlp.up_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.72.mlp.up_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.72.mlp.up_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.72.post_attention_layernorm.weight": "model-00022-of-00027.safetensors", + "model.layers.72.self_attn.k_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.72.self_attn.k_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.72.self_attn.k_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.72.self_attn.o_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.72.self_attn.o_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.72.self_attn.o_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.72.self_attn.q_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.72.self_attn.q_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.72.self_attn.q_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.72.self_attn.v_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.72.self_attn.v_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.72.self_attn.v_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.73.input_layernorm.weight": "model-00022-of-00027.safetensors", + "model.layers.73.mlp.down_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.73.mlp.down_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.73.mlp.down_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.73.mlp.gate_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.73.mlp.gate_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.73.mlp.gate_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.73.mlp.up_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.73.mlp.up_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.73.mlp.up_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.73.post_attention_layernorm.weight": "model-00022-of-00027.safetensors", + "model.layers.73.self_attn.k_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.73.self_attn.k_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.73.self_attn.k_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.73.self_attn.o_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.73.self_attn.o_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.73.self_attn.o_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.73.self_attn.q_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.73.self_attn.q_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.73.self_attn.q_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.73.self_attn.v_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.73.self_attn.v_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.73.self_attn.v_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.74.input_layernorm.weight": "model-00023-of-00027.safetensors", + "model.layers.74.mlp.down_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.74.mlp.down_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.74.mlp.down_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.74.mlp.gate_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.74.mlp.gate_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.74.mlp.gate_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.74.mlp.up_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.74.mlp.up_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.74.mlp.up_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.74.post_attention_layernorm.weight": "model-00023-of-00027.safetensors", + "model.layers.74.self_attn.k_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.74.self_attn.k_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.74.self_attn.k_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.74.self_attn.o_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.74.self_attn.o_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.74.self_attn.o_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.74.self_attn.q_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.74.self_attn.q_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.74.self_attn.q_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.74.self_attn.v_proj.activation_scale": "model-00022-of-00027.safetensors", + "model.layers.74.self_attn.v_proj.weight": "model-00022-of-00027.safetensors", + "model.layers.74.self_attn.v_proj.weight_scale_inv": "model-00022-of-00027.safetensors", + "model.layers.75.input_layernorm.weight": "model-00023-of-00027.safetensors", + "model.layers.75.mlp.down_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.75.mlp.down_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.75.mlp.down_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.75.mlp.gate_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.75.mlp.gate_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.75.mlp.gate_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.75.mlp.up_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.75.mlp.up_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.75.mlp.up_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.75.post_attention_layernorm.weight": "model-00023-of-00027.safetensors", + "model.layers.75.self_attn.k_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.75.self_attn.k_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.75.self_attn.k_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.75.self_attn.o_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.75.self_attn.o_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.75.self_attn.o_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.75.self_attn.q_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.75.self_attn.q_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.75.self_attn.q_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.75.self_attn.v_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.75.self_attn.v_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.75.self_attn.v_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.76.input_layernorm.weight": "model-00023-of-00027.safetensors", + "model.layers.76.mlp.down_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.76.mlp.down_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.76.mlp.down_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.76.mlp.gate_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.76.mlp.gate_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.76.mlp.gate_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.76.mlp.up_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.76.mlp.up_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.76.mlp.up_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.76.post_attention_layernorm.weight": "model-00023-of-00027.safetensors", + "model.layers.76.self_attn.k_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.76.self_attn.k_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.76.self_attn.k_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.76.self_attn.o_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.76.self_attn.o_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.76.self_attn.o_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.76.self_attn.q_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.76.self_attn.q_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.76.self_attn.q_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.76.self_attn.v_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.76.self_attn.v_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.76.self_attn.v_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.77.input_layernorm.weight": "model-00023-of-00027.safetensors", + "model.layers.77.mlp.down_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.77.mlp.down_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.77.mlp.down_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.77.mlp.gate_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.77.mlp.gate_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.77.mlp.gate_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.77.mlp.up_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.77.mlp.up_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.77.mlp.up_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.77.post_attention_layernorm.weight": "model-00023-of-00027.safetensors", + "model.layers.77.self_attn.k_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.77.self_attn.k_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.77.self_attn.k_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.77.self_attn.o_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.77.self_attn.o_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.77.self_attn.o_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.77.self_attn.q_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.77.self_attn.q_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.77.self_attn.q_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.77.self_attn.v_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.77.self_attn.v_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.77.self_attn.v_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.78.input_layernorm.weight": "model-00024-of-00027.safetensors", + "model.layers.78.mlp.down_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.78.mlp.down_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.78.mlp.down_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.78.mlp.gate_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.78.mlp.gate_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.78.mlp.gate_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.78.mlp.up_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.78.mlp.up_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.78.mlp.up_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.78.post_attention_layernorm.weight": "model-00024-of-00027.safetensors", + "model.layers.78.self_attn.k_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.78.self_attn.k_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.78.self_attn.k_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.78.self_attn.o_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.78.self_attn.o_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.78.self_attn.o_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.78.self_attn.q_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.78.self_attn.q_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.78.self_attn.q_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.78.self_attn.v_proj.activation_scale": "model-00023-of-00027.safetensors", + "model.layers.78.self_attn.v_proj.weight": "model-00023-of-00027.safetensors", + "model.layers.78.self_attn.v_proj.weight_scale_inv": "model-00023-of-00027.safetensors", + "model.layers.79.input_layernorm.weight": "model-00024-of-00027.safetensors", + "model.layers.79.mlp.down_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.79.mlp.down_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.79.mlp.down_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.79.mlp.gate_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.79.mlp.gate_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.79.mlp.gate_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.79.mlp.up_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.79.mlp.up_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.79.mlp.up_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.79.post_attention_layernorm.weight": "model-00024-of-00027.safetensors", + "model.layers.79.self_attn.k_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.79.self_attn.k_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.79.self_attn.k_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.79.self_attn.o_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.79.self_attn.o_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.79.self_attn.o_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.79.self_attn.q_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.79.self_attn.q_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.79.self_attn.q_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.79.self_attn.v_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.79.self_attn.v_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.79.self_attn.v_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.8.input_layernorm.weight": "model-00004-of-00027.safetensors", + "model.layers.8.mlp.down_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.8.mlp.down_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.8.mlp.gate_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.8.mlp.gate_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.8.mlp.up_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.8.mlp.up_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00004-of-00027.safetensors", + "model.layers.8.self_attn.k_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.8.self_attn.k_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.8.self_attn.o_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.8.self_attn.o_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.8.self_attn.q_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.8.self_attn.q_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.8.self_attn.v_proj.activation_scale": "model-00003-of-00027.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00003-of-00027.safetensors", + "model.layers.8.self_attn.v_proj.weight_scale_inv": "model-00003-of-00027.safetensors", + "model.layers.80.input_layernorm.weight": "model-00024-of-00027.safetensors", + "model.layers.80.mlp.down_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.80.mlp.down_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.80.mlp.down_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.80.mlp.gate_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.80.mlp.gate_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.80.mlp.gate_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.80.mlp.up_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.80.mlp.up_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.80.mlp.up_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.80.post_attention_layernorm.weight": "model-00024-of-00027.safetensors", + "model.layers.80.self_attn.k_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.80.self_attn.k_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.80.self_attn.k_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.80.self_attn.o_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.80.self_attn.o_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.80.self_attn.o_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.80.self_attn.q_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.80.self_attn.q_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.80.self_attn.q_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.80.self_attn.v_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.80.self_attn.v_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.80.self_attn.v_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.81.input_layernorm.weight": "model-00025-of-00027.safetensors", + "model.layers.81.mlp.down_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.81.mlp.down_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.81.mlp.down_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.81.mlp.gate_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.81.mlp.gate_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.81.mlp.gate_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.81.mlp.up_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.81.mlp.up_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.81.mlp.up_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.81.post_attention_layernorm.weight": "model-00025-of-00027.safetensors", + "model.layers.81.self_attn.k_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.81.self_attn.k_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.81.self_attn.k_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.81.self_attn.o_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.81.self_attn.o_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.81.self_attn.o_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.81.self_attn.q_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.81.self_attn.q_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.81.self_attn.q_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.81.self_attn.v_proj.activation_scale": "model-00024-of-00027.safetensors", + "model.layers.81.self_attn.v_proj.weight": "model-00024-of-00027.safetensors", + "model.layers.81.self_attn.v_proj.weight_scale_inv": "model-00024-of-00027.safetensors", + "model.layers.82.input_layernorm.weight": "model-00025-of-00027.safetensors", + "model.layers.82.mlp.down_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.82.mlp.down_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.82.mlp.down_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.82.mlp.gate_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.82.mlp.gate_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.82.mlp.gate_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.82.mlp.up_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.82.mlp.up_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.82.mlp.up_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.82.post_attention_layernorm.weight": "model-00025-of-00027.safetensors", + "model.layers.82.self_attn.k_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.82.self_attn.k_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.82.self_attn.k_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.82.self_attn.o_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.82.self_attn.o_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.82.self_attn.o_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.82.self_attn.q_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.82.self_attn.q_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.82.self_attn.q_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.82.self_attn.v_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.82.self_attn.v_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.82.self_attn.v_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.83.input_layernorm.weight": "model-00025-of-00027.safetensors", + "model.layers.83.mlp.down_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.83.mlp.down_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.83.mlp.down_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.83.mlp.gate_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.83.mlp.gate_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.83.mlp.gate_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.83.mlp.up_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.83.mlp.up_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.83.mlp.up_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.83.post_attention_layernorm.weight": "model-00025-of-00027.safetensors", + "model.layers.83.self_attn.k_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.83.self_attn.k_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.83.self_attn.k_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.83.self_attn.o_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.83.self_attn.o_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.83.self_attn.o_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.83.self_attn.q_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.83.self_attn.q_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.83.self_attn.q_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.83.self_attn.v_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.83.self_attn.v_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.83.self_attn.v_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.84.input_layernorm.weight": "model-00025-of-00027.safetensors", + "model.layers.84.mlp.down_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.84.mlp.down_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.84.mlp.down_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.84.mlp.gate_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.84.mlp.gate_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.84.mlp.gate_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.84.mlp.up_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.84.mlp.up_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.84.mlp.up_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.84.post_attention_layernorm.weight": "model-00025-of-00027.safetensors", + "model.layers.84.self_attn.k_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.84.self_attn.k_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.84.self_attn.k_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.84.self_attn.o_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.84.self_attn.o_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.84.self_attn.o_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.84.self_attn.q_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.84.self_attn.q_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.84.self_attn.q_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.84.self_attn.v_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.84.self_attn.v_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.84.self_attn.v_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.85.input_layernorm.weight": "model-00026-of-00027.safetensors", + "model.layers.85.mlp.down_proj.activation_scale": "model-00026-of-00027.safetensors", + "model.layers.85.mlp.down_proj.weight": "model-00026-of-00027.safetensors", + "model.layers.85.mlp.down_proj.weight_scale_inv": "model-00026-of-00027.safetensors", + "model.layers.85.mlp.gate_proj.activation_scale": "model-00026-of-00027.safetensors", + "model.layers.85.mlp.gate_proj.weight": "model-00026-of-00027.safetensors", + "model.layers.85.mlp.gate_proj.weight_scale_inv": "model-00026-of-00027.safetensors", + "model.layers.85.mlp.up_proj.activation_scale": "model-00026-of-00027.safetensors", + "model.layers.85.mlp.up_proj.weight": "model-00026-of-00027.safetensors", + "model.layers.85.mlp.up_proj.weight_scale_inv": "model-00026-of-00027.safetensors", + "model.layers.85.post_attention_layernorm.weight": "model-00026-of-00027.safetensors", + "model.layers.85.self_attn.k_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.85.self_attn.k_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.85.self_attn.k_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.85.self_attn.o_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.85.self_attn.o_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.85.self_attn.o_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.85.self_attn.q_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.85.self_attn.q_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.85.self_attn.q_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.85.self_attn.v_proj.activation_scale": "model-00025-of-00027.safetensors", + "model.layers.85.self_attn.v_proj.weight": "model-00025-of-00027.safetensors", + "model.layers.85.self_attn.v_proj.weight_scale_inv": "model-00025-of-00027.safetensors", + "model.layers.86.input_layernorm.weight": "model-00026-of-00027.safetensors", + "model.layers.86.mlp.down_proj.activation_scale": "model-00026-of-00027.safetensors", + "model.layers.86.mlp.down_proj.weight": "model-00026-of-00027.safetensors", + "model.layers.86.mlp.down_proj.weight_scale_inv": "model-00026-of-00027.safetensors", + "model.layers.86.mlp.gate_proj.activation_scale": "model-00026-of-00027.safetensors", + "model.layers.86.mlp.gate_proj.weight": "model-00026-of-00027.safetensors", + "model.layers.86.mlp.gate_proj.weight_scale_inv": "model-00026-of-00027.safetensors", + "model.layers.86.mlp.up_proj.activation_scale": "model-00026-of-00027.safetensors", + "model.layers.86.mlp.up_proj.weight": "model-00026-of-00027.safetensors", + "model.layers.86.mlp.up_proj.weight_scale_inv": "model-00026-of-00027.safetensors", + "model.layers.86.post_attention_layernorm.weight": "model-00026-of-00027.safetensors", + "model.layers.86.self_attn.k_proj.activation_scale": "model-00026-of-00027.safetensors", + "model.layers.86.self_attn.k_proj.weight": "model-00026-of-00027.safetensors", + "model.layers.86.self_attn.k_proj.weight_scale_inv": "model-00026-of-00027.safetensors", + "model.layers.86.self_attn.o_proj.activation_scale": "model-00026-of-00027.safetensors", + "model.layers.86.self_attn.o_proj.weight": "model-00026-of-00027.safetensors", + "model.layers.86.self_attn.o_proj.weight_scale_inv": "model-00026-of-00027.safetensors", + "model.layers.86.self_attn.q_proj.activation_scale": "model-00026-of-00027.safetensors", + "model.layers.86.self_attn.q_proj.weight": "model-00026-of-00027.safetensors", + "model.layers.86.self_attn.q_proj.weight_scale_inv": "model-00026-of-00027.safetensors", + "model.layers.86.self_attn.v_proj.activation_scale": "model-00026-of-00027.safetensors", + "model.layers.86.self_attn.v_proj.weight": "model-00026-of-00027.safetensors", + "model.layers.86.self_attn.v_proj.weight_scale_inv": "model-00026-of-00027.safetensors", + "model.layers.87.input_layernorm.weight": "model-00026-of-00027.safetensors", + "model.layers.87.mlp.down_proj.activation_scale": "model-00026-of-00027.safetensors", + "model.layers.87.mlp.down_proj.weight": "model-00026-of-00027.safetensors", + "model.layers.87.mlp.down_proj.weight_scale_inv": "model-00026-of-00027.safetensors", + "model.layers.87.mlp.gate_proj.activation_scale": "model-00026-of-00027.safetensors", + "model.layers.87.mlp.gate_proj.weight": "model-00026-of-00027.safetensors", + "model.layers.87.mlp.gate_proj.weight_scale_inv": "model-00026-of-00027.safetensors", + "model.layers.87.mlp.up_proj.activation_scale": "model-00026-of-00027.safetensors", + "model.layers.87.mlp.up_proj.weight": "model-00026-of-00027.safetensors", + "model.layers.87.mlp.up_proj.weight_scale_inv": "model-00026-of-00027.safetensors", + "model.layers.87.post_attention_layernorm.weight": "model-00026-of-00027.safetensors", + "model.layers.87.self_attn.k_proj.activation_scale": "model-00026-of-00027.safetensors", + "model.layers.87.self_attn.k_proj.weight": "model-00026-of-00027.safetensors", + "model.layers.87.self_attn.k_proj.weight_scale_inv": "model-00026-of-00027.safetensors", + "model.layers.87.self_attn.o_proj.activation_scale": "model-00026-of-00027.safetensors", + "model.layers.87.self_attn.o_proj.weight": "model-00026-of-00027.safetensors", + "model.layers.87.self_attn.o_proj.weight_scale_inv": "model-00026-of-00027.safetensors", + "model.layers.87.self_attn.q_proj.activation_scale": "model-00026-of-00027.safetensors", + "model.layers.87.self_attn.q_proj.weight": "model-00026-of-00027.safetensors", + "model.layers.87.self_attn.q_proj.weight_scale_inv": "model-00026-of-00027.safetensors", + "model.layers.87.self_attn.v_proj.activation_scale": "model-00026-of-00027.safetensors", + "model.layers.87.self_attn.v_proj.weight": "model-00026-of-00027.safetensors", + "model.layers.87.self_attn.v_proj.weight_scale_inv": "model-00026-of-00027.safetensors", + "model.layers.9.input_layernorm.weight": "model-00004-of-00027.safetensors", + "model.layers.9.mlp.down_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.9.mlp.down_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.9.mlp.gate_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.9.mlp.gate_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.9.mlp.up_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.9.mlp.up_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00004-of-00027.safetensors", + "model.layers.9.self_attn.k_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.9.self_attn.k_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.9.self_attn.o_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.9.self_attn.o_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.9.self_attn.q_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.9.self_attn.q_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.layers.9.self_attn.v_proj.activation_scale": "model-00004-of-00027.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00004-of-00027.safetensors", + "model.layers.9.self_attn.v_proj.weight_scale_inv": "model-00004-of-00027.safetensors", + "model.norm.weight": "model-00026-of-00027.safetensors" + } +} diff --git a/params.json b/params.json new file mode 100644 index 0000000000000000000000000000000000000000..589242b1b668fd59ea76a34722d986c24d9c873f --- /dev/null +++ b/params.json @@ -0,0 +1,30 @@ +{ + "dim": 12288, + "n_layers": 88, + "head_dim": 128, + "hidden_dim": 28672, + "n_heads": 96, + "n_kv_heads": 8, + "rope_theta": 1000000.0, + "norm_eps": 1e-05, + "vocab_size": 131072, + "tied_embeddings": false, + "max_position_embeddings": 262144, + "max_seq_len": 262144, + "q_lora_rank": null, + "qk_rope_head_dim": null, + "qk_nope_head_dim": null, + "kv_lora_rank": null, + "v_head_dim": null, + "quantization": { + "qformat_weight": "fp8_e4m3", + "qscheme_act": "TENSOR" + }, + "yarn": { + "original_max_position_embeddings": 4096, + "factor": 64, + "apply_scale": true, + "beta": 4, + "alpha": 1 + } +} \ No newline at end of file diff --git a/tekken.json b/tekken.json new file mode 100644 index 0000000000000000000000000000000000000000..83432646105f3ab2d46c0bc1415305e82333028f --- /dev/null +++ b/tekken.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2bef5cf42cba0f948b70607300c477f0a23c3bc79ef1b0a00705e592586c835 +size 16753659 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5b51e255641d3ab81f891f54bd61370fcedf6622 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:286acad9b0e27fce778ac429763536accf618ccb6ed72963b6f94685e531c5c7 +size 17077402 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..dd7753a86d3f08ce351fa7ef5920c554ef2c4eb2 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,1012 @@ +{ + "additional_special_tokens": null, + "backend": "tokenizers", + "extra_special_tokens": [ + "", + "", + "", + "[INST]", + "[/INST]", + "[AVAILABLE_TOOLS]", + "[/AVAILABLE_TOOLS]", + "[TOOL_RESULTS]", + "[/TOOL_RESULTS]", + "[TOOL_CALLS]", + "[IMG]", + "", + "[IMG_BREAK]", + "[IMG_END]", + "[PREFIX]", + "[MIDDLE]", + "[SUFFIX]", + "[SYSTEM_PROMPT]", + "[/SYSTEM_PROMPT]", + "[TOOL_CONTENT]", + "", + "", + "", + "", + "[AUDIO]", + "[BEGIN_AUDIO]", + "", + "", + "", + "", + "", + "", + "[ARGS]", + "[CALL_ID]", + "[THINK]", + "[/THINK]", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "model_max_length": 1000000000000000019884624838656, + "bos_token": "", + "eos_token": "", + "pad_token": "", + "unk_token": "", + "tokenizer_class": "TokenizersBackend" +}