Mehdi Challakh
commited on
adapted the safetensor example since injecting the system prompt manually is no longer necessary
Browse files
README.md
CHANGED
|
@@ -23,31 +23,24 @@ A finetune of https://huggingface.co/stabilityai/stable-code-instruct-3b trained
|
|
| 23 |
### Safetensors (recommended)
|
| 24 |
|
| 25 |
```python
|
| 26 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 27 |
import torch
|
|
|
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
model_name = "path/to/your/safetensors/model" #./stable-cypher-instruct-3b
|
| 32 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 33 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
| 40 |
|
| 41 |
-
|
| 42 |
-
full_prompt = f"{instruction}\n\nHuman: {question}\n\nAssistant:"
|
| 43 |
|
| 44 |
-
|
| 45 |
-
inputs = tokenizer(full_prompt, return_tensors="pt")
|
| 46 |
|
| 47 |
-
|
| 48 |
-
print("Generating response...")
|
| 49 |
-
with torch.no_grad():
|
| 50 |
-
outputs = model.generate(
|
| 51 |
**inputs,
|
| 52 |
max_new_tokens=128,
|
| 53 |
do_sample=True,
|
|
@@ -56,13 +49,7 @@ with torch.no_grad():
|
|
| 56 |
pad_token_id=tokenizer.eos_token_id,
|
| 57 |
)
|
| 58 |
|
| 59 |
-
|
| 60 |
-
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 61 |
-
answer = answer[len(full_prompt):].strip() # Remove the input prompt from the output
|
| 62 |
-
|
| 63 |
-
print("\nQuestion:", question)
|
| 64 |
-
print("\nGenerated Cypher statement:")
|
| 65 |
-
print(answer)
|
| 66 |
```
|
| 67 |
|
| 68 |
### GGUF
|
|
|
|
| 23 |
### Safetensors (recommended)
|
| 24 |
|
| 25 |
```python
|
|
|
|
| 26 |
import torch
|
| 27 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 28 |
|
| 29 |
+
tokenizer = AutoTokenizer.from_pretrained("lakkeo/stable-cypher-instruct-3b", trust_remote_code=True)
|
| 30 |
+
model = AutoModelForCausalLM.from_pretrained("lakkeo/stable-cypher-instruct-3b", torch_dtype=torch.bfloat16, trust_remote_code=True)
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
+
messages = [
|
| 33 |
+
{
|
| 34 |
+
"role": "user",
|
| 35 |
+
"content": "Show me the people who have Python and Cloud skills and have been in the company for at least 3 years."
|
| 36 |
+
}
|
| 37 |
+
]
|
| 38 |
|
| 39 |
+
prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
|
|
|
|
| 40 |
|
| 41 |
+
inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
|
|
|
|
| 42 |
|
| 43 |
+
tokens = model.generate(
|
|
|
|
|
|
|
|
|
|
| 44 |
**inputs,
|
| 45 |
max_new_tokens=128,
|
| 46 |
do_sample=True,
|
|
|
|
| 49 |
pad_token_id=tokenizer.eos_token_id,
|
| 50 |
)
|
| 51 |
|
| 52 |
+
outputs = tokenizer.batch_decode(tokens[:, inputs.input_ids.shape[-1]:], skip_special_tokens=False)[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
```
|
| 54 |
|
| 55 |
### GGUF
|