RedHatAI
/

Llama-3.3-70B-Instruct-FP8-dynamic

Text Generation

text-generation-inference

compressed-tensors

Model card Files Files and versions

alexmarques commited on Apr 16

Commit

39d6d29

·

verified ·

1 Parent(s): 04e53e0

Update README.md

Files changed (1) hide show

README.md +6 -1

README.md CHANGED Viewed

@@ -63,7 +63,12 @@ sampling_params = SamplingParams(temperature=0.7, top_p=0.8, max_tokens=256)
 tokenizer = AutoTokenizer.from_pretrained(model_id)
-prompt = "Give me a short introduction to large language model."
 llm = LLM(model=model_id, tensor_parallel_size=number_gpus)

 tokenizer = AutoTokenizer.from_pretrained(model_id)
+messages = [
+    {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
+    {"role": "user", "content": "Who are you?"},
+]
+prompts = tokenizer.apply_chat_template(messages, tokenize=False)
 llm = LLM(model=model_id, tensor_parallel_size=number_gpus)