fromtransformersimportAutoTokenizer,AutoModelForCausalLMimporttorchfromswarms.models.base_llmimportBaseLLMclassLlama3(BaseLLM):""" Llama3 class represents a Llama model for natural language generation. Args: model_id (str): The ID of the Llama model to use. system_prompt (str): The system prompt to use for generating responses. temperature (float): The temperature value for controlling the randomness of the generated responses. top_p (float): The top-p value for controlling the diversity of the generated responses. max_tokens (int): The maximum number of tokens to generate in the response. **kwargs: Additional keyword arguments. Attributes: model_id (str): The ID of the Llama model being used. system_prompt (str): The system prompt for generating responses. temperature (float): The temperature value for generating responses. top_p (float): The top-p value for generating responses. max_tokens (int): The maximum number of tokens to generate in the response. tokenizer (AutoTokenizer): The tokenizer for the Llama model. model (AutoModelForCausalLM): The Llama model for generating responses. Methods: run(task, *args, **kwargs): Generates a response for the given task. """def__init__(self,model_id="meta-llama/Meta-Llama-3-8B-Instruct",system_prompt:str=None,temperature:float=0.6,top_p:float=0.9,max_tokens:int=4000,**kwargs,):self.model_id=model_idself.system_prompt=system_promptself.temperature=temperatureself.top_p=top_pself.max_tokens=max_tokensself.tokenizer=AutoTokenizer.from_pretrained(model_id)self.model=AutoModelForCausalLM.from_pretrained(model_id,torch_dtype=torch.bfloat16,device_map="auto",)defrun(self,task:str,*args,**kwargs):""" Generates a response for the given task. Args: task (str): The user's task or input. Returns: str: The generated response. """messages=[{"role":"system","content":self.system_prompt},{"role":"user","content":task},]input_ids=self.tokenizer.apply_chat_template(messages,add_generation_prompt=True,return_tensors="pt").to(self.model.device)terminators=[self.tokenizer.eos_token_id,self.tokenizer.convert_tokens_to_ids("<|eot_id|>"),]outputs=self.model.generate(input_ids,max_new_tokens=self.max_tokens,eos_token_id=terminators,do_sample=True,temperature=self.temperature,top_p=self.top_p,*args,**kwargs,)response=outputs[0][input_ids.shape[-1]:]returnself.tokenizer.decode(response,skip_special_tokens=True)