from fastapi import FastAPI
import torch
import os
from llama_cpp import Llama
from transformers import AutoModelForCausalLM, AutoTokenizer
import requests
device = "cpu"

access_token = os.getenv("access_token")
privateurl = os.getenv("privateurl")

tokenizer1 = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
tokenizer2 = AutoTokenizer.from_pretrained("google/gemma-2-2b-it", token=access_token)
tokenizer3 = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

llm1 = Llama.from_pretrained(
    repo_id="Qwen/Qwen2-1.5B-Instruct-GGUF",
    filename="*q8_0.gguf",
    verbose=False
)

llm2 = Llama.from_pretrained(
    repo_id="NexaAIDev/gemma-2-2b-it-GGUF",
    filename="*q4_K_S.gguf",
    verbose=False
)

llm3 = Llama.from_pretrained(
    repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
    filename="*q4.gguf",
    verbose=False
)

app = FastAPI()

@app.get("/")
async def read_root():
    return {"Hello": "World!"}

def modelResp1(cookie, target, token, prompt):
    messages = [
        {"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
        {"role": "user", "content": "Who are you?"},
        {"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
        {"role": "user", "content": f"{prompt}"}
    ]
    text = tokenizer1.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    output = llm1(
      text,
      max_tokens=64,  # Generate up to 256 tokens
      echo=False,  # Whether to echo the prompt
    )
    response = output['choices'][0]['text']
    headers['Cookie'] = f"{cookie}"
    payload['token'] = f"{token}"
    payload['target'] = f"{target}"
    payload['content'] = response
    requests.post(privateurl, headers=headers, data=payload)

def modelResp2(prompt):
    messages = [
        {"role": "user", "content": "Who are you?"},
        {"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
        {"role": "user", "content": f"{prompt}"}
    ]
    text = tokenizer2.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    output = llm2(
      text,
      max_tokens=64,  # Generate up to 256 tokens
      echo=False,  # Whether to echo the prompt
    )
    response = output['choices'][0]['text']

    return response
    
def modelResp3(prompt):
    messages = [
        {"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
        {"role": "user", "content": "Who are you?"},
        {"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
        {"role": "user", "content": f"{prompt}"}
    ]
    text = tokenizer3.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    output = llm2(
      text,
      max_tokens=64,  # Generate up to 256 tokens
      echo=False,  # Whether to echo the prompt
    )
    response = output['choices'][0]['text']

    return response
    
@app.post("/modelapi1")
async def modelApi(data: dict):
    target = data.get("target_id")
    cookie = data.get("Cookie")
    token = data.get("token")
    prompt = data.get("prompt")
    modelResp1(cookie, target, token, prompt)
    return {"Hello": "World!"}
    
@app.post("/modelapi2")
async def modelApi(data: dict):
    prompt = data.get("prompt")
    #response = modelResp2(prompt)
    return {"Hello": "World!"}
    
@app.post("/modelapi3")
async def modelApi1(data: dict):
    prompt = data.get("prompt")
    response = modelResp3(prompt)
    return response


headers = {
    'Accept': 'application/json, text/javascript, */*; q=0.01',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Cookie': '',
    'Sec-Ch-Ua': '"Opera";v="95", "Chromium";v="109", "Not;A=Brand";v="24"',
    'Sec-Ch-Ua-Mobile': '?0',
    'Sec-Ch-Ua-Platform': '"Windows"',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 OPR/95.0.0.0',
    'X-Requested-With': 'XMLHttpRequest'
}

payload = {
    'target': '',
    'content': '',
    'token': ''
}