import gradio as gr from transformers import pipeline, AutoTokenizer import torch import os import spaces @spaces.GPU def load_model(model_name): return pipeline("text-generation", model=model_name, device_map="auto", trust_remote_code=True) @spaces.GPU() def generate( model_name, user_input, temperature=0.7, top_p=0.95, top_k=50, max_new_tokens=512, ): pipe = load_model(model_name) # Set tokenize correctly. Otherwise ticking the box breaks it. if model_name == "LeroyDyer/Mixtral_AI_Cyber_1.0": prompt = user_input else: prompt = f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n" outputs = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True, temperature=temperature, top_k=top_k, top_p=top_p, repetition_penalty=1.1) return outputs[0]["generated_text"] model_choices = ["LeroyDyer/Mixtral_Instruct","LeroyDyer/Mixtral_Chat","LeroyDyer/Mixtral_Chat_X","LeroyDyer/Mixtral_Chat_X_128k", "LeroyDyer/Mixtral_AI_Instruct_X","LeroyDyer/Mixtral_AI_Thinker", "LeroyDyer/Mixtral_AI_128k_BioMedical","Mixtral_BioMedical","LeroyDyer/Mixtral_Samantha", "LeroyDyer/Mixtral_AI_Base_128k","LeroyDyer/Mixtral_Base","LeroyDyer/Mixtral_AI_1.0", "LeroyDyer/Mixtral_AI_Vision_128K","LeroyDyer/Mixtral_AI_Base","LeroyDyer/Mixtral_AI_Medic_Base", "LeroyDyer/Mixtral_AI_Vision_128K_X","LeroyDyer/Mixtral_AI_Vision_V1_128", "LeroyDyer/Mixtral_AI_Cyber", "LeroyDyer/Mixtral_AI_Cyber_1.0","LeroyDyer/Mixtral_AI_Cyber_2.0", ] # What at the best options? g = gr.Interface( fn=generate, inputs=[ gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True), gr.components.Textbox(lines=2, label="Prompt", value="Write me a Python program that calculates the factorial of a given number."), gr.components.Slider(minimum=0, maximum=1, value=0.4, label="Temperature"), gr.components.Slider(minimum=0, maximum=1, value=0.95, label="Top p"), gr.components.Slider(minimum=0, maximum=100, step=1, value=50, label="Top k"), gr.components.Slider(minimum=1, maximum=2048, step=1, value=1024, label="Max tokens"), ], outputs=[gr.Textbox(lines=10, label="Output")], title="SpydazWeb AI (LeroyDyer) Language Models", description="Each has thier own personality's ", concurrency_limit=1 ) g.launch(max_threads=4)