fastapi uvicorn llama-cpp-python python-dotenv tqdm huggingface_hub flashy>=0.0.1 hydra-core>=1.1 hydra_colorlog av gradio_client==0.2.6 xformers transformers>=4.31.0 spaces peft torchvision==0.15.1 torch==2.0.0 accelerate sentencepiece pybind11>=2.12 numpy<2