abidlabs's picture
abidlabs HF staff
Update app.py
e9dcfc7
raw
history blame contribute delete
No virus
1.43 kB
import torch
import gradio as gr
import pytube as pt
from transformers import pipeline
MODEL_NAME = "openai/whisper-large-v2"
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
all_special_ids = pipe.tokenizer.all_special_ids
transcribe_token_id = all_special_ids[-5]
translate_token_id = all_special_ids[-6]
def transcribe(microphone, state, task="transcribe"):
file = microphone
pipe.model.config.forced_decoder_ids = [[2, transcribe_token_id if task=="transcribe" else translate_token_id]]
text = pipe(file)["text"]
return state + "\n" + text, state + "\n" + text
mf_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(source="microphone", type="filepath", optional=True),
gr.State(value="")
],
outputs=[
gr.Textbox(lines=15),
gr.State()]
,
layout="horizontal",
theme="huggingface",
title="Whisper Large V2: Transcribe Audio",
live=True,
description=(
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
" of arbitrary length."
),
allow_flagging="never",
)
mf_transcribe.launch(enable_queue=True)