lambdaofgod commited on
Commit
426db28
1 Parent(s): 818e811

gradio app setup

Browse files
Files changed (2) hide show
  1. app.py +62 -0
  2. requirements.txt +1 -0
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ import datasets
4
+ from findkit import indexes
5
+
6
+
7
+ import gradio as gr
8
+
9
+ logging.basicConfig(level="INFO")
10
+
11
+
12
+ def get_html_retrieval_results(retrieval_result, show_only_one_match_per_episode):
13
+ if show_only_one_match_per_episode:
14
+ retrieval_result = retrieval_result.drop_duplicates(subset=["episode"])
15
+ if len(retrieval_result) > 0:
16
+ retrieval_result_html = retrieval_result.to_html(render_links=True, index=False)
17
+ return retrieval_result_html
18
+ else:
19
+ return ""
20
+
21
+
22
+ def get_retrieval_results(findkit_index, query, n_retrieved_results):
23
+ retrieval_results_df = findkit_index.find_similar(query, n_retrieved_results)
24
+ return retrieval_results_df.rename({"distance": "bm25_score"})
25
+
26
+
27
+ def setup_df():
28
+ podcast_dataset = datasets.load_dataset("lambdaofgod/lex_fridman_podcast")["train"]
29
+ df = podcast_dataset.to_pandas()
30
+ return df.dropna()
31
+
32
+
33
+ def setup_index():
34
+ df = setup_df()
35
+ return indexes.InMemoryBM25Index.build(df["text"], df)
36
+
37
+
38
+ findkit_index = setup_index()
39
+
40
+
41
+ def show_retrieval_results(query, n_retrieved_results, show_only_one_match_per_episode):
42
+ retrieval_results_df = get_retrieval_results(
43
+ findkit_index, query, n_retrieved_results
44
+ )
45
+ return get_html_retrieval_results(
46
+ retrieval_results_df, show_only_one_match_per_episode
47
+ )
48
+
49
+
50
+ show_only_one_match_per_episode = gr.Checkbox(
51
+ label="show only one match per episode", value=False
52
+ )
53
+ n_retrieved_results = gr.Number(label="number of results", value=10, precision=0)
54
+ query = gr.Textbox(label="input query", value="artificial life")
55
+
56
+ demo = gr.Interface(
57
+ fn=show_retrieval_results,
58
+ inputs=[query, n_retrieved_results, show_only_one_match_per_episode],
59
+ outputs="html",
60
+ )
61
+
62
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ git+https://github.com/lambdaofgod/findkit