livebooks / public-apps /summarine.livemd
linjunpop's picture
Update public-apps/summarine.livemd
0194265
<!-- livebook:{"app_settings":{"show_source":true,"slug":"Summarine"}} -->
# Summarine
```elixir
Mix.install(
[
{:kino_bumblebee, "~> 0.3.0"},
{:exla, "~> 0.5.1"},
{:req, "~> 0.3.11"}
],
config: [nx: [default_backend: EXLA.Backend]]
)
```
## Intro
Audio to text, then summary.
## Setup Ollama module
```elixir
defmodule Ollama do
@api_endpoint "http://localhost:11434/api/generate"
@model "llama2-uncensored"
def generate(prompt) do
payload = %{
model: @model,
prompt: prompt
}
{:ok, response} = Req.post(@api_endpoint, json: payload)
process_response(response)
end
defp process_response(response) do
response.body
|> String.split("\n")
|> Enum.map(&process_chunk/1)
|> Enum.reject(&is_nil/1)
|> Enum.map(&get_content/1)
|> Enum.join("")
end
defp process_chunk("") do
nil
end
defp process_chunk(json_string) do
{:ok, data} = Jason.decode(json_string)
data
end
defp get_content(%{"response" => response}) do
response
end
defp get_content(_) do
""
end
end
```
## App
```elixir
model_name = "openai/whisper-base"
{:ok, model_info} = Bumblebee.load_model({:hf, model_name})
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, model_name})
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, model_name})
{:ok, generation_config} = Bumblebee.load_generation_config({:hf, model_name})
generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100)
serving =
Bumblebee.Audio.speech_to_text(model_info, featurizer, tokenizer, generation_config,
compile: [batch_size: 4],
defn_options: [compiler: EXLA]
)
```
```elixir
audio_input = Kino.Input.audio("", sampling_rate: featurizer.sampling_rate)
form = Kino.Control.form([audio: audio_input], submit: "Summary the audio")
audio_frame = Kino.Frame.new(placeholder: false)
summary_frame = Kino.Frame.new(placeholder: false)
Kino.listen(form, fn %{data: %{audio: audio}} ->
if audio do
Kino.Frame.render(audio_frame, Kino.Text.new("Running..."))
audio =
audio.data
|> Nx.from_binary(:f32)
|> Nx.reshape({:auto, audio.num_channels})
|> Nx.mean(axes: [1])
%{results: [%{text: generated_text}]} = Nx.Serving.run(serving, audio)
Kino.Frame.render(audio_frame, Kino.Markdown.new("**Audio Content**: #{generated_text}"))
Kino.Frame.render(summary_frame, Kino.Markdown.new("Running"))
result = Ollama.generate("Please summary the text: #{generated_text}")
Kino.Frame.render(summary_frame, Kino.Markdown.new("**Summary**: #{result}"))
end
end)
Kino.Layout.grid([form, audio_frame, summary_frame], boxed: true, gap: 16)
```