|
<!-- livebook:{"app_settings":{"show_source":true,"slug":"Summarine"}} --> |
|
|
|
|
|
|
|
```elixir |
|
Mix.install( |
|
[ |
|
{:kino_bumblebee, "~> 0.3.0"}, |
|
{:exla, "~> 0.5.1"}, |
|
{:req, "~> 0.3.11"} |
|
], |
|
config: [nx: [default_backend: EXLA.Backend]] |
|
) |
|
``` |
|
|
|
|
|
|
|
Audio to text, then summary. |
|
|
|
|
|
|
|
```elixir |
|
defmodule Ollama do |
|
@api_endpoint "http://localhost:11434/api/generate" |
|
@model "llama2-uncensored" |
|
|
|
def generate(prompt) do |
|
payload = %{ |
|
model: @model, |
|
prompt: prompt |
|
} |
|
|
|
{:ok, response} = Req.post(@api_endpoint, json: payload) |
|
|
|
process_response(response) |
|
end |
|
|
|
defp process_response(response) do |
|
response.body |
|
|> String.split("\n") |
|
|> Enum.map(&process_chunk/1) |
|
|> Enum.reject(&is_nil/1) |
|
|> Enum.map(&get_content/1) |
|
|> Enum.join("") |
|
end |
|
|
|
defp process_chunk("") do |
|
nil |
|
end |
|
|
|
defp process_chunk(json_string) do |
|
{:ok, data} = Jason.decode(json_string) |
|
|
|
data |
|
end |
|
|
|
defp get_content(%{"response" => response}) do |
|
response |
|
end |
|
|
|
defp get_content(_) do |
|
"" |
|
end |
|
end |
|
``` |
|
|
|
|
|
|
|
```elixir |
|
model_name = "openai/whisper-base" |
|
|
|
{:ok, model_info} = Bumblebee.load_model({:hf, model_name}) |
|
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, model_name}) |
|
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, model_name}) |
|
{:ok, generation_config} = Bumblebee.load_generation_config({:hf, model_name}) |
|
generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100) |
|
|
|
serving = |
|
Bumblebee.Audio.speech_to_text(model_info, featurizer, tokenizer, generation_config, |
|
compile: [batch_size: 4], |
|
defn_options: [compiler: EXLA] |
|
) |
|
``` |
|
|
|
```elixir |
|
audio_input = Kino.Input.audio("", sampling_rate: featurizer.sampling_rate) |
|
form = Kino.Control.form([audio: audio_input], submit: "Summary the audio") |
|
audio_frame = Kino.Frame.new(placeholder: false) |
|
summary_frame = Kino.Frame.new(placeholder: false) |
|
|
|
Kino.listen(form, fn %{data: %{audio: audio}} -> |
|
if audio do |
|
Kino.Frame.render(audio_frame, Kino.Text.new("Running...")) |
|
|
|
audio = |
|
audio.data |
|
|> Nx.from_binary(:f32) |
|
|> Nx.reshape({:auto, audio.num_channels}) |
|
|> Nx.mean(axes: [1]) |
|
|
|
%{results: [%{text: generated_text}]} = Nx.Serving.run(serving, audio) |
|
Kino.Frame.render(audio_frame, Kino.Markdown.new("**Audio Content**: #{generated_text}")) |
|
|
|
Kino.Frame.render(summary_frame, Kino.Markdown.new("Running")) |
|
|
|
result = Ollama.generate("Please summary the text: #{generated_text}") |
|
|
|
Kino.Frame.render(summary_frame, Kino.Markdown.new("**Summary**: #{result}")) |
|
end |
|
end) |
|
|
|
Kino.Layout.grid([form, audio_frame, summary_frame], boxed: true, gap: 16) |
|
``` |
|
|