Spaces:
Build error
Build error
upgrade aws-sdk version
Browse files- .dockerignore +6 -0
- Cargo.lock +31 -3
- Cargo.toml +3 -2
- Dockerfile +18 -40
- README.MD +1 -1
- docker-compose.yaml +16 -0
- src/config.rs +5 -0
- src/main.rs +9 -6
- src/whisper.rs +8 -9
.dockerignore
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.idea/
|
| 2 |
+
static/
|
| 3 |
+
target/
|
| 4 |
+
docker-compose.yaml
|
| 5 |
+
Dockerfile
|
| 6 |
+
README.MD
|
Cargo.lock
CHANGED
|
@@ -1100,6 +1100,15 @@ version = "0.4.20"
|
|
| 1100 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 1101 |
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
|
| 1102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1103 |
[[package]]
|
| 1104 |
name = "memchr"
|
| 1105 |
version = "2.6.4"
|
|
@@ -1495,8 +1504,17 @@ checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
|
|
| 1495 |
dependencies = [
|
| 1496 |
"aho-corasick",
|
| 1497 |
"memchr",
|
| 1498 |
-
"regex-automata",
|
| 1499 |
-
"regex-syntax",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1500 |
]
|
| 1501 |
|
| 1502 |
[[package]]
|
|
@@ -1507,9 +1525,15 @@ checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
|
|
| 1507 |
dependencies = [
|
| 1508 |
"aho-corasick",
|
| 1509 |
"memchr",
|
| 1510 |
-
"regex-syntax",
|
| 1511 |
]
|
| 1512 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1513 |
[[package]]
|
| 1514 |
name = "regex-syntax"
|
| 1515 |
version = "0.8.2"
|
|
@@ -2098,10 +2122,14 @@ version = "0.3.17"
|
|
| 2098 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2099 |
checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77"
|
| 2100 |
dependencies = [
|
|
|
|
| 2101 |
"nu-ansi-term",
|
|
|
|
|
|
|
| 2102 |
"sharded-slab",
|
| 2103 |
"smallvec",
|
| 2104 |
"thread_local",
|
|
|
|
| 2105 |
"tracing-core",
|
| 2106 |
"tracing-log",
|
| 2107 |
]
|
|
|
|
| 1100 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 1101 |
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
|
| 1102 |
|
| 1103 |
+
[[package]]
|
| 1104 |
+
name = "matchers"
|
| 1105 |
+
version = "0.1.0"
|
| 1106 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 1107 |
+
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
|
| 1108 |
+
dependencies = [
|
| 1109 |
+
"regex-automata 0.1.10",
|
| 1110 |
+
]
|
| 1111 |
+
|
| 1112 |
[[package]]
|
| 1113 |
name = "memchr"
|
| 1114 |
version = "2.6.4"
|
|
|
|
| 1504 |
dependencies = [
|
| 1505 |
"aho-corasick",
|
| 1506 |
"memchr",
|
| 1507 |
+
"regex-automata 0.4.3",
|
| 1508 |
+
"regex-syntax 0.8.2",
|
| 1509 |
+
]
|
| 1510 |
+
|
| 1511 |
+
[[package]]
|
| 1512 |
+
name = "regex-automata"
|
| 1513 |
+
version = "0.1.10"
|
| 1514 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 1515 |
+
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
|
| 1516 |
+
dependencies = [
|
| 1517 |
+
"regex-syntax 0.6.29",
|
| 1518 |
]
|
| 1519 |
|
| 1520 |
[[package]]
|
|
|
|
| 1525 |
dependencies = [
|
| 1526 |
"aho-corasick",
|
| 1527 |
"memchr",
|
| 1528 |
+
"regex-syntax 0.8.2",
|
| 1529 |
]
|
| 1530 |
|
| 1531 |
+
[[package]]
|
| 1532 |
+
name = "regex-syntax"
|
| 1533 |
+
version = "0.6.29"
|
| 1534 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 1535 |
+
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
|
| 1536 |
+
|
| 1537 |
[[package]]
|
| 1538 |
name = "regex-syntax"
|
| 1539 |
version = "0.8.2"
|
|
|
|
| 2122 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2123 |
checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77"
|
| 2124 |
dependencies = [
|
| 2125 |
+
"matchers",
|
| 2126 |
"nu-ansi-term",
|
| 2127 |
+
"once_cell",
|
| 2128 |
+
"regex",
|
| 2129 |
"sharded-slab",
|
| 2130 |
"smallvec",
|
| 2131 |
"thread_local",
|
| 2132 |
+
"tracing",
|
| 2133 |
"tracing-core",
|
| 2134 |
"tracing-log",
|
| 2135 |
]
|
Cargo.toml
CHANGED
|
@@ -19,7 +19,7 @@ serde_yaml = "0.9"
|
|
| 19 |
tokio = { version = "1.33", features = ["macros", "rt-multi-thread", "sync"] }
|
| 20 |
tokio-stream = "0.1"
|
| 21 |
tracing = "0.1"
|
| 22 |
-
tracing-subscriber = "0.3"
|
| 23 |
whisper-rs = "0.8"
|
| 24 |
whisper-rs-sys = "0.6"
|
| 25 |
|
|
@@ -34,9 +34,10 @@ features = ["coreml"]
|
|
| 34 |
[target.aarch64-apple-darwin.dependencies.whisper-rs-sys]
|
| 35 |
version = "0.6"
|
| 36 |
|
| 37 |
-
|
| 38 |
[target.x86_64-unknown-linux-gnu.dependencies.whisper-rs]
|
|
|
|
| 39 |
features = ["cuda"]
|
| 40 |
|
| 41 |
[target.aarch64-unknown-linux-gnu.dependencies.whisper-rs]
|
|
|
|
| 42 |
features = ["cuda"]
|
|
|
|
| 19 |
tokio = { version = "1.33", features = ["macros", "rt-multi-thread", "sync"] }
|
| 20 |
tokio-stream = "0.1"
|
| 21 |
tracing = "0.1"
|
| 22 |
+
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
| 23 |
whisper-rs = "0.8"
|
| 24 |
whisper-rs-sys = "0.6"
|
| 25 |
|
|
|
|
| 34 |
[target.aarch64-apple-darwin.dependencies.whisper-rs-sys]
|
| 35 |
version = "0.6"
|
| 36 |
|
|
|
|
| 37 |
[target.x86_64-unknown-linux-gnu.dependencies.whisper-rs]
|
| 38 |
+
version = "0.8"
|
| 39 |
features = ["cuda"]
|
| 40 |
|
| 41 |
[target.aarch64-unknown-linux-gnu.dependencies.whisper-rs]
|
| 42 |
+
version = "0.8"
|
| 43 |
features = ["cuda"]
|
Dockerfile
CHANGED
|
@@ -1,46 +1,24 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
# Install python3.11 and build dependencies
|
| 5 |
-
RUN apt-get update
|
| 6 |
-
RUN apt-get install -y software-properties-common
|
| 7 |
-
#RUN add-apt-repository ppa:deadsnakes/ppa
|
| 8 |
-
|
| 9 |
-
RUN apt-get update
|
| 10 |
-
RUN apt-get install -y libssl-dev cmake python3-dev curl pkg-config clang
|
| 11 |
-
|
| 12 |
-
# install rust toolchain
|
| 13 |
-
RUN curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain stable -y
|
| 14 |
ENV PATH=/root/.cargo/bin:$PATH
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
-
COPY
|
| 18 |
-
|
| 19 |
-
# Download dependencies
|
| 20 |
-
RUN mkdir -p src/bin && echo "fn main() {println!(\"if you see this, the build broke\")}" > src/bin/bigbot.rs
|
| 21 |
-
RUN --mount=type=cache,target=/usr/local/cargo/registry \
|
| 22 |
-
--mount=type=cache,target=/usr/local/cargo/git \
|
| 23 |
-
cargo build --release
|
| 24 |
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
-
# Build the project with release profile
|
| 28 |
-
RUN --mount=type=cache,target=/usr/local/cargo/registry \
|
| 29 |
-
--mount=type=cache,target=/usr/local/cargo/git \
|
| 30 |
-
cargo build --release
|
| 31 |
-
|
| 32 |
-
# Runtime stage
|
| 33 |
FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04 as runtime
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
COPY --from=builder /target/release/polyhedron /usr/local/bin/polyhedron
|
| 40 |
-
COPY ./models/ggml-large-encoder.mlmodelc ./models/ggml-large-encoder.mlmodelc
|
| 41 |
-
COPY ./models/ggml-large.bin ./models/ggml-large.bin
|
| 42 |
-
COPY ./config/dev.yaml ./config/dev.yaml
|
| 43 |
-
COPY ./static ./static
|
| 44 |
-
|
| 45 |
-
# Run the binary
|
| 46 |
-
CMD ["polyhedron"]
|
|
|
|
| 1 |
+
FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 as chef
|
| 2 |
+
RUN apt-get update && apt-get install -y curl
|
| 3 |
+
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain stable -y
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
ENV PATH=/root/.cargo/bin:$PATH
|
| 5 |
+
RUN cargo install cargo-chef
|
| 6 |
+
WORKDIR /app
|
| 7 |
|
| 8 |
+
FROM chef as planner
|
| 9 |
+
COPY . .
|
| 10 |
+
RUN cargo chef prepare --recipe-path recipe.json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
FROM chef as builder
|
| 13 |
+
RUN apt-get update && apt-get install -y cmake g++ libclang-dev libssl-dev pkg-config python3-dev
|
| 14 |
+
COPY --from=planner /app/recipe.json recipe.json
|
| 15 |
+
RUN cargo chef cook --release --recipe-path recipe.json
|
| 16 |
+
COPY . .
|
| 17 |
+
RUN cargo build --release
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04 as runtime
|
| 20 |
|
| 21 |
+
RUN apt-get update && apt-get install -y ca-certificates && rm -rf /var/lib/apt/lists/*
|
| 22 |
+
WORKDIR /app
|
| 23 |
+
COPY --from=builder /app/target/release/polyhedron .
|
| 24 |
+
ENTRYPOINT ["./polyhedron"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.MD
CHANGED
|
@@ -22,7 +22,7 @@ Configuration like AWS credentials and models are specified in config.yaml.
|
|
| 22 |
To run Polyhedron locally:
|
| 23 |
|
| 24 |
1. Config AWS account via https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html
|
| 25 |
-
2. Clone the repository, Run `
|
| 26 |
3. Open http://localhost:8080 in the browser
|
| 27 |
## Architecture
|
| 28 |
|
|
|
|
| 22 |
To run Polyhedron locally:
|
| 23 |
|
| 24 |
1. Config AWS account via https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html
|
| 25 |
+
2. Clone the repository, Run `docker compose up`
|
| 26 |
3. Open http://localhost:8080 in the browser
|
| 27 |
## Architecture
|
| 28 |
|
docker-compose.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: "3"
|
| 2 |
+
services:
|
| 3 |
+
polyhedron:
|
| 4 |
+
container_name: polyhedron
|
| 5 |
+
build: ./
|
| 6 |
+
# image: vitongue/polyhedron:latest
|
| 7 |
+
environment:
|
| 8 |
+
- AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
|
| 9 |
+
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
|
| 10 |
+
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
|
| 11 |
+
- RUST_LOG=polyhedron=debug
|
| 12 |
+
volumes:
|
| 13 |
+
- ./config:/app/config
|
| 14 |
+
- ./static:/app/static
|
| 15 |
+
ports:
|
| 16 |
+
- "8080:8080"
|
src/config.rs
CHANGED
|
@@ -4,6 +4,7 @@ use config::{Config, Environment, File};
|
|
| 4 |
use once_cell::sync::Lazy;
|
| 5 |
use serde::Deserialize;
|
| 6 |
use whisper_rs::FullParams;
|
|
|
|
| 7 |
|
| 8 |
pub(crate) static SETTINGS: Lazy<Settings> =
|
| 9 |
Lazy::new(|| Settings::new().expect("Failed to initialize settings"));
|
|
@@ -85,6 +86,10 @@ impl Settings {
|
|
| 85 |
.map_err(anyhow::Error::from)?;
|
| 86 |
|
| 87 |
config.try_deserialize::<Self>().map_err(Into::into)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
}
|
| 89 |
}
|
| 90 |
|
|
|
|
| 4 |
use once_cell::sync::Lazy;
|
| 5 |
use serde::Deserialize;
|
| 6 |
use whisper_rs::FullParams;
|
| 7 |
+
use tracing::debug;
|
| 8 |
|
| 9 |
pub(crate) static SETTINGS: Lazy<Settings> =
|
| 10 |
Lazy::new(|| Settings::new().expect("Failed to initialize settings"));
|
|
|
|
| 86 |
.map_err(anyhow::Error::from)?;
|
| 87 |
|
| 88 |
config.try_deserialize::<Self>().map_err(Into::into)
|
| 89 |
+
.map(|settings| {
|
| 90 |
+
debug!("Settings: {settings:?}");
|
| 91 |
+
settings
|
| 92 |
+
})
|
| 93 |
}
|
| 94 |
}
|
| 95 |
|
src/main.rs
CHANGED
|
@@ -19,6 +19,8 @@ use poem::{
|
|
| 19 |
};
|
| 20 |
use serde::{Deserialize, Serialize};
|
| 21 |
use tokio::select;
|
|
|
|
|
|
|
| 22 |
|
| 23 |
use crate::{config::*, lesson::*, whisper::*};
|
| 24 |
|
|
@@ -34,11 +36,12 @@ struct Context {
|
|
| 34 |
|
| 35 |
#[tokio::main]
|
| 36 |
async fn main() -> Result<(), std::io::Error> {
|
| 37 |
-
tracing_subscriber::
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
|
| 40 |
-
tracing::debug!("Transcribe client version: {}", PKG_VERSION);
|
| 41 |
-
}
|
| 42 |
|
| 43 |
let shared_config = aws_config::load_from_env().await;
|
| 44 |
let ctx = Context {
|
|
@@ -95,8 +98,8 @@ async fn stream_speaker(
|
|
| 95 |
ws.on_upgrade(|mut socket| async move {
|
| 96 |
let _origin_tx = lesson.voice_channel();
|
| 97 |
let mut transcribe_rx = lesson.transcript_channel();
|
| 98 |
-
let whisper =
|
| 99 |
-
|
| 100 |
let mut whisper_transcribe_rx = whisper.subscribe();
|
| 101 |
loop {
|
| 102 |
select! {
|
|
|
|
| 19 |
};
|
| 20 |
use serde::{Deserialize, Serialize};
|
| 21 |
use tokio::select;
|
| 22 |
+
use tracing::debug;
|
| 23 |
+
use tracing_subscriber::{fmt, prelude::*, EnvFilter};
|
| 24 |
|
| 25 |
use crate::{config::*, lesson::*, whisper::*};
|
| 26 |
|
|
|
|
| 36 |
|
| 37 |
#[tokio::main]
|
| 38 |
async fn main() -> Result<(), std::io::Error> {
|
| 39 |
+
tracing_subscriber::registry()
|
| 40 |
+
.with(fmt::layer())
|
| 41 |
+
.with(EnvFilter::from_default_env())
|
| 42 |
+
.init();
|
| 43 |
|
| 44 |
+
debug!("Transcribe client version: {}", PKG_VERSION);
|
|
|
|
|
|
|
| 45 |
|
| 46 |
let shared_config = aws_config::load_from_env().await;
|
| 47 |
let ctx = Context {
|
|
|
|
| 98 |
ws.on_upgrade(|mut socket| async move {
|
| 99 |
let _origin_tx = lesson.voice_channel();
|
| 100 |
let mut transcribe_rx = lesson.transcript_channel();
|
| 101 |
+
let whisper = WhisperHandler::new(SETTINGS.whisper.clone(), prompt)
|
| 102 |
+
.expect("failed to create whisper");
|
| 103 |
let mut whisper_transcribe_rx = whisper.subscribe();
|
| 104 |
loop {
|
| 105 |
select! {
|
src/whisper.rs
CHANGED
|
@@ -8,6 +8,7 @@ use std::{
|
|
| 8 |
|
| 9 |
use once_cell::sync::Lazy;
|
| 10 |
use tokio::sync::{broadcast, mpsc, oneshot};
|
|
|
|
| 11 |
use whisper_rs::{convert_integer_to_float_audio, WhisperContext, WhisperState, WhisperToken};
|
| 12 |
use whisper_rs_sys::WHISPER_SAMPLE_RATE;
|
| 13 |
|
|
@@ -124,15 +125,13 @@ impl WhisperHandler {
|
|
| 124 |
}
|
| 125 |
};
|
| 126 |
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
);
|
| 135 |
-
}
|
| 136 |
}
|
| 137 |
|
| 138 |
if let Err(e) = shared_transcription_tx.send(segments) {
|
|
|
|
| 8 |
|
| 9 |
use once_cell::sync::Lazy;
|
| 10 |
use tokio::sync::{broadcast, mpsc, oneshot};
|
| 11 |
+
use tracing::trace;
|
| 12 |
use whisper_rs::{convert_integer_to_float_audio, WhisperContext, WhisperState, WhisperToken};
|
| 13 |
use whisper_rs_sys::WHISPER_SAMPLE_RATE;
|
| 14 |
|
|
|
|
| 125 |
}
|
| 126 |
};
|
| 127 |
|
| 128 |
+
for segment in segments.iter() {
|
| 129 |
+
trace!(
|
| 130 |
+
"[{}-{}]s SEGMENT: {}",
|
| 131 |
+
segment.start_timestamp as f32 / 1000.0,
|
| 132 |
+
segment.end_timestamp as f32 / 1000.0,
|
| 133 |
+
segment.text
|
| 134 |
+
);
|
|
|
|
|
|
|
| 135 |
}
|
| 136 |
|
| 137 |
if let Err(e) = shared_transcription_tx.send(segments) {
|