Spaces:

Hijiki-HF
/

blog_creation

Running

App Files Files Community

Hijiki commited on May 11

Commit

28c88f6

unverified ·

2 Parent(s): c2806e9 b22544c

Merge pull request #7 from hijiki-my-dev/develop

Browse files

Files changed (4) hide show

.gitignore +3 -0
README.md +20 -1
requirements.txt +3 -66
src/app.py +22 -18

.gitignore CHANGED Viewed

@@ -1,5 +1,8 @@
 models/
 src/data/
 tmp/
 # Byte-compiled / optimized / DLL files

+fig_memo/
 models/
+practice/
 src/data/
+src/model/
 tmp/
 # Byte-compiled / optimized / DLL files

README.md CHANGED Viewed

@@ -46,4 +46,23 @@ LLMやBERTなどの自然言語処理技術を使ったプロジェクトの練
     ├── collect # データセットを作成する
     ├── data
     └── app.py
-```

     ├── collect # データセットを作成する
     ├── data
     └── app.py
+```
+## 実行方法
+- ローカル
+## メモ
+### モデルについて
+- （2025/5/10）LLMをCPUで使用するのはかなり厳しい。gguf形式のものを適切に使用すれば可能かもしれないが、まずはt5などを使用する？
+- LLMについて比較を行った結果
+    - SakanaAI/TinySwallow-1.5B-Instruct（1.5Bということを考慮に入れるとgemma3以上？）
+    -   gguf形式ならCPUでも推論可能なはず。だけどcolabで6分かかる、、、
+    - google/gemma-3-4b-it（圧倒的。1bは英語のみ対応）
+    - Rakuten/RakutenAI-2.0-mini-instruct（かなり良い）
+    - rinna/gemma-2-baku-2b-it（そこそこ。実行方法が悪い？）
+    - google/gemma-2-2b-jpn-it（同）
+    - meta-llama/Llama-3.2-3B-Instruct（日本語対応してない）
+    - microsoft/Phi-4-mini-instruct
+    - lightblue/DeepSeek-R1-Distill-Qwen-1.5B-Multilingual

requirements.txt CHANGED Viewed

@@ -1,66 +1,3 @@
-altair==5.5.0
-appnope==0.1.4
-asttokens==3.0.0
-attrs==25.1.0
-blinker==1.9.0
-cachetools==5.5.1
-certifi==2025.1.31
-charset-normalizer==3.4.1
-click==8.1.8
-comm==0.2.2
-debugpy==1.8.9
-decorator==5.1.1
-diskcache==5.6.3
-executing==2.1.0
-gitdb==4.0.12
-GitPython==3.1.44
-idna==3.10
-ipykernel==6.29.5
-ipython==8.30.0
-jedi==0.19.2
-Jinja2==3.1.4
-jsonschema==4.23.0
-jsonschema-specifications==2024.10.1
-jupyter_client==8.6.3
-jupyter_core==5.7.2
-llama_cpp_python==0.3.2
-markdown-it-py==3.0.0
-MarkupSafe==3.0.2
-matplotlib-inline==0.1.7
-mdurl==0.1.2
-narwhals==1.27.1
-nest-asyncio==1.6.0
-numpy==2.1.3
-packaging==24.2
-pandas==2.2.3
-parso==0.8.4
-pexpect==4.9.0
-pillow==11.1.0
-platformdirs==4.3.6
-prompt_toolkit==3.0.48
-protobuf==5.29.3
-psutil==6.1.0
-ptyprocess==0.7.0
-pure_eval==0.2.3
-pyarrow==19.0.0
-pydeck==0.9.1
-Pygments==2.18.0
-python-dateutil==2.9.0.post0
-pytz==2024.2
-pyzmq==26.2.0
-referencing==0.36.2
-requests==2.32.3
-rich==13.9.4
-rpds-py==0.22.3
-six==1.17.0
-smmap==5.0.2
-stack-data==0.6.3
-streamlit==1.42.1
-tenacity==9.0.0
-toml==0.10.2
-tornado==6.4.2
-traitlets==5.14.3
-typing_extensions==4.12.2
-tzdata==2024.2
-urllib3==2.3.0
-wcwidth==0.2.13

+huggingface-hub
+llama-cpp-python
+streamlit

src/app.py CHANGED Viewed

@@ -1,41 +1,45 @@
 import streamlit as st
 # ページ設定
 st.set_page_config(
-    page_title="小説感想生成アプリ（デモ）",
     page_icon="📚",
     layout="centered",
 )
 # アプリのタイトル
-st.title("小説感想生成アプリ（デモ版）")
-st.subheader("あなたの入力をそのまま返します")
 # 入力フォーム
 with st.form("input_form"):
-    novel_title = st.text_input("小説のタイトル", placeholder="例：人間失格")
-    summary = st.text_area("あらすじや感想メモ", height=200, placeholder="例：主人公の葉蔵は自分を「人間失格」だと考えている...")
     submit_button = st.form_submit_button("生成")
 # 送信ボタンが押されたら結果を表示
 if submit_button:
-    st.markdown("## 入力内容")
-    st.write(f"**タイトル:** {novel_title}")
-    st.write("**あらすじや感想メモ:**")
-    st.write(summary)
-    st.markdown("---")
     st.markdown("## 生成された感想記事（デモ）")
     st.info(f"""
-    【{novel_title}】についての感想
-    {summary}
-    ※このデモ版では入力内容をそのまま返しています。
-    実際のアプリではここにLLMによって生成された内容が表示されます。
     """)
 # フッター
 st.markdown("---")
-st.caption("Powered by Streamlit & Hugging Face")

+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
 import streamlit as st
+MAX_OUTPUT_TOKENS = 512
+def summarize_article(input_text):
+    repo_id = "SakanaAI/TinySwallow-1.5B-Instruct-GGUF"
+    filename = "tinyswallow-1.5b-instruct-q5_k_m.gguf"
+    model_path = hf_hub_download(repo_id=repo_id, filename=filename)
+    # モデルの読み込み
+    llm = Llama(model_path=model_path, n_ctx=4096, n_gpu_layers=-1)
+    prompt = f"以下のテキストを日本語で約400字程度に要約してください。特に固有名詞や専門用語は正確に含めてください。テキスト: {input_text} 要約: "
+    response = llm(prompt, max_tokens=MAX_OUTPUT_TOKENS)
+    return response["choices"][0]["text"]
 # ページ設定
 st.set_page_config(
+    page_title="記事要約（デモ）",
     page_icon="📚",
     layout="centered",
 )
 # アプリのタイトル
+st.title("記事要約（デモ）")
+st.subheader("入力を元に要約を生成します")
 # 入力フォーム
 with st.form("input_form"):
+    input_text = st.text_area("記事内容", height=200, placeholder="例：主人公の葉蔵は自分を「人間失格」だと考えている...")
     submit_button = st.form_submit_button("生成")
 # 送信ボタンが押されたら結果を表示
 if submit_button:
+    summary = summarize_article(input_text)
     st.markdown("## 生成された感想記事（デモ）")
     st.info(f"""
+    {summary["summary_text"][0]}
     """)
 # フッター
 st.markdown("---")
+st.caption("Powered by Streamlit & Hugging Face")