Spaces:
Running
Running
Fix: Info bloğu sadece PDF özetlerinde gösteriliyor, token limiti 1000'e düşürüldü
Browse files- summarizer.py +1 -1
- ui.py +20 -11
summarizer.py
CHANGED
@@ -68,7 +68,7 @@ def summarize_text(text, mode, model_name="anthropic/claude-3-haiku", lang_mode=
|
|
68 |
"messages": [
|
69 |
{"role": "user", "content": build_prompt(text, mode, lang_mode, is_table)}
|
70 |
],
|
71 |
-
"max_tokens":
|
72 |
}
|
73 |
|
74 |
try:
|
|
|
68 |
"messages": [
|
69 |
{"role": "user", "content": build_prompt(text, mode, lang_mode, is_table)}
|
70 |
],
|
71 |
+
"max_tokens": 1000
|
72 |
}
|
73 |
|
74 |
try:
|
ui.py
CHANGED
@@ -7,33 +7,41 @@ from utils import chunk_text_by_tokens
|
|
7 |
|
8 |
def process_input(pdf, image, manual_text, mode, model_name, start_page, end_page, lang_mode, is_table):
|
9 |
if is_table and model_name != "anthropic/claude-3-haiku":
|
10 |
-
return "Tablo içeriği için yalnızca Claude önerilir.","",None
|
11 |
|
|
|
|
|
12 |
if pdf is not None:
|
13 |
text_chunks = extract_text_chunks_from_pdf(pdf, start=int(start_page), end=int(end_page))
|
14 |
if any("[ERROR]" in chunk for chunk in text_chunks):
|
15 |
return text_chunks[0], "", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
elif image is not None:
|
17 |
text = extract_text_from_image(image)
|
18 |
if "[ERROR]" in text:
|
19 |
return text, "", None
|
20 |
text_chunks = [text]
|
|
|
|
|
21 |
elif manual_text.strip() != "":
|
22 |
text_chunks = [manual_text]
|
|
|
|
|
23 |
else:
|
24 |
return "Lütfen bir giriş türü seçin.", "", None
|
25 |
|
26 |
-
all_text = "\n\n".join(text_chunks)
|
27 |
-
chunk_count = len(chunk_text_by_tokens(all_text, max_tokens=1300))
|
28 |
-
|
29 |
-
info_block = f"""
|
30 |
-
Sayfa Aralığı: {start_page}–{end_page}
|
31 |
-
Model: {model_name}
|
32 |
-
Chunk Sayısı: {chunk_count}
|
33 |
-
""".strip()
|
34 |
-
|
35 |
full_summary = summarize_long_text(all_text, mode, model_name, lang_mode, is_table)
|
36 |
-
|
|
|
|
|
37 |
|
38 |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode='w', encoding='utf-8')
|
39 |
temp_file.write(full_summary)
|
@@ -42,6 +50,7 @@ def process_input(pdf, image, manual_text, mode, model_name, start_page, end_pag
|
|
42 |
return all_text, full_summary, temp_file.name
|
43 |
|
44 |
|
|
|
45 |
with gr.Blocks() as demo:
|
46 |
gr.Markdown("## VizSum")
|
47 |
|
|
|
7 |
|
8 |
def process_input(pdf, image, manual_text, mode, model_name, start_page, end_page, lang_mode, is_table):
|
9 |
if is_table and model_name != "anthropic/claude-3-haiku":
|
10 |
+
return "Tablo içeriği için yalnızca Claude önerilir.", "", None
|
11 |
|
12 |
+
info_block = ""
|
13 |
+
|
14 |
if pdf is not None:
|
15 |
text_chunks = extract_text_chunks_from_pdf(pdf, start=int(start_page), end=int(end_page))
|
16 |
if any("[ERROR]" in chunk for chunk in text_chunks):
|
17 |
return text_chunks[0], "", None
|
18 |
+
|
19 |
+
all_text = "\n\n".join(text_chunks)
|
20 |
+
chunk_count = len(chunk_text_by_tokens(all_text, max_tokens=1000))
|
21 |
+
info_block = f"""
|
22 |
+
Sayfa Aralığı: {start_page}–{end_page}
|
23 |
+
Model: {model_name}
|
24 |
+
Chunk Sayısı: {chunk_count}
|
25 |
+
""".strip()
|
26 |
+
|
27 |
elif image is not None:
|
28 |
text = extract_text_from_image(image)
|
29 |
if "[ERROR]" in text:
|
30 |
return text, "", None
|
31 |
text_chunks = [text]
|
32 |
+
all_text = text
|
33 |
+
|
34 |
elif manual_text.strip() != "":
|
35 |
text_chunks = [manual_text]
|
36 |
+
all_text = manual_text
|
37 |
+
|
38 |
else:
|
39 |
return "Lütfen bir giriş türü seçin.", "", None
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
full_summary = summarize_long_text(all_text, mode, model_name, lang_mode, is_table)
|
42 |
+
|
43 |
+
if info_block:
|
44 |
+
full_summary = f"{info_block}\n\n{full_summary}"
|
45 |
|
46 |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode='w', encoding='utf-8')
|
47 |
temp_file.write(full_summary)
|
|
|
50 |
return all_text, full_summary, temp_file.name
|
51 |
|
52 |
|
53 |
+
|
54 |
with gr.Blocks() as demo:
|
55 |
gr.Markdown("## VizSum")
|
56 |
|