Spaces:

mohamedrady
/

clockwork-temptation

Runtime error

App Files Files Community

mohamedrady commited on Jul 18, 2024

Commit

260a06d

verified ·

1 Parent(s): 49fcff6

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -38

app.py CHANGED Viewed

@@ -9,21 +9,19 @@ from transformers import pipeline, AutoModel, AutoTokenizer
 import PyPDF2
 import gradio as gr
 import openai
-from haystack.nodes import FARMReader
-from paddlenlp import Taskflow
 # تحميل وتفعيل الأدوات المطلوبة
 nltk.download('punkt')
 # التحقق من توفر GPU واستخدامه
-device = 0 if torch.cuda.is_available() else -1
 # تحميل نماذج التحليل اللغوي
 analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=device)
-# تحميل نموذج التعرف على الكيانات في camel_tools
-ner = NERecognizer.pretrained()
 # تحميل نماذج BERT، GPT2، ELECTRA، و AraBERT
 arabic_bert_tokenizer = AutoTokenizer.from_pretrained("asafaya/bert-base-arabic")
 arabic_bert_model = AutoModel.from_pretrained("asafaya/bert-base-arabic")
@@ -37,22 +35,14 @@ arabic_electra_model = AutoModel.from_pretrained("aubmindlab/araelectra-base-dis
 arabert_tokenizer = AutoTokenizer.from_pretrained("aubmindlab/bert-base-arabertv02")
 arabert_model = AutoModel.from_pretrained("aubmindlab/bert-base-arabertv02")
-# إعداد OpenAI API
-openai.api_key = os.getenv("sk-proj-62TDbO5KABSdkZaFPPD4T3BlbkFJkhqOYpHhL6OucTzNdWSU")
-# إعداد farm-haystack
-reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2")
-# إعداد paddlenlp
-ner_task = Taskflow("ner")
 # دالة لتحليل النص باستخدام camel_tools
 def camel_ner_analysis(text):
     tokens = simple_word_tokenize(text)
     entities = ner.predict(tokens)
     entity_dict = {"PERSON": [], "LOC": [], "ORG": [], "DATE": []}
     for token, tag in zip(tokens, entities):
-        if tag in entity_dict:
             entity_dict[tag].append((token, tag))
     return entity_dict
@@ -71,7 +61,7 @@ def nltk_extract_quotes(text):
     quotes = []
     sentences = nltk.tokenize.sent_tokenize(text, language='arabic')
     for sentence in sentences:
-        if '"' in sentence or '«' in sentence or '»' in sentence:
             quotes.append(sentence)
     return quotes
@@ -82,10 +72,10 @@ def count_tokens(text):
 # دالة لاستخراج النص من ملفات PDF
 def extract_pdf_text(file_path):
-    with open(file_path, "rb") as pdf_file:
         pdf_reader = PyPDF2.PdfReader(pdf_file)
         text = ""
-        for page_num in range(len(pdf_reader.pages)):
             page = pdf_reader.pages[page_num]
             text += page.extract_text()
     return text
@@ -93,7 +83,7 @@ def extract_pdf_text(file_path):
 # دالة لاستخراج المشاهد من النص
 def extract_scenes(text):
     scenes = re.split(r'داخلي|خارجي', text)
-    scenes = [scene.strip() for scene in scenes if scene.strip()]
     return scenes
 # دالة لاستخراج تفاصيل المشهد (المكان والوقت)
@@ -102,9 +92,9 @@ def extract_scene_details(scene):
     location_match = re.search(r'(داخلي|خارجي)', scene)
     time_match = re.search(r'(ليلاً|نهاراً|شروق|غروب)', scene)
-    if location_match:
         details['location'] = location_match.group()
-    if time_match:
         details['time'] = time_match.group()
     return details
@@ -135,11 +125,11 @@ def analyze_and_complete(file_paths):
     results = []
     output_directory = os.getenv("SPACE_DIR", "/app/output")
-    for file_path in file_paths:
-        if file_path.endswith(".pdf"):
             text = extract_pdf_text(file_path)
         else:
-            with open(file_path, "r", encoding="utf-8") as file:
                 text = file.read()
         filename_prefix = os.path.splitext(os.path.basename(file_path))[0]
@@ -155,47 +145,47 @@ def analyze_and_complete(file_paths):
         character_frequency = extract_character_frequency(camel_entities)
         dialogues = extract_dialogues(text)
-        scene_details = [extract_scene_details(scene) for scene in scenes]
         # حفظ النتائج إلى ملفات
-        with open(os.path.join(output_directory, f"{filename_prefix}_entities.txt"), "w", encoding="utf-8") as file:
             file.write(str(camel_entities))
-        with open(os.path.join(output_directory, f"{filename_prefix}_sentiments.txt"), "w", encoding="utf-8") as file:
             file.write(str(sentiments))
-        with open(os.path.join(output_directory, f"{filename_prefix}_sentences.txt"), "w", encoding="utf-8") as file:
             file.write("\n".join(sentences))
-        with open(os.path.join(output_directory, f"{filename_prefix}_quotes.txt"), "w", encoding="utf-8") as file:
             file.write("\n".join(quotes))
-        with open(os.path.join(output_directory, f"{filename_prefix}_token_count.txt"), "w", encoding="utf-8") as file:
             file.write(str(token_count))
-        with open(os.path.join(output_directory, f"{filename_prefix}_scenes.txt"), "w", encoding="utf-8") as file:
             file.write("\n".join(scenes))
-        with open(os.path.join(output_directory, f"{filename_prefix}_scene_details.txt"), "w", encoding="utf-8") as file:
             file.write(str(scene_details))
-        with open(os.path.join(output_directory, f"{filename_prefix}_ages.txt"), "w", encoding="utf-8") as file:
             file.write(str(ages))
-        with open(os.path.join(output_directory, f"{filename_prefix}_character_descriptions.txt"), "w", encoding="utf-8") as file:
             file.write(str(character_descriptions))
-        with open(os.path.join(output_directory, f"{filename_prefix}_character_frequency.txt"), "w", encoding="utf-8") as file:
             file.write(str(character_frequency))
-        with open(os.path.join(output_directory, f"{filename_prefix}_dialogues.txt"), "w", encoding="utf-8") as file:
             file.write(str(dialogues))
         results.append((str(camel_entities), str(sentiments), "\n".join(sentences), "\n".join(quotes), str(token_count), "\n".join(scenes), str(scene_details), str(ages), str(character_descriptions), str(character_frequency), str(dialogues)))
     return results
-# تعريف واجهة Gradio
 interface = gr.Interface(
     fn=analyze_and_complete,
     inputs=gr.File(file_count="multiple", type="filepath"),

 import PyPDF2
 import gradio as gr
 import openai
+# تعيين التوكن الخاص بـ OpenAI
+openai.api_key = "sk-proj-62TDbO5KABSdkZaFPPD4T3BlbkFJkhqOYpHhL6OucTzNdWSU"
 # تحميل وتفعيل الأدوات المطلوبة
 nltk.download('punkt')
 # التحقق من توفر GPU واستخدامه
+device = 0 إذا torch.cuda.is_available() else -1
 # تحميل نماذج التحليل اللغوي
 analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=device)
 # تحميل نماذج BERT، GPT2، ELECTRA، و AraBERT
 arabic_bert_tokenizer = AutoTokenizer.from_pretrained("asafaya/bert-base-arabic")
 arabic_bert_model = AutoModel.from_pretrained("asafaya/bert-base-arabic")
 arabert_tokenizer = AutoTokenizer.from_pretrained("aubmindlab/bert-base-arabertv02")
 arabert_model = AutoModel.from_pretrained("aubmindlab/bert-base-arabertv02")
 # دالة لتحليل النص باستخدام camel_tools
 def camel_ner_analysis(text):
+    ner = NERecognizer.pretrained()
     tokens = simple_word_tokenize(text)
     entities = ner.predict(tokens)
     entity_dict = {"PERSON": [], "LOC": [], "ORG": [], "DATE": []}
     for token, tag in zip(tokens, entities):
+        إذا tag in entity_dict:
             entity_dict[tag].append((token, tag))
     return entity_dict
     quotes = []
     sentences = nltk.tokenize.sent_tokenize(text, language='arabic')
     for sentence in sentences:
+        إذا '"' in sentence أو '«' in sentence أو '»' in sentence:
             quotes.append(sentence)
     return quotes
 # دالة لاستخراج النص من ملفات PDF
 def extract_pdf_text(file_path):
+    مع open(file_path, "rb") كما pdf_file:
         pdf_reader = PyPDF2.PdfReader(pdf_file)
         text = ""
+        لكل page_num in range(len(pdf_reader.pages)):
             page = pdf_reader.pages[page_num]
             text += page.extract_text()
     return text
 # دالة لاستخراج المشاهد من النص
 def extract_scenes(text):
     scenes = re.split(r'داخلي|خارجي', text)
+    scenes = [scene.strip() for scene in scenes إذا scene.strip()]
     return scenes
 # دالة لاستخراج تفاصيل المشهد (المكان والوقت)
     location_match = re.search(r'(داخلي|خارجي)', scene)
     time_match = re.search(r'(ليلاً|نهاراً|شروق|غروب)', scene)
+    إذا location_match:
         details['location'] = location_match.group()
+    إذا time_match:
         details['time'] = time_match.group()
     return details
     results = []
     output_directory = os.getenv("SPACE_DIR", "/app/output")
+    لكل file_path in file_paths:
+        إذا file_path.endswith(".pdf"):
             text = extract_pdf_text(file_path)
         else:
+            مع open(file_path, "r", encoding="utf-8") كما file:
                 text = file.read()
         filename_prefix = os.path.splitext(os.path.basename(file_path))[0]
         character_frequency = extract_character_frequency(camel_entities)
         dialogues = extract_dialogues(text)
+        scene_details = [extract_scene_details(scene) لكل scene in scenes]
         # حفظ النتائج إلى ملفات
+        مع open(os.path.join(output_directory, f"{filename_prefix}_entities.txt"), "w", encoding="utf-8") كما file:
             file.write(str(camel_entities))
+        مع open(os.path.join(output_directory, f"{filename_prefix}_sentiments.txt"), "w", encoding="utf-8") كما file:
             file.write(str(sentiments))
+        مع open(os.path.join(output_directory, f"{filename_prefix}_sentences.txt"), "w", encoding="utf-8") كما file:
             file.write("\n".join(sentences))
+        مع open(os.path.join(output_directory, f"{filename_prefix}_quotes.txt"), "w", encoding="utf-8") كما file:
             file.write("\n".join(quotes))
+        مع open(os.path.join(output_directory, f"{filename_prefix}_token_count.txt"), "w", encoding="utf-8") كما file:
             file.write(str(token_count))
+        مع open(os.path.join(output_directory, f"{filename_prefix}_scenes.txt"), "w", encoding="utf-8") كما file:
             file.write("\n".join(scenes))
+        مع open(os.path.join(output_directory, f"{filename_prefix}_scene_details.txt"), "w", encoding="utf-8") كما file:
             file.write(str(scene_details))
+        مع open(os.path.join(output_directory, f"{filename_prefix}_ages.txt"), "w", encoding="utf-8") كما file:
             file.write(str(ages))
+        مع open(os.path.join(output_directory, f"{filename_prefix}_character_descriptions.txt"), "w", encoding="utf-8") كما file:
             file.write(str(character_descriptions))
+        مع open(os.path.join(output_directory, f"{filename_prefix}_character_frequency.txt"), "w", encoding="utf-8") كما file:
             file.write(str(character_frequency))
+        مع open(os.path.join(output_directory, f"{filename_prefix}_dialogues.txt"), "w", encoding="utf-8") كما file:
             file.write(str(dialogues))
         results.append((str(camel_entities), str(sentiments), "\n".join(sentences), "\n".join(quotes), str(token_count), "\n".join(scenes), str(scene_details), str(ages), str(character_descriptions), str(character_frequency), str(dialogues)))
     return results
+## تعريف واجهة Gradio
 interface = gr.Interface(
     fn=analyze_and_complete,
     inputs=gr.File(file_count="multiple", type="filepath"),