Spaces:

MHamdan
/

ContentAnalyzer

Sleeping

App Files Files Community

MHamdan commited on Feb 15

Commit

5215be1

1 Parent(s): dbf46ef

Initial content analyzer setup

Browse files

Files changed (4) hide show

README.md +20 -9
app.py +204 -0
deploy_to_hf.py +113 -0
requirements.txt +8 -0

README.md CHANGED Viewed

@@ -1,14 +1,25 @@
----
-title: ContentAnalyzer
-emoji: 📉
-colorFrom: red
-colorTo: purple
 sdk: gradio
-sdk_version: 5.16.0
 app_file: app.py
 pinned: false
-license: apache-2.0
-short_description: general content analyzer
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+echo "---
+title: Content Analyzer
+emoji: 📑
+colorFrom: blue
+colorTo: indigo
 sdk: gradio
+sdk_version: 4.0.0
 app_file: app.py
 pinned: false
 ---
+# Content Analyzer
+An advanced content analysis tool that can process:
+- Text input
+- Web URLs
+- Document files (.txt, .pdf, .docx)
+## Features
+- Text summarization
+- Sentiment analysis
+- Topic detection" > README.md

app.py ADDED Viewed

	@@ -0,0 +1,204 @@

+# app.py
+import gradio as gr
+import requests
+from bs4 import BeautifulSoup
+from transformers import pipeline
+import PyPDF2
+import docx
+import os
+from typing import List, Tuple, Optional
+from smolagents import CodeAgent, HfApiModel, Tool
+class ContentAnalyzer:
+    def __init__(self):
+        # Initialize models
+        self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+        self.sentiment_analyzer = pipeline("sentiment-analysis")
+        self.zero_shot = pipeline("zero-shot-classification")
+    def read_file(self, file_obj) -> str:
+        """Read content from different file types."""
+        if file_obj is None:
+            return ""
+        file_ext = os.path.splitext(file_obj.name)[1].lower()
+        try:
+            if file_ext == '.txt':
+                return file_obj.read().decode('utf-8')
+            elif file_ext == '.pdf':
+                pdf_reader = PyPDF2.PdfReader(file_obj)
+                text = ""
+                for page in pdf_reader.pages:
+                    text += page.extract_text() + "\n"
+                return text
+            elif file_ext == '.docx':
+                doc = docx.Document(file_obj)
+                return "\n".join([paragraph.text for paragraph in doc.paragraphs])
+            else:
+                return f"Unsupported file type: {file_ext}"
+        except Exception as e:
+            return f"Error reading file: {str(e)}"
+    def fetch_web_content(self, url: str) -> str:
+        """Fetch content from URL."""
+        try:
+            response = requests.get(url, timeout=10)
+            response.raise_for_status()
+            soup = BeautifulSoup(response.text, 'html.parser')
+            # Remove scripts and styles
+            for script in soup(["script", "style"]):
+                script.decompose()
+            text = soup.get_text(separator='\n')
+            lines = (line.strip() for line in text.splitlines())
+            return "\n".join(line for line in lines if line)
+        except Exception as e:
+            return f"Error fetching URL: {str(e)}"
+    def analyze_content(self,
+                       text: Optional[str] = None,
+                       url: Optional[str] = None,
+                       file: Optional[object] = None,
+                       analysis_types: List[str] = ["summarize"]) -> dict:
+        """Analyze content from text, URL, or file."""
+        try:
+            # Get content from appropriate source
+            if url:
+                content = self.fetch_web_content(url)
+            elif file:
+                content = self.read_file(file)
+            else:
+                content = text or ""
+            if not content or content.startswith("Error"):
+                return {"error": content or "No content provided"}
+            results = {
+                "original_text": content[:1000] + "..." if len(content) > 1000 else content
+            }
+            # Perform requested analyses
+            if "summarize" in analysis_types:
+                summary = self.summarizer(content[:1024], max_length=130, min_length=30)
+                results["summary"] = summary[0]['summary_text']
+            if "sentiment" in analysis_types:
+                sentiment = self.sentiment_analyzer(content[:512])
+                results["sentiment"] = {
+                    "label": sentiment[0]['label'],
+                    "score": round(sentiment[0]['score'], 3)
+                }
+            if "topics" in analysis_types:
+                topics = self.zero_shot(
+                    content[:512],
+                    candidate_labels=["technology", "science", "business",
+                                    "politics", "entertainment", "education",
+                                    "health", "sports"]
+                )
+                results["topics"] = [
+                    {"label": label, "score": round(score, 3)}
+                    for label, score in zip(topics['labels'], topics['scores'])
+                    if score > 0.1
+                ]
+            return results
+        except Exception as e:
+            return {"error": f"Analysis error: {str(e)}"}
+def create_interface():
+    analyzer = ContentAnalyzer()
+    with gr.Blocks(title="Content Analyzer") as demo:
+        gr.Markdown("# 📑 Content Analyzer")
+        gr.Markdown("Analyze text content from various sources using AI.")
+        with gr.Tabs():
+            # Text Input Tab
+            with gr.Tab("Text Input"):
+                text_input = gr.Textbox(
+                    label="Enter Text",
+                    placeholder="Paste your text here...",
+                    lines=5
+                )
+            # URL Input Tab
+            with gr.Tab("Web URL"):
+                url_input = gr.Textbox(
+                    label="Enter URL",
+                    placeholder="https://example.com"
+                )
+            # File Upload Tab
+            with gr.Tab("File Upload"):
+                file_input = gr.File(
+                    label="Upload File",
+                    file_types=[".txt", ".pdf", ".docx"]
+                )
+        # Analysis Options
+        analysis_types = gr.CheckboxGroup(
+            choices=["summarize", "sentiment", "topics"],
+            value=["summarize"],
+            label="Analysis Types"
+        )
+        analyze_btn = gr.Button("Analyze", variant="primary")
+        # Output Sections
+        with gr.Tabs():
+            with gr.Tab("Original Text"):
+                original_text = gr.Markdown()
+            with gr.Tab("Summary"):
+                summary_output = gr.Markdown()
+            with gr.Tab("Sentiment"):
+                sentiment_output = gr.Markdown()
+            with gr.Tab("Topics"):
+                topics_output = gr.Markdown()
+        def process_analysis(text, url, file, types):
+            # Get analysis results
+            results = analyzer.analyze_content(text, url, file, types)
+            if "error" in results:
+                return results["error"], "", "", ""
+            # Format outputs
+            original = results.get("original_text", "")
+            summary = results.get("summary", "")
+            sentiment = ""
+            if "sentiment" in results:
+                sent = results["sentiment"]
+                sentiment = f"**Sentiment:** {sent['label']} (Confidence: {sent['score']})"
+            topics = ""
+            if "topics" in results:
+                topics = "**Detected Topics:**\n" + "\n".join([
+                    f"- {t['label']}: {t['score']}"
+                    for t in results["topics"]
+                ])
+            return original, summary, sentiment, topics
+        # Connect the interface
+        analyze_btn.click(
+            fn=process_analysis,
+            inputs=[text_input, url_input, file_input, analysis_types],
+            outputs=[original_text, summary_output, sentiment_output, topics_output]
+        )
+    return demo
+# Launch the app
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch()

deploy_to_hf.py ADDED Viewed

	@@ -0,0 +1,113 @@

+# deploy_to_hf.py
+import os
+import requests
+# Your Hugging Face token
+HF_TOKEN = os.environ.get("HF_REPO_API")
+headers = {
+    "Authorization": f"Bearer {HF_TOKEN}",
+    "Content-Type": "application/json"
+}
+# The main app content (from your previous app.py)
+app_content = """
+import gradio as gr
+import requests
+from bs4 import BeautifulSoup
+from transformers import pipeline
+import PyPDF2
+import docx
+import os
+from typing import List, Tuple, Optional
+class ContentAnalyzer:
+    def __init__(self):
+        # Initialize models
+        self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+        self.sentiment_analyzer = pipeline("sentiment-analysis")
+        self.zero_shot = pipeline("zero-shot-classification")
+    def read_file(self, file_obj) -> str:
+        # ... [rest of your ContentAnalyzer class code]
+        pass
+# ... [rest of your app.py code]
+"""
+def commit_files_to_space():
+    # Prepare files content
+    files = {
+        'app.py': app_content,
+        'requirements.txt': """gradio>=4.0.0
+requests>=2.31.0
+beautifulsoup4>=4.12.2
+transformers>=4.35.0
+torch>=2.0.1
+PyPDF2>=3.0.0
+python-docx>=0.8.11
+smolagents>=0.2.0""",
+        'README.md': """---
+title: Content Analyzer
+emoji: 📑
+colorFrom: blue
+colorTo: indigo
+sdk: gradio
+sdk_version: 4.0.0
+app_file: app.py
+pinned: false
+---
+# Content Analyzer
+An advanced content analysis tool that can process:
+- Text input
+- Web URLs
+- Document files (.txt, .pdf, .docx)
+## Features
+- Text summarization
+- Sentiment analysis
+- Topic detection
+"""
+    }
+    # Commit each file
+    commit_url = "https://huggingface.co/api/spaces/MHamdan/ContentAnalyzer/commit"
+    operations = []
+    for filename, content in files.items():
+        operations.append({
+            "operation": "create",
+            "path": filename,
+            "content": content
+        })
+    commit_data = {
+        "operations": operations,
+        "commit_message": "Initial content analyzer setup"
+    }
+    response = requests.post(
+        commit_url,
+        headers=headers,
+        json=commit_data
+    )
+    if response.status_code == 200:
+        print("Files committed successfully!")
+        print("You can view your space at: https://huggingface.co/spaces/MHamdan/ContentAnalyzer")
+    else:
+        print("Error committing files:", response.text)
+        print("Status code:", response.status_code)
+if __name__ == "__main__":
+    # Verify authentication first
+    auth_response = requests.get("https://huggingface.co/api/whoami-v2", headers=headers)
+    if auth_response.status_code == 200:
+        print("Authentication successful!")
+        commit_files_to_space()
+    else:
+        print("Authentication failed. Please check your token.")
+        print("Status code:", auth_response.status_code)
+        print("Response:", auth_response.text)

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+echo "gradio>=4.0.0
+requests>=2.31.0
+beautifulsoup4>=4.12.2
+transformers>=4.35.0
+torch>=2.0.1
+PyPDF2>=3.0.0
+python-docx>=0.8.11
+smolagents>=0.2.0" > requirements.txt