Spaces:

Naz786
/

Thesis-writer-bot

Sleeping

App Files Files Community

Naz786 commited on Jul 18

Commit

a4fecd2

verified ·

1 Parent(s): 68b1f81

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -385

app.py CHANGED Viewed

@@ -1,399 +1,67 @@
 import streamlit as st
 import os
 import requests
-import hashlib
-from typing import List, Dict, Any, Optional, Mapping
 from datetime import datetime
-import json
-import re
-from urllib.parse import quote
 import time
 import random
 import markdown
-from crewai import Agent, Task, Crew, Process
-from crewai.tools import BaseTool
 from groq import Groq
-import nltk
-from textstat import flesch_reading_ease, flesch_kincaid_grade
-from bs4 import BeautifulSoup
-import concurrent.futures
-from duckduckgo_search import DDGS
-# Download NLTK data
-try:
-    nltk.download('punkt', quiet=True)
-    nltk.download('stopwords', quiet=True)
-    nltk.download('wordnet', quiet=True)
-except:
-    pass
-# Custom Tools for Academic Research and Writing
-class AcademicResearchTool(BaseTool):
-    name: str = "academic_research"
-    description: str = "Conduct comprehensive academic research for thesis/synopsis"
-    def _run(self, topic: str, research_areas: str) -> str:
-        try:
-            time.sleep(1)
-            search_queries = [
-                f"{topic} research studies",
-                f"{topic} academic papers",
-                f"{topic} recent developments",
-                f"{topic} methodology",
-                f"{topic} literature review"
-            ]
-            all_research = []
-            with DDGS() as ddgs:
-                for query in search_queries:
-                    try:
-                        results = list(ddgs.text(query, max_results=6))
-                        for result in results:
-                            all_research.append({
-                                'query': query,
-                                'title': result.get('title', ''),
-                                'content': result.get('body', ''),
-                                'url': result.get('href', ''),
-                                'relevance_score': self._calculate_relevance(result.get('body', ''), topic)
-                            })
-                        time.sleep(0.5)
-                    except Exception:
-                        continue
-            unique_research = self._remove_duplicates(all_research)
-            unique_research.sort(key=lambda x: x['relevance_score'], reverse=True)
-            return json.dumps(unique_research[:15])
-        except Exception as e:
-            return f"Research failed: {str(e)}"
-    def _calculate_relevance(self, content: str, topic: str) -> float:
-        topic_words = set(topic.lower().split())
-        content_words = set(content.lower().split())
-        if not topic_words or not content_words:
-            return 0.0
-        intersection = topic_words.intersection(content_words)
-        return len(intersection) / len(topic_words)
-    def _remove_duplicates(self, research_list: List[Dict]) -> List[Dict]:
-        seen_urls = set()
-        unique_research = []
-        for item in research_list:
-            if item['url'] not in seen_urls:
-                seen_urls.add(item['url'])
-                unique_research.append(item)
-        return unique_research
-class CitationGeneratorTool(BaseTool):
-    name: str = "citation_generator"
-    description: str = "Generate proper academic citations and references"
-    def _run(self, research_data: str) -> str:
-        try:
-            research_items = json.loads(research_data)
-            citations = []
-            for i, item in enumerate(research_items[:10]):
-                title = item.get('title', 'Unknown Title')
-                url = item.get('url', '')
-                domain = url.split('/')[2] if len(url.split('/')) > 2 else 'Unknown'
-                citation = {
-                    'id': f"source_{i+1}",
-                    'title': title,
-                    'url': url,
-                    'domain': domain,
-                    'apa_citation': f"{domain}. ({datetime.now().year}). {title}. Retrieved from {url}",
-                    'in_text': f"({domain}, {datetime.now().year})"
-                }
-                citations.append(citation)
-            return json.dumps(citations)
-        except Exception as e:
-            return f"Citation generation failed: {str(e)}"
-class AcademicWritingTool(BaseTool):
-    name: str = "academic_writing"
-    description: str = "Analyze and improve academic writing style"
-    def _run(self, text: str, academic_level: str) -> str:
-        try:
-            flesch_score = flesch_reading_ease(text)
-            fk_grade = flesch_kincaid_grade(text)
-            sentences = text.split('.')
-            sentence_lengths = [len(s.split()) for s in sentences if s.strip()]
-            avg_sentence_length = sum(sentence_lengths) / max(len(sentence_lengths), 1)
-            academic_patterns = [
-                "furthermore", "moreover", "additionally", "consequently",
-                "therefore", "thus", "hence", "accordingly", "subsequently"
-            ]
-            pattern_usage = sum(1 for pattern in academic_patterns if pattern in text.lower())
-            level_guidelines = {
-                'undergraduate': {
-                    'target_flesch': (60, 80),
-                    'target_grade': (12, 14),
-                    'sentence_length': (15, 25)
-                },
-                'masters': {
-                    'target_flesch': (50, 70),
-                    'target_grade': (14, 16),
-                    'sentence_length': (18, 30)
-                },
-                'phd': {
-                    'target_flesch': (40, 60),
-                    'target_grade': (16, 18),
-                    'sentence_length': (20, 35)
-                }
-            }
-            guidelines = level_guidelines.get(academic_level.lower(), level_guidelines['masters'])
-            analysis = {
-                'flesch_score': flesch_score,
-                'fk_grade': fk_grade,
-                'avg_sentence_length': avg_sentence_length,
-                'academic_patterns_used': pattern_usage,
-                'target_guidelines': guidelines,
-                'suggestions': []
-            }
-            if flesch_score > guidelines['target_flesch'][1]:
-                analysis['suggestions'].append("Consider more complex sentence structures for academic tone")
-            if avg_sentence_length < guidelines['sentence_length'][0]:
-                analysis['suggestions'].append("Use longer, more detailed sentences")
-            if pattern_usage < 3:
-                analysis['suggestions'].append("Include more academic transition phrases")
-            return json.dumps(analysis)
-        except Exception as e:
-            return f"Academic analysis failed: {str(e)}"
-class HumanizationTool(BaseTool):
-    name: str = "humanization_tool"
-    description: str = "Make academic writing sound more human and less AI-like"
-    def _run(self, text: str) -> str:
-        try:
-            ai_patterns = [
-                "It is important to note that",
-                "This demonstrates that",
-                "This indicates that",
-                "As previously mentioned",
-                "It should be mentioned that",
-                "This suggests that",
-                "This implies that",
-                "It can be concluded that"
-            ]
-            human_alternatives = [
-                "Notably,",
-                "This shows",
-                "This reveals",
-                "As noted earlier",
-                "It's worth noting",
-                "This suggests",
-                "This implies",
-                "Therefore,"
-            ]
-            humanized_text = text
-            for ai_pattern, human_alt in zip(ai_patterns, human_alternatives):
-                humanized_text = humanized_text.replace(ai_pattern, human_alt)
-            variations = [
-                "Interestingly,",
-                "Surprisingly,",
-                "Remarkably,",
-                "Significantly,",
-                "Importantly,"
-            ]
-            sentences = humanized_text.split('.')
-            for i in range(1, len(sentences), 3):
-                if i < len(sentences) and sentences[i].strip():
-                    variation = random.choice(variations)
-                    sentences[i] = f" {variation} {sentences[i].lstrip()}"
-            humanized_text = '.'.join(sentences)
-            personal_insights = [
-                "Based on the available evidence,",
-                "From the research findings,",
-                "Considering the data,",
-                "In light of these results,"
-            ]
-            if len(sentences) > 5:
-                insight = random.choice(personal_insights)
-                sentences[2] = f" {insight} {sentences[2].lstrip()}"
-            return '.'.join(sentences)
-        except Exception as e:
-            return f"Humanization failed: {str(e)}"
-def rate_limit_handler(max_retries=3, base_delay=2):
-    def decorator(func):
-        def wrapper(*args, **kwargs):
-            for attempt in range(max_retries):
-                try:
-                    return func(*args, **kwargs)
-                except Exception as e:
-                    if "rate_limit" in str(e).lower() and attempt < max_retries - 1:
-                        delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
-                        st.warning(f"Rate limit hit. Retrying in {delay:.1f} seconds... (Attempt {attempt + 1}/{max_retries})")
-                        time.sleep(delay)
-                    else:
-                        raise e
-            return None
-        return wrapper
-    return decorator
-# Custom LLM class for CrewAI with built-in API
-import litellm
-from langchain.llms.base import LLM
-class BuiltInLLM(LLM):
-    model_name: str = "groq/llama-3.3-70b-versatile"
-    api_key: str = "API_KEY"  # <-- Replace with your actual API key
-    def __init__(self):
-        super().__init__()
-        os.environ["GROQ_API_KEY"] = self.api_key
-        litellm.set_verbose = False
-    @property
-    def _llm_type(self) -> str:
-        return "groq"
-    @rate_limit_handler(max_retries=3, base_delay=2)
-    def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str:
-        try:
-            if len(prompt.split()) > 1500:
-                words = prompt.split()
-                prompt = ' '.join(words[:1500]) + "..."
-            response = litellm.completion(
-                model=self.model_name,
-                messages=[
-                    {"role": "system", "content": "You are an expert academic writer who creates sophisticated, well-researched thesis documents that sound completely human-written. You avoid AI patterns and create authentic academic content with proper citations and natural flow."},
-                    {"role": "user", "content": prompt}
-                ],
-                max_tokens=2500,
-                temperature=0.6,
-                top_p=0.9,
-                api_key=self.api_key
-            )
-            time.sleep(2)
-            return response.choices[0].message.content
-        except Exception as e:
-            st.error(f"Error in processing: {str(e)}")
-            return f"Error: {str(e)}"
-    @property
-    def _identifying_params(self) -> Mapping[str, Any]:
-        return {"model_name": self.model_name}
-def create_thesis_agents(llm):
-    research_agent = Agent(
-        role="Academic Research Specialist",
-        goal="Conduct comprehensive academic research and gather credible sources",
-        backstory="You are a PhD-level researcher with expertise in finding and analyzing academic sources. You understand how to identify credible information and synthesize research findings.",
-        tools=[AcademicResearchTool()],
-        verbose=True,
-        allow_delegation=False,
-        llm=llm
-    )
-    thesis_writer = Agent(
-        role="Academic Thesis Writer",
-        goal="Write sophisticated thesis documents that sound completely human-written",
-        backstory="You are an experienced academic writer who specializes in creating thesis documents. You know how to write in a way that sounds natural and scholarly, avoiding AI patterns while maintaining academic rigor.",
-        tools=[AcademicWritingTool(), CitationGeneratorTool()],
-        verbose=True,
-        allow_delegation=False,
-        llm=llm
-    )
-    humanization_agent = Agent(
-        role="Academic Writing Humanizer",
-        goal="Make academic writing sound completely human and undetectable",
-        backstory="You are an expert editor who specializes in making academic content sound natural and human-written. You know how to eliminate AI patterns and create authentic scholarly writing.",
-        tools=[HumanizationTool()],
-        verbose=True,
-        allow_delegation=False,
-        llm=llm
-    )
-    return research_agent, thesis_writer, humanization_agent
-def create_thesis_tasks(topic, document_type, academic_level, research_areas, word_count, agents):
-    research_agent, thesis_writer, humanization_agent = agents
-    research_task = Task(
-        description=f"""
-        Conduct comprehensive academic research for a {document_type} on "{topic}".
-        Research Areas: {research_areas}
-        Academic Level: {academic_level}
-        Target Length: {word_count} words
-        Requirements:
-        - Find 10-15 credible academic sources
-        - Gather recent research and developments
-        - Identify key theories and methodologies
-        - Note different perspectives and debates
-        - Focus on peer-reviewed and scholarly sources
-        - Include both theoretical and practical aspects
-        Provide a detailed research summary with key findings, methodologies, and source analysis.
-        """,
-        agent=research_agent,
-        expected_output="Comprehensive research summary with credible sources and key insights"
-    )
-    thesis_task = Task(
-        description=f"""
-        Write a complete {document_type} on "{topic}" that sounds completely human-written.
-        Academic Level: {academic_level}
-        Target Length: {word_count} words
-        Research Areas: {research_areas}
-        Requirements:
-        - Use the comprehensive research provided
-        - Write in proper academic style for {academic_level} level
-        - Include proper citations and references
-        - Create logical structure with introduction, body, and conclusion
-        - Use varied sentence structures and academic vocabulary
-        - Include critical analysis and original insights
-        - Maintain scholarly tone while sounding natural
-        - Avoid AI-like patterns and formal robotic language
-        - Include methodology, findings, and implications
-        - Make it engaging and intellectually rigorous
-        Structure:
-        1. Introduction and background
-        2. Literature review
-        3. Methodology
-        4. Analysis and findings
-        5. Discussion and implications
-        6. Conclusion and recommendations
-        Important: Write as if you're a human academic expert sharing original research and insights.
-        """,
-        agent=thesis_writer,
-        expected_output="Complete academic thesis document with proper structure and citations",
-        dependencies=[research_task]
-    )
-    humanization_task = Task(
-        description=f"""
-        Polish and humanize the thesis document to make it completely undetectable as AI-written.
-        Requirements:
-        - Remove any remaining AI patterns
-        - Improve natural academic flow
-        - Add authentic human writing touches
-        - Ensure varied sentence structures
-        - Make transitions feel natural and scholarly
-        - Add subtle personal insights and critical thinking
-        - Maintain academic rigor while sounding human
-        - Improve readability without losing sophistication
-        - Ensure proper citation integration
-        - Make it sound like expert human academic writing
-        Focus on making it indistinguishable from high-quality human academic writing.
-        """,
-        agent=humanization_agent,
-        expected_output="Final polished human-sounding academic thesis document",
-        dependencies=[thesis_task]
-    )
-    return [research_task, thesis_task, humanization_task]
-def run_thesis_writer(topic, document_type, academic_level, research_areas, word_count):
     try:
-        llm = BuiltInLLM()
-        agents = create_thesis_agents(llm)
-        tasks = create_thesis_tasks(topic, document_type, academic_level, research_areas, word_count, agents)
-        crew = Crew(
-            agents=list(agents),
-            tasks=tasks,
-            process=Process.sequential,
-            verbose=True
         )
-        with st.spinner("Creating comprehensive thesis document with AI agents..."):
-            result = crew.kickoff()
-        return result
     except Exception as e:
-        st.error(f"Error in thesis writing: {str(e)}")
-        return None
 def main():
     st.set_page_config(
@@ -465,12 +133,19 @@ def main():
             placeholder="Specific methodology, theoretical framework, case studies, etc...",
             height=100
         )
         if st.button("🚀 Generate Thesis Document", type="primary", use_container_width=True):
             if not topic.strip():
                 st.error("Please enter a thesis topic!")
             else:
                 research_areas_text = research_areas if research_areas.strip() else "general academic research"
-                result = run_thesis_writer(topic, document_type, academic_level, research_areas_text, word_count)
                 if result:
                     st.session_state.generated_thesis = result
                     st.session_state.thesis_info = {
@@ -554,4 +229,4 @@ def main():
             st.info("👈 Enter a thesis topic and click 'Generate Thesis Document' to create your academic content")
 if __name__ == "__main__":
-    main()

 import streamlit as st
 import os
 import requests
 from datetime import datetime
 import time
 import random
 import markdown
 from groq import Groq
+# --- Streamlit UI and Groq API Integration ---
+def build_thesis_prompt(topic, document_type, academic_level, research_areas, word_count, additional_requirements):
+    prompt = f"""
+You are an expert academic writer. Write a complete {document_type} on the topic: "{topic}".
+Academic Level: {academic_level}
+Target Length: {word_count} words
+Research Areas: {research_areas}
+"""
+    if additional_requirements and additional_requirements.strip():
+        prompt += f"\nAdditional Requirements: {additional_requirements.strip()}\n"
+    prompt += """
+Requirements:
+- Use credible academic sources and reference them in-text (APA style, e.g., (Author, Year)).
+- Write in proper academic style for the specified level.
+- Create logical structure with introduction, body, and conclusion.
+- Use varied sentence structures and academic vocabulary.
+- Include critical analysis and original insights.
+- Maintain scholarly tone while sounding natural and human.
+- Avoid AI-like patterns and robotic language.
+- Include methodology, findings, and implications if relevant.
+- Make it engaging and intellectually rigorous.
+Structure:
+1. Introduction and background
+2. Literature review
+3. Methodology
+4. Analysis and findings
+5. Discussion and implications
+6. Conclusion and recommendations
+Important: Write as if you're a human academic expert sharing original research and insights. Make it indistinguishable from human writing.\n\nBegin the document below:\n\n"
+    return prompt
+def call_groq_llama(prompt, api_key, model_name="llama3-70b-8192"):  # Use the correct Groq model name
+    client = Groq(api_key=api_key)
     try:
+        response = client.chat.completions.create(
+            model=model_name,
+            messages=[
+                {"role": "system", "content": "You are an expert academic writer who creates sophisticated, well-researched thesis documents that sound completely human-written. You avoid AI patterns and create authentic academic content with proper citations and natural flow."},
+                {"role": "user", "content": prompt}
+            ],
+            max_tokens=2500,
+            temperature=0.6,
+            top_p=0.9
         )
+        return response.choices[0].message.content
     except Exception as e:
+        st.error(f"Error from Groq API: {str(e)}")
+        return f"Error: {str(e)}"
+def run_thesis_writer(topic, document_type, academic_level, research_areas, word_count, additional_requirements, api_key):
+    prompt = build_thesis_prompt(topic, document_type, academic_level, research_areas, word_count, additional_requirements)
+    with st.spinner("Generating your thesis document with Groq Llama-3..."):
+        result = call_groq_llama(prompt, api_key)
+    return result
 def main():
     st.set_page_config(
             placeholder="Specific methodology, theoretical framework, case studies, etc...",
             height=100
         )
+        api_key = st.text_input(
+            "Enter your Groq API Key",
+            type="password",
+            help="Your API key is used only to generate your document and is never stored."
+        )
         if st.button("🚀 Generate Thesis Document", type="primary", use_container_width=True):
             if not topic.strip():
                 st.error("Please enter a thesis topic!")
+            elif not api_key.strip():
+                st.error("Please enter your Groq API key!")
             else:
                 research_areas_text = research_areas if research_areas.strip() else "general academic research"
+                result = run_thesis_writer(topic, document_type, academic_level, research_areas_text, word_count, additional_requirements, api_key)
                 if result:
                     st.session_state.generated_thesis = result
                     st.session_state.thesis_info = {
             st.info("👈 Enter a thesis topic and click 'Generate Thesis Document' to create your academic content")
 if __name__ == "__main__":
+    main()