File size: 3,152 Bytes
64ffc8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import streamlit as st
import requests
from bs4 import BeautifulSoup
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification

# ---------------- Model Setup ----------------
@st.cache_resource
def load_sentiment_model():
    model_id = "LinkLinkWu/Stock_Analysis_Test_Ahamed"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForSequenceClassification.from_pretrained(model_id)
    return pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

@st.cache_resource
def load_ner_model():
    tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
    model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
    return pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True)

sentiment_pipeline = load_sentiment_model()
ner_pipeline = load_ner_model()

# ---------------- Helper Functions ----------------
def fetch_news(ticker):
    try:
        url = f"https://finviz.com/quote.ashx?t={ticker}"
        headers = {
            'User-Agent': 'Mozilla/5.0',
            'Accept': 'text/html',
            'Accept-Language': 'en-US,en;q=0.5',
            'Referer': 'https://finviz.com/',
            'Connection': 'keep-alive',
        }
        response = requests.get(url, headers=headers)
        if response.status_code != 200:
            st.error(f"Failed to fetch page for {ticker}: Status code {response.status_code}")
            return []

        soup = BeautifulSoup(response.text, 'html.parser')
        title = soup.title.text if soup.title else ""
        if ticker not in title:
            st.error(f"Page for {ticker} not found or access denied.")
            return []

        news_table = soup.find(id='news-table')
        if news_table is None:
            st.error(f"News table not found for {ticker}. The website structure might have changed.")
            return []

        news = []
        for row in news_table.findAll('tr')[:50]:
            a_tag = row.find('a')
            if a_tag:
                title = a_tag.get_text()
                link = a_tag['href']
                news.append({'title': title, 'link': link})
        return news
    except Exception as e:
        st.error(f"Failed to fetch news for {ticker}: {e}")
        return []

def analyze_sentiment(text):
    try:
        result = sentiment_pipeline(text)[0]
        return "Positive" if result['label'] == 'POSITIVE' else "Negative"
    except Exception as e:
        st.error(f"Sentiment analysis failed: {e}")
        return "Unknown"

def extract_org_entities(text):
    try:
        entities = ner_pipeline(text)
        org_entities = []
        for ent in entities:
            if ent["entity_group"] == "ORG":
                clean_word = ent["word"].replace("##", "").strip()
                if clean_word.upper() not in org_entities:
                    org_entities.append(clean_word.upper())
                if len(org_entities) >= 5:
                    break
        return org_entities
    except Exception as e:
        st.error(f"NER entity extraction failed: {e}")
        return []