Spaces:
Sleeping
Sleeping
File size: 3,159 Bytes
64ffc8f 7832e21 64ffc8f 15e8ca2 dd3df57 64ffc8f 15e8ca2 dd3df57 64ffc8f 7832e21 64ffc8f 3ae83b6 64ffc8f dd3df57 64ffc8f dd3df57 64ffc8f 7832e21 64ffc8f 628c80f 64ffc8f 628c80f 64ffc8f 7832e21 64ffc8f 628c80f 8d9b985 628c80f 8d9b985 64ffc8f 628c80f 8d9b985 64ffc8f 8d9b985 64ffc8f 8d9b985 7832e21 64ffc8f dd3df57 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification
from bs4 import BeautifulSoup
import requests
# ----------- Eager Initialization of Pipelines -----------
# Sentiment pipeline
model_id = "LinkLinkWu/ISOM5240HKUSTBASE"
sentiment_tokenizer = AutoTokenizer.from_pretrained(model_id)
sentiment_model = AutoModelForSequenceClassification.from_pretrained(model_id)
sentiment_pipeline = pipeline(
"sentiment-analysis",
model=sentiment_model,
tokenizer=sentiment_tokenizer
)
# NER pipeline
ner_tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
ner_model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
ner_pipeline = pipeline(
"ner",
model=ner_model,
tokenizer=ner_tokenizer,
grouped_entities=True
)
# ----------- Core Functions -----------
def fetch_news(ticker):
try:
url = f"https://finviz.com/quote.ashx?t={ticker}"
headers = {
'User-Agent': 'Mozilla/5.0',
'Accept': 'text/html',
'Accept-Language': 'en-US,en;q=0.5',
'Referer': 'https://finviz.com/',
'Connection': 'keep-alive',
}
response = requests.get(url, headers=headers)
if response.status_code != 200:
return []
soup = BeautifulSoup(response.text, 'html.parser')
title = soup.title.text if soup.title else ""
if ticker not in title:
return []
news_table = soup.find(id='news-table')
if news_table is None:
return []
news = []
for row in news_table.findAll('tr')[:30]:
a_tag = row.find('a')
if a_tag:
title_text = a_tag.get_text()
link = a_tag['href']
news.append({'title': title_text, 'link': link})
return news
except Exception:
return []
def analyze_sentiment(text, pipe=None):
"""
兼容两种调用:
- analyze_sentiment(text) -> 使用全局 sentiment_pipeline
- analyze_sentiment(text, some_pipeline) -> 使用传入的 some_pipeline
"""
try:
sentiment_pipe = pipe or sentiment_pipeline
result = sentiment_pipe(text)[0]
return "Positive" if result['label'] == 'POSITIVE' else "Negative"
except Exception:
return "Unknown"
def extract_org_entities(text, pipe=None):
"""
- extract_org_entities(text)
- extract_org_entities(text, some_pipeline)
"""
try:
ner_pipe = pipe or ner_pipeline
entities = ner_pipe(text)
orgs = []
for ent in entities:
if ent["entity_group"] == "ORG":
w = ent["word"].replace("##", "").strip().upper()
if w not in orgs:
orgs.append(w)
if len(orgs) >= 5:
break
return orgs
except Exception:
return []
# ----------- Helper Functions for Imports -----------
def get_sentiment_pipeline():
return sentiment_pipeline
def get_ner_pipeline():
return ner_pipeline
|