Spaces:
Running
Running
File size: 4,046 Bytes
5f52527 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import re
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForTokenClassification
import dateparser
from datetime import datetime
import spacy
app = FastAPI()
# Load classification and summarization models
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
summarizer_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
# Load spaCy English model for name/entity detection
try:
nlp = spacy.load("en_core_web_sm")
except:
import subprocess
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
nlp = spacy.load("en_core_web_sm")
# Labels for classification
labels = [
"task", "event", "reminder", "meeting", "relationship", "note", "journal", "memory", "status_update",
"sick_notice", "out_of_office", "travel_plan", "celebration", "emotion", "other"
]
class TextInput(BaseModel):
text: str
def extract_dates(text):
time_expressions = re.findall(
r'\b(kal|aaj|parso|raat|subah|shaam|dopahar|[0-9]{1,2} baje|next week|tomorrow|today|yesterday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday|[\d]{1,2}/[\d]{1,2}/[\d]{2,4})\b',
text, flags=re.IGNORECASE)
parsed = [str(dateparser.parse(t)) for t in time_expressions if dateparser.parse(t)]
return list(set(parsed)), list(set(time_expressions))
def detect_tense(parsed_dates):
now = datetime.now()
tenses = set()
for d in parsed_dates:
dt = dateparser.parse(d)
if not dt:
continue
if dt < now:
tenses.add("past")
elif dt > now:
tenses.add("future")
else:
tenses.add("present")
return list(tenses) if tenses else ["unknown"]
def generate_summary(text):
input_ids = summarizer_tokenizer("summarize: " + text, return_tensors="pt").input_ids
output_ids = summarizer_model.generate(input_ids, max_length=60, num_beams=4, early_stopping=True)
return summarizer_tokenizer.decode(output_ids[0], skip_special_tokens=True)
def extract_people(text):
doc = nlp(text)
return list(set(ent.text for ent in doc.ents if ent.label_ in ["PERSON"]))
def estimate_mood(text):
text_lower = text.lower()
mood_map = {
"happy": ["happy", "excited", "joy", "grateful"],
"sad": ["sad", "upset", "crying", "lonely"],
"angry": ["angry", "annoyed", "frustrated", "irritated"],
"nervous": ["nervous", "anxious", "scared"],
"unwell": ["sick", "unwell", "not feeling well", "fever", "cold", "headache"],
"neutral": []
}
for mood, keywords in mood_map.items():
for kw in keywords:
if kw in text_lower:
return mood
return "neutral"
def generate_tags(label, text):
base_tags = [label]
keywords = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower())
force_tags = []
if any(w in text.lower() for w in ["sick", "unwell", "not feeling well", "fever"]):
force_tags += ["sick", "leave"]
if "work" in text.lower():
force_tags.append("work")
return list(set(base_tags + force_tags + keywords))
@app.post("/analyze")
async def analyze(input: TextInput):
text = input.text
classification = classifier(text, labels)
best_label = classification['labels'][0]
scores = dict(zip(classification['labels'], classification['scores']))
parsed_dates, time_mentions = extract_dates(text)
tenses = detect_tense(parsed_dates)
summary = generate_summary(text)
people = extract_people(text)
mood = estimate_mood(text)
tags = generate_tags(best_label, text)
return {
"type": best_label,
"confidence_scores": scores,
"time_mentions": time_mentions,
"parsed_dates": parsed_dates,
"tense": tenses,
"summary": summary,
"people": people,
"mood": mood,
"tags": tags
}
|