Spaces:

eaglelandsonce
/

TensorFlowClass

Sleeping

File size: 3,586 Bytes

import streamlit as st
from transformers import AutoTokenizer, AutoModel
import torch
import networkx as nx
import matplotlib.pyplot as plt
from collections import Counter
import graphrag
import inspect

st.title("GraphRAG Module Exploration and Text Analysis")

# Diagnostic section
st.header("GraphRAG Module Contents")
graphrag_contents = dir(graphrag)
st.write("Available attributes and methods in graphrag module:")
for item in graphrag_contents:
    st.write(f"- {item}")
    attr = getattr(graphrag, item)
    if inspect.isclass(attr) or inspect.isfunction(attr):
        st.write(f"  Signature: {inspect.signature(attr)}")
        st.write(f"  Docstring: {attr.__doc__}")

# Attempt to find a suitable model class
model_class = None
for item in graphrag_contents:
    if 'model' in item.lower():
        model_class = getattr(graphrag, item)
        st.write(f"Found potential model class: {item}")
        break

if model_class is None:
    st.error("Could not find a suitable model class in graphrag module.")
    st.stop()

@st.cache_resource
def load_model():
    tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
    bert_model = AutoModel.from_pretrained("bert-base-uncased")
    
    # Initialize graphrag model
    # Note: This is a placeholder. Adjust based on the actual model class found
    graph_rag_model = model_class(
        bert_model,
        num_labels=2,  # For binary sentiment classification
        # Add or remove parameters based on the actual model's requirements
    )
    
    return tokenizer, graph_rag_model

def text_to_graph(text):
    words = text.split()
    G = nx.Graph()
    for i, word in enumerate(words):
        G.add_node(i, word=word)
        if i > 0:
            G.add_edge(i-1, i)
    
    edge_index = [[e[0] for e in G.edges()] + [e[1] for e in G.edges()],
                  [e[1] for e in G.edges()] + [e[0] for e in G.edges()]]
    
    return {
        "edge_index": edge_index,
        "num_nodes": len(G.nodes()),
        "node_feat": [[ord(word[0])] for word in words],  # Use ASCII value of first letter as feature
        "edge_attr": [[1] for _ in range(len(G.edges()) * 2)],  # All edges have the same attribute
    }

def analyze_text(text, tokenizer, model):
    # Tokenize the text
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    
    # Create graph representation
    graph = text_to_graph(text)
    
    # Combine tokenized input with graph representation
    # Note: This is a placeholder. Adjust based on the actual model's input requirements
    combined_input = {
        "input_ids": inputs["input_ids"],
        "attention_mask": inputs["attention_mask"],
        "edge_index": torch.tensor(graph["edge_index"], dtype=torch.long),
        "node_feat": torch.tensor(graph["node_feat"], dtype=torch.float),
        "edge_attr": torch.tensor(graph["edge_attr"], dtype=torch.float),
        "num_nodes": graph["num_nodes"]
    }
    
    # Perform inference
    with torch.no_grad():
        outputs = model(**combined_input)
    
    # Process outputs
    # Note: Adjust this based on the actual model's output format
    logits = outputs.logits if hasattr(outputs, 'logits') else outputs
    probabilities = torch.softmax(logits, dim=1)
    sentiment = "Positive" if probabilities[0][1] > probabilities[0][0] else "Negative"
    confidence = probabilities[0][1].item() if sentiment == "Positive" else probabilities[0][0].item()
    
    return sentiment, confidence, graph

# Rest of the Streamlit app (text input, analysis button, etc.) remains the same...