Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from transformers import AutoTokenizer, AutoModel | |
import torch | |
import graphrag | |
import inspect | |
# Diagnostic Section | |
st.title("Graphrag Module Investigation") | |
st.write("Contents of graphrag module:") | |
module_contents = dir(graphrag) | |
st.write(module_contents) | |
st.write("Detailed information about graphrag module contents:") | |
for item in module_contents: | |
attr = getattr(graphrag, item) | |
st.write(f"Name: {item}") | |
st.write(f"Type: {type(attr)}") | |
if inspect.isclass(attr): | |
st.write("Class Methods and Attributes:") | |
for name, value in inspect.getmembers(attr): | |
if not name.startswith('_'): # Exclude private methods/attributes | |
st.write(f" - {name}: {type(value)}") | |
if callable(attr): | |
st.write("Signature:") | |
st.write(inspect.signature(attr)) | |
st.write("Docstring:") | |
st.write(inspect.getdoc(attr)) | |
st.write("---") | |
# Main Application Section | |
st.title("Graphrag Text Analysis") | |
def load_model(): | |
bert_model_name = "bert-base-uncased" | |
tokenizer = AutoTokenizer.from_pretrained(bert_model_name) | |
bert_model = AutoModel.from_pretrained(bert_model_name) | |
# Initialize Graphrag model | |
# Note: This part may need to be adjusted based on the actual structure of graphrag | |
model = None | |
for item in module_contents: | |
if 'model' in item.lower() or 'rag' in item.lower(): | |
model_class = getattr(graphrag, item) | |
if inspect.isclass(model_class): | |
try: | |
# Attempt to initialize the model | |
# You may need to adjust the parameters based on the actual class signature | |
model = model_class(bert_model) | |
st.success(f"Successfully initialized {item}") | |
break | |
except Exception as e: | |
st.write(f"Tried initializing {item}, but got error: {str(e)}") | |
if model is None: | |
st.error("Could not initialize any Graphrag model. Please check the module structure.") | |
return tokenizer, model | |
def process_text(text, tokenizer, model): | |
if model is None: | |
return "Model not initialized" | |
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512) | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
# Process outputs based on your specific task | |
# This is a placeholder; adjust according to your model's output | |
if hasattr(outputs, 'logits'): | |
logits = outputs.logits | |
elif isinstance(outputs, torch.Tensor): | |
logits = outputs | |
else: | |
return f"Unexpected output format: {type(outputs)}" | |
probabilities = torch.softmax(logits, dim=1) | |
return probabilities.tolist()[0] | |
tokenizer, model = load_model() | |
# Text input for single prediction | |
text_input = st.text_area("Enter text for analysis:") | |
if st.button("Analyze Text"): | |
if text_input: | |
result = process_text(text_input, tokenizer, model) | |
st.write(f"Analysis Result: {result}") | |
else: | |
st.write("Please enter some text to analyze.") | |
# Note about sample data | |
st.markdown("Note: To use a CSV file, you would typically upload it and process each row. For simplicity, we're using direct text input in this example.") |