Spaces:

eaglelandsonce
/

TensorFlowClass

Sleeping

App Files Files Community

eaglelandsonce commited on Jul 9, 2024

Commit

2ead64f

verified ·

1 Parent(s): 5794470

Update pages/21_GraphRag.py

Browse files

Files changed (1) hide show

pages/21_GraphRag.py +71 -59

pages/21_GraphRag.py CHANGED Viewed

@@ -1,70 +1,82 @@
 import streamlit as st
-import sys
-import subprocess
-import importlib
-st.title("GraphRAG Module Explorer")
-# Function to install a package
-def install_package(package):
-    subprocess.check_call([sys.executable, "-m", "pip", "install", package])
-# Check and install required packages
-required_packages = ['graphrag', 'sentence_transformers']
-for package in required_packages:
-    try:
-        importlib.import_module(package)
-    except ImportError:
-        st.write(f"Installing {package}...")
-        install_package(package)
-        st.write(f"{package} installed successfully.")
-# Now try to import graphrag
-try:
-    import graphrag
-    import inspect
-    # Display all attributes and functions in the graphrag module
-    st.header("GraphRAG Module Contents")
-    graphrag_contents = dir(graphrag)
-    for item in graphrag_contents:
-        attr = getattr(graphrag, item)
-        st.subheader(f"{item}")
-        st.write(f"Type: {type(attr)}")
-        if inspect.isclass(attr):
-            st.write("Class Methods:")
-            for name, method in inspect.getmembers(attr, predicate=inspect.isfunction):
-                st.write(f"- {name}")
-                st.write(f"  Signature: {inspect.signature(method)}")
-                st.write(f"  Docstring: {method.__doc__}")
-        elif inspect.isfunction(attr):
-            st.write("Function:")
-            st.write(f"Signature: {inspect.signature(attr)}")
-            st.write(f"Docstring: {attr.__doc__}")
-        elif isinstance(attr, (int, float, str, bool)):
-            st.write(f"Value: {attr}")
-        st.write("---")
-    # Display the module's docstring if available
-    if graphrag.__doc__:
-        st.header("GraphRAG Module Documentation")
-        st.write(graphrag.__doc__)
-    st.header("Next Steps")
-    st.write("""
-    Based on the information above, we need to determine:
-    1. How to create a graph representation of text using graphrag.
-    2. How to process this graph representation for analysis.
-    3. Whether graphrag provides any built-in analysis tools or if we need to integrate it with other libraries.
-    Please review the module contents and let me know which components seem most relevant for our text analysis task.
-    """)
-except Exception as e:
-    st.error(f"An error occurred while exploring the graphrag module: {str(e)}")
-    st.write("Please check the installation of graphrag and its dependencies, and try running the app again.")

 import streamlit as st
+import graphrag
+import networkx as nx
+import matplotlib.pyplot as plt
+from sentence_transformers import SentenceTransformer
+import torch
+import nltk
+from nltk.tokenize import sent_tokenize, word_tokenize
+nltk.download('punkt', quiet=True)
+@st.cache_resource
+def load_models():
+    # Load SentenceTransformer model for sentence embeddings
+    sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
+    return sentence_model
+def text_to_graph(text, sentence_model):
+    # Tokenize text into sentences
+    sentences = sent_tokenize(text)
+    # Create graph
+    G = nx.Graph()
+    # Add nodes (sentences) to the graph
+    for i, sentence in enumerate(sentences):
+        embedding = sentence_model.encode(sentence)
+        G.add_node(i, text=sentence, embedding=embedding)
+    # Add edges between sentences based on cosine similarity
+    for i in range(len(sentences)):
+        for j in range(i+1, len(sentences)):
+            similarity = torch.cosine_similarity(
+                torch.tensor(G.nodes[i]['embedding']),
+                torch.tensor(G.nodes[j]['embedding']),
+                dim=0
+            )
+            if similarity > 0.5:  # Adjust this threshold as needed
+                G.add_edge(i, j, weight=similarity.item())
+    return G, sentences
+def analyze_text(text, sentence_model):
+    G, sentences = text_to_graph(text, sentence_model)
+    # Basic graph analysis
+    num_nodes = G.number_of_nodes()
+    num_edges = G.number_of_edges()
+    avg_degree = sum(dict(G.degree()).values()) / num_nodes
+    # Identify important sentences using PageRank
+    pagerank = nx.pagerank(G)
+    important_sentences = sorted(pagerank, key=pagerank.get, reverse=True)[:3]
+    return G, sentences, num_nodes, num_edges, avg_degree, important_sentences
+st.title("GraphRAG-based Text Analysis")
+sentence_model = load_models()
+text_input = st.text_area("Enter text for analysis:", height=200)
+if st.button("Analyze Text"):
+    if text_input:
+        G, sentences, num_nodes, num_edges, avg_degree, important_sentences = analyze_text(text_input, sentence_model)
+        st.write(f"Number of sentences: {num_nodes}")
+        st.write(f"Number of connections: {num_edges}")
+        st.write(f"Average connections per sentence: {avg_degree:.2f}")
+        st.subheader("Most important sentences:")
+        for i in important_sentences:
+            st.write(f"- {sentences[i]}")
+        # Visualize graph
+        plt.figure(figsize=(10, 6))
+        pos = nx.spring_layout(G)
+        nx.draw(G, pos, with_labels=False, node_size=30, node_color='lightblue', edge_color='gray')
+        plt.title("Text as Graph")
+        st.pyplot(plt)
+    else:
+        st.write("Please enter some text to analyze.")