Prompt_Squirrel

Build error

App Files Files Community

FoodDesert commited on Apr 8, 2024

Commit

83610fc

verified ·

1 Parent(s): 5b174ea

Upload 3 files

Browse files

Files changed (3) hide show

README.md +1 -1
app.py +90 -29
tfidfreducedfiles.joblib +3 -0

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Tagset Completer
 emoji: 🐿️
 colorFrom: gray
 colorTo: gray

 ---
+title: Prompt Squirrel
 emoji: 🐿️
 colorFrom: gray
 colorTo: gray

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 from sklearn.metrics.pairwise import cosine_similarity
 from scipy.sparse import csr_matrix
 import numpy as np
 from joblib import load
 import h5py
 from io import BytesIO
@@ -19,6 +20,7 @@ import io
 import os
 import glob
 import itertools
@@ -32,7 +34,7 @@ Since Stable Diffusion's initial release in 2022, users have developed a myriad
 Some models react best when prompted with verbose scene descriptions akin to DALL-E, while others fine-tuned on images scraped from popular image boards understand those boards' tag sets.
 This tool serves as a linguistic bridge to the e621 image board tag lexicon, on which many popular models such as Fluffyrock, Fluffusion, and Pony Diffusion v6 were trained.
-When you enter a txt2img prompt and press the "submit" button, the Tagset Completer parses your prompt and checks that all your tags are valid e621 tags.
 If it finds any that are not, it recommends some valid e621 tags you can use to replace them in the "Unknown Tags" section.
 Additionally, in the "Top Artists" text box, it lists the artists who would most likely draw an image having the set of tags you provided.
 This is useful to align your prompt with the expected input to an e621-trained model.
@@ -114,18 +116,12 @@ See SamplePrompts.csv for the list of prompts used and their descriptions.
 nsfw_threshold = 0.95  # Assuming the threshold value is defined here
-#grammar=r"""
-#!start: (prompt | /[][():]/+)*
-#prompt: (emphasized | plain | commas | WHITESPACE)*
-#!emphasized: "(" prompt ")"
-#        | "(" prompt ":" [WHITESPACE] NUMBER [WHITESPACE] ")"
-#!comma: ","
-#commas: double_comma | comma
-#double_comma: comma WHITESPACE* comma
-#WHITESPACE: /\s+/
-#plain: /([^,\\\[\]():|]|\\.)+/
-#%import common.SIGNED_NUMBER -> NUMBER
-#"""
 grammar=r"""
 !start: (prompt | /[][():]/+)*
@@ -353,11 +349,11 @@ def geometric_mean_given_words(target_word, context_words, co_occurrence_matrix,
     return geometric_mean
-def create_html_tables_for_tags(tag, result, tag2count, tag2idwiki):
     # Wrap the tag part in a <span> with styles for bold and larger font
-    html_str = f"<div style='display: inline-block; margin: 20px; vertical-align: top;'><table><thead><tr><th colspan='3' style='text-align: center; padding-bottom: 10px;'><span style='font-weight: bold; font-size: 20px;'>{tag}</span></th></tr></thead><tbody><tr style='border-bottom: 1px solid #000;'><th>Corrected Tag</th><th>Similarity</th><th>Count</th></tr>"
     # Loop through the results and add table rows for each
-    for word, sim in result:
         word_with_underscores = word.replace(' ', '_')
         count = tag2count.get(word_with_underscores, 0)  # Get the count if available, otherwise default to 0
         tag_id, wiki_entry = tag2idwiki.get(word_with_underscores, (None, ''))
@@ -379,7 +375,7 @@ def create_html_tables_for_tags(tag, result, tag2count, tag2idwiki):
 def create_top_artists_table(top_artists):
     # Add a heading above the table
-    html_str = "<div style='display: inline-block; margin: 20px; text-align: center;'>"
     html_str += "<h1>Top Artists</h1>"  # Heading for the table
     # Start the table with increased font size and no borders between rows
     html_str += "<table style='font-size: 20px; border-collapse: collapse;'>"
@@ -396,16 +392,70 @@ def create_top_artists_table(top_artists):
     return html_str
 def create_html_placeholder(title="", content="", placeholder_height=400, placeholder_width="100%"):
     # Include a title in the same style as the top artists table heading
-    html_placeholder = f"<div style='text-align: center;'><h1>{title}</h1></div>"
     # Conditionally add content if present
     if content:
         html_placeholder += f"<div style='text-align: center; margin-bottom: 20px;'><p>{content}</p></div>"
     # Add the placeholder div with specified height and width
     html_placeholder += f"<div style='height: {placeholder_height}px; width: {placeholder_width}; margin: 20px auto; background: transparent;'></div>"
     return html_placeholder
 def find_similar_tags(test_tags, similarity_weight, allow_nsfw_tags):
     #Initialize stuff
@@ -425,7 +475,7 @@ def find_similar_tags(test_tags, similarity_weight, allow_nsfw_tags):
     transformed_tags = [tag.replace(' ', '_') for tag in modified_tags]
     # Find similar tags and prepare data for tables
-    html_content = "<div style='display: inline-block; margin: 20px; text-align: center;'>"
     html_content += "<h1>Unknown Tags</h1>"  # Heading for the table
     tags_added = False
     bad_entities = []
@@ -561,14 +611,21 @@ def find_similar_artists(original_tags_string, top_n, similarity_weight, allow_n
         ###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys()))   #We may want this line again later.  These are the tags that were not used to calculate the artists list.
         unseen_tags_data, bad_entities = find_similar_tags(tag_data, similarity_weight, allow_nsfw_tags)
         bad_entities.extend(augment_bad_entities_with_regex(new_tags_string))
         bad_entities.sort(key=lambda x: x['start'])
         bad_tags_illustrated_string = {"text":new_tags_string, "entities":bad_entities}
-        #modified_tags = [tag_info['modified_tag'] for tag_info in tag_data]
-        #X_new_image = vectorizer.transform([','.join(modified_tags + removed_tags)])
-        #artist_matrix_tags = [tag_info['artist_matrix_tag'] for tag_info in tag_data]
         artist_matrix_tags = [tag_info['artist_matrix_tag'] for tag_info in tag_data if tag_info['node_type'] == "tag"]
         X_new_image = vectorizer.transform([','.join(artist_matrix_tags + removed_tags)])
         similarities = cosine_similarity(X_new_image, X_artist)[0]
@@ -586,12 +643,12 @@ def find_similar_artists(original_tags_string, top_n, similarity_weight, allow_n
                 image_galleries.append(baseline)  # Add baseline as its own gallery item
                 image_galleries.append(artists)  # Extend the list with artist tuples
-        return (unseen_tags_data, bad_tags_illustrated_string, top_artists_str, dynamic_prompts_formatted_artists, *image_galleries)
     except ParseError as e:
-        return [], "Parse Error: Check for mismatched parentheses or something", "", None, None
-with gr.Blocks() as app:
     with gr.Group():
         with gr.Row():
             with gr.Column(scale=3):
@@ -609,7 +666,11 @@ with gr.Blocks() as app:
                 with gr.Row():
                     similarity_weight = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="Similarity weight")
                     allow_nsfw = gr.Checkbox(label="Allow NSFW Tags", value=False)
-                unseen_tags = gr.HTML(label="Unknown Tags", value=create_html_placeholder(title="Unknown Tags"))
         with gr.Column(scale=1):
             with gr.Group():
                 num_artists = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of artists")
@@ -626,7 +687,7 @@ with gr.Blocks() as app:
     submit_button.click(
         find_similar_artists,
         inputs=[image_tags, num_artists, similarity_weight, allow_nsfw],
-        outputs=[unseen_tags, bad_tags_illustrated_string, top_artists, dynamic_prompts] + galleries
     )
     gr.Markdown(faq_content)

 from sklearn.metrics.pairwise import cosine_similarity
 from scipy.sparse import csr_matrix
 import numpy as np
+import joblib
 from joblib import load
 import h5py
 from io import BytesIO
 import os
 import glob
 import itertools
+from itertools import islice
 Some models react best when prompted with verbose scene descriptions akin to DALL-E, while others fine-tuned on images scraped from popular image boards understand those boards' tag sets.
 This tool serves as a linguistic bridge to the e621 image board tag lexicon, on which many popular models such as Fluffyrock, Fluffusion, and Pony Diffusion v6 were trained.
+When you enter a txt2img prompt and press the "submit" button, Prompt Squirrel parses your prompt and checks that all your tags are valid e621 tags.
 If it finds any that are not, it recommends some valid e621 tags you can use to replace them in the "Unknown Tags" section.
 Additionally, in the "Top Artists" text box, it lists the artists who would most likely draw an image having the set of tags you provided.
 This is useful to align your prompt with the expected input to an e621-trained model.
 nsfw_threshold = 0.95  # Assuming the threshold value is defined here
+css = """
+.scrollable-content {
+    max-height: 500px;
+    overflow-y: auto;
+}
+"""
 grammar=r"""
 !start: (prompt | /[][():]/+)*
     return geometric_mean
+def create_html_tables_for_tags(subtable_heading, word_similarity_tuples, tag2count, tag2idwiki):
     # Wrap the tag part in a <span> with styles for bold and larger font
+    html_str = f"<div style='display: inline-block; margin: 20px; vertical-align: top;'><table><thead><tr><th colspan='3' style='text-align: center; padding-bottom: 10px;'><span style='font-weight: bold; font-size: 20px;'>{subtable_heading}</span></th></tr></thead><tbody><tr style='border-bottom: 1px solid #000;'><th>Corrected Tag</th><th>Similarity</th><th>Count</th></tr>"
     # Loop through the results and add table rows for each
+    for word, sim in word_similarity_tuples:
         word_with_underscores = word.replace(' ', '_')
         count = tag2count.get(word_with_underscores, 0)  # Get the count if available, otherwise default to 0
         tag_id, wiki_entry = tag2idwiki.get(word_with_underscores, (None, ''))
 def create_top_artists_table(top_artists):
     # Add a heading above the table
+    html_str = "<div class=\"scrollable-content\" style='display: inline-block; margin: 20px; text-align: center;'>"
     html_str += "<h1>Top Artists</h1>"  # Heading for the table
     # Start the table with increased font size and no borders between rows
     html_str += "<table style='font-size: 20px; border-collapse: collapse;'>"
     return html_str
+def construct_pseudo_vector(pseudo_doc_terms, idf_loaded, tag_to_row_loaded):
+    # Initialize a vector of zeros with the length of the term_to_index mapping
+    pseudo_vector = np.zeros(len(tag_to_row_loaded))
+    # Fill in the vector for terms in the pseudo document
+    for term in pseudo_doc_terms:
+        if term in tag_to_row_loaded:
+            index = tag_to_row_loaded[term]
+            pseudo_vector[index] = idf_loaded.get(term, 0)
+    # Return the vector as a 2D array for compatibility with SVD transform
+    return pseudo_vector.reshape(1, -1)
+def get_top_indices(reduced_pseudo_vector, reduced_matrix):
+    # Compute cosine similarities
+    similarities = cosine_similarity(reduced_pseudo_vector, reduced_matrix).flatten()
+    # Get sorted tag indices based on similarities, in descending order
+    sorted_indices = np.argsort(-similarities)
+    # Return the top N indices
+    return sorted_indices
+def get_tfidf_reduced_similar_tags(pseudo_doc_terms, allow_nsfw_tags):
+    # Check and load components if not already loaded
+    if not hasattr(get_tfidf_reduced_similar_tags, "components"):
+        get_tfidf_reduced_similar_tags.components = joblib.load('tfidfreducedfiles.joblib')
+    # Access components
+    components = get_tfidf_reduced_similar_tags.components
+    idf_loaded = components['idf']
+    tag_to_row_loaded = components['tag_to_row']
+    reduced_matrix_loaded = components['reduced_matrix']
+    svd_loaded = components['svd_model']
+    # Remaining part of the function
+    pseudo_vector = construct_pseudo_vector(pseudo_doc_terms, idf_loaded, tag_to_row_loaded)
+    reduced_pseudo_vector = svd_loaded.transform(pseudo_vector)
+    # Compute cosine similarities
+    similarities = cosine_similarity(reduced_pseudo_vector, reduced_matrix_loaded).flatten()
+    # Get top N indices based on similarities
+    top_indices_reduced = get_top_indices(reduced_pseudo_vector, reduced_matrix_loaded)
+    # Create the initial tag_similarity_dict
+    tag_similarity_dict = {list(tag_to_row_loaded.keys())[i]: similarities[i] for i in top_indices_reduced}
+    if not allow_nsfw_tags:
+        tag_similarity_dict = {tag: similarity for tag, similarity in tag_similarity_dict.items() if tag.replace(' ', '_') not in nsfw_tags}
+    sorted_tag_similarity_dict = OrderedDict(sorted(tag_similarity_dict.items(), key=lambda x: x[1], reverse=True))
+    return sorted_tag_similarity_dict
 def create_html_placeholder(title="", content="", placeholder_height=400, placeholder_width="100%"):
     # Include a title in the same style as the top artists table heading
+    html_placeholder = f"<div class=\"scrollable-content\" style='text-align: center;'><h1>{title}</h1></div>"
     # Conditionally add content if present
     if content:
         html_placeholder += f"<div style='text-align: center; margin-bottom: 20px;'><p>{content}</p></div>"
     # Add the placeholder div with specified height and width
     html_placeholder += f"<div style='height: {placeholder_height}px; width: {placeholder_width}; margin: 20px auto; background: transparent;'></div>"
     return html_placeholder
 def find_similar_tags(test_tags, similarity_weight, allow_nsfw_tags):
     #Initialize stuff
     transformed_tags = [tag.replace(' ', '_') for tag in modified_tags]
     # Find similar tags and prepare data for tables
+    html_content = "<div class=\"scrollable-content\" style='display: inline-block; margin: 20px; text-align: center;'>"
     html_content += "<h1>Unknown Tags</h1>"  # Heading for the table
     tags_added = False
     bad_entities = []
         ###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys()))   #We may want this line again later.  These are the tags that were not used to calculate the artists list.
         unseen_tags_data, bad_entities = find_similar_tags(tag_data, similarity_weight, allow_nsfw_tags)
+        #Bad tags stuff
         bad_entities.extend(augment_bad_entities_with_regex(new_tags_string))
         bad_entities.sort(key=lambda x: x['start'])
         bad_tags_illustrated_string = {"text":new_tags_string, "entities":bad_entities}
+        #Suggested tags stuff
+        suggested_tags_html_content = "<div class=\"scrollable-content\" style='display: inline-block; margin: 20px; text-align: center;'>"
+        suggested_tags_html_content += "<h1>Suggested Tags</h1>"  # Heading for the table
+        suggested_tags = get_tfidf_reduced_similar_tags([item["artist_matrix_tag"] for item in tag_data], allow_nsfw_tags)
+        topnsuggestions = list(islice(suggested_tags.items(), 100))
+        suggested_tags_html_content += create_html_tables_for_tags("Suggested Tag", topnsuggestions, find_similar_tags.tag2count, find_similar_tags.tag2idwiki)
+        #Artist stuff
         artist_matrix_tags = [tag_info['artist_matrix_tag'] for tag_info in tag_data if tag_info['node_type'] == "tag"]
         X_new_image = vectorizer.transform([','.join(artist_matrix_tags + removed_tags)])
         similarities = cosine_similarity(X_new_image, X_artist)[0]
                 image_galleries.append(baseline)  # Add baseline as its own gallery item
                 image_galleries.append(artists)  # Extend the list with artist tuples
+        return (unseen_tags_data, bad_tags_illustrated_string, suggested_tags_html_content, top_artists_str, dynamic_prompts_formatted_artists, *image_galleries)
     except ParseError as e:
+        return [], "Parse Error: Check for mismatched parentheses or something", "", "", None, None
+with gr.Blocks(css=css) as app:
     with gr.Group():
         with gr.Row():
             with gr.Column(scale=3):
                 with gr.Row():
                     similarity_weight = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="Similarity weight")
                     allow_nsfw = gr.Checkbox(label="Allow NSFW Tags", value=False)
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        unseen_tags = gr.HTML(label="Unknown Tags", value=create_html_placeholder(title="Unknown Tags"))
+                    with gr.Column(scale=1):
+                        suggested_tags = gr.HTML(label="Suggested Tags", value=create_html_placeholder(title="Suggested Tags"))
         with gr.Column(scale=1):
             with gr.Group():
                 num_artists = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of artists")
     submit_button.click(
         find_similar_artists,
         inputs=[image_tags, num_artists, similarity_weight, allow_nsfw],
+        outputs=[unseen_tags, bad_tags_illustrated_string, suggested_tags, top_artists, dynamic_prompts] + galleries
     )
     gr.Markdown(faq_content)

tfidfreducedfiles.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a325f75a94c8a6c47034fba0e96a89039a3550463f916690b74c16d139f32504
+size 68245886