blazingbunny commited on
Commit
81f86dc
·
verified ·
1 Parent(s): 3efbb40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -36
app.py CHANGED
@@ -1,10 +1,5 @@
1
  import streamlit as st
2
- from advertools import sitemap_to_df, word_frequency
3
- import pandas as pd
4
- import matplotlib.pyplot as plt
5
-
6
- # List of common words to filter
7
- common_words = set(["author", "category", "product", "authors", "categories", "products", "blog", "blogs"])
8
 
9
  # Sidebar inputs for sitemaps
10
  sitemap_urls = [
@@ -24,36 +19,10 @@ for sitemap_url in sitemap_urls:
24
  except Exception as e:
25
  st.write(f"An error occurred for {sitemap_url}:", str(e))
26
 
27
- # Analyze the combined URLs
28
  if all_urls:
29
- try:
30
- # Perform analysis on combined URLs
31
- # Extracting relevant words from the last folder, replacing hyphens with spaces
32
- slugs = [url.split("/")[-2].replace("-", " ") for url in all_urls]
33
-
34
- # Filtering out common words
35
- slugs_filtered = [' '.join([word for word in slug.split() if word.lower() not in common_words]) for slug in slugs]
36
-
37
- # Word frequency for single words, excluding common words
38
- word_freq = word_frequency(slugs_filtered, phrase_len=1)
39
- st.subheader("Most-frequently used words in article titles (excluding common words)")
40
- st.dataframe(word_freq.head(10))
41
-
42
- # Word frequency for two-word phrases, excluding common words
43
- word_freq_phrases = word_frequency(slugs_filtered, phrase_len=2)
44
- st.subheader("Most-frequently used two-word phrases in article titles (excluding common words)")
45
- st.dataframe(word_freq_phrases.head(10))
46
-
47
- # Plotting trends
48
- # Since we don't have dates associated with each URL, we won't be able to plot trends based on dates.
49
- # If you have a way to associate dates with each URL, we can modify this part accordingly.
50
-
51
- # Total number of URLs
52
- st.subheader("Total Number of URLs")
53
- total_urls = len(all_urls)
54
- st.write(f"The total number of URLs in all sitemaps is {total_urls}.")
55
-
56
- except Exception as e:
57
- st.write("An error occurred during analysis:", str(e))
58
  else:
59
  st.write("Please enter at least one valid sitemap URL.")
 
1
  import streamlit as st
2
+ from advertools import sitemap_to_df
 
 
 
 
 
3
 
4
  # Sidebar inputs for sitemaps
5
  sitemap_urls = [
 
19
  except Exception as e:
20
  st.write(f"An error occurred for {sitemap_url}:", str(e))
21
 
22
+ # Display all URLs
23
  if all_urls:
24
+ st.subheader("All URLs from all sitemaps:")
25
+ for url in all_urls:
26
+ st.write(url)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  else:
28
  st.write("Please enter at least one valid sitemap URL.")