Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -24,9 +24,9 @@ def get_metadata(url):
|
|
24 |
modified_time_tag = soup.find('meta', {'property': 'article:modified_time'})
|
25 |
modified_time = modified_time_tag.get('content') if modified_time_tag else 'Not available'
|
26 |
|
27 |
-
return author, published_time, modified_time
|
28 |
except Exception as e:
|
29 |
-
return f"Error: {str(e)}", 'Not available', 'Not available'
|
30 |
|
31 |
# Streamlit app
|
32 |
st.title("URL Metadata Scraper")
|
@@ -41,10 +41,19 @@ if uploaded_file is not None:
|
|
41 |
# Limit to 10,000 URLs
|
42 |
df = df.head(10000)
|
43 |
|
|
|
|
|
|
|
44 |
# Display results for each URL
|
45 |
for url in df['URL']:
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
# Display a message indicating the end of the process
|
50 |
st.success("Scraping completed successfully!")
|
|
|
24 |
modified_time_tag = soup.find('meta', {'property': 'article:modified_time'})
|
25 |
modified_time = modified_time_tag.get('content') if modified_time_tag else 'Not available'
|
26 |
|
27 |
+
return url, author, published_time, modified_time
|
28 |
except Exception as e:
|
29 |
+
return url, f"Error: {str(e)}", 'Not available', 'Not available'
|
30 |
|
31 |
# Streamlit app
|
32 |
st.title("URL Metadata Scraper")
|
|
|
41 |
# Limit to 10,000 URLs
|
42 |
df = df.head(10000)
|
43 |
|
44 |
+
# Create a list to store the results
|
45 |
+
results = []
|
46 |
+
|
47 |
# Display results for each URL
|
48 |
for url in df['URL']:
|
49 |
+
result = get_metadata(url)
|
50 |
+
results.append(result)
|
51 |
+
|
52 |
+
# Convert the results to a DataFrame
|
53 |
+
results_df = pd.DataFrame(results, columns=['URL', 'Author', 'Published Time', 'Modified Time'])
|
54 |
+
|
55 |
+
# Display the results as a table
|
56 |
+
st.table(results_df)
|
57 |
|
58 |
# Display a message indicating the end of the process
|
59 |
st.success("Scraping completed successfully!")
|