Spaces:
Sleeping
Sleeping
Update document_scrapped.py
Browse files- document_scrapped.py +4 -4
document_scrapped.py
CHANGED
|
@@ -104,7 +104,7 @@ def excel(link : str) -> str:
|
|
| 104 |
sample_df = df
|
| 105 |
json_data = sample_df.to_json(orient='records')
|
| 106 |
js = json.loads(json_data)
|
| 107 |
-
rs =
|
| 108 |
return rs
|
| 109 |
else:
|
| 110 |
print("Failed to download file")
|
|
@@ -134,7 +134,7 @@ def csv(link : str) -> str:
|
|
| 134 |
|
| 135 |
json_data = sample_df.to_json(orient='records')
|
| 136 |
js = json.loads(json_data)
|
| 137 |
-
rs =
|
| 138 |
return rs
|
| 139 |
|
| 140 |
except Exception as e:
|
|
@@ -156,7 +156,7 @@ def docx(url : str) -> str:
|
|
| 156 |
full_text.append(para.text)
|
| 157 |
|
| 158 |
f = "\n".join(full_text)
|
| 159 |
-
n = select_words_until_char_limit(f,
|
| 160 |
return n
|
| 161 |
except Exception as e:
|
| 162 |
print(f"An error occurred: {e}")
|
|
@@ -182,7 +182,7 @@ def pptx(url : str) -> str:
|
|
| 182 |
full_text.append(shape.text)
|
| 183 |
|
| 184 |
g = "\n".join(full_text)
|
| 185 |
-
c = select_words_until_char_limit(g,
|
| 186 |
return c
|
| 187 |
except Exception as e:
|
| 188 |
print(f"An error occurred: {e}")
|
|
|
|
| 104 |
sample_df = df
|
| 105 |
json_data = sample_df.to_json(orient='records')
|
| 106 |
js = json.loads(json_data)
|
| 107 |
+
rs = select_words_until_char_limit(f"{js}", 32000)
|
| 108 |
return rs
|
| 109 |
else:
|
| 110 |
print("Failed to download file")
|
|
|
|
| 134 |
|
| 135 |
json_data = sample_df.to_json(orient='records')
|
| 136 |
js = json.loads(json_data)
|
| 137 |
+
rs = select_words_until_char_limit(f"{js}", 32000)
|
| 138 |
return rs
|
| 139 |
|
| 140 |
except Exception as e:
|
|
|
|
| 156 |
full_text.append(para.text)
|
| 157 |
|
| 158 |
f = "\n".join(full_text)
|
| 159 |
+
n = select_words_until_char_limit(f, 32000)
|
| 160 |
return n
|
| 161 |
except Exception as e:
|
| 162 |
print(f"An error occurred: {e}")
|
|
|
|
| 182 |
full_text.append(shape.text)
|
| 183 |
|
| 184 |
g = "\n".join(full_text)
|
| 185 |
+
c = select_words_until_char_limit(g, 32000)
|
| 186 |
return c
|
| 187 |
except Exception as e:
|
| 188 |
print(f"An error occurred: {e}")
|