Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -19,30 +19,32 @@ def extract_order_data(pdf_file):
|
|
19 |
for page in reader.pages:
|
20 |
text = page.extract_text()
|
21 |
if text:
|
22 |
-
# Splitting the text page by page and then by line
|
23 |
lines = text.strip().split('\n')
|
24 |
-
|
25 |
-
# Find the start of the table, assuming it begins with a "Order ID" heading
|
26 |
start_index = next((i for i, line in enumerate(lines) if "Order ID" in line), None)
|
27 |
|
28 |
if start_index is not None:
|
29 |
-
# Table headers are on the same line
|
30 |
headers = [header.strip() for header in lines[start_index].split(",")]
|
31 |
-
|
32 |
-
#
|
33 |
for line in lines[start_index + 1:]:
|
34 |
-
values = [
|
35 |
-
# Ensure that the number of values matches the number of headers
|
36 |
if len(headers) == len(values):
|
37 |
order_data.append(dict(zip(headers, values)))
|
|
|
|
|
|
|
|
|
|
|
38 |
return order_data
|
39 |
|
|
|
|
|
40 |
# Function to fetch order status using GROQ API
|
41 |
def fetch_order_status_from_groq(order_id, groq_api_key):
|
42 |
"""Fetches order status and customer details from GROQ API."""
|
43 |
|
44 |
transport = RequestsHTTPTransport(
|
45 |
-
url="https://api.groq.cloud/v1/graphql", # Replace with your GROQ endpoint
|
46 |
headers={"Authorization": f"Bearer {groq_api_key}"},
|
47 |
verify=True,
|
48 |
retries=3,
|
@@ -92,7 +94,7 @@ def main():
|
|
92 |
else:
|
93 |
st.error("Could not retrieve order status.")
|
94 |
else:
|
95 |
-
st.error("Failed to extract order data from PDF.")
|
96 |
|
97 |
if __name__ == "__main__":
|
98 |
main()
|
|
|
19 |
for page in reader.pages:
|
20 |
text = page.extract_text()
|
21 |
if text:
|
|
|
22 |
lines = text.strip().split('\n')
|
|
|
|
|
23 |
start_index = next((i for i, line in enumerate(lines) if "Order ID" in line), None)
|
24 |
|
25 |
if start_index is not None:
|
|
|
26 |
headers = [header.strip() for header in lines[start_index].split(",")]
|
27 |
+
# Clean headers from extra spaces
|
28 |
+
headers = [h.replace(" ", "") for h in headers] # Remove spaces in header names
|
29 |
for line in lines[start_index + 1:]:
|
30 |
+
values = [v.strip() for v in line.split(",")]
|
|
|
31 |
if len(headers) == len(values):
|
32 |
order_data.append(dict(zip(headers, values)))
|
33 |
+
elif len(values) > len(headers):
|
34 |
+
# Handle cases where there are more values than headers (e.g., extra commas)
|
35 |
+
order_data.append(dict(zip(headers, values[:len(headers)])))
|
36 |
+
else:
|
37 |
+
print(f"Skipping line due to header/value mismatch: {line}") # print the problematic line.
|
38 |
return order_data
|
39 |
|
40 |
+
|
41 |
+
|
42 |
# Function to fetch order status using GROQ API
|
43 |
def fetch_order_status_from_groq(order_id, groq_api_key):
|
44 |
"""Fetches order status and customer details from GROQ API."""
|
45 |
|
46 |
transport = RequestsHTTPTransport(
|
47 |
+
url="[https://api.groq.cloud/v1/graphql](https://api.groq.cloud/v1/graphql)", # Replace with your GROQ endpoint
|
48 |
headers={"Authorization": f"Bearer {groq_api_key}"},
|
49 |
verify=True,
|
50 |
retries=3,
|
|
|
94 |
else:
|
95 |
st.error("Could not retrieve order status.")
|
96 |
else:
|
97 |
+
st.error("Failed to extract order data from PDF. Please check the PDF format and try again.")
|
98 |
|
99 |
if __name__ == "__main__":
|
100 |
main()
|