masadonline commited on
Commit
6c38165
·
verified ·
1 Parent(s): 6ca1b30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -10
app.py CHANGED
@@ -19,30 +19,32 @@ def extract_order_data(pdf_file):
19
  for page in reader.pages:
20
  text = page.extract_text()
21
  if text:
22
- # Splitting the text page by page and then by line
23
  lines = text.strip().split('\n')
24
-
25
- # Find the start of the table, assuming it begins with a "Order ID" heading
26
  start_index = next((i for i, line in enumerate(lines) if "Order ID" in line), None)
27
 
28
  if start_index is not None:
29
- # Table headers are on the same line
30
  headers = [header.strip() for header in lines[start_index].split(",")]
31
-
32
- # Data starts from the next line
33
  for line in lines[start_index + 1:]:
34
- values = [value.strip() for value in line.split(",")]
35
- # Ensure that the number of values matches the number of headers
36
  if len(headers) == len(values):
37
  order_data.append(dict(zip(headers, values)))
 
 
 
 
 
38
  return order_data
39
 
 
 
40
  # Function to fetch order status using GROQ API
41
  def fetch_order_status_from_groq(order_id, groq_api_key):
42
  """Fetches order status and customer details from GROQ API."""
43
 
44
  transport = RequestsHTTPTransport(
45
- url="https://api.groq.cloud/v1/graphql", # Replace with your GROQ endpoint
46
  headers={"Authorization": f"Bearer {groq_api_key}"},
47
  verify=True,
48
  retries=3,
@@ -92,7 +94,7 @@ def main():
92
  else:
93
  st.error("Could not retrieve order status.")
94
  else:
95
- st.error("Failed to extract order data from PDF.")
96
 
97
  if __name__ == "__main__":
98
  main()
 
19
  for page in reader.pages:
20
  text = page.extract_text()
21
  if text:
 
22
  lines = text.strip().split('\n')
 
 
23
  start_index = next((i for i, line in enumerate(lines) if "Order ID" in line), None)
24
 
25
  if start_index is not None:
 
26
  headers = [header.strip() for header in lines[start_index].split(",")]
27
+ # Clean headers from extra spaces
28
+ headers = [h.replace(" ", "") for h in headers] # Remove spaces in header names
29
  for line in lines[start_index + 1:]:
30
+ values = [v.strip() for v in line.split(",")]
 
31
  if len(headers) == len(values):
32
  order_data.append(dict(zip(headers, values)))
33
+ elif len(values) > len(headers):
34
+ # Handle cases where there are more values than headers (e.g., extra commas)
35
+ order_data.append(dict(zip(headers, values[:len(headers)])))
36
+ else:
37
+ print(f"Skipping line due to header/value mismatch: {line}") # print the problematic line.
38
  return order_data
39
 
40
+
41
+
42
  # Function to fetch order status using GROQ API
43
  def fetch_order_status_from_groq(order_id, groq_api_key):
44
  """Fetches order status and customer details from GROQ API."""
45
 
46
  transport = RequestsHTTPTransport(
47
+ url="[https://api.groq.cloud/v1/graphql](https://api.groq.cloud/v1/graphql)", # Replace with your GROQ endpoint
48
  headers={"Authorization": f"Bearer {groq_api_key}"},
49
  verify=True,
50
  retries=3,
 
94
  else:
95
  st.error("Could not retrieve order status.")
96
  else:
97
+ st.error("Failed to extract order data from PDF. Please check the PDF format and try again.")
98
 
99
  if __name__ == "__main__":
100
  main()