import streamlit as st

import re

def extract_sentence_parts(text):
    # Regular expression to match sentence parts
    pattern = r'\b[A-Z][\w\s]*[\d\.]'
    matches = re.findall(pattern, text)
    
    # Filter matches longer than 10 characters
    long_matches = [match for match in matches if len(match) > 10]
    
    return long_matches


def find_strings_with_ending_conditions(text):
    pattern = r'([A-Z][\w\s,;:()-]*[.!?](?=\s|$))|([A-Z][\w\s,;:()-]* {5}(?=\s|$))|([A-Z][\w\s,;:()-]*:(?=\s|$))'
    matches = re.finditer(pattern, text)
    return [match.group() for match in matches]

def split_at_contiguous_spaces(text):
    return re.split(r'\s{2,}', text)

# Test string - LOINC Panels for Exercise
test_string = "LOINC,Detailed_description,exercise,questions of ,216                Exercise activity & pain severity panel 10591                           Exercise stress test study 18280                 D-Lactate^1st specimen post exercise 18281                 D-Lactate^2nd specimen post exercise 18282                 D-Lactate^3rd specimen post exercise 18283                 D-Lactate^4th specimen post exercise 18284                 D-Lactate^5th specimen post exercise 18285                 D-Lactate^6th specimen post exercise 18286                               D-lactate^pre exercise 24690                                   Time^post exercise 34256                                Breaths^post exercise 34257                             Heart rate^post exercise 34262                    Gas delivery source^post exercise 34264                        Gas flow.oxygen^post exercise 34266                      Oxygen saturation^post exercise 34417                              Heart beat^pre exercise 34418                             Heart rate^post exercise 34420                                 Oxygen^post exercise 34421                       Oxygen saturation^pre exercise 34422         Oxyhemoglobin/Hemoglobin.total^post exercise 37279..."


st.title("String Ending Conditions Matcher")
input_text = st.text_area("Enter your text:", value=test_string)

records = split_at_contiguous_spaces(input_text)

if records:
    st.write("Split records:")
    for record in records:
        st.write(f"- {record}")
else:
    st.write("No records found.")


results = find_strings_with_ending_conditions(input_text)
if results:
    st.write("Matched strings:")
    for result in results:
        st.write(f"- {result}")
else:
    st.write("No strings with the specified ending conditions were found.")
    

#result = extract_sentence_parts(test_string)
#st.write(result)