Spaces:

JoseEliel
/

generate-lagrangians

Running

App Files Files Community

José Eliel Camargo Molina commited on Jan 14

Commit

bf7477e

1 Parent(s): ca2bf21

latex fixed

Browse files

Files changed (1) hide show

app.py +305 -265

app.py CHANGED Viewed

@@ -1,429 +1,469 @@
-import streamlit as st
-from transformers import PreTrainedTokenizerFast, BartForConditionalGeneration
-import torch
-from pathlib import Path
-import urllib.request
-# To latex stuff
-####################################
-import itertools
 import re
 rep_tex_dict = {
-    "SU3":{"-3":r"\bar{\textbf{3}}","3":r"\textbf{3}"},
-    "SU2":{"-2":r"\textbf{2}","2":r"\textbf{2}","-3":r"\textbf{3}","3":r"\textbf{3}"},
 }
-def fieldobj_to_tex(obj,lor_index,pos):
     su3 = None
     su2 = None
-    u1  =  None
     hel = None
     sp  = None
-    #print(obj)
     obj_mod = obj.copy()
     for tok in obj:
-      if "SU3" in tok:
-        su3 = tok.split("=")[-1]
-        obj_mod.remove(tok)
-      if "SU2" in tok:
-        su2 = tok.split("=")[-1]
-        obj_mod.remove(tok)
-      if "U1"  in tok:
-        u1 = tok.split("=")[-1]
-        obj_mod.remove(tok)
-      if "HELICITY" in tok:
-          hel = tok.split("=")[-1]
-          if hel == "1" : hel = "+1"
-      if "SPIN"  in tok: sp = tok.split("=")[-1]
-    #print(obj)
     assert sp is not None
-    outtex= ""
-    if sp == "0"    : outtex += "\phi"
-    if sp == "1"    : outtex += "A"+pos+lor_index
-    if sp == "1/2"  : outtex += "\psi"
     outtex += r"_{("
-    if su3 is not None:
-      outtex += rep_tex_dict["SU3"][su3]+" ,"
     else:
-      outtex +=  r"\textbf{1}"+" ,"
-    if su2 is not None:
-      outtex += rep_tex_dict["SU2"][su2]+" ,"
     else:
-      outtex +=  r"\textbf{1}"+" ,"
-    if u1  is not None:
-      outtex += u1+" ,"
     else:
-      outtex += r"\textbf{0}"+" ,"
-    if hel  is not None: outtex += "h:"+ hel + " ,"
-    if outtex[-1] == ",": outtex = outtex[:-1]+")}"
     return outtex
-def derobj_to_tex(obj,lor_index,pos):
-    if pos == "^":
-      outtex = "D^{"+lor_index+"}_{("
     elif pos == "_":
-      outtex = "D_{"+lor_index+"("
     else:
-      raise ValueError("pos must be ^ or _")
-    if "SU3" not in obj and "SU2" not in obj and "U1" not in obj:
-       if pos == "^":
-        return "\partial^{"+lor_index+"}"
-       elif pos == "_":
-        return "\partial_{"+lor_index+"}"
-    if "SU3" in obj: outtex += "SU3,"
-    if "SU2" in obj: outtex += "SU2,"
-    if "U1" in obj: outtex += "U1,"
-    if outtex[-1] == ",": outtex = outtex[:-1]+")}"
     return outtex
-def gamobj_to_tex(obj,lor_index,pos):
-    outtex = "\sigma"+pos+lor_index
-    return outtex
-def obj_to_tex(obj,lor_index="\mu",pos="^"):
-    if isinstance(obj,tuple): obj = list(obj)
-    if isinstance(obj,str): obj = [i for i in obj.split(" ") if i != ""]
-    # remove any space char in the first element of the list
-    if obj[0] == "+"  :
-        return "\quad\quad+"
-    if obj[0] == "-" :
-        return "\quad\quad-"
-    if obj[0] == "i" :
         return "i"
-    if obj[0] == "FIELD" :
-	    return fieldobj_to_tex(obj,lor_index,pos)
     if obj[0] == "DERIVATIVE":
-      return derobj_to_tex(obj,lor_index,pos)
     if obj[0] == "SIGMA":
-      return gamobj_to_tex(obj,lor_index,pos)
     if obj[0] == "COMMUTATOR_A":
-      return "[ "+derobj_to_tex(obj,lor_index,pos)
     if obj[0] == "COMMUTATOR_B":
-      return ", "+derobj_to_tex(obj,lor_index,pos)+' ]'
-def split_with_delimiter_preserved(string, delimiters,ignore_dots=False):
-    if "." in string and ignore_dots == False:
-        #print(string)
-        raise  ValueError("Unexpected ending to the generated Lagrangian")
-    pattern = '(' + '|'.join(map(re.escape, delimiters)) + ')'
-    pattern = re.split(pattern, string)
-    pattern = [" + " if i == "+ " else i for i in pattern  ]
-    pattern = [i for i in pattern if i != ""]
-    return pattern
-def split_with_delimiter_preserved(string, delimiters,ignore_dots=False):
-    if "." in string and ignore_dots == False:
-        #print(string)
-        raise  ValueError("Unexpected ending to the generated Lagrangian")
     pattern = '(' + '|'.join(map(re.escape, delimiters)) + ')'
-    pattern = re.split(pattern, string)
-    pattern = [" + " if i == "+ " else i for i in pattern  ]
-    pattern = [i for i in pattern if i != ""]
-    return pattern
 def clean_split(inlist, delimiters):
     i = 0
     merged_list = []
     while i < len(inlist):
         if inlist[i] in delimiters:
             if i < len(inlist) - 1:
                 merged_list.append(inlist[i] + inlist[i+1])
-                i += 1  # Skip the next element as it has been merged
             else:
-                merged_list.append(inlist[i])  # If it's the last element, append it without merging
         else:
             merged_list.append(inlist[i])
         i += 1
     return merged_list
 def get_obj_dict(inlist):
     outdict = {}
     for iitem in inlist:
-        idict = {"ID":None,"LATEX":None}
-        id = [i for i in iitem.split() if "ID" in i]
-        if len(id) == 1:
-            idict["ID"] = id[0]
-            if "FIELD" in iitem:
-                idict["LATEX"] = obj_to_tex(iitem,"\\mu","^")
-        if iitem == "+" or iitem == "-" or iitem == "i":
-            idict["LATEX"] = obj_to_tex(iitem    )
         outdict[iitem] = idict
     return outdict
 def get_con_dict(inlist):
     outdict = {}
     for iitem in inlist:
-        iitem = iitem.split()
-        iitem = [i for i in iitem if i != ""]
-        sym = [i for i in iitem if ("SU" in i or "LORENTZ" in i)]
         assert len(sym) == 1, "More than one symmetry in contraction"
-        ids = [i for i in iitem if ("SU" not in i and "LZ" not in i)]
-        if sym[0] not in outdict.keys():
             outdict[sym[0]] = [ids]
         else:
             outdict[sym[0]].append(ids)
     return outdict
-def term_to_tex(term,verbose=False):
-    # Clean term
-    term = term.replace(".","").replace(" = ", "=").replace(" =- ", "=-").replace(" / ", "/").replace("COMMUTATOR_A DERIVATIVE", "COMMUTATOR_ADERIVATIVE").replace("COMMUTATOR_B DERIVATIVE", "COMMUTATOR_BDERIVATIVE")
-    term = split_with_delimiter_preserved(term,[" FIELD "," DERIVATIVE "," SIGMA "," COMMUTATOR_A "," COMMUTATOR_B "," CONTRACTIONS "])
-    term = clean_split(term, [" FIELD "," DERIVATIVE "," SIGMA "," COMMUTATOR_ADERIVATIVE "," COMMUTATOR_BDERIVATIVE "," CONTRACTIONS "])
-    if verbose: print(term)
-    if term == [" + "] or term == [" - "] or term == [" i "]:
-        return term[0]
-    # Get Dictionary of objects
-    objdict = get_obj_dict([i for i in term if " CONTRACTIONS " not in i])
     if verbose:
-        for i,j in objdict.items():
-            print(i,"\t\t",j)
-    # Do contractions
-    contractions = [i for i in term if " CONTRACTIONS " in i]
-    assert len(contractions) < 2, "More than one contraction in term"
-    if (len(contractions) == 1) and contractions != [" CONTRACTIONS "]:
-        contractions = contractions[0]
-        contractions = split_with_delimiter_preserved(contractions,[" LORENTZ "," SU2 "," SU3 "])
-        contractions = clean_split(contractions, [" LORENTZ "," SU2 "," SU3 "])
-        contractions = [i for i in contractions if i != " CONTRACTIONS"]
-        condict      = get_con_dict(contractions)
-        if verbose: print(condict)
-        if "LZ" in condict.keys():
             firstlz = True
             cma = True
-            for con in condict["LZ"]:
-                for kobj , iobj in objdict.items():
-                    if iobj["ID"] is None   : continue
                     if iobj["ID"] in con:
-                        if cma: lsymb = "\\mu"
-                        else: lsymb = "\\nu"
                         if firstlz:
-                            iobj["LATEX"] = obj_to_tex(kobj,lsymb,"^")
                             firstlz = False
                         else:
-                            iobj["LATEX"] = obj_to_tex(kobj,lsymb,"_")
                             cma = False
                             firstlz = True
-    outstr = " ".join([objdict[i]["LATEX"] for i in term if " CONTRACTIONS " not in i])
     return outstr
-def display_in_latex(instring,verbose=False):
-  #latex_string = r"$\overgroup{\Large{" + instring + "}}$"
-  latex_string = r"$\Large{" + instring + "}$"
-  if verbose: print(latex_string)
-  display(Latex(latex_string))
-  return instring
-def str_tex(instr,num=0):
-    #print("INPUT:",iinstr)
-    #print("TERM:")
-    #outstr = ""
-    #instr = split_with_delimiter_preserved(iinstr,[" + ","+ "," - "])
     if num != 0:
         instr = instr[:num]
-    inlist = [term.replace(".","") for term in instr]
     outstr = ""
     coup = 0
     mass = 0
-    outstr = "\\begin{aligned}"
-    for i, iterm in enumerate(inlist):
-        if i ==0:
-            outstr += " \mathcal{L}= \quad \\\\ & "
-        else:
             nqf = iterm.count("FIELD SPIN = 0")
-            nD  = iterm.count(" DERIVATIVE ")
             if nqf != 0 and nqf != 2 and nD == 0:
                 coup += 1
-                outstr += " \lambda_{"+str(coup)+"} \,"
             if nqf == 2 and nD == 0:
                 mass += 1
-                outstr += " m^2_{"+str(mass)+"} \,"
-            outstr += term_to_tex(iterm,False) + " \quad "
-            if i%4 == 0: outstr += " \\\\ \\\\ & "
     return outstr
 def master_str_tex(iinstr):
-    instr = split_with_delimiter_preserved(iinstr,[" + ","+ "," - "])
     try:
         outstr = str_tex(instr)
     except Exception as e:
-        outstr = str_tex(instr,-1)
-        outstr += "  \cdots"
         print(e)
-    outstr += "\\end{aligned}"
-    return outstr#########
 device = 'cpu'
 model_name = "JoseEliel/BART-Lagrangian"
 @st.cache_resource
 def load_model():
     model = BartForConditionalGeneration.from_pretrained(model_name).to(device)
     return model
-model = load_model()
 @st.cache_resource
 def load_tokenizer():
     return PreTrainedTokenizerFast.from_pretrained(model_name)
 hf_tokenizer = load_tokenizer()
 def process_input(input_text):
     input_text = input_text.replace("[SOS]", "").replace("[EOS]", "").replace("FIELD", "SPLITFIELD")
-    fields = input_text.split('SPLIT')[1:]
-    fields = [x.strip().split(' ') for x in fields]
     fields = sorted(fields)
-    fields = "[SOS] " + " ".join([" ".join(x) for x in fields]) + " [EOS]"
     return fields
 def process_output(output_text):
-    return output_text.replace("[SOS]", "").replace("[EOS]", "").replace(".","")
-def process_output_pretty_print(output_text):
-    pretty_output = output_text.replace(" / ", "/")
-    pretty_output = pretty_output.replace("=- ", "= -")
-    pretty_output = pretty_output.replace("+", "\n+")
-    return pretty_output
 def generate_lagrangian(input_text):
     input_text = process_input(input_text)
     inputs = hf_tokenizer([input_text], return_tensors='pt').to(device)
-    with st.spinner(text="Generating Lagrangian..."):
-        lagrangian_ids = model.generate(inputs['input_ids'], max_length=512)
         lagrangian = hf_tokenizer.decode(lagrangian_ids[0].tolist(), skip_special_tokens=False)
         lagrangian = process_output(lagrangian)
     return lagrangian
 def generate_field(sp, su2, su3, u1):
-    # Initialize components list
-    if sp == "0":
-        components = [f"FIELD SPIN={sp}"]
-    else:
         components = [f"FIELD SPIN={sp} HEL=1/2"]
-    # Conditionally add each component
     if su2 != "$1$":
         components.append(f"SU2={su2}")
-    if su3 == "$\\bar{3}$":
         components.append("SU3=-3")
-    if su3 != "$1$" and su3 != "$\\bar{3}$":
-        components.append(f"SU3={su3}")
     if u1 != "0":
         components.append(f"U1={u1}")
-    # Join components into final string
-    return " ".join(components).replace("$","")
 def main():
-    # Streamlit UI (Adjusted without 'className')
     st.title("$\\mathscr{L}$agrangian Generator")
     st.markdown(" ### For a set of chosen fields, this model generates the corresponding Lagrangian which encodes all interactions and dynamics of the fields.")
     st.markdown(" #### This is a demo of our [BART](https://arxiv.org/abs/1910.13461)-based model with ca 360M parameters")
-    st.markdown(" ##### :violet[Due to computational resources, we limit the number of fields to 3 and the maximum length of the generated Lagrangian to 512 tokens.]")
     st.markdown(" ##### Choose up to three different fields:")
     su2_options = ["$1$", "$2$", "$3$"]
     su3_options = ["$1$", "$3$", "$\\bar{3}$"]
-    u1_options = ["-1","-2/3", "-1/2", "-1/3", "0","1/3" ,"1/2", "2/3", "1"]
     spin_options = ["0", "1/2"]
-    # Initialize or update session state variables
-    if 'count' not in st.session_state:
-        st.session_state.count = 0  # Keeps track of button presses
-    if 'field_strings' not in st.session_state:
-        st.session_state.field_strings = []  # Stores the generated field strings
     with st.form("field_selection"):
         spin_selection = st.radio("Select spin value:", spin_options)
-        su2_selection = st.radio("Select $\\mathrm{SU}(2)$ value:", su2_options)
-        su3_selection = st.radio("Select $\\mathrm{SU}(3)$ value:", su3_options)
-        u1_selection = st.radio("Select $\\mathrm{U}(1)$ value:", u1_options)
         submitted = st.form_submit_button("Add field")
         if submitted:
             if st.session_state.count < 3:
-                field_string = generate_field(spin_selection, su2_selection, su3_selection, u1_selection)
-                st.session_state.field_strings.append(field_string)  # Save generated field string
-                st.session_state.count += 1  # Increment button press count
-            elif st.session_state.count >= 3:
-                st.write("You have reached the maximum number of fields we allow in this demo.")
     clear_fields = st.button("Clear fields")
     if clear_fields:
         st.session_state.field_strings = []
         st.session_state.count = 0
-    # Button to generate field text, allows up to 2 button presses
-    st.write(f"Input Fields:")
     for i, fs in enumerate(st.session_state.field_strings, 1):
         texfield = obj_to_tex(fs)
-        fieldname = f"Field {i}:"
-        st.latex("\\text{" + fieldname + "} \quad" + texfield)
     if st.button("Generate Lagrangian"):
         input_fields = " ".join(st.session_state.field_strings)
-        if input_fields == "":
-            st.write("Please add fields before generating the Lagrangian.")
             return
-        else:
-            print("\n")
-            # append input fields into csv file, create if not exist
-            #with open('usesdata.csv', 'a') as f:
-            #    f.write(input_fields + "\n")
-            # replace = with space
-            input_fields = input_fields.replace("=", " ")
-            # append and prepend input fields with SOS and EOS tokens
-            input_fields = "[SOS] " + input_fields + " [EOS]"
-            print(input_fields)
-            generated_lagrangian = generate_lagrangian(input_fields)
-            print(generated_lagrangian)
-            print("\n")
-            # Save generated lagrangian into same csv file, create if not exist
-            #with open('usesdata.csv', 'a') as f:
-            #    f.write(generated_lagrangian + "\n")
-            # add = to SU2 X, SU3 X, U1 X, SPIN X only when X is a number and not when its followd by anything not a number
-            #generated_lagrangian = re.sub(r"(SU2)(\s)(\d)", r"\1=\3", generated_lagrangian)
-            #latex_output = master_str_tex(generated_lagrangian[1:])
-            #print(latex_output)
-            #print("\n\n")
-            # save latex output in file
-            #with open('usesdata.csv', 'a') as f:
-            #    f.write(latex_output + "\n")
-            #st.text_area("Generated Lagrangian", pretty_output, height=300)
-            st.markdown("### Generated Lagrangian")
-            st.text_area(generated_lagrangian, height=300)
-    # write my contact info
     st.markdown("### Contact")
-    st.markdown("If you have any questions or suggestions, please feel free to Email us. [Eliel](mailto:[email protected]) or [Yong Sheng](mailto:[email protected]).")
 if __name__ == "__main__":
     main()

 import re
+import streamlit as st
+import torch
+from transformers import PreTrainedTokenizerFast, BartForConditionalGeneration
+# Dictionary for SU(3)/SU(2) latex representations
 rep_tex_dict = {
+    "SU3": {"-3": r"\bar{\textbf{3}}", "3": r"\textbf{3}"},
+    "SU2": {"-2": r"\textbf{2}", "2": r"\textbf{2}", "-3": r"\textbf{3}", "3": r"\textbf{3}"},
 }
+def fieldobj_to_tex(obj, lor_index, pos):
     su3 = None
     su2 = None
+    u1  = None
     hel = None
     sp  = None
     obj_mod = obj.copy()
     for tok in obj:
+        if "SU3" in tok:
+            su3 = tok.split("=")[-1]
+            obj_mod.remove(tok)
+        if "SU2" in tok:
+            su2 = tok.split("=")[-1]
+            obj_mod.remove(tok)
+        if "U1" in tok:
+            u1 = tok.split("=")[-1]
+            obj_mod.remove(tok)
+        if "HELICITY" in tok:
+            hel = tok.split("=")[-1]
+            if hel == "1":
+                hel = "+1"
+        if "SPIN" in tok:
+            sp = tok.split("=")[-1]
     assert sp is not None
+    outtex = ""
+    if sp == "0":
+        outtex += r"\phi"
+    if sp == "1":
+        outtex += "A" + pos + lor_index
+    if sp == "1/2":
+        outtex += r"\psi"
     outtex += r"_{("
+    # SU(3)
+    if su3 is not None:
+        outtex += rep_tex_dict["SU3"].get(su3, r"\textbf{1}") + " ,"
     else:
+        outtex += r"\textbf{1},"
+    # SU(2)
+    if su2 is not None:
+        outtex += rep_tex_dict["SU2"].get(su2, r"\textbf{1}") + " ,"
     else:
+        outtex += r"\textbf{1},"
+    # U(1)
+    if u1 is not None:
+        outtex += u1 + " ,"
     else:
+        outtex += r"\textbf{0},"
+    # Helicity
+    if hel is not None:
+        outtex += "h:" + hel + " ,"
+    # Finish out subscript
+    if outtex[-1] == ",":
+        outtex = outtex[:-1] + ")}"
     return outtex
+def derobj_to_tex(obj, lor_index, pos):
+    if pos == "^":
+        outtex = f"D^{{{lor_index}}}_{{("
     elif pos == "_":
+        outtex = f"D_{{{lor_index}}}^{{("
     else:
+        raise ValueError("pos must be ^ or _")
+    if "SU3" not in obj and "SU2" not in obj and "U1" not in obj:
+        # Just partial derivative
+        if pos == "^":
+            return f"\\partial^{lor_index}"
+        else:
+            return f"\\partial_{lor_index}"
+    if "SU3" in obj:
+        outtex += "SU3,"
+    if "SU2" in obj:
+        outtex += "SU2,"
+    if "U1" in obj:
+        outtex += "U1,"
+    if outtex[-1] == ",":
+        outtex = outtex[:-1] + ")}"
     return outtex
+def gamobj_to_tex(obj, lor_index, pos):
+    return r"\sigma" + pos + lor_index
+def obj_to_tex(obj, lor_index="\mu", pos="^"):
+    # Convert tuple/strings to a list of tokens
+    if isinstance(obj, tuple):
+        obj = list(obj)
+    if isinstance(obj, str):
+        obj = [i for i in obj.split(" ") if i != ""]
+    # Basic tokens
+    if obj[0] == "+":
+        return r"\quad\quad+"
+    if obj[0] == "-":
+        return r"\quad\quad-"
+    if obj[0] == "i":
         return "i"
+    # Field
+    if obj[0] == "FIELD":
+        return fieldobj_to_tex(obj, lor_index, pos)
+    # Derivative
     if obj[0] == "DERIVATIVE":
+        return derobj_to_tex(obj, lor_index, pos)
+    # Sigma (gamma matrices)
     if obj[0] == "SIGMA":
+        return gamobj_to_tex(obj, lor_index, pos)
+    # Combined COMMUTATOR + DERIVATIVE tokens
+    if obj[0] == "COMMUTATOR_ADERIVATIVE":
+        new_obj = obj[:]
+        new_obj[0] = "DERIVATIVE"
+        return "[ " + derobj_to_tex(new_obj, lor_index, pos)
+    if obj[0] == "COMMUTATOR_BDERIVATIVE":
+        new_obj = obj[:]
+        new_obj[0] = "DERIVATIVE"
+        return ", " + derobj_to_tex(new_obj, lor_index, pos) + " ]"
+    # Single COMMUTATOR tokens
     if obj[0] == "COMMUTATOR_A":
+        return "[ " + derobj_to_tex(obj, lor_index, pos)
     if obj[0] == "COMMUTATOR_B":
+        return ", " + derobj_to_tex(obj, lor_index, pos) + " ]"
+    # Fallback for unrecognized tokens if you like:
+    # return f"\\text{{Unhandled}}({obj})"
+    return ""
+def split_with_delimiter_preserved(string, delimiters, ignore_dots=False):
+    """
+    Splits a string using the given delimiters,
+    while preserving them as separate tokens.
+    """
+    if "." in string and not ignore_dots:
+        raise ValueError("Unexpected ending to the generated Lagrangian")
     pattern = '(' + '|'.join(map(re.escape, delimiters)) + ')'
+    parts = re.split(pattern, string)
+    # Turn a lonely "+ " into " + "
+    parts = [" + " if p == "+ " else p for p in parts]
+    # Remove empty entries
+    parts = [p for p in parts if p != ""]
+    return parts
 def clean_split(inlist, delimiters):
+    """
+    Merges an immediate delimiter with its next token
+    so that "FIELD " + "SPIN" -> "FIELD SPIN".
+    """
     i = 0
     merged_list = []
     while i < len(inlist):
         if inlist[i] in delimiters:
             if i < len(inlist) - 1:
                 merged_list.append(inlist[i] + inlist[i+1])
+                i += 1
             else:
+                merged_list.append(inlist[i])
         else:
             merged_list.append(inlist[i])
         i += 1
     return merged_list
 def get_obj_dict(inlist):
     outdict = {}
     for iitem in inlist:
+        idict = {"ID": None, "LATEX": None}
+        # Find any ID=... string
+        item_parts = iitem.split()
+        the_ids = [x for x in item_parts if x.startswith("ID")]
+        if the_ids:
+            idict["ID"] = the_ids[0]
+        # Always compute LATEX from obj_to_tex
+        idict["LATEX"] = obj_to_tex(iitem, "\\mu", "^")
         outdict[iitem] = idict
     return outdict
 def get_con_dict(inlist):
+    """
+    For a list of 'contractions' tokens, produce
+    a dictionary of which IDs are to be contracted
+    under LORENTZ, SU2, or SU3.
+    """
     outdict = {}
     for iitem in inlist:
+        tokens = iitem.split()
+        tokens = [t for t in tokens if t != ""]
+        sym = [t for t in tokens if ("SU" in t or "LORENTZ" in t)]
         assert len(sym) == 1, "More than one symmetry in contraction"
+        ids = [t for t in tokens if ("SU" not in t and "LZ" not in t)]
+        if sym[0] not in outdict:
             outdict[sym[0]] = [ids]
         else:
             outdict[sym[0]].append(ids)
     return outdict
+def term_to_tex(term, verbose=True):
+    """
+    Converts one Lagrangian term into its LaTeX representation.
+    """
+    # Clean up certain strings
+    term = term.replace(".", "").replace(" = ", "=").replace(" =- ", "=-")
+    term = term.replace(" / ", "/")
+    term = term.replace("COMMUTATOR_A DERIVATIVE", "COMMUTATOR_ADERIVATIVE")
+    term = term.replace("COMMUTATOR_B DERIVATIVE", "COMMUTATOR_BDERIVATIVE")
+    # Split into sub-tokens
+    term = split_with_delimiter_preserved(
+        term,
+        [" FIELD ", " DERIVATIVE ", " SIGMA ", " COMMUTATOR_ADERIVATIVE ", " COMMUTATOR_BDERIVATIVE ", " CONTRACTIONS "]
+    )
+    term = clean_split(
+        term,
+        [" FIELD ", " DERIVATIVE ", " SIGMA ", " COMMUTATOR_ADERIVATIVE ", " COMMUTATOR_BDERIVATIVE ", " CONTRACTIONS "]
+    )
     if verbose:
+        print(term)
+    # If it's just +, -, or i, return that token
+    if term in [[" + "], [" - "], [" i "]]:
+        return term[0]
+    # Build dictionary for objects that aren't in "CONTRACTIONS"
+    objdict = get_obj_dict([t for t in term if " CONTRACTIONS " not in t])
+    if verbose:
+        for k, v in objdict.items():
+            print(k, "\t\t", v)
+    # Contractions
+    contractions = [t for t in term if " CONTRACTIONS " in t]
+    if len(contractions) > 1:
+        raise ValueError("More than one contraction in term")
+    if len(contractions) == 1 and contractions != [" CONTRACTIONS "]:
+        # e.g. "LORENTZ ID5 ID2", etc.
+        c_str = contractions[0]
+        c_str = split_with_delimiter_preserved(c_str, [" LORENTZ ", " SU2 ", " SU3 "])
+        c_str = clean_split(c_str, [" LORENTZ ", " SU2 ", " SU3 "])
+        c_str = [i for i in c_str if i != " CONTRACTIONS"]
+        condict = get_con_dict(c_str)
+        if verbose:
+            print(condict)
+        # LORENTZ contraction handling
+        if "LORENTZ" in condict:
             firstlz = True
             cma = True
+            for con in condict["LORENTZ"]:
+                for kobj, iobj in objdict.items():
+                    if iobj["ID"] is None:
+                        continue
                     if iobj["ID"] in con:
+                        if cma:
+                            lsymb = r"\mu"
+                        else:
+                            lsymb = r"\nu"
                         if firstlz:
+                            iobj["LATEX"] = obj_to_tex(kobj, lsymb, "^")
                             firstlz = False
                         else:
+                            iobj["LATEX"] = obj_to_tex(kobj, lsymb, "_")
                             cma = False
                             firstlz = True
+    # Join the final LaTeX strings
+    outstr = " ".join([objdict[t]["LATEX"] for t in term if " CONTRACTIONS " not in t])
     return outstr
+def str_tex(instr, num=0):
+    """
+    Convert list of terms into complete LaTeX lines for the Lagrangian.
+    """
     if num != 0:
         instr = instr[:num]
+    inlist = [term.replace(".", "") for term in instr]
     outstr = ""
     coup = 0
     mass = 0
+    outstr = r"\begin{aligned}"
+    for i, iterm in enumerate(inlist):
+        if i == 0:
+            outstr += r" \mathcal{L}= \quad \\ & "
+        else:
+            # Identify coupling or mass terms by counting spin-0 fields
             nqf = iterm.count("FIELD SPIN = 0")
+            nD = iterm.count(" DERIVATIVE ")
             if nqf != 0 and nqf != 2 and nD == 0:
                 coup += 1
+                outstr += rf" \lambda_{{{coup}}} \,"
             if nqf == 2 and nD == 0:
                 mass += 1
+                outstr += rf" m^2_{{{mass}}} \,"
+            outstr += term_to_tex(iterm, False) + r" \quad "
+            if i % 4 == 0:
+                outstr += r" \\ \\ & "
     return outstr
 def master_str_tex(iinstr):
+    """
+    Master function that splits the incoming string,
+    tries to render the full Lagrangian,
+    and catches errors if the model text is truncated.
+    """
+    instr = split_with_delimiter_preserved(iinstr, [" + ", "+ ", " - "])
     try:
         outstr = str_tex(instr)
     except Exception as e:
+        # If an error occurs, try ignoring the last token
+        outstr = str_tex(instr, -1)
+        outstr += "  \\cdots"
         print(e)
+    outstr += r"\end{aligned}"
+    return outstr
+# ---------------------------------------------------------------------------------
+# Model loading
 device = 'cpu'
 model_name = "JoseEliel/BART-Lagrangian"
 @st.cache_resource
 def load_model():
     model = BartForConditionalGeneration.from_pretrained(model_name).to(device)
     return model
 @st.cache_resource
 def load_tokenizer():
     return PreTrainedTokenizerFast.from_pretrained(model_name)
+model = load_model()
 hf_tokenizer = load_tokenizer()
+# ---------------------------------------------------------------------------------
+# Text processing wrappers
 def process_input(input_text):
+    # Sort fields so generation is consistent
     input_text = input_text.replace("[SOS]", "").replace("[EOS]", "").replace("FIELD", "SPLITFIELD")
+    fields = input_text.split("SPLIT")[1:]
+    fields = [x.strip().split(" ") for x in fields]
     fields = sorted(fields)
+    fields = "[SOS] " + " ".join([" ".join(x) for x in fields]) + " [EOS]"
     return fields
 def process_output(output_text):
+    # Remove special tokens from model output
+    return output_text.replace("[SOS]", "").replace("[EOS]", "").replace(".", "")
+def reformat_expression(s):
+    # e.g. turn SU2= -1 into SU2=-1, remove spaces
+    return re.sub(r"(SU[23]|U1|SPIN|HEL)\s+([+-]?\s*\d+)",
+                  lambda m: f"{m.group(1)} = {m.group(2).replace(' ', '')}",
+                  s)
 def generate_lagrangian(input_text):
+    """
+    Calls the model to produce a Lagrangian for the user-given fields.
+    """
     input_text = process_input(input_text)
     inputs = hf_tokenizer([input_text], return_tensors='pt').to(device)
+    with st.spinner("Generating Lagrangian..."):
+        lagrangian_ids = model.generate(inputs['input_ids'], max_length=2048)
         lagrangian = hf_tokenizer.decode(lagrangian_ids[0].tolist(), skip_special_tokens=False)
         lagrangian = process_output(lagrangian)
     return lagrangian
 def generate_field(sp, su2, su3, u1):
+    """
+    Builds a single field string with the chosen spin and gauge charges.
+    """
+    components = [f"FIELD SPIN={sp}"]
+    # For spin = 1/2, optionally add helicity
+    if sp == "1/2":
         components = [f"FIELD SPIN={sp} HEL=1/2"]
     if su2 != "$1$":
         components.append(f"SU2={su2}")
+    if su3 == "$\\bar{{3}}$":
         components.append("SU3=-3")
+    elif su3 != "$1$":
+        components.append(f"SU3={su3.replace('$','')}")
     if u1 != "0":
         components.append(f"U1={u1}")
+    return " ".join(components).replace("$", "")
+# ---------------------------------------------------------------------------------
+# Streamlit GUI
 def main():
     st.title("$\\mathscr{L}$agrangian Generator")
     st.markdown(" ### For a set of chosen fields, this model generates the corresponding Lagrangian which encodes all interactions and dynamics of the fields.")
     st.markdown(" #### This is a demo of our [BART](https://arxiv.org/abs/1910.13461)-based model with ca 360M parameters")
+    st.markdown(" ##### :violet[Due to computational resources, we limit the number of fields to 3.]")
     st.markdown(" ##### Choose up to three different fields:")
+    st.markdown("Choose up to three different fields:")
     su2_options = ["$1$", "$2$", "$3$"]
     su3_options = ["$1$", "$3$", "$\\bar{3}$"]
+    u1_options = ["-1", "-2/3", "-1/2", "-1/3", "0", "1/3", "1/2", "2/3", "1"]
     spin_options = ["0", "1/2"]
+    if "count" not in st.session_state:
+        st.session_state.count = 0
+    if "field_strings" not in st.session_state:
+        st.session_state.field_strings = []
     with st.form("field_selection"):
         spin_selection = st.radio("Select spin value:", spin_options)
+        su2_selection = st.radio("Select SU(2) value:", su2_options)
+        su3_selection = st.radio("Select SU(3) value:", su3_options)
+        u1_selection  = st.radio("Select U(1) value:", u1_options)
         submitted = st.form_submit_button("Add field")
         if submitted:
             if st.session_state.count < 3:
+                fs = generate_field(spin_selection, su2_selection, su3_selection, u1_selection)
+                st.session_state.field_strings.append(fs)
+                st.session_state.count += 1
+            else:
+                st.write("Maximum of 3 fields for this demo.")
     clear_fields = st.button("Clear fields")
     if clear_fields:
         st.session_state.field_strings = []
         st.session_state.count = 0
+    # Display current fields
+    st.write("Input Fields:")
     for i, fs in enumerate(st.session_state.field_strings, 1):
         texfield = obj_to_tex(fs)
+        st.latex(r"\text{Field " + str(i) + ":} \quad " + texfield)
+    # Generate Lagrangian button
     if st.button("Generate Lagrangian"):
         input_fields = " ".join(st.session_state.field_strings)
+        if input_fields.strip() == "":
+            st.write("Please add at least one field before generating the Lagrangian.")
             return
+        input_fields = input_fields.replace("=", " ")
+        input_fields = "[SOS] " + input_fields + " [EOS]"
+        generated_lagrangian = generate_lagrangian(input_fields)
+        generated_lagrangian = reformat_expression(generated_lagrangian)
+        print(generated_lagrangian)
+        # Attempt to render as LaTeX
+        latex_output = master_str_tex(generated_lagrangian[1:])
+        st.latex(latex_output)
     st.markdown("### Contact")
+    st.markdown("For questions/suggestions, email us: [Eliel](mailto:[email protected]) or [Yong Sheng](mailto:[email protected]).")
 if __name__ == "__main__":
     main()