Spaces:

CintraAI
/

code-chunker

Running

App Files Files Community

CintraAI commited on Apr 10, 2024

Commit

4644b40

1 Parent(s): 6ca6c86

updated requirements

Browse files

Files changed (4) hide show

.idea/code-chunker.iml +1 -0
Chunker.py +15 -15
app.py +19 -1
requirements.txt +45 -0

.idea/code-chunker.iml CHANGED Viewed

@@ -10,5 +10,6 @@
   </component>
   <component name="PackageRequirementsSettings">
     <option name="removeUnused" value="true" />
   </component>
 </module>

   </component>
   <component name="PackageRequirementsSettings">
     <option name="removeUnused" value="true" />
+    <option name="modifyBaseFiles" value="true" />
   </component>
 </module>

Chunker.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from abc import ABC, abstractmethod
 from CodeParser import CodeParser
-from Utils import count_tokens
 class Chunker(ABC):
@@ -20,19 +19,20 @@ class Chunker(ABC):
     def print_chunks(chunks):
         for chunk_number, chunk_code in chunks.items():
             print(f"Chunk {chunk_number}:")
-            print("="*40)
             print(chunk_code)
-            print("="*40)
     @staticmethod
     def consolidate_chunks_into_file(chunks):
         return "\n".join(chunks.values())
     @staticmethod
     def count_lines(consolidated_chunks):
         lines = consolidated_chunks.split("\n")
         return len(lines)
 class CodeChunker(Chunker):
     def __init__(self, file_extension, encoding_name="gpt-4"):
         super().__init__(encoding_name)
@@ -60,15 +60,16 @@ class CodeChunker(Chunker):
             if highest_comment_line:  # If a highest comment line exists, add it
                 adjusted_breakpoints.append(highest_comment_line)
             else:
-                adjusted_breakpoints.append(bp)  # If no comments were found before the breakpoint, add the original breakpoint
         breakpoints = sorted(set(adjusted_breakpoints))  # Ensure breakpoints are unique and sorted
         while i < len(lines):
             line = lines[i]
             new_token_count = count_tokens(line, self.encoding_name)
             if token_count + new_token_count > token_limit:
                 # Set the stop line to the last breakpoint before the current line
                 if i in breakpoints:
                     stop_line = i
@@ -79,20 +80,20 @@ class CodeChunker(Chunker):
                 if stop_line == start_line and i not in breakpoints:
                     token_count += new_token_count
                     i += 1
                 # If the stop line is the same as the start line and the current line is a breakpoint, it means we can create a chunk with just the current line
                 elif stop_line == start_line and i == stop_line:
                     token_count += new_token_count
                     i += 1
                 # If the stop line is the same as the start line and the current line is a breakpoint, it means we can create a chunk with just the current line
                 elif stop_line == start_line and i in breakpoints:
                     current_chunk = "\n".join(lines[start_line:stop_line])
                     if current_chunk.strip():  # If the current chunk is not just whitespace
                         chunks[chunk_number] = current_chunk  # Using chunk_number as key
                         chunk_number += 1
                     token_count = 0
                     start_line = i
                     i += 1
@@ -103,7 +104,7 @@ class CodeChunker(Chunker):
                     if current_chunk.strip():
                         chunks[chunk_number] = current_chunk  # Using chunk_number as key
                         chunk_number += 1
                     i = stop_line
                     token_count = 0
                     start_line = stop_line
@@ -116,9 +117,8 @@ class CodeChunker(Chunker):
         current_chunk_code = "\n".join(lines[start_line:])
         if current_chunk_code.strip():  # Checks if the chunk is not just whitespace
             chunks[chunk_number] = current_chunk_code  # Using chunk_number as key
         return chunks
     def get_chunk(self, chunked_codebase, chunk_number):
         return chunked_codebase[chunk_number]

 from abc import ABC, abstractmethod
 from CodeParser import CodeParser
+from utils import count_tokens
 class Chunker(ABC):
     def print_chunks(chunks):
         for chunk_number, chunk_code in chunks.items():
             print(f"Chunk {chunk_number}:")
+            print("=" * 40)
             print(chunk_code)
+            print("=" * 40)
     @staticmethod
     def consolidate_chunks_into_file(chunks):
         return "\n".join(chunks.values())
     @staticmethod
     def count_lines(consolidated_chunks):
         lines = consolidated_chunks.split("\n")
         return len(lines)
 class CodeChunker(Chunker):
     def __init__(self, file_extension, encoding_name="gpt-4"):
         super().__init__(encoding_name)
             if highest_comment_line:  # If a highest comment line exists, add it
                 adjusted_breakpoints.append(highest_comment_line)
             else:
+                adjusted_breakpoints.append(
+                    bp)  # If no comments were found before the breakpoint, add the original breakpoint
         breakpoints = sorted(set(adjusted_breakpoints))  # Ensure breakpoints are unique and sorted
         while i < len(lines):
             line = lines[i]
             new_token_count = count_tokens(line, self.encoding_name)
             if token_count + new_token_count > token_limit:
                 # Set the stop line to the last breakpoint before the current line
                 if i in breakpoints:
                     stop_line = i
                 if stop_line == start_line and i not in breakpoints:
                     token_count += new_token_count
                     i += 1
                 # If the stop line is the same as the start line and the current line is a breakpoint, it means we can create a chunk with just the current line
                 elif stop_line == start_line and i == stop_line:
                     token_count += new_token_count
                     i += 1
                 # If the stop line is the same as the start line and the current line is a breakpoint, it means we can create a chunk with just the current line
                 elif stop_line == start_line and i in breakpoints:
                     current_chunk = "\n".join(lines[start_line:stop_line])
                     if current_chunk.strip():  # If the current chunk is not just whitespace
                         chunks[chunk_number] = current_chunk  # Using chunk_number as key
                         chunk_number += 1
                     token_count = 0
                     start_line = i
                     i += 1
                     if current_chunk.strip():
                         chunks[chunk_number] = current_chunk  # Using chunk_number as key
                         chunk_number += 1
                     i = stop_line
                     token_count = 0
                     start_line = stop_line
         current_chunk_code = "\n".join(lines[start_line:])
         if current_chunk_code.strip():  # Checks if the chunk is not just whitespace
             chunks[chunk_number] = current_chunk_code  # Using chunk_number as key
         return chunks
     def get_chunk(self, chunked_codebase, chunk_number):
         return chunked_codebase[chunk_number]

app.py CHANGED Viewed

@@ -1,10 +1,28 @@
 import streamlit as st
 from utils import load_json, count_tokens
 import json
 # Set up the Streamlit page configuration
 st.set_page_config(page_title="Cintra Code Chunker", layout="wide")
 def main():
     # Streamlit widgets for file selection
     st.title("Cintra Code Chunker")
@@ -38,4 +56,4 @@ def main():
 if __name__ == "__main__":
-    main()

 import streamlit as st
 from utils import load_json, count_tokens
 import json
+import os
 # Set up the Streamlit page configuration
 st.set_page_config(page_title="Cintra Code Chunker", layout="wide")
+# Slider to select a value
+x = st.slider("Select a value")
+st.write(x, "squared is", x * x)
+code_files_directory = "example_code_files"
+code_files = os.listdir(code_files_directory)
+# Dropdown menu for the user to select a code file
+selected_file = st.selectbox("Select a code file", code_files)
+file_path = os.path.join(code_files_directory, selected_file)
+with open(file_path, "r") as file:
+    code_content = file.read()
+    st.code(code_content, language="python")
 def main():
     # Streamlit widgets for file selection
     st.title("Cintra Code Chunker")
 if __name__ == "__main__":
+    main()

requirements.txt CHANGED Viewed

	@@ -0,0 +1,45 @@

+altair==5.3.0
+attrs==23.2.0
+blinker==1.7.0
+cachetools==5.3.3
+certifi==2024.2.2
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+gitdb==4.0.11
+GitPython==3.1.43
+idna==3.6
+Jinja2==3.1.3
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+mdurl==0.1.2
+numpy==1.26.4
+packaging==24.0
+pandas==2.2.1
+pillow==10.3.0
+protobuf==4.25.3
+pyarrow==15.0.2
+pydeck==0.8.1b0
+Pygments==2.17.2
+python-dateutil==2.9.0.post0
+pytz==2024.1
+referencing==0.34.0
+requests==2.31.0
+rich==13.7.1
+rpds-py==0.18.0
+six==1.16.0
+smmap==5.0.1
+streamlit==1.33.0
+tenacity==8.2.3
+regex==2023.12.25
+tiktoken==0.6.0
+tree-sitter==0.21.3
+toml==0.10.2
+toolz==0.12.1
+tornado==6.4
+typing_extensions==4.11.0
+tzdata==2024.1
+urllib3==2.2.1
+watchdog==4.0.0