Spaces:
Runtime error
Runtime error
# standard | |
import configparser | |
import os | |
import time | |
import re | |
# 3rd party | |
from langchain.llms import OpenAI | |
from langchain.chat_models import ChatOpenAI | |
from langchain import LLMChain | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain import PromptTemplate | |
# read config | |
config = configparser.ConfigParser() | |
config.read('config.ini') | |
# read config variables | |
if not os.getenv("OPENAI_API_KEY"): | |
os.environ["OPENAI_API_KEY"] = config['REQUIRED']['openai-api-key'] | |
# LangChain Config | |
# llm | |
llm = OpenAI(temperature=0) | |
# prompt | |
prompt = PromptTemplate( | |
template="Write a concise summary of the following: {transcript}", | |
input_variables=['transcript'] | |
) | |
# chain | |
chain = LLMChain( | |
prompt=prompt, | |
llm=llm, | |
verbose=False | |
) | |
def load_transcript(input_file): | |
# Google Meet Transcripts have a header which we don't want to be summarized | |
header_lines = 5 | |
file_text = input_file.readlines() | |
head = file_text[:header_lines] | |
transcript = "".join(file_text[header_lines:]) | |
return head, transcript | |
def create_meeting_notes(transcript_file): | |
# read config variables | |
# if not os.getenv("OPENAI_API_KEY"): | |
# os.environ["OPENAI_API_KEY"] = config['REQUIRED']['openai-api-key'] | |
# transcript_filepath = config['OPTIONAL']['transcript-filepath'] | |
# notes_filepath = config['OPTIONAL']['notes-filepath'] | |
head, transcript = load_transcript(transcript_file) | |
# split the transcript on the 5-min timestamps | |
regex_pattern = r"[0-9]{2}:[0-9]{2}:0{2}" | |
five_min_chunks = re.split(regex_pattern, transcript) | |
# create a textsplitter to subdivide those chunks into appropriately sized chunks. | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0) | |
# list the meeting time and the chunks associated with it | |
timestamped_summaries = [] | |
print(f"Summarizing {len(five_min_chunks)*5} minute meeting") | |
start_time = time.time() | |
# summarize the | |
for i, five_minutes_chunk in enumerate(five_min_chunks): | |
timestamp = time.strftime('%H:%M:%S', time.gmtime(60 * 5 * i)) | |
sub_chunks = text_splitter.split_text(five_minutes_chunk) | |
summaries = [] | |
for j, chunk in enumerate(sub_chunks, 1): | |
summaries.append(chain.run(chunk)) | |
print(f"{timestamp}: Chunk {j}/{len(sub_chunks)}") | |
timestamped_summaries.append((timestamp, summaries)) | |
elapsed_time = time.time() - start_time | |
minutes = elapsed_time // 60 | |
print(f"Summarized first {5 * (i+1)} minutes of meeting, {minutes:.0f} minutes {elapsed_time - 60 * minutes:.2f} seconds elapsed") | |
first_line = re.split(r"[()]", head[0]) | |
# Transcript Notes | |
meeting_notes = f'''# {first_line[0]} | |
{first_line[1]} | |
## Attendees | |
{head[2]}## Meeting Notes | |
''' | |
for timestamp, summaries in timestamped_summaries: | |
meeting_notes += f'### {timestamp}\n' | |
for summary in summaries: | |
meeting_notes += f"- {summary.strip()}\n" | |
meeting_notes += "\nEnd of Meeting" | |
return meeting_notes | |
# with open(notes_filepath, 'w+') as f: | |
# f.write(meeting_notes) | |
# print(f"Export to file {notes_filepath} completed") | |