Spaces:

JYYong
/

test_space

Runtime error

File size: 6,327 Bytes

fa23262
 
273c30f
 
fa23262
273c30f
 
 
5bc2b7b
65a4f6a
 
273c30f
5bc2b7b
 
 
 
65a4f6a
 
 
 
273c30f
 
73dae5e
273c30f

import gradio as gr

def update(name):
    return f"Welcome to Gradio, {name}!"

demo = gr.Blocks()

with demo:
    gr.Markdown(f"각 질문에 대답 후 Enter 해주세요.\n\n")
    with gr.Row():
        topic = gr.Textbox(label="Topic", placeholder="대화 주제를 정해주세요 (e.g. 여가 생활, 일과 직업, 개인 및 관계, etc...)")
    with gr.Row():
        with gr.Column():
            addr = gr.Textbox(label="지역", placeholder="e.g. 여가 생활, 일과 직업, 개인 및 관계, etc...")
            age = gr.Textbox(label="나이", placeholder="e.g. 20대 미만, 40대, 70대 이상, etc...")
            sex = gr.Textbox(label="성별", placeholder="e.g. 남성, 여성, etc...")
        with gr.Column():
            addr = gr.Textbox(label="지역", placeholder="e.g. 여가 생활, 일과 직업, 개인 및 관계, etc...")
            age = gr.Textbox(label="나이", placeholder="e.g. 20대 미만, 40대, 70대 이상, etc...")
            sex = gr.Textbox(label="성별", placeholder="e.g. 남성, 여성, etc...")
        out = gr.Textbox()
    btn = gr.Button("Run")
    # btn.click(fn=update, inputs=inp, outputs=out)

demo.launch()


def main(model_name):
    warnings.filterwarnings("ignore")

    tokenizer = AutoTokenizer.from_pretrained('kakaobrain/kogpt', revision='KoGPT6B-ryan1.5b')
    special_tokens_dict = {'additional_special_tokens': ['<sep>', '<eos>', '<sos>', '#@이름#', '#@계정#', '#@신원#', '#@전번#', '#@금융#', '#@번호#', '#@주소#', '#@소속#', '#@기타#']}
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)

    model = AutoModelForCausalLM.from_pretrained(model_name)
    model.resize_token_embeddings(len(tokenizer))
    model = model.cuda()

    info = ""
    
    while True:
        if info == "":
            print(
                f"지금부터 대화 정보를 입력 받겠습니다.\n"
                f"각 질문에 대답 후 Enter 해주세요.\n"
                f"아무 입력 없이 Enter 할 경우, 미리 지정된 값 중 랜덤으로 정하게 됩니다.\n"
            )

            time.sleep(1)

            yon = "no"
        else:
            yon = input(
                f"이전 대화 정보를 그대로 유지할까요? (yes : 유지, no : 새로 작성) :"
            )
        
        if yon == "no":
            info = "일상 대화 "

            topic = input("대화 주제를 정해주세요 (e.g. 여가 생활, 일과 직업, 개인 및 관계, etc...) :")
            if topic == "":
                topic = random.choice(['여가 생활', '시사/교육', '미용과 건강', '식음료', '상거래(쇼핑)', '일과 직업', '주거와 생활', '개인 및 관계', '행사'])
                print(topic)
            info += topic + "<sep>"

            def ask_info(who, ment):
                print(ment)
                text = who + ":"
                addr = input("어디 사세요? (e.g. 서울특별시, 제주도, etc...) :").strip()
                if addr == "":
                    addr = random.choice(['서울특별시', '경기도', '부산광역시', '대전광역시', '광주광역시', '울산광역시', '경상남도', '인천광역시', '충청북도', '제주도', '강원도', '충청남도', '전라북도', '대구광역시', '전라남도', '경상북도', '세종특별자치시', '기타'])
                    print(addr)
                text += addr + " "

                age = input("나이가? (e.g. 20대, 70대 이상, etc...) :").strip()
                if age == "":
                    age = random.choice(['20대', '30대', '50대', '20대 미만', '60대', '40대', '70대 이상'])
                    print(age)
                text += age + " "

                sex = input("성별이? (e.g. 남성, 여성, etc... (?)) :").strip()
                if sex == "":
                    sex = random.choice(['남성', '여성'])
                    print(sex)
                text += sex + "<sep>"
                return text

            info += ask_info(who="P01", ment=f"\n당신에 대해 알려주세요.\n")
            info += ask_info(who="P02", ment=f"\n챗봇에 대해 알려주세요.\n")

        pp = info.replace('<sep>', '\n')
        print(
            f"\n----------------\n"
            f"<입력 정보 확인> (P01 : 당신, P02 : 챗봇)\n"
            f"{pp}"
            f"----------------\n"
            f"대화를 종료하고 싶으면 언제든지 'end' 라고 말해주세요~\n"
        )
        talk = []
        switch = True
        switch2 = True
        while True:
            inp = "P01<sos>"
            myinp = input("당신 : ")
            if myinp == "end":
                print("대화 종료!")
                break
            inp += myinp + "<eos>"
            talk.append(inp)
            talk.append("P02<sos>")

            while True:
                now_inp = info + "".join(talk)
                inpu = tokenizer(now_inp, max_length=1024, truncation='longest_first', return_tensors='pt')
                seq_len = inpu.input_ids.size(1)
                if seq_len > 512 * 0.8 and switch:
                    print(
                        f"<주의> 현재 대화 길이가 곧 최대 길이에 도달합니다. ({seq_len} / 512)"
                    )
                    switch = False

                if seq_len >= 512 and switch2:
                    print("<주의> 대화 길이가 너무 길어졌기 때문에, 이후 대화는 맨 앞의 발화를 조금씩 지우면서 진행됩니다.")
                    talk = talk[1:]
                    switch2 = False
                else:
                    break
            
            out = model.generate(
                inputs=inpu.input_ids.cuda(), 
                attention_mask=inpu.attention_mask.cuda(),
                max_length=512, 
                do_sample=True,
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.encode('<eos>')[0]
                )
            output = tokenizer.batch_decode(out)
            print("챗봇 : " + output[0][len(now_inp):-5])
            talk[-1] += output[0][len(now_inp):]

        again = input(f"다른 대화를 시작할까요? (yes : 새로운 시작, no : 종료) :")
        if again == "no":
            break