| 'use client' |
| import type { FC } from 'react' |
| import React, { useCallback, useEffect, useState } from 'react' |
| import { useTranslation } from 'react-i18next' |
| import UrlInput from '../base/url-input' |
| import OptionsWrap from '../base/options-wrap' |
| import CrawledResult from '../base/crawled-result' |
| import Crawling from '../base/crawling' |
| import ErrorMessage from '../base/error-message' |
| import Header from './header' |
| import Options from './options' |
| import cn from '@/utils/classnames' |
| import { useModalContext } from '@/context/modal-context' |
| import Toast from '@/app/components/base/toast' |
| import { checkJinaReaderTaskStatus, createJinaReaderTask } from '@/service/datasets' |
| import { sleep } from '@/utils' |
| import type { CrawlOptions, CrawlResultItem } from '@/models/datasets' |
|
|
| const ERROR_I18N_PREFIX = 'common.errorMsg' |
| const I18N_PREFIX = 'datasetCreation.stepOne.website' |
|
|
| type Props = { |
| onPreview: (payload: CrawlResultItem) => void |
| checkedCrawlResult: CrawlResultItem[] |
| onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void |
| onJobIdChange: (jobId: string) => void |
| crawlOptions: CrawlOptions |
| onCrawlOptionsChange: (payload: CrawlOptions) => void |
| } |
|
|
| enum Step { |
| init = 'init', |
| running = 'running', |
| finished = 'finished', |
| } |
|
|
| const JinaReader: FC<Props> = ({ |
| onPreview, |
| checkedCrawlResult, |
| onCheckedCrawlResultChange, |
| onJobIdChange, |
| crawlOptions, |
| onCrawlOptionsChange, |
| }) => { |
| const { t } = useTranslation() |
| const [step, setStep] = useState<Step>(Step.init) |
| const [controlFoldOptions, setControlFoldOptions] = useState<number>(0) |
| useEffect(() => { |
| if (step !== Step.init) |
| setControlFoldOptions(Date.now()) |
| }, [step]) |
| const { setShowAccountSettingModal } = useModalContext() |
| const handleSetting = useCallback(() => { |
| setShowAccountSettingModal({ |
| payload: 'data-source', |
| }) |
| }, [setShowAccountSettingModal]) |
|
|
| const checkValid = useCallback((url: string) => { |
| let errorMsg = '' |
| if (!url) { |
| errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, { |
| field: 'url', |
| }) |
| } |
|
|
| if (!errorMsg && !((url.startsWith('http://') || url.startsWith('https://')))) |
| errorMsg = t(`${ERROR_I18N_PREFIX}.urlError`) |
|
|
| if (!errorMsg && (crawlOptions.limit === null || crawlOptions.limit === undefined || crawlOptions.limit === '')) { |
| errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, { |
| field: t(`${I18N_PREFIX}.limit`), |
| }) |
| } |
|
|
| return { |
| isValid: !errorMsg, |
| errorMsg, |
| } |
| }, [crawlOptions, t]) |
|
|
| const isInit = step === Step.init |
| const isCrawlFinished = step === Step.finished |
| const isRunning = step === Step.running |
| const [crawlResult, setCrawlResult] = useState<{ |
| current: number |
| total: number |
| data: CrawlResultItem[] |
| time_consuming: number | string |
| } | undefined>(undefined) |
| const [crawlErrorMessage, setCrawlErrorMessage] = useState('') |
| const showError = isCrawlFinished && crawlErrorMessage |
|
|
| const waitForCrawlFinished = useCallback(async (jobId: string) => { |
| try { |
| const res = await checkJinaReaderTaskStatus(jobId) as any |
| console.log('res', res) |
| if (res.status === 'completed') { |
| return { |
| isError: false, |
| data: { |
| ...res, |
| total: Math.min(res.total, parseFloat(crawlOptions.limit as string)), |
| }, |
| } |
| } |
| if (res.status === 'failed' || !res.status) { |
| return { |
| isError: true, |
| errorMessage: res.message, |
| data: { |
| data: [], |
| }, |
| } |
| } |
| |
| setCrawlResult({ |
| ...res, |
| total: Math.min(res.total, parseFloat(crawlOptions.limit as string)), |
| }) |
| onCheckedCrawlResultChange(res.data || []) |
| await sleep(2500) |
| return await waitForCrawlFinished(jobId) |
| } |
| catch (e: any) { |
| const errorBody = await e.json() |
| return { |
| isError: true, |
| errorMessage: errorBody.message, |
| data: { |
| data: [], |
| }, |
| } |
| } |
| }, [crawlOptions.limit]) |
|
|
| const handleRun = useCallback(async (url: string) => { |
| const { isValid, errorMsg } = checkValid(url) |
| if (!isValid) { |
| Toast.notify({ |
| message: errorMsg!, |
| type: 'error', |
| }) |
| return |
| } |
| setStep(Step.running) |
| try { |
| const startTime = Date.now() |
| const res = await createJinaReaderTask({ |
| url, |
| options: crawlOptions, |
| }) as any |
|
|
| if (res.data) { |
| const data = { |
| current: 1, |
| total: 1, |
| data: [{ |
| title: res.data.title, |
| markdown: res.data.content, |
| description: res.data.description, |
| source_url: res.data.url, |
| }], |
| time_consuming: (Date.now() - startTime) / 1000, |
| } |
| setCrawlResult(data) |
| onCheckedCrawlResultChange(data.data || []) |
| setCrawlErrorMessage('') |
| } |
| else if (res.job_id) { |
| const jobId = res.job_id |
| onJobIdChange(jobId) |
| const { isError, data, errorMessage } = await waitForCrawlFinished(jobId) |
| if (isError) { |
| setCrawlErrorMessage(errorMessage || t(`${I18N_PREFIX}.unknownError`)) |
| } |
| else { |
| setCrawlResult(data) |
| onCheckedCrawlResultChange(data.data || []) |
| setCrawlErrorMessage('') |
| } |
| } |
| } |
| catch (e) { |
| setCrawlErrorMessage(t(`${I18N_PREFIX}.unknownError`)!) |
| console.log(e) |
| } |
| finally { |
| setStep(Step.finished) |
| } |
| }, [checkValid, crawlOptions, onJobIdChange, t, waitForCrawlFinished]) |
|
|
| return ( |
| <div> |
| <Header onSetting={handleSetting} /> |
| <div className={cn('mt-2 p-4 pb-0 rounded-xl border border-gray-200')}> |
| <UrlInput onRun={handleRun} isRunning={isRunning} /> |
| <OptionsWrap |
| className={cn('mt-4')} |
| controlFoldOptions={controlFoldOptions} |
| > |
| <Options className='mt-2' payload={crawlOptions} onChange={onCrawlOptionsChange} /> |
| </OptionsWrap> |
| |
| {!isInit && ( |
| <div className='mt-3 relative left-[-16px] w-[calc(100%_+_32px)] rounded-b-xl'> |
| {isRunning |
| && <Crawling |
| className='mt-2' |
| crawledNum={crawlResult?.current || 0} |
| totalNum={crawlResult?.total || parseFloat(crawlOptions.limit as string) || 0} |
| />} |
| {showError && ( |
| <ErrorMessage className='rounded-b-xl' title={t(`${I18N_PREFIX}.exceptionErrorTitle`)} errorMsg={crawlErrorMessage} /> |
| )} |
| {isCrawlFinished && !showError |
| && <CrawledResult |
| className='mb-2' |
| list={crawlResult?.data || []} |
| checkedList={checkedCrawlResult} |
| onSelectedChange={onCheckedCrawlResultChange} |
| onPreview={onPreview} |
| usedTime={parseFloat(crawlResult?.time_consuming as string) || 0} |
| /> |
| } |
| </div> |
| )} |
| </div> |
| </div> |
| ) |
| } |
| export default React.memo(JinaReader) |
|
|