| 'use client' | |
| import { useTranslation } from 'react-i18next' | |
| import { formatFileSize, formatNumber, formatTime } from '@/utils/format' | |
| import type { DocType } from '@/models/datasets' | |
| import useTimestamp from '@/hooks/use-timestamp' | |
| export type inputType = 'input' | 'select' | 'textarea' | |
| export type metadataType = DocType | 'originInfo' | 'technicalParameters' | |
| type MetadataMap = | |
| Record< | |
| metadataType, | |
| { | |
| text: string | |
| allowEdit?: boolean | |
| icon?: React.ReactNode | |
| iconName?: string | |
| subFieldsMap: Record< | |
| string, | |
| { | |
| label: string | |
| inputType?: inputType | |
| field?: string | |
| render?: (value: any, total?: number) => React.ReactNode | string | |
| } | |
| > | |
| } | |
| > | |
| const fieldPrefix = 'datasetDocuments.metadata.field' | |
| export const useMetadataMap = (): MetadataMap => { | |
| const { t } = useTranslation() | |
| const { formatTime: formatTimestamp } = useTimestamp() | |
| return { | |
| book: { | |
| text: t('datasetDocuments.metadata.type.book'), | |
| iconName: 'bookOpen', | |
| subFieldsMap: { | |
| title: { label: t(`${fieldPrefix}.book.title`) }, | |
| language: { | |
| label: t(`${fieldPrefix}.book.language`), | |
| inputType: 'select', | |
| }, | |
| author: { label: t(`${fieldPrefix}.book.author`) }, | |
| publisher: { label: t(`${fieldPrefix}.book.publisher`) }, | |
| publication_date: { label: t(`${fieldPrefix}.book.publicationDate`) }, | |
| isbn: { label: t(`${fieldPrefix}.book.ISBN`) }, | |
| category: { | |
| label: t(`${fieldPrefix}.book.category`), | |
| inputType: 'select', | |
| }, | |
| }, | |
| }, | |
| web_page: { | |
| text: t('datasetDocuments.metadata.type.webPage'), | |
| iconName: 'globe', | |
| subFieldsMap: { | |
| 'title': { label: t(`${fieldPrefix}.webPage.title`) }, | |
| 'url': { label: t(`${fieldPrefix}.webPage.url`) }, | |
| 'language': { | |
| label: t(`${fieldPrefix}.webPage.language`), | |
| inputType: 'select', | |
| }, | |
| 'author/publisher': { label: t(`${fieldPrefix}.webPage.authorPublisher`) }, | |
| 'publish_date': { label: t(`${fieldPrefix}.webPage.publishDate`) }, | |
| 'topics/keywords': { label: t(`${fieldPrefix}.webPage.topicsKeywords`) }, | |
| 'description': { label: t(`${fieldPrefix}.webPage.description`) }, | |
| }, | |
| }, | |
| paper: { | |
| text: t('datasetDocuments.metadata.type.paper'), | |
| iconName: 'graduationHat', | |
| subFieldsMap: { | |
| 'title': { label: t(`${fieldPrefix}.paper.title`) }, | |
| 'language': { | |
| label: t(`${fieldPrefix}.paper.language`), | |
| inputType: 'select', | |
| }, | |
| 'author': { label: t(`${fieldPrefix}.paper.author`) }, | |
| 'publish_date': { label: t(`${fieldPrefix}.paper.publishDate`) }, | |
| 'journal/conference_name': { | |
| label: t(`${fieldPrefix}.paper.journalConferenceName`), | |
| }, | |
| 'volume/issue/page_numbers': { label: t(`${fieldPrefix}.paper.volumeIssuePage`) }, | |
| 'doi': { label: t(`${fieldPrefix}.paper.DOI`) }, | |
| 'topics/keywords': { label: t(`${fieldPrefix}.paper.topicsKeywords`) }, | |
| 'abstract': { | |
| label: t(`${fieldPrefix}.paper.abstract`), | |
| inputType: 'textarea', | |
| }, | |
| }, | |
| }, | |
| social_media_post: { | |
| text: t('datasetDocuments.metadata.type.socialMediaPost'), | |
| iconName: 'atSign', | |
| subFieldsMap: { | |
| 'platform': { label: t(`${fieldPrefix}.socialMediaPost.platform`) }, | |
| 'author/username': { | |
| label: t(`${fieldPrefix}.socialMediaPost.authorUsername`), | |
| }, | |
| 'publish_date': { label: t(`${fieldPrefix}.socialMediaPost.publishDate`) }, | |
| 'post_url': { label: t(`${fieldPrefix}.socialMediaPost.postURL`) }, | |
| 'topics/tags': { label: t(`${fieldPrefix}.socialMediaPost.topicsTags`) }, | |
| }, | |
| }, | |
| personal_document: { | |
| text: t('datasetDocuments.metadata.type.personalDocument'), | |
| iconName: 'file', | |
| subFieldsMap: { | |
| 'title': { label: t(`${fieldPrefix}.personalDocument.title`) }, | |
| 'author': { label: t(`${fieldPrefix}.personalDocument.author`) }, | |
| 'creation_date': { | |
| label: t(`${fieldPrefix}.personalDocument.creationDate`), | |
| }, | |
| 'last_modified_date': { | |
| label: t(`${fieldPrefix}.personalDocument.lastModifiedDate`), | |
| }, | |
| 'document_type': { | |
| label: t(`${fieldPrefix}.personalDocument.documentType`), | |
| inputType: 'select', | |
| }, | |
| 'tags/category': { | |
| label: t(`${fieldPrefix}.personalDocument.tagsCategory`), | |
| }, | |
| }, | |
| }, | |
| business_document: { | |
| text: t('datasetDocuments.metadata.type.businessDocument'), | |
| iconName: 'briefcase', | |
| subFieldsMap: { | |
| 'title': { label: t(`${fieldPrefix}.businessDocument.title`) }, | |
| 'author': { label: t(`${fieldPrefix}.businessDocument.author`) }, | |
| 'creation_date': { | |
| label: t(`${fieldPrefix}.businessDocument.creationDate`), | |
| }, | |
| 'last_modified_date': { | |
| label: t(`${fieldPrefix}.businessDocument.lastModifiedDate`), | |
| }, | |
| 'document_type': { | |
| label: t(`${fieldPrefix}.businessDocument.documentType`), | |
| inputType: 'select', | |
| }, | |
| 'department/team': { | |
| label: t(`${fieldPrefix}.businessDocument.departmentTeam`), | |
| }, | |
| }, | |
| }, | |
| im_chat_log: { | |
| text: t('datasetDocuments.metadata.type.IMChat'), | |
| iconName: 'messageTextCircle', | |
| subFieldsMap: { | |
| 'chat_platform': { label: t(`${fieldPrefix}.IMChat.chatPlatform`) }, | |
| 'chat_participants/group_name': { | |
| label: t(`${fieldPrefix}.IMChat.chatPartiesGroupName`), | |
| }, | |
| 'start_date': { label: t(`${fieldPrefix}.IMChat.startDate`) }, | |
| 'end_date': { label: t(`${fieldPrefix}.IMChat.endDate`) }, | |
| 'participants': { label: t(`${fieldPrefix}.IMChat.participants`) }, | |
| 'topicsKeywords': { | |
| label: t(`${fieldPrefix}.IMChat.topicsKeywords`), | |
| inputType: 'textarea', | |
| }, | |
| 'fileType': { label: t(`${fieldPrefix}.IMChat.fileType`) }, | |
| }, | |
| }, | |
| wikipedia_entry: { | |
| text: t('datasetDocuments.metadata.type.wikipediaEntry'), | |
| allowEdit: false, | |
| subFieldsMap: { | |
| 'title': { label: t(`${fieldPrefix}.wikipediaEntry.title`) }, | |
| 'language': { | |
| label: t(`${fieldPrefix}.wikipediaEntry.language`), | |
| inputType: 'select', | |
| }, | |
| 'web_page_url': { label: t(`${fieldPrefix}.wikipediaEntry.webpageURL`) }, | |
| 'editor/contributor': { | |
| label: t(`${fieldPrefix}.wikipediaEntry.editorContributor`), | |
| }, | |
| 'last_edit_date': { | |
| label: t(`${fieldPrefix}.wikipediaEntry.lastEditDate`), | |
| }, | |
| 'summary/introduction': { | |
| label: t(`${fieldPrefix}.wikipediaEntry.summaryIntroduction`), | |
| inputType: 'textarea', | |
| }, | |
| }, | |
| }, | |
| synced_from_notion: { | |
| text: t('datasetDocuments.metadata.type.notion'), | |
| allowEdit: false, | |
| subFieldsMap: { | |
| 'title': { label: t(`${fieldPrefix}.notion.title`) }, | |
| 'language': { label: t(`${fieldPrefix}.notion.lang`), inputType: 'select' }, | |
| 'author/creator': { label: t(`${fieldPrefix}.notion.author`) }, | |
| 'creation_date': { label: t(`${fieldPrefix}.notion.createdTime`) }, | |
| 'last_modified_date': { | |
| label: t(`${fieldPrefix}.notion.lastModifiedTime`), | |
| }, | |
| 'notion_page_link': { label: t(`${fieldPrefix}.notion.url`) }, | |
| 'category/tags': { label: t(`${fieldPrefix}.notion.tag`) }, | |
| 'description': { label: t(`${fieldPrefix}.notion.desc`) }, | |
| }, | |
| }, | |
| synced_from_github: { | |
| text: t('datasetDocuments.metadata.type.github'), | |
| allowEdit: false, | |
| subFieldsMap: { | |
| 'repository_name': { label: t(`${fieldPrefix}.github.repoName`) }, | |
| 'repository_description': { label: t(`${fieldPrefix}.github.repoDesc`) }, | |
| 'repository_owner/organization': { label: t(`${fieldPrefix}.github.repoOwner`) }, | |
| 'code_filename': { label: t(`${fieldPrefix}.github.fileName`) }, | |
| 'code_file_path': { label: t(`${fieldPrefix}.github.filePath`) }, | |
| 'programming_language': { label: t(`${fieldPrefix}.github.programmingLang`) }, | |
| 'github_link': { label: t(`${fieldPrefix}.github.url`) }, | |
| 'open_source_license': { label: t(`${fieldPrefix}.github.license`) }, | |
| 'commit_date': { label: t(`${fieldPrefix}.github.lastCommitTime`) }, | |
| 'commit_author': { | |
| label: t(`${fieldPrefix}.github.lastCommitAuthor`), | |
| }, | |
| }, | |
| }, | |
| originInfo: { | |
| text: '', | |
| allowEdit: false, | |
| subFieldsMap: { | |
| 'name': { label: t(`${fieldPrefix}.originInfo.originalFilename`) }, | |
| 'data_source_info.upload_file.size': { | |
| label: t(`${fieldPrefix}.originInfo.originalFileSize`), | |
| render: value => formatFileSize(value), | |
| }, | |
| 'created_at': { | |
| label: t(`${fieldPrefix}.originInfo.uploadDate`), | |
| render: value => formatTimestamp(value, t('datasetDocuments.metadata.dateTimeFormat') as string), | |
| }, | |
| 'completed_at': { | |
| label: t(`${fieldPrefix}.originInfo.lastUpdateDate`), | |
| render: value => formatTimestamp(value, t('datasetDocuments.metadata.dateTimeFormat') as string), | |
| }, | |
| 'data_source_type': { | |
| label: t(`${fieldPrefix}.originInfo.source`), | |
| render: value => t(`datasetDocuments.metadata.source.${value}`), | |
| }, | |
| }, | |
| }, | |
| technicalParameters: { | |
| text: t('datasetDocuments.metadata.type.technicalParameters'), | |
| allowEdit: false, | |
| subFieldsMap: { | |
| 'dataset_process_rule.mode': { | |
| label: t(`${fieldPrefix}.technicalParameters.segmentSpecification`), | |
| render: value => value === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string), | |
| }, | |
| 'dataset_process_rule.rules.segmentation.max_tokens': { | |
| label: t(`${fieldPrefix}.technicalParameters.segmentLength`), | |
| render: value => formatNumber(value), | |
| }, | |
| 'average_segment_length': { | |
| label: t(`${fieldPrefix}.technicalParameters.avgParagraphLength`), | |
| render: value => `${formatNumber(value)} characters`, | |
| }, | |
| 'segment_count': { | |
| label: t(`${fieldPrefix}.technicalParameters.paragraphs`), | |
| render: value => `${formatNumber(value)} paragraphs`, | |
| }, | |
| 'hit_count': { | |
| label: t(`${fieldPrefix}.technicalParameters.hitCount`), | |
| render: (value, total) => { | |
| const v = value || 0 | |
| return `${!total ? 0 : ((v / total) * 100).toFixed(2)}% (${v}/${total})` | |
| }, | |
| }, | |
| 'indexing_latency': { | |
| label: t(`${fieldPrefix}.technicalParameters.embeddingTime`), | |
| render: value => formatTime(value), | |
| }, | |
| 'tokens': { | |
| label: t(`${fieldPrefix}.technicalParameters.embeddedSpend`), | |
| render: value => `${formatNumber(value)} tokens`, | |
| }, | |
| }, | |
| }, | |
| } | |
| } | |
| const langPrefix = 'datasetDocuments.metadata.languageMap.' | |
| export const useLanguages = () => { | |
| const { t } = useTranslation() | |
| return { | |
| zh: t(`${langPrefix}zh`), | |
| en: t(`${langPrefix}en`), | |
| es: t(`${langPrefix}es`), | |
| fr: t(`${langPrefix}fr`), | |
| de: t(`${langPrefix}de`), | |
| ja: t(`${langPrefix}ja`), | |
| ko: t(`${langPrefix}ko`), | |
| ru: t(`${langPrefix}ru`), | |
| ar: t(`${langPrefix}ar`), | |
| pt: t(`${langPrefix}pt`), | |
| it: t(`${langPrefix}it`), | |
| nl: t(`${langPrefix}nl`), | |
| pl: t(`${langPrefix}pl`), | |
| sv: t(`${langPrefix}sv`), | |
| tr: t(`${langPrefix}tr`), | |
| he: t(`${langPrefix}he`), | |
| hi: t(`${langPrefix}hi`), | |
| da: t(`${langPrefix}da`), | |
| fi: t(`${langPrefix}fi`), | |
| no: t(`${langPrefix}no`), | |
| hu: t(`${langPrefix}hu`), | |
| el: t(`${langPrefix}el`), | |
| cs: t(`${langPrefix}cs`), | |
| th: t(`${langPrefix}th`), | |
| id: t(`${langPrefix}id`), | |
| ro: t(`${langPrefix}ro`), | |
| } | |
| } | |
| const bookCategoryPrefix = 'datasetDocuments.metadata.categoryMap.book.' | |
| export const useBookCategories = () => { | |
| const { t } = useTranslation() | |
| return { | |
| fiction: t(`${bookCategoryPrefix}fiction`), | |
| biography: t(`${bookCategoryPrefix}biography`), | |
| history: t(`${bookCategoryPrefix}history`), | |
| science: t(`${bookCategoryPrefix}science`), | |
| technology: t(`${bookCategoryPrefix}technology`), | |
| education: t(`${bookCategoryPrefix}education`), | |
| philosophy: t(`${bookCategoryPrefix}philosophy`), | |
| religion: t(`${bookCategoryPrefix}religion`), | |
| socialSciences: t(`${bookCategoryPrefix}socialSciences`), | |
| art: t(`${bookCategoryPrefix}art`), | |
| travel: t(`${bookCategoryPrefix}travel`), | |
| health: t(`${bookCategoryPrefix}health`), | |
| selfHelp: t(`${bookCategoryPrefix}selfHelp`), | |
| businessEconomics: t(`${bookCategoryPrefix}businessEconomics`), | |
| cooking: t(`${bookCategoryPrefix}cooking`), | |
| childrenYoungAdults: t(`${bookCategoryPrefix}childrenYoungAdults`), | |
| comicsGraphicNovels: t(`${bookCategoryPrefix}comicsGraphicNovels`), | |
| poetry: t(`${bookCategoryPrefix}poetry`), | |
| drama: t(`${bookCategoryPrefix}drama`), | |
| other: t(`${bookCategoryPrefix}other`), | |
| } | |
| } | |
| const personalDocCategoryPrefix | |
| = 'datasetDocuments.metadata.categoryMap.personalDoc.' | |
| export const usePersonalDocCategories = () => { | |
| const { t } = useTranslation() | |
| return { | |
| notes: t(`${personalDocCategoryPrefix}notes`), | |
| blogDraft: t(`${personalDocCategoryPrefix}blogDraft`), | |
| diary: t(`${personalDocCategoryPrefix}diary`), | |
| researchReport: t(`${personalDocCategoryPrefix}researchReport`), | |
| bookExcerpt: t(`${personalDocCategoryPrefix}bookExcerpt`), | |
| schedule: t(`${personalDocCategoryPrefix}schedule`), | |
| list: t(`${personalDocCategoryPrefix}list`), | |
| projectOverview: t(`${personalDocCategoryPrefix}projectOverview`), | |
| photoCollection: t(`${personalDocCategoryPrefix}photoCollection`), | |
| creativeWriting: t(`${personalDocCategoryPrefix}creativeWriting`), | |
| codeSnippet: t(`${personalDocCategoryPrefix}codeSnippet`), | |
| designDraft: t(`${personalDocCategoryPrefix}designDraft`), | |
| personalResume: t(`${personalDocCategoryPrefix}personalResume`), | |
| other: t(`${personalDocCategoryPrefix}other`), | |
| } | |
| } | |
| const businessDocCategoryPrefix | |
| = 'datasetDocuments.metadata.categoryMap.businessDoc.' | |
| export const useBusinessDocCategories = () => { | |
| const { t } = useTranslation() | |
| return { | |
| meetingMinutes: t(`${businessDocCategoryPrefix}meetingMinutes`), | |
| researchReport: t(`${businessDocCategoryPrefix}researchReport`), | |
| proposal: t(`${businessDocCategoryPrefix}proposal`), | |
| employeeHandbook: t(`${businessDocCategoryPrefix}employeeHandbook`), | |
| trainingMaterials: t(`${businessDocCategoryPrefix}trainingMaterials`), | |
| requirementsDocument: t(`${businessDocCategoryPrefix}requirementsDocument`), | |
| designDocument: t(`${businessDocCategoryPrefix}designDocument`), | |
| productSpecification: t(`${businessDocCategoryPrefix}productSpecification`), | |
| financialReport: t(`${businessDocCategoryPrefix}financialReport`), | |
| marketAnalysis: t(`${businessDocCategoryPrefix}marketAnalysis`), | |
| projectPlan: t(`${businessDocCategoryPrefix}projectPlan`), | |
| teamStructure: t(`${businessDocCategoryPrefix}teamStructure`), | |
| policiesProcedures: t(`${businessDocCategoryPrefix}policiesProcedures`), | |
| contractsAgreements: t(`${businessDocCategoryPrefix}contractsAgreements`), | |
| emailCorrespondence: t(`${businessDocCategoryPrefix}emailCorrespondence`), | |
| other: t(`${businessDocCategoryPrefix}other`), | |
| } | |
| } | |