import { Readability } from '@mozilla/readability'; import * as linkedom from 'linkedom'; import { beforeEach, describe, expect, it, vi } from 'vitest'; import { parseArticle } from '../src/lib/parsers'; // Mock the Readability and parseHTML dependencies vi.mock('@mozilla/readability', () => { return { Readability: vi.fn(), }; }); vi.mock('linkedom', () => { return { parseHTML: vi.fn(), }; }); describe('parseArticle', () => { // Note: Testing Readability itself is hard. Focus on the wrapper. beforeEach(() => { vi.resetAllMocks(); // Default mocks for linkedom vi.mocked(linkedom.parseHTML).mockReturnValue({ document: 'mock-document', } as unknown); }); it('should return an error Result if Readability constructor or parse() throws an exception', () => { // Setup: Make Readability throw an error vi.mocked(Readability).mockImplementation(() => { throw new Error('Readability error'); }); // Execute const result = parseArticle({ html: 'Test' }); // Verify expect(result.isErr()).toBe(true); if (result.isErr()) { expect(result.error.type).toBe('READABILITY_ERROR'); } }); it('should return an error Result if Readability returns null', () => { // Setup: Make Readability.parse() return null vi.mocked(Readability).mockImplementation(() => { return { parse: () => null, } as unknown as Readability; }); // Execute const result = parseArticle({ html: 'Test' }); // Verify expect(result.isErr()).toBe(true); if (result.isErr()) { expect(result.error.type).toBe('NO_ARTICLE_FOUND'); } }); it('should return an error Result if Readability result is missing title', () => { // Setup: Make Readability.parse() return an object without a title vi.mocked(Readability).mockImplementation(() => { return { parse: () => ({ title: '', // empty title textContent: 'Some content', }), } as unknown as Readability; }); // Execute const result = parseArticle({ html: 'Test' }); // Verify expect(result.isErr()).toBe(true); if (result.isErr()) { expect(result.error.type).toBe('NO_ARTICLE_FOUND'); } }); it('should return an error Result if Readability result is missing textContent', () => { // Setup: Make Readability.parse() return an object without textContent vi.mocked(Readability).mockImplementation(() => { return { parse: () => ({ title: 'Article Title', textContent: '', // empty textContent }), } as unknown as Readability; }); // Execute const result = parseArticle({ html: 'Test' }); // Verify expect(result.isErr()).toBe(true); if (result.isErr()) { expect(result.error.type).toBe('NO_ARTICLE_FOUND'); } }); it('should return the extracted title, cleaned textContent, and publishedTime when successful', () => { // Setup: Make Readability.parse() return a valid article vi.mocked(Readability).mockImplementation(() => { return { parse: () => ({ title: 'Article Title', textContent: 'Article content here', publishedTime: '2025-03-18T18:04:44-04:00', }), } as unknown as Readability; }); // Execute const result = parseArticle({ html: 'Test' }); // Verify expect(result.isOk()).toBe(true); if (result.isOk()) { expect(result.value).toEqual({ title: 'Article Title', text: 'Article content here', publishedTime: '2025-03-18T18:04:44-04:00', }); } }); it('should clean and normalize whitespace in the extracted textContent', () => { // Setup: Make Readability.parse() return messy text content const messyText = ' Multiple spaces \n\n\n and \t\t tabs \n and extra newlines '; vi.mocked(Readability).mockImplementation(() => { return { parse: () => ({ title: 'Article Title', textContent: messyText, }), } as unknown as Readability; }); // Execute const result = parseArticle({ html: 'Test' }); // Verify expect(result.isOk()).toBe(true); if (result.isOk()) { // The text should be cleaned according to the cleanString function logic expect(result.value.text).toBe('Multiple spaces\nand tabs\nand extra newlines'); } }); });