Spaces:
Running
Running
File size: 4,601 Bytes
1b44660 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import { Readability } from '@mozilla/readability';
import * as linkedom from 'linkedom';
import { beforeEach, describe, expect, it, vi } from 'vitest';
import { parseArticle } from '../src/lib/parsers';
// Mock the Readability and parseHTML dependencies
vi.mock('@mozilla/readability', () => {
return {
Readability: vi.fn(),
};
});
vi.mock('linkedom', () => {
return {
parseHTML: vi.fn(),
};
});
describe('parseArticle', () => {
// Note: Testing Readability itself is hard. Focus on the wrapper.
beforeEach(() => {
vi.resetAllMocks();
// Default mocks for linkedom
vi.mocked(linkedom.parseHTML).mockReturnValue({
document: 'mock-document',
} as unknown);
});
it('should return an error Result if Readability constructor or parse() throws an exception', () => {
// Setup: Make Readability throw an error
vi.mocked(Readability).mockImplementation(() => {
throw new Error('Readability error');
});
// Execute
const result = parseArticle({ html: '<html><body>Test</body></html>' });
// Verify
expect(result.isErr()).toBe(true);
if (result.isErr()) {
expect(result.error.type).toBe('READABILITY_ERROR');
}
});
it('should return an error Result if Readability returns null', () => {
// Setup: Make Readability.parse() return null
vi.mocked(Readability).mockImplementation(() => {
return {
parse: () => null,
} as unknown as Readability;
});
// Execute
const result = parseArticle({ html: '<html><body>Test</body></html>' });
// Verify
expect(result.isErr()).toBe(true);
if (result.isErr()) {
expect(result.error.type).toBe('NO_ARTICLE_FOUND');
}
});
it('should return an error Result if Readability result is missing title', () => {
// Setup: Make Readability.parse() return an object without a title
vi.mocked(Readability).mockImplementation(() => {
return {
parse: () => ({
title: '', // empty title
textContent: 'Some content',
}),
} as unknown as Readability;
});
// Execute
const result = parseArticle({ html: '<html><body>Test</body></html>' });
// Verify
expect(result.isErr()).toBe(true);
if (result.isErr()) {
expect(result.error.type).toBe('NO_ARTICLE_FOUND');
}
});
it('should return an error Result if Readability result is missing textContent', () => {
// Setup: Make Readability.parse() return an object without textContent
vi.mocked(Readability).mockImplementation(() => {
return {
parse: () => ({
title: 'Article Title',
textContent: '', // empty textContent
}),
} as unknown as Readability;
});
// Execute
const result = parseArticle({ html: '<html><body>Test</body></html>' });
// Verify
expect(result.isErr()).toBe(true);
if (result.isErr()) {
expect(result.error.type).toBe('NO_ARTICLE_FOUND');
}
});
it('should return the extracted title, cleaned textContent, and publishedTime when successful', () => {
// Setup: Make Readability.parse() return a valid article
vi.mocked(Readability).mockImplementation(() => {
return {
parse: () => ({
title: 'Article Title',
textContent: 'Article content here',
publishedTime: '2025-03-18T18:04:44-04:00',
}),
} as unknown as Readability;
});
// Execute
const result = parseArticle({ html: '<html><body>Test</body></html>' });
// Verify
expect(result.isOk()).toBe(true);
if (result.isOk()) {
expect(result.value).toEqual({
title: 'Article Title',
text: 'Article content here',
publishedTime: '2025-03-18T18:04:44-04:00',
});
}
});
it('should clean and normalize whitespace in the extracted textContent', () => {
// Setup: Make Readability.parse() return messy text content
const messyText = ' Multiple spaces \n\n\n and \t\t tabs \n and extra newlines ';
vi.mocked(Readability).mockImplementation(() => {
return {
parse: () => ({
title: 'Article Title',
textContent: messyText,
}),
} as unknown as Readability;
});
// Execute
const result = parseArticle({ html: '<html><body>Test</body></html>' });
// Verify
expect(result.isOk()).toBe(true);
if (result.isOk()) {
// The text should be cleaned according to the cleanString function logic
expect(result.value.text).toBe('Multiple spaces\nand tabs\nand extra newlines');
}
});
});
|