File size: 4,601 Bytes
1b44660
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import { Readability } from '@mozilla/readability';
import * as linkedom from 'linkedom';
import { beforeEach, describe, expect, it, vi } from 'vitest';
import { parseArticle } from '../src/lib/parsers';

// Mock the Readability and parseHTML dependencies
vi.mock('@mozilla/readability', () => {
  return {
    Readability: vi.fn(),
  };
});

vi.mock('linkedom', () => {
  return {
    parseHTML: vi.fn(),
  };
});

describe('parseArticle', () => {
  // Note: Testing Readability itself is hard. Focus on the wrapper.

  beforeEach(() => {
    vi.resetAllMocks();

    // Default mocks for linkedom
    vi.mocked(linkedom.parseHTML).mockReturnValue({
      document: 'mock-document',
    } as unknown);
  });

  it('should return an error Result if Readability constructor or parse() throws an exception', () => {
    // Setup: Make Readability throw an error
    vi.mocked(Readability).mockImplementation(() => {
      throw new Error('Readability error');
    });

    // Execute
    const result = parseArticle({ html: '<html><body>Test</body></html>' });

    // Verify
    expect(result.isErr()).toBe(true);
    if (result.isErr()) {
      expect(result.error.type).toBe('READABILITY_ERROR');
    }
  });

  it('should return an error Result if Readability returns null', () => {
    // Setup: Make Readability.parse() return null
    vi.mocked(Readability).mockImplementation(() => {
      return {
        parse: () => null,
      } as unknown as Readability;
    });

    // Execute
    const result = parseArticle({ html: '<html><body>Test</body></html>' });

    // Verify
    expect(result.isErr()).toBe(true);
    if (result.isErr()) {
      expect(result.error.type).toBe('NO_ARTICLE_FOUND');
    }
  });

  it('should return an error Result if Readability result is missing title', () => {
    // Setup: Make Readability.parse() return an object without a title
    vi.mocked(Readability).mockImplementation(() => {
      return {
        parse: () => ({
          title: '', // empty title
          textContent: 'Some content',
        }),
      } as unknown as Readability;
    });

    // Execute
    const result = parseArticle({ html: '<html><body>Test</body></html>' });

    // Verify
    expect(result.isErr()).toBe(true);
    if (result.isErr()) {
      expect(result.error.type).toBe('NO_ARTICLE_FOUND');
    }
  });

  it('should return an error Result if Readability result is missing textContent', () => {
    // Setup: Make Readability.parse() return an object without textContent
    vi.mocked(Readability).mockImplementation(() => {
      return {
        parse: () => ({
          title: 'Article Title',
          textContent: '', // empty textContent
        }),
      } as unknown as Readability;
    });

    // Execute
    const result = parseArticle({ html: '<html><body>Test</body></html>' });

    // Verify
    expect(result.isErr()).toBe(true);
    if (result.isErr()) {
      expect(result.error.type).toBe('NO_ARTICLE_FOUND');
    }
  });

  it('should return the extracted title, cleaned textContent, and publishedTime when successful', () => {
    // Setup: Make Readability.parse() return a valid article
    vi.mocked(Readability).mockImplementation(() => {
      return {
        parse: () => ({
          title: 'Article Title',
          textContent: 'Article content here',
          publishedTime: '2025-03-18T18:04:44-04:00',
        }),
      } as unknown as Readability;
    });

    // Execute
    const result = parseArticle({ html: '<html><body>Test</body></html>' });

    // Verify
    expect(result.isOk()).toBe(true);
    if (result.isOk()) {
      expect(result.value).toEqual({
        title: 'Article Title',
        text: 'Article content here',
        publishedTime: '2025-03-18T18:04:44-04:00',
      });
    }
  });

  it('should clean and normalize whitespace in the extracted textContent', () => {
    // Setup: Make Readability.parse() return messy text content
    const messyText = '  Multiple    spaces  \n\n\n  and \t\t tabs \n   and extra newlines  ';
    vi.mocked(Readability).mockImplementation(() => {
      return {
        parse: () => ({
          title: 'Article Title',
          textContent: messyText,
        }),
      } as unknown as Readability;
    });

    // Execute
    const result = parseArticle({ html: '<html><body>Test</body></html>' });

    // Verify
    expect(result.isOk()).toBe(true);
    if (result.isOk()) {
      // The text should be cleaned according to the cleanString function logic
      expect(result.value.text).toBe('Multiple spaces\nand tabs\nand extra newlines');
    }
  });
});