File size: 7,343 Bytes
a1c0952
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#!/usr/bin/env node

import fs from 'node:fs';
import path from 'node:path';

import cliProgress from 'cli-progress';

import { ckanPackageSearch, findResultByTypeAndArea, getAndParseCSVDataForId, getAndStreamCSVDataForId } from '../lib/ckan.js';
import { machiAzaName, SingleChiban, SingleMachiAza } from '../data.js';
import { projectABRData } from '../lib/proj.js';
import { MachiAzaData } from '../lib/ckan_data/machi_aza.js';
import { ChibanData, ChibanPosData } from '../lib/ckan_data/chiban.js';
import { mergeDataLeftJoin } from '../lib/ckan_data/index.js';

const HEADER_CHUNK_SIZE = 50_000;

type ChibanApi = {
  machiAza: SingleMachiAza;
  chibans: SingleChiban[];
}[];

type HeaderRow = {
  name: string;
  offset: number;
  length: number;
}

function serializeApiDataTxt(apiData: ChibanApi): { headerIterations: number, headerData: HeaderRow[], data: Buffer } {
  const outSections: Buffer[] = [];
  for ( const { machiAza, chibans } of apiData ) {
    let outSection = `地番,${machiAzaName(machiAza)}\n` +
                     `prc_num1,prc_num2,prc_num3,lng,lat\n`;
    for (const chiban of chibans) {
      outSection += `${chiban.prc_num1},${chiban.prc_num2 || ''},${chiban.prc_num3 || ''},${chiban.point?.[0] || ''},${chiban.point?.[1] || ''}\n`;
    }
    outSections.push(Buffer.from(outSection, 'utf8'));
  }

  const createHeader = (iterations = 1) => {
    let header = '';
    const headerMaxSize = HEADER_CHUNK_SIZE * iterations;
    let lastBytePos = headerMaxSize;
    const headerData: HeaderRow[] = [];
    for (const [index, section] of outSections.entries()) {
      const ma = apiData[index].machiAza;

      header += `${machiAzaName(ma)},${lastBytePos},${section.length}\n`;
      headerData.push({
        name: machiAzaName(ma),
        offset: lastBytePos,
        length: section.length,
      });

      lastBytePos += section.length;
    }
    const headerBuf = Buffer.from(header + '=END=\n', 'utf8');
    if (headerBuf.length > headerMaxSize) {
      return createHeader(iterations + 1);
    } else {
      const padding = Buffer.alloc(headerMaxSize - headerBuf.length);
      padding.fill(0x20);
      return {
        iterations,
        data: headerData,
        buffer: Buffer.concat([headerBuf, padding])
      };
    }
  };

  const header = createHeader();
  return {
    headerIterations: header.iterations,
    headerData: header.data,
    data: Buffer.concat([header.buffer, ...outSections]),
  };
}

async function outputChibanData(outDir: string, outFilename: string, apiData: ChibanApi) {
  if (apiData.length === 0) {
    return;
  }
  // const machiAzaJSON = path.join(outDir, 'ja', outFilename + '.json');
  // await fs.promises.writeFile(outFile, JSON.stringify(apiData, null, 2));

  const outFileTXT = path.join(outDir, 'ja', outFilename + '-地番.txt');
  const txt = serializeApiDataTxt(apiData);
  await fs.promises.mkdir(path.dirname(outFileTXT), { recursive: true });
  await fs.promises.writeFile(outFileTXT, txt.data);

  console.log(`${outFilename}: ${apiData.length.toString(10).padEnd(4, ' ')} 件の町字の地番を出力した`);
}

async function main(argv: string[]) {
  const outDir = argv[2] || path.join(import.meta.dirname, '..', '..', 'out', 'api');
  fs.mkdirSync(outDir, { recursive: true });

  console.log('事前準備: 町字データを取得中...');
  const machiAzaData = await getAndParseCSVDataForId<MachiAzaData>('ba-o1-000000_g2-000003'); // 市区町村 & 町字
  const machiAzaDataByCode = new Map(machiAzaData.map((ma) => [
    `${ma.lg_code}|${ma.machiaza_id}`,
    ma
  ]));
  const machiAzas: MachiAzaData[] = [];
  for (const ma of machiAzaData) {
    if (machiAzas.findIndex((c) => c.lg_code === ma.lg_code) > 0) {
      continue;
    }
    machiAzas.push(ma);
  }
  console.log('事前準備: 町字データを取得しました');

  const progress = new cliProgress.SingleBar({
    format: ' {bar} {percentage}% | ETA: {eta_formatted} | {value}/{total}',
    barCompleteChar: '\u2588',
    barIncompleteChar: '\u2591',
    etaBuffer: 30,
    fps: 2,
    // No-TTY output is required for CI/CD environments
    noTTYOutput: true,
  });
  progress.start(machiAzas.length, 0);
  try {

    let currentLgCode: string | undefined = undefined;
    for (const ma of machiAzas) {
      if (currentLgCode && ma.lg_code === currentLgCode) {
        // we have already processed this lg_code, so we can skip it
        progress.increment();
        continue;
      } else if (currentLgCode !== ma.lg_code) {
        currentLgCode = ma.lg_code;
      }
      let area = `${ma.pref} ${ma.county}${ma.city}`;
      if (ma.ward !== '') {
        area += ` ${ma.ward}`;
      }
      const searchQuery = `${area} 地番マスター`;
      const results = await ckanPackageSearch(searchQuery);
      const chibanDataRef = findResultByTypeAndArea(results, '地番マスター(市区町村)', area);
      const chibanPosDataRef = findResultByTypeAndArea(results, '地番マスター位置参照拡張(市区町村)', area);
      if (!chibanDataRef) {
        console.error(`Insufficient data found for ${searchQuery} (地番マスター)`);
        progress.increment();
        continue;
      }

      const mainStream = getAndStreamCSVDataForId<ChibanData>(chibanDataRef.name);
      const posStream = chibanPosDataRef ?
        getAndStreamCSVDataForId<ChibanPosData>(chibanPosDataRef.name)
        :
        // 位置参照拡張データが無い場合もある
        (async function*() {})();

      const rawData = mergeDataLeftJoin(mainStream, posStream, ['lg_code', 'machiaza_id', 'prc_id'], true);
      // console.log(`処理: ${ma.pref} ${ma.county}${ma.city} ${ma.ward} の地番データを処理中...`);

      let currentMachiAza: MachiAzaData | undefined = undefined;
      const apiData: ChibanApi = [];
      let currentChibanList: SingleChiban[] = [];
      for await (const raw of rawData) {
        const ma = machiAzaDataByCode.get(`${raw.lg_code}|${raw.machiaza_id}`);
        if (!ma) {
          continue;
        }
        if (currentMachiAza && (currentMachiAza.machiaza_id !== ma.machiaza_id || currentMachiAza.lg_code !== ma.lg_code)) {
          apiData.push({
            machiAza: currentMachiAza,
            chibans: currentChibanList,
          });
          currentChibanList = [];
          currentMachiAza = ma;
        }
        if (!currentMachiAza) {
          currentMachiAza = ma;
        }

        currentChibanList.push({
          prc_num1: raw.prc_num1,
          prc_num2: raw.prc_num2 !== '' ? raw.prc_num2 : undefined,
          prc_num3: raw.prc_num3 !== '' ? raw.prc_num3 : undefined,
          point: 'rep_srid' in raw ? projectABRData(raw) : undefined,
        });
      }
      if (currentMachiAza && currentChibanList.length > 0) {
        apiData.push({
          machiAza: currentMachiAza,
          chibans: currentChibanList,
        });
      }
      await outputChibanData(outDir, path.join(
        ma.pref,
        `${ma.county}${ma.city}${ma.ward}`,
      ), apiData);
      progress.increment();
    }
  } finally {
    progress.stop();
  }
}

export default main;