matsuap's picture
Upload 65 files
a1c0952 verified
import fs from 'node:fs';
import path from 'node:path';
import { ckanPackageSearch, combineCSVParserIterators, CSVParserIterator, findResultByTypeAndArea, getAndParseCSVDataForId, getAndStreamCSVDataForId } from '../lib/ckan.js';
import { mergeRsdtdspRsdtData, RsdtdspRsdtData, RsdtdspRsdtPosData } from '../lib/ckan_data/rsdtdsp_rsdt.js';
import { machiAzaName, RsdtApi, SingleRsdt } from '../data.js';
import { projectABRData } from '../lib/proj.js';
import { MachiAzaData } from '../lib/ckan_data/machi_aza.js';
import { rawToMachiAza } from './02_machi_aza.js';
import { loadSettings } from '../lib/settings.js';
const HEADER_CHUNK_SIZE = 50_000;
// const HEADER_PBF_CHUNK_SIZE = 8_192;
function getOutPath(ma: MachiAzaData) {
return path.join(
ma.pref,
`${ma.county}${ma.city}${ma.ward}`,
);
}
type HeaderRow = {
name: string;
offset: number;
length: number;
}
function serializeApiDataTxt(apiData: RsdtApi): { headerIterations: number, headerData: HeaderRow[], data: Buffer } {
const outSections: Buffer[] = [];
for ( const { machiAza, rsdts } of apiData ) {
let outSection = `住居表示,${machiAzaName(machiAza)}\n` +
`blk_num,rsdt_num,rsdt_num2,lng,lat\n`;
for (const rsdt of rsdts) {
outSection += `${rsdt.blk_num || ''},${rsdt.rsdt_num},${rsdt.rsdt_num2 || ''},${rsdt.point?.[0] || ''},${rsdt.point?.[1] || ''}\n`;
}
outSections.push(Buffer.from(outSection, 'utf8'));
}
const createHeader = (iterations = 1) => {
let header = '';
const headerMaxSize = HEADER_CHUNK_SIZE * iterations;
let lastBytePos = headerMaxSize;
const headerData: HeaderRow[] = [];
for (const [index, section] of outSections.entries()) {
const ma = apiData[index].machiAza;
header += `${machiAzaName(ma)},${lastBytePos},${section.length}\n`;
headerData.push({
name: machiAzaName(ma),
offset: lastBytePos,
length: section.length,
});
lastBytePos += section.length;
}
const headerBuf = Buffer.from(header + '=END=\n', 'utf8');
if (headerBuf.length > headerMaxSize) {
return createHeader(iterations + 1);
} else {
const padding = Buffer.alloc(headerMaxSize - headerBuf.length);
padding.fill(0x20);
return {
iterations,
data: headerData,
buffer: Buffer.concat([headerBuf, padding])
};
}
};
const header = createHeader();
return {
headerIterations: header.iterations,
headerData: header.data,
data: Buffer.concat([header.buffer, ...outSections]),
};
}
// function _stringIfNotInteger(value: string | undefined) {
// if (!value) { return undefined; }
// return /^\d+$/.test(value) ? undefined : value;
// }
// function serializeApiDataPbf(apiData: RsdtApi): Buffer {
// let outSections: Buffer[] = [];
// for ( const { machiAza, rsdts } of apiData ) {
// const section: AddrData.Section = {
// kind: AddrData.Kind.RSDT,
// name: machiAzaName(machiAza),
// rsdtRows: [],
// chibanRows: [],
// }
// for (const rsdt of rsdts) {
// section.rsdtRows.push({
// blkNum: rsdt.blk_num ? parseInt(rsdt.blk_num, 10) : undefined,
// rsdtNum: parseInt(rsdt.rsdt_num, 10),
// rsdtNum2: rsdt.rsdt_num2 ? parseInt(rsdt.rsdt_num2, 10) : undefined,
// point: rsdt.point ? { lng: rsdt.point[0], lat: rsdt.point[1] } : undefined,
// blkNumStr: _stringIfNotInteger(rsdt.blk_num),
// rsdtNumStr: _stringIfNotInteger(rsdt.rsdt_num),
// rsdtNum2Str: _stringIfNotInteger(rsdt.rsdt_num2),
// });
// }
// const sectionBuf = Buffer.from(AddrData.Section.encode(section).finish());
// outSections.push(sectionBuf);
// }
// const createHeader = (iterations = 1) => {
// const header: AddrData.Header = {
// kind: AddrData.Kind.RSDT,
// rows: [],
// };
// const headerMaxSize = HEADER_PBF_CHUNK_SIZE * iterations;
// let lastBytePos = headerMaxSize;
// for (const [index, section] of outSections.entries()) {
// const ma = apiData[index].machiAza;
// header.rows.push({
// name: machiAzaName(ma),
// offset: lastBytePos,
// length: section.length,
// });
// lastBytePos += section.length;
// }
// const headerBuf = Buffer.from(AddrData.Header.encode(header).finish());
// if (headerBuf.length > headerMaxSize) {
// return createHeader(iterations + 1);
// } else {
// const padding = Buffer.alloc(headerMaxSize - headerBuf.length);
// padding.fill(0x00);
// return Buffer.concat([headerBuf, padding]);
// }
// };
// const header = createHeader();
// return Buffer.concat([header, ...outSections]);
// }
async function outputRsdtData(outDir: string, outFilename: string, apiData: RsdtApi) {
// const machiAzaJSON = path.join(outDir, 'ja', outFilename + '.json');
// fs.mkdirSync(path.dirname(machiAzaJSON), { recursive: true });
// fs.writeFileSync(outFileJSON, JSON.stringify(apiData));
const outFileTXT = path.join(outDir, 'ja', outFilename + '-住居表示.txt');
const txt = serializeApiDataTxt(apiData);
await fs.promises.mkdir(path.dirname(outFileTXT), { recursive: true });
await fs.promises.writeFile(outFileTXT, txt.data);
// const outFilePbf = path.join(outDir, 'ja', outFilename + '.pbf');
// fs.writeFileSync(outFilePbf, serializeApiDataPbf(apiData));
console.log(`${outFilename}-住居表示: ${apiData.length.toString(10).padEnd(4, ' ')} 件の町字を出力した`);
}
async function main(argv: string[]) {
const outDir = argv[2] || path.join(import.meta.dirname, '..', '..', 'out', 'api');
fs.mkdirSync(outDir, { recursive: true });
const machiAzaData = await getAndParseCSVDataForId<MachiAzaData>('ba-o1-000000_g2-000003'); // 市区町村 & 町字
const machiAzaDataByCode = new Map(machiAzaData.map((city) => [
`${city.lg_code}|${city.machiaza_id}`,
city
]));
// 鹿児島県
// const mainStream = getAndStreamCSVDataForId<RsdtdspRsdtData>('ba-o1-460001_g2-000005');
// const posStream = getAndStreamCSVDataForId<RsdtdspRsdtPosData>('ba-o1-460001_g2-000008');
const hasFilter = (await loadSettings()).lgCodes.length > 0;
let mainStream: CSVParserIterator<RsdtdspRsdtData>;
let posStream: CSVParserIterator<RsdtdspRsdtPosData>;
if (!hasFilter) {
mainStream = getAndStreamCSVDataForId<RsdtdspRsdtData>('ba000003');
posStream = getAndStreamCSVDataForId<RsdtdspRsdtPosData>('ba000006');
} else {
// machiAzaData が既にフィルターされているので、そこからユニークな都道府県のみ抽出し、そのストリームのみ読み込むようにする
const prefs = new Set(machiAzaData.map((ma) => ma.pref));
const mainStreams: CSVParserIterator<RsdtdspRsdtData>[] = [];
const posStreams: CSVParserIterator<RsdtdspRsdtPosData>[] = [];
for (const pref of prefs) {
const mainSearchQuery = `${pref} 住居表示-住居マスター データセット`;
const mainResults = await ckanPackageSearch(mainSearchQuery);
const main = findResultByTypeAndArea(mainResults, '住居表示-住居マスター(都道府県)', pref);
if (!main) {
throw new Error(`「${pref}」の住居表示-住居マスター データセットが見つかりませんでした`);
}
mainStreams.push(getAndStreamCSVDataForId<RsdtdspRsdtData>(main.id));
const posSearchQuery = `${pref} 住居表示-住居マスター位置参照拡張 データセット`;
const posResults = await ckanPackageSearch(posSearchQuery);
const pos = findResultByTypeAndArea(posResults, '住居表示-住居マスター位置参照拡張(都道府県)', pref);
if (!pos) {
throw new Error(`「${pref}」の住居表示-住居マスター位置参照拡張 データセットが見つかりませんでした`);
}
posStreams.push(getAndStreamCSVDataForId<RsdtdspRsdtPosData>(pos.id));
}
mainStream = combineCSVParserIterators(...mainStreams);
posStream = combineCSVParserIterators(...posStreams);
}
const rawData = mergeRsdtdspRsdtData(mainStream, posStream);
let lastOutPath: string | undefined = undefined;
let apiData: RsdtApi = [];
let currentRsdtList: SingleRsdt[] = [];
let currentMachiAza: MachiAzaData | undefined = undefined;
for await (const raw of rawData) {
const ma = machiAzaDataByCode.get(`${raw.lg_code}|${raw.machiaza_id}`);
if (!ma) {
continue;
}
const thisOutPath = getOutPath(ma);
if (currentMachiAza && (currentMachiAza.machiaza_id !== ma.machiaza_id || currentMachiAza.lg_code !== ma.lg_code)) {
if (currentRsdtList.length > 0) {
apiData.push({
machiAza: rawToMachiAza(currentMachiAza),
rsdts: currentRsdtList,
});
}
currentMachiAza = ma;
currentRsdtList = [];
}
if (lastOutPath !== thisOutPath && lastOutPath !== undefined) {
await outputRsdtData(outDir, lastOutPath, apiData);
apiData = [];
}
if (lastOutPath !== thisOutPath) {
lastOutPath = thisOutPath;
}
if (!currentMachiAza) {
currentMachiAza = ma;
}
currentRsdtList.push({
blk_num: raw.blk_num === '' ? undefined : raw.blk_num,
rsdt_num: raw.rsdt_num,
rsdt_num2: raw.rsdt_num2 === '' ? undefined : raw.rsdt_num2,
point: 'rep_srid' in raw ? projectABRData(raw) : undefined,
});
}
if (currentMachiAza && currentRsdtList.length > 0) {
apiData.push({
machiAza: rawToMachiAza(currentMachiAza),
rsdts: currentRsdtList,
});
}
if (lastOutPath) {
await outputRsdtData(outDir, lastOutPath, apiData);
}
}
export default main;