-- 今後の可読性のためにテーブルを作成しておく CREATE TEMPORARY TABLE IF NOT EXISTS MEDIS515 AS -- TODO: ここは適宜パスを置き換えてください SELECT * FROM read_csv('~/Downloads/byomei515/main/nmain515.txt', encoding='shift_jis'); CREATE TEMPORARY TABLE IF NOT EXISTS CONCEPT AS -- TODO: ここは適宜パスを置き換えてください SELECT * FROM '~/Downloads/vocabulary_download_v5_{5dc9fc80-875e-403e-8e35-83059485f0e7}_1759465520653/CONCEPT.csv'; CREATE TEMPORARY TABLE IF NOT EXISTS CONCEPT_RELATIONSHIP AS -- TODO: ここは適宜パスを置き換えてください SELECT * FROM '~/Downloads/vocabulary_download_v5_{5dc9fc80-875e-403e-8e35-83059485f0e7}_1759465520653/CONCEPT_RELATIONSHIP.csv'; -- ---------------------------------------- -- ピリオド有無の統一のみでMEDISとCONCEPTを結合 -- ---------------------------------------- SELECT (SELECT COUNT(*) FROM MEDIS515) - COUNT(DISTINCT column01) AS 変換不能数, (SELECT COUNT(DISTINCT column02) FROM MEDIS515) AS 病名ユニーク表記数, (SELECT COUNT(DISTINCT column06) FROM MEDIS515) AS MEDIS_ICD10ユニーク数, FROM MEDIS515 INNER JOIN CONCEPT source ON REPLACE(source.concept_code, '.', '') = MEDIS515.column06 INNER JOIN CONCEPT_RELATIONSHIP rel ON rel.concept_id_1 = source.concept_id INNER JOIN CONCEPT target ON target.concept_id = rel.concept_id_2 WHERE source.vocabulary_id = 'ICD10' AND target.standard_concept = 'S' AND rel.relationship_id IN ('Maps to', 'Maps to value'); -- ----------------------------------------------------------------- -- 前述のマッピング&マッチ失敗部分をベースコードを用いてフォールバックしたSQL -- ----------------------------------------------------------------- WITH CMPL_NUM_NORM AS ( SELECT '補助数字' AS kind, column01, column02, column06, REPLACE(column06, '-', '')[0:-2] AS icd10_norm, FROM MEDIS515 WHERE (column06 ~ '.*-\d' OR column06 ~ '[A-Z]\d{4,}') AND column06 NOT IN (SELECT REPLACE(concept_code, '.', '') FROM CONCEPT WHERE vocabulary_id = 'ICD10') ), CMPL_ALPHA_NORM AS ( SELECT '補助アルファベット' AS kind, column01, column02, column06, column06[0:-2] AS icd10_norm, FROM MEDIS515 WHERE MEDIS515.column06 ~ '.*[a-z]' ), MISSING_NORM AS ( SELECT 'ICD10欠損' AS kind, column01, column02, column06, column06 AS icd10_norm, FROM MEDIS515 WHERE MEDIS515.column06 IS NULL ), MERGED AS ( SELECT * FROM CMPL_NUM_NORM UNION ALL SELECT * FROM CMPL_ALPHA_NORM UNION ALL SELECT * FROM MISSING_NORM ), MERGED_ALL AS ( SELECT * FROM MERGED UNION ALL SELECT NULL AS kind, column01, column02, column06, column06 AS icd10_norm, FROM MEDIS515 WHERE column01 NOT IN (SELECT column01 FROM MERGED) ) SELECT MERGED_ALL.kind, MERGED_ALL.column02, MERGED_ALL.column06, MERGED_ALL.icd10_norm, source.concept_id AS ICD_cid, source.concept_name AS ICD_name, target.concept_id AS SNOMED_cid, target.concept_name AS SNOMED_name, FROM MERGED_ALL LEFT JOIN CONCEPT source ON REPLACE(source.concept_code, '.', '') = MERGED_ALL.icd10_norm AND source.vocabulary_id = 'ICD10' LEFT JOIN CONCEPT_RELATIONSHIP rel ON rel.concept_id_1 = source.concept_id AND rel.relationship_id IN ('Maps to', 'Maps to value') AND rel.concept_id_1 != rel.concept_id_2 LEFT JOIN CONCEPT target ON target.concept_id = rel.concept_id_2 AND target.standard_concept = 'S' ORDER BY kind DESC, column01;