camera-database/scripts/import_goergens.py

324 lines
12 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Import Goergens camera database - FIXED VERSION
Verifies IDs exist before using to prevent cache poisoning
"""
import openpyxl
import psycopg2
import psycopg2.extras
import re
from collections import defaultdict
import sys
DB_CONFIG = {
'port': 5433,
'database': 'camera_db',
'user': 'postgres'
}
# Caches
manufacturer_cache = {}
model_cache = {}
variant_cache = {}
lens_cache = {}
shutter_cache = {}
missing_variant_counter = defaultdict(lambda: 'a')
def verify_id_exists(cur, table, id_val):
"""Verify an ID actually exists in the database"""
if not id_val:
return False
cur.execute(f"SELECT 1 FROM {table} WHERE id = %s", (id_val,))
return cur.fetchone() is not None
def parse_goergens_id(goergens_id):
notes = []
if not goergens_id or not isinstance(goergens_id, str):
return None, None, None, None, "MISSING_ID"
parts = goergens_id.strip().split()
if len(parts) == 3:
manufacturer_code = parts[0]
model_number = parts[2]
key = f"{manufacturer_code}_{model_number}"
variant_letter = missing_variant_counter[key]
combo_number = "01"
missing_variant_counter[key] = chr(ord(missing_variant_counter[key]) + 1)
notes.append(f"AUTO_VARIANT: '{goergens_id}' → variant '{variant_letter}01'")
return manufacturer_code, model_number, variant_letter, combo_number, "; ".join(notes)
if len(parts) < 4:
return None, None, None, None, f"MALFORMED: {goergens_id}"
manufacturer_code = parts[0]
model_number = parts[2]
variant_combo = parts[3]
if variant_combo.isdigit():
variant_letter = 'a'
combo_number = variant_combo
notes.append(f"AUTO_VARIANT: Added 'a' prefix to '{variant_combo}'")
return manufacturer_code, model_number, variant_letter, combo_number, "; ".join(notes)
match_3letter = re.match(r'^([a-z]{3})(\d+)$', variant_combo)
if match_3letter:
variant_full = match_3letter.group(1)
combo_digit = match_3letter.group(2)
variant_letter = variant_full[:2]
combo_number = variant_full[2] + combo_digit
notes.append(f"3LETTER_FIX: '{variant_combo}' → v='{variant_letter}', c='{combo_number}'")
return manufacturer_code, model_number, variant_letter, combo_number, "; ".join(notes)
match = re.match(r'^([a-z]{1,2})(\d+)$', variant_combo)
if not match:
return None, None, None, None, f"PARSE_FAIL: '{variant_combo}'"
return manufacturer_code, model_number, match.group(1), match.group(2), None
def parse_body_type(typ_str):
if not typ_str or not isinstance(typ_str, str):
return None, None
typ_str = typ_str.strip()
parts = typ_str.split('/')
body_type = parts[0].strip() if len(parts) > 0 and parts[0] else None
viewfinder_type = parts[1].strip() if len(parts) > 1 and parts[1] else None
return body_type, viewfinder_type
def parse_year(year_str):
if not year_str:
return None, None
year_str = str(year_str).strip().replace('c', '').replace('ca', '').replace('Ca', '')
if '-' in year_str:
parts = year_str.split('-')
try:
return int(parts[0]), int(parts[1])
except:
return None, None
try:
year = int(year_str)
return year, year
except:
return None, None
def get_or_create_manufacturer(cur, name, code):
if code in manufacturer_cache:
return manufacturer_cache[code]
cur.execute("SELECT id FROM manufacturers WHERE goergens_code = %s", (code,))
row = cur.fetchone()
if row:
manufacturer_cache[code] = row[0]
return row[0]
cur.execute("INSERT INTO manufacturers (goergens_code, name) VALUES (%s, %s) RETURNING id", (code, name))
mfg_id = cur.fetchone()[0]
manufacturer_cache[code] = mfg_id
return mfg_id
def get_or_create_model(cur, mfg_id, model_num, body, vf, fmt, y1, y2):
key = (mfg_id, model_num)
if key in model_cache:
return model_cache[key]
cur.execute("SELECT id FROM camera_models WHERE manufacturer_id = %s AND goergens_model_number = %s", (mfg_id, model_num))
row = cur.fetchone()
if row:
model_cache[key] = row[0]
return row[0]
cur.execute(
"INSERT INTO camera_models (manufacturer_id, goergens_model_number, body_type, viewfinder_type, format_code, year_first, year_last) VALUES (%s, %s, %s, %s, %s, %s, %s) RETURNING id",
(mfg_id, model_num, body, vf, fmt, y1, y2)
)
model_id = cur.fetchone()[0]
model_cache[key] = model_id
return model_id
def get_or_create_variant(cur, model_id, variant_letter):
key = (model_id, variant_letter)
if key in variant_cache:
return variant_cache[key]
cur.execute("SELECT id FROM housing_variants WHERE camera_model_id = %s AND goergens_variant_letter = %s", (model_id, variant_letter))
row = cur.fetchone()
if row:
variant_cache[key] = row[0]
return row[0]
cur.execute("INSERT INTO housing_variants (camera_model_id, goergens_variant_letter) VALUES (%s, %s) RETURNING id", (model_id, variant_letter))
variant_id = cur.fetchone()[0]
variant_cache[key] = variant_id
return variant_id
def get_or_create_lens(cur, name):
if not name:
return None
name = str(name).strip() if name else None
if not name or name == 'None':
return None
if name in lens_cache:
return lens_cache[name]
cur.execute("SELECT id FROM lenses WHERE name = %s", (name,))
row = cur.fetchone()
if row:
lens_cache[name] = row[0]
return row[0]
cur.execute("INSERT INTO lenses (name) VALUES (%s) RETURNING id", (name,))
lens_id = cur.fetchone()[0]
lens_cache[name] = lens_id
return lens_id
def get_or_create_shutter(cur, name):
if not name:
return None
name = str(name).strip() if name else None
if not name or name == 'None':
return None
if name in shutter_cache:
return shutter_cache[name]
cur.execute("SELECT id FROM shutters WHERE name = %s", (name,))
row = cur.fetchone()
if row:
shutter_cache[name] = row[0]
return row[0]
cur.execute("INSERT INTO shutters (name) VALUES (%s) RETURNING id", (name,))
shutter_id = cur.fetchone()[0]
shutter_cache[name] = shutter_id
return shutter_id
def import_goergens_data(excel_path, conn):
cur = conn.cursor()
print("Loading Excel...")
wb = openpyxl.load_workbook(excel_path, read_only=True)
ws = wb[wb.sheetnames[0]]
total_rows = ws.max_row - 1
print(f"Found {total_rows} cameras\n")
imported, skipped, duplicates = 0, 0, 0
errors = []
for i, row in enumerate(ws.iter_rows(min_row=2, values_only=True), 1):
if i % 1000 == 0:
print(f"Progress: {i}/{total_rows} ({i*100//total_rows}%)")
try:
row_id, mfg_name, goergens_id, camera_name = row[0], row[1], row[2], row[3]
lens_name, shutter_name, year_str, typ_str = row[4], row[5], row[6], row[7]
format_code, notes = row[8], row[9] if len(row) > 9 else None
mfg_name = str(mfg_name).strip() if mfg_name else None
goergens_id = str(goergens_id).strip() if goergens_id else None
camera_name = str(camera_name).strip() if camera_name else None
format_code = str(format_code).strip() if format_code else None
if format_code == 'None':
format_code = None
if camera_name == 'None':
camera_name = None
if not mfg_name or not goergens_id:
skipped += 1
continue
mfg_code, model_num, variant, combo, parse_notes = parse_goergens_id(goergens_id)
if not all([mfg_code, model_num, variant, combo]):
skipped += 1
errors.append((i+1, goergens_id, parse_notes or "PARSE_FAILED"))
continue
body_type, vf_type = parse_body_type(typ_str)
y1, y2 = parse_year(year_str)
# Create/get IDs with verification
mfg_id = get_or_create_manufacturer(cur, mfg_name, mfg_code)
if not verify_id_exists(cur, 'manufacturers', mfg_id):
raise Exception(f"Manufacturer ID {mfg_id} not in DB after creation")
model_id = get_or_create_model(cur, mfg_id, model_num, body_type, vf_type, format_code, y1, y2)
if not verify_id_exists(cur, 'camera_models', model_id):
raise Exception(f"Model ID {model_id} not in DB after creation")
variant_id = get_or_create_variant(cur, model_id, variant)
if not verify_id_exists(cur, 'housing_variants', variant_id):
raise Exception(f"Variant ID {variant_id} not in DB after creation")
lens_id = get_or_create_lens(cur, lens_name)
if lens_id and not verify_id_exists(cur, 'lenses', lens_id):
raise Exception(f"Lens ID {lens_id} not in DB after creation")
shutter_id = get_or_create_shutter(cur, shutter_name)
if shutter_id and not verify_id_exists(cur, 'shutters', shutter_id):
raise Exception(f"Shutter ID {shutter_id} not in DB after creation")
full_notes = []
if parse_notes:
full_notes.append(parse_notes)
if notes:
full_notes.append(str(notes))
specs = {'notes': " | ".join(full_notes)} if full_notes else None
# Simple insert
# Commit hierarchy BEFORE combo insert
conn.commit()
cur.execute(
"INSERT INTO lens_shutter_combos (housing_variant_id, goergens_combo_number, lens_id, shutter_id, specs) VALUES (%s, %s, %s, %s, %s)",
(variant_id, combo, lens_id, shutter_id, psycopg2.extras.Json(specs) if specs else None)
)
if camera_name:
cur.execute(
"INSERT INTO camera_model_i18n (camera_model_id, model_name) VALUES (%s, %s) ON CONFLICT (camera_model_id) DO NOTHING",
(model_id, camera_name)
)
conn.commit() # Commit combo
imported += 1
except psycopg2.IntegrityError as e:
if 'duplicate key value' in str(e):
duplicates += 1
conn.rollback()
else:
skipped += 1
errors.append((i+1, goergens_id if 'goergens_id' in locals() else 'N/A', str(e)))
conn.rollback()
except Exception as e:
skipped += 1
errors.append((i+1, goergens_id if 'goergens_id' in locals() else 'N/A', str(e)))
conn.rollback()
print(f"\n{'='*60}")
print(f"IMPORT COMPLETE")
print(f"{'='*60}")
print(f"Imported: {imported:,}")
print(f"Skipped: {skipped}")
print(f"Duplicates: {duplicates}")
print(f"\nManufacturers: {len(manufacturer_cache):,}")
print(f"Models: {len(model_cache):,}")
print(f"Variants: {len(variant_cache):,}")
print(f"Lenses: {len(lens_cache):,}")
print(f"Shutters: {len(shutter_cache):,}")
if errors and len(errors) <= 50:
print(f"\nErrors ({len(errors)}):")
for row_num, gid, err in errors:
print(f" Row {row_num}: {gid} - {err}")
elif errors:
print(f"\nErrors: {len(errors)} total (showing first 20)")
for row_num, gid, err in errors[:20]:
print(f" Row {row_num}: {gid} - {err}")
cur.close()
def main():
excel_path = '/data/camera-database/data/aaa01Kameras.xlsx'
print("Connecting to database...")
conn = psycopg2.connect(**DB_CONFIG)
try:
import_goergens_data(excel_path, conn)
except Exception as e:
print(f"\nFATAL ERROR: {e}")
import traceback
traceback.print_exc()
conn.rollback()
return 1
finally:
conn.close()
return 0
if __name__ == '__main__':
sys.exit(main())