#!/usr/bin/env python3 """ Import Goergens camera database - FIXED VERSION Verifies IDs exist before using to prevent cache poisoning """ import openpyxl import psycopg2 import psycopg2.extras import re from collections import defaultdict import sys DB_CONFIG = { 'port': 5433, 'database': 'camera_db', 'user': 'postgres' } # Caches manufacturer_cache = {} model_cache = {} variant_cache = {} lens_cache = {} shutter_cache = {} missing_variant_counter = defaultdict(lambda: 'a') def verify_id_exists(cur, table, id_val): """Verify an ID actually exists in the database""" if not id_val: return False cur.execute(f"SELECT 1 FROM {table} WHERE id = %s", (id_val,)) return cur.fetchone() is not None def parse_goergens_id(goergens_id): notes = [] if not goergens_id or not isinstance(goergens_id, str): return None, None, None, None, "MISSING_ID" parts = goergens_id.strip().split() if len(parts) == 3: manufacturer_code = parts[0] model_number = parts[2] key = f"{manufacturer_code}_{model_number}" variant_letter = missing_variant_counter[key] combo_number = "01" missing_variant_counter[key] = chr(ord(missing_variant_counter[key]) + 1) notes.append(f"AUTO_VARIANT: '{goergens_id}' → variant '{variant_letter}01'") return manufacturer_code, model_number, variant_letter, combo_number, "; ".join(notes) if len(parts) < 4: return None, None, None, None, f"MALFORMED: {goergens_id}" manufacturer_code = parts[0] model_number = parts[2] variant_combo = parts[3] if variant_combo.isdigit(): variant_letter = 'a' combo_number = variant_combo notes.append(f"AUTO_VARIANT: Added 'a' prefix to '{variant_combo}'") return manufacturer_code, model_number, variant_letter, combo_number, "; ".join(notes) match_3letter = re.match(r'^([a-z]{3})(\d+)$', variant_combo) if match_3letter: variant_full = match_3letter.group(1) combo_digit = match_3letter.group(2) variant_letter = variant_full[:2] combo_number = variant_full[2] + combo_digit notes.append(f"3LETTER_FIX: '{variant_combo}' → v='{variant_letter}', c='{combo_number}'") return manufacturer_code, model_number, variant_letter, combo_number, "; ".join(notes) match = re.match(r'^([a-z]{1,2})(\d+)$', variant_combo) if not match: return None, None, None, None, f"PARSE_FAIL: '{variant_combo}'" return manufacturer_code, model_number, match.group(1), match.group(2), None def parse_body_type(typ_str): if not typ_str or not isinstance(typ_str, str): return None, None typ_str = typ_str.strip() parts = typ_str.split('/') body_type = parts[0].strip() if len(parts) > 0 and parts[0] else None viewfinder_type = parts[1].strip() if len(parts) > 1 and parts[1] else None return body_type, viewfinder_type def parse_year(year_str): if not year_str: return None, None year_str = str(year_str).strip().replace('c', '').replace('ca', '').replace('Ca', '') if '-' in year_str: parts = year_str.split('-') try: return int(parts[0]), int(parts[1]) except: return None, None try: year = int(year_str) return year, year except: return None, None def get_or_create_manufacturer(cur, name, code): if code in manufacturer_cache: return manufacturer_cache[code] cur.execute("SELECT id FROM manufacturers WHERE goergens_code = %s", (code,)) row = cur.fetchone() if row: manufacturer_cache[code] = row[0] return row[0] cur.execute("INSERT INTO manufacturers (goergens_code, name) VALUES (%s, %s) RETURNING id", (code, name)) mfg_id = cur.fetchone()[0] manufacturer_cache[code] = mfg_id return mfg_id def get_or_create_model(cur, mfg_id, model_num, body, vf, fmt, y1, y2): key = (mfg_id, model_num) if key in model_cache: return model_cache[key] cur.execute("SELECT id FROM camera_models WHERE manufacturer_id = %s AND goergens_model_number = %s", (mfg_id, model_num)) row = cur.fetchone() if row: model_cache[key] = row[0] return row[0] cur.execute( "INSERT INTO camera_models (manufacturer_id, goergens_model_number, body_type, viewfinder_type, format_code, year_first, year_last) VALUES (%s, %s, %s, %s, %s, %s, %s) RETURNING id", (mfg_id, model_num, body, vf, fmt, y1, y2) ) model_id = cur.fetchone()[0] model_cache[key] = model_id return model_id def get_or_create_variant(cur, model_id, variant_letter): key = (model_id, variant_letter) if key in variant_cache: return variant_cache[key] cur.execute("SELECT id FROM housing_variants WHERE camera_model_id = %s AND goergens_variant_letter = %s", (model_id, variant_letter)) row = cur.fetchone() if row: variant_cache[key] = row[0] return row[0] cur.execute("INSERT INTO housing_variants (camera_model_id, goergens_variant_letter) VALUES (%s, %s) RETURNING id", (model_id, variant_letter)) variant_id = cur.fetchone()[0] variant_cache[key] = variant_id return variant_id def get_or_create_lens(cur, name): if not name: return None name = str(name).strip() if name else None if not name or name == 'None': return None if name in lens_cache: return lens_cache[name] cur.execute("SELECT id FROM lenses WHERE name = %s", (name,)) row = cur.fetchone() if row: lens_cache[name] = row[0] return row[0] cur.execute("INSERT INTO lenses (name) VALUES (%s) RETURNING id", (name,)) lens_id = cur.fetchone()[0] lens_cache[name] = lens_id return lens_id def get_or_create_shutter(cur, name): if not name: return None name = str(name).strip() if name else None if not name or name == 'None': return None if name in shutter_cache: return shutter_cache[name] cur.execute("SELECT id FROM shutters WHERE name = %s", (name,)) row = cur.fetchone() if row: shutter_cache[name] = row[0] return row[0] cur.execute("INSERT INTO shutters (name) VALUES (%s) RETURNING id", (name,)) shutter_id = cur.fetchone()[0] shutter_cache[name] = shutter_id return shutter_id def import_goergens_data(excel_path, conn): cur = conn.cursor() print("Loading Excel...") wb = openpyxl.load_workbook(excel_path, read_only=True) ws = wb[wb.sheetnames[0]] total_rows = ws.max_row - 1 print(f"Found {total_rows} cameras\n") imported, skipped, duplicates = 0, 0, 0 errors = [] for i, row in enumerate(ws.iter_rows(min_row=2, values_only=True), 1): if i % 1000 == 0: print(f"Progress: {i}/{total_rows} ({i*100//total_rows}%)") try: row_id, mfg_name, goergens_id, camera_name = row[0], row[1], row[2], row[3] lens_name, shutter_name, year_str, typ_str = row[4], row[5], row[6], row[7] format_code, notes = row[8], row[9] if len(row) > 9 else None mfg_name = str(mfg_name).strip() if mfg_name else None goergens_id = str(goergens_id).strip() if goergens_id else None camera_name = str(camera_name).strip() if camera_name else None format_code = str(format_code).strip() if format_code else None if format_code == 'None': format_code = None if camera_name == 'None': camera_name = None if not mfg_name or not goergens_id: skipped += 1 continue mfg_code, model_num, variant, combo, parse_notes = parse_goergens_id(goergens_id) if not all([mfg_code, model_num, variant, combo]): skipped += 1 errors.append((i+1, goergens_id, parse_notes or "PARSE_FAILED")) continue body_type, vf_type = parse_body_type(typ_str) y1, y2 = parse_year(year_str) # Create/get IDs with verification mfg_id = get_or_create_manufacturer(cur, mfg_name, mfg_code) if not verify_id_exists(cur, 'manufacturers', mfg_id): raise Exception(f"Manufacturer ID {mfg_id} not in DB after creation") model_id = get_or_create_model(cur, mfg_id, model_num, body_type, vf_type, format_code, y1, y2) if not verify_id_exists(cur, 'camera_models', model_id): raise Exception(f"Model ID {model_id} not in DB after creation") variant_id = get_or_create_variant(cur, model_id, variant) if not verify_id_exists(cur, 'housing_variants', variant_id): raise Exception(f"Variant ID {variant_id} not in DB after creation") lens_id = get_or_create_lens(cur, lens_name) if lens_id and not verify_id_exists(cur, 'lenses', lens_id): raise Exception(f"Lens ID {lens_id} not in DB after creation") shutter_id = get_or_create_shutter(cur, shutter_name) if shutter_id and not verify_id_exists(cur, 'shutters', shutter_id): raise Exception(f"Shutter ID {shutter_id} not in DB after creation") full_notes = [] if parse_notes: full_notes.append(parse_notes) if notes: full_notes.append(str(notes)) specs = {'notes': " | ".join(full_notes)} if full_notes else None # Simple insert # Commit hierarchy BEFORE combo insert conn.commit() cur.execute( "INSERT INTO lens_shutter_combos (housing_variant_id, goergens_combo_number, lens_id, shutter_id, specs) VALUES (%s, %s, %s, %s, %s)", (variant_id, combo, lens_id, shutter_id, psycopg2.extras.Json(specs) if specs else None) ) if camera_name: cur.execute( "INSERT INTO camera_model_i18n (camera_model_id, model_name) VALUES (%s, %s) ON CONFLICT (camera_model_id) DO NOTHING", (model_id, camera_name) ) conn.commit() # Commit combo imported += 1 except psycopg2.IntegrityError as e: if 'duplicate key value' in str(e): duplicates += 1 conn.rollback() else: skipped += 1 errors.append((i+1, goergens_id if 'goergens_id' in locals() else 'N/A', str(e))) conn.rollback() except Exception as e: skipped += 1 errors.append((i+1, goergens_id if 'goergens_id' in locals() else 'N/A', str(e))) conn.rollback() print(f"\n{'='*60}") print(f"IMPORT COMPLETE") print(f"{'='*60}") print(f"Imported: {imported:,}") print(f"Skipped: {skipped}") print(f"Duplicates: {duplicates}") print(f"\nManufacturers: {len(manufacturer_cache):,}") print(f"Models: {len(model_cache):,}") print(f"Variants: {len(variant_cache):,}") print(f"Lenses: {len(lens_cache):,}") print(f"Shutters: {len(shutter_cache):,}") if errors and len(errors) <= 50: print(f"\nErrors ({len(errors)}):") for row_num, gid, err in errors: print(f" Row {row_num}: {gid} - {err}") elif errors: print(f"\nErrors: {len(errors)} total (showing first 20)") for row_num, gid, err in errors[:20]: print(f" Row {row_num}: {gid} - {err}") cur.close() def main(): excel_path = '/data/camera-database/data/aaa01Kameras.xlsx' print("Connecting to database...") conn = psycopg2.connect(**DB_CONFIG) try: import_goergens_data(excel_path, conn) except Exception as e: print(f"\nFATAL ERROR: {e}") import traceback traceback.print_exc() conn.rollback() return 1 finally: conn.close() return 0 if __name__ == '__main__': sys.exit(main())