324 lines
12 KiB
Python
Executable File
324 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Import Goergens camera database - FIXED VERSION
|
|
Verifies IDs exist before using to prevent cache poisoning
|
|
"""
|
|
|
|
import openpyxl
|
|
import psycopg2
|
|
import psycopg2.extras
|
|
import re
|
|
from collections import defaultdict
|
|
import sys
|
|
|
|
DB_CONFIG = {
|
|
'port': 5433,
|
|
'database': 'camera_db',
|
|
'user': 'postgres'
|
|
}
|
|
|
|
# Caches
|
|
manufacturer_cache = {}
|
|
model_cache = {}
|
|
variant_cache = {}
|
|
lens_cache = {}
|
|
shutter_cache = {}
|
|
missing_variant_counter = defaultdict(lambda: 'a')
|
|
|
|
def verify_id_exists(cur, table, id_val):
|
|
"""Verify an ID actually exists in the database"""
|
|
if not id_val:
|
|
return False
|
|
cur.execute(f"SELECT 1 FROM {table} WHERE id = %s", (id_val,))
|
|
return cur.fetchone() is not None
|
|
|
|
def parse_goergens_id(goergens_id):
|
|
notes = []
|
|
if not goergens_id or not isinstance(goergens_id, str):
|
|
return None, None, None, None, "MISSING_ID"
|
|
parts = goergens_id.strip().split()
|
|
if len(parts) == 3:
|
|
manufacturer_code = parts[0]
|
|
model_number = parts[2]
|
|
key = f"{manufacturer_code}_{model_number}"
|
|
variant_letter = missing_variant_counter[key]
|
|
combo_number = "01"
|
|
missing_variant_counter[key] = chr(ord(missing_variant_counter[key]) + 1)
|
|
notes.append(f"AUTO_VARIANT: '{goergens_id}' → variant '{variant_letter}01'")
|
|
return manufacturer_code, model_number, variant_letter, combo_number, "; ".join(notes)
|
|
if len(parts) < 4:
|
|
return None, None, None, None, f"MALFORMED: {goergens_id}"
|
|
manufacturer_code = parts[0]
|
|
model_number = parts[2]
|
|
variant_combo = parts[3]
|
|
if variant_combo.isdigit():
|
|
variant_letter = 'a'
|
|
combo_number = variant_combo
|
|
notes.append(f"AUTO_VARIANT: Added 'a' prefix to '{variant_combo}'")
|
|
return manufacturer_code, model_number, variant_letter, combo_number, "; ".join(notes)
|
|
match_3letter = re.match(r'^([a-z]{3})(\d+)$', variant_combo)
|
|
if match_3letter:
|
|
variant_full = match_3letter.group(1)
|
|
combo_digit = match_3letter.group(2)
|
|
variant_letter = variant_full[:2]
|
|
combo_number = variant_full[2] + combo_digit
|
|
notes.append(f"3LETTER_FIX: '{variant_combo}' → v='{variant_letter}', c='{combo_number}'")
|
|
return manufacturer_code, model_number, variant_letter, combo_number, "; ".join(notes)
|
|
match = re.match(r'^([a-z]{1,2})(\d+)$', variant_combo)
|
|
if not match:
|
|
return None, None, None, None, f"PARSE_FAIL: '{variant_combo}'"
|
|
return manufacturer_code, model_number, match.group(1), match.group(2), None
|
|
|
|
def parse_body_type(typ_str):
|
|
if not typ_str or not isinstance(typ_str, str):
|
|
return None, None
|
|
typ_str = typ_str.strip()
|
|
parts = typ_str.split('/')
|
|
body_type = parts[0].strip() if len(parts) > 0 and parts[0] else None
|
|
viewfinder_type = parts[1].strip() if len(parts) > 1 and parts[1] else None
|
|
return body_type, viewfinder_type
|
|
|
|
def parse_year(year_str):
|
|
if not year_str:
|
|
return None, None
|
|
year_str = str(year_str).strip().replace('c', '').replace('ca', '').replace('Ca', '')
|
|
if '-' in year_str:
|
|
parts = year_str.split('-')
|
|
try:
|
|
return int(parts[0]), int(parts[1])
|
|
except:
|
|
return None, None
|
|
try:
|
|
year = int(year_str)
|
|
return year, year
|
|
except:
|
|
return None, None
|
|
|
|
def get_or_create_manufacturer(cur, name, code):
|
|
if code in manufacturer_cache:
|
|
return manufacturer_cache[code]
|
|
cur.execute("SELECT id FROM manufacturers WHERE goergens_code = %s", (code,))
|
|
row = cur.fetchone()
|
|
if row:
|
|
manufacturer_cache[code] = row[0]
|
|
return row[0]
|
|
cur.execute("INSERT INTO manufacturers (goergens_code, name) VALUES (%s, %s) RETURNING id", (code, name))
|
|
mfg_id = cur.fetchone()[0]
|
|
manufacturer_cache[code] = mfg_id
|
|
return mfg_id
|
|
|
|
def get_or_create_model(cur, mfg_id, model_num, body, vf, fmt, y1, y2):
|
|
key = (mfg_id, model_num)
|
|
if key in model_cache:
|
|
return model_cache[key]
|
|
cur.execute("SELECT id FROM camera_models WHERE manufacturer_id = %s AND goergens_model_number = %s", (mfg_id, model_num))
|
|
row = cur.fetchone()
|
|
if row:
|
|
model_cache[key] = row[0]
|
|
return row[0]
|
|
cur.execute(
|
|
"INSERT INTO camera_models (manufacturer_id, goergens_model_number, body_type, viewfinder_type, format_code, year_first, year_last) VALUES (%s, %s, %s, %s, %s, %s, %s) RETURNING id",
|
|
(mfg_id, model_num, body, vf, fmt, y1, y2)
|
|
)
|
|
model_id = cur.fetchone()[0]
|
|
model_cache[key] = model_id
|
|
return model_id
|
|
|
|
def get_or_create_variant(cur, model_id, variant_letter):
|
|
key = (model_id, variant_letter)
|
|
if key in variant_cache:
|
|
return variant_cache[key]
|
|
cur.execute("SELECT id FROM housing_variants WHERE camera_model_id = %s AND goergens_variant_letter = %s", (model_id, variant_letter))
|
|
row = cur.fetchone()
|
|
if row:
|
|
variant_cache[key] = row[0]
|
|
return row[0]
|
|
cur.execute("INSERT INTO housing_variants (camera_model_id, goergens_variant_letter) VALUES (%s, %s) RETURNING id", (model_id, variant_letter))
|
|
variant_id = cur.fetchone()[0]
|
|
variant_cache[key] = variant_id
|
|
return variant_id
|
|
|
|
def get_or_create_lens(cur, name):
|
|
if not name:
|
|
return None
|
|
name = str(name).strip() if name else None
|
|
if not name or name == 'None':
|
|
return None
|
|
if name in lens_cache:
|
|
return lens_cache[name]
|
|
cur.execute("SELECT id FROM lenses WHERE name = %s", (name,))
|
|
row = cur.fetchone()
|
|
if row:
|
|
lens_cache[name] = row[0]
|
|
return row[0]
|
|
cur.execute("INSERT INTO lenses (name) VALUES (%s) RETURNING id", (name,))
|
|
lens_id = cur.fetchone()[0]
|
|
lens_cache[name] = lens_id
|
|
return lens_id
|
|
|
|
def get_or_create_shutter(cur, name):
|
|
if not name:
|
|
return None
|
|
name = str(name).strip() if name else None
|
|
if not name or name == 'None':
|
|
return None
|
|
if name in shutter_cache:
|
|
return shutter_cache[name]
|
|
cur.execute("SELECT id FROM shutters WHERE name = %s", (name,))
|
|
row = cur.fetchone()
|
|
if row:
|
|
shutter_cache[name] = row[0]
|
|
return row[0]
|
|
cur.execute("INSERT INTO shutters (name) VALUES (%s) RETURNING id", (name,))
|
|
shutter_id = cur.fetchone()[0]
|
|
shutter_cache[name] = shutter_id
|
|
return shutter_id
|
|
|
|
def import_goergens_data(excel_path, conn):
|
|
cur = conn.cursor()
|
|
print("Loading Excel...")
|
|
wb = openpyxl.load_workbook(excel_path, read_only=True)
|
|
ws = wb[wb.sheetnames[0]]
|
|
total_rows = ws.max_row - 1
|
|
print(f"Found {total_rows} cameras\n")
|
|
|
|
imported, skipped, duplicates = 0, 0, 0
|
|
errors = []
|
|
|
|
for i, row in enumerate(ws.iter_rows(min_row=2, values_only=True), 1):
|
|
if i % 1000 == 0:
|
|
print(f"Progress: {i}/{total_rows} ({i*100//total_rows}%)")
|
|
|
|
try:
|
|
row_id, mfg_name, goergens_id, camera_name = row[0], row[1], row[2], row[3]
|
|
lens_name, shutter_name, year_str, typ_str = row[4], row[5], row[6], row[7]
|
|
format_code, notes = row[8], row[9] if len(row) > 9 else None
|
|
|
|
mfg_name = str(mfg_name).strip() if mfg_name else None
|
|
goergens_id = str(goergens_id).strip() if goergens_id else None
|
|
camera_name = str(camera_name).strip() if camera_name else None
|
|
format_code = str(format_code).strip() if format_code else None
|
|
|
|
if format_code == 'None':
|
|
format_code = None
|
|
if camera_name == 'None':
|
|
camera_name = None
|
|
|
|
if not mfg_name or not goergens_id:
|
|
skipped += 1
|
|
continue
|
|
|
|
mfg_code, model_num, variant, combo, parse_notes = parse_goergens_id(goergens_id)
|
|
|
|
if not all([mfg_code, model_num, variant, combo]):
|
|
skipped += 1
|
|
errors.append((i+1, goergens_id, parse_notes or "PARSE_FAILED"))
|
|
continue
|
|
|
|
body_type, vf_type = parse_body_type(typ_str)
|
|
y1, y2 = parse_year(year_str)
|
|
|
|
# Create/get IDs with verification
|
|
mfg_id = get_or_create_manufacturer(cur, mfg_name, mfg_code)
|
|
if not verify_id_exists(cur, 'manufacturers', mfg_id):
|
|
raise Exception(f"Manufacturer ID {mfg_id} not in DB after creation")
|
|
|
|
model_id = get_or_create_model(cur, mfg_id, model_num, body_type, vf_type, format_code, y1, y2)
|
|
if not verify_id_exists(cur, 'camera_models', model_id):
|
|
raise Exception(f"Model ID {model_id} not in DB after creation")
|
|
|
|
variant_id = get_or_create_variant(cur, model_id, variant)
|
|
if not verify_id_exists(cur, 'housing_variants', variant_id):
|
|
raise Exception(f"Variant ID {variant_id} not in DB after creation")
|
|
|
|
lens_id = get_or_create_lens(cur, lens_name)
|
|
if lens_id and not verify_id_exists(cur, 'lenses', lens_id):
|
|
raise Exception(f"Lens ID {lens_id} not in DB after creation")
|
|
|
|
shutter_id = get_or_create_shutter(cur, shutter_name)
|
|
if shutter_id and not verify_id_exists(cur, 'shutters', shutter_id):
|
|
raise Exception(f"Shutter ID {shutter_id} not in DB after creation")
|
|
|
|
full_notes = []
|
|
if parse_notes:
|
|
full_notes.append(parse_notes)
|
|
if notes:
|
|
full_notes.append(str(notes))
|
|
specs = {'notes': " | ".join(full_notes)} if full_notes else None
|
|
|
|
# Simple insert
|
|
# Commit hierarchy BEFORE combo insert
|
|
conn.commit()
|
|
|
|
cur.execute(
|
|
"INSERT INTO lens_shutter_combos (housing_variant_id, goergens_combo_number, lens_id, shutter_id, specs) VALUES (%s, %s, %s, %s, %s)",
|
|
(variant_id, combo, lens_id, shutter_id, psycopg2.extras.Json(specs) if specs else None)
|
|
)
|
|
|
|
if camera_name:
|
|
cur.execute(
|
|
"INSERT INTO camera_model_i18n (camera_model_id, model_name) VALUES (%s, %s) ON CONFLICT (camera_model_id) DO NOTHING",
|
|
(model_id, camera_name)
|
|
)
|
|
|
|
conn.commit() # Commit combo
|
|
imported += 1
|
|
|
|
|
|
|
|
|
|
except psycopg2.IntegrityError as e:
|
|
if 'duplicate key value' in str(e):
|
|
duplicates += 1
|
|
conn.rollback()
|
|
else:
|
|
skipped += 1
|
|
errors.append((i+1, goergens_id if 'goergens_id' in locals() else 'N/A', str(e)))
|
|
conn.rollback()
|
|
except Exception as e:
|
|
skipped += 1
|
|
errors.append((i+1, goergens_id if 'goergens_id' in locals() else 'N/A', str(e)))
|
|
conn.rollback()
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"IMPORT COMPLETE")
|
|
print(f"{'='*60}")
|
|
print(f"Imported: {imported:,}")
|
|
print(f"Skipped: {skipped}")
|
|
print(f"Duplicates: {duplicates}")
|
|
print(f"\nManufacturers: {len(manufacturer_cache):,}")
|
|
print(f"Models: {len(model_cache):,}")
|
|
print(f"Variants: {len(variant_cache):,}")
|
|
print(f"Lenses: {len(lens_cache):,}")
|
|
print(f"Shutters: {len(shutter_cache):,}")
|
|
|
|
if errors and len(errors) <= 50:
|
|
print(f"\nErrors ({len(errors)}):")
|
|
for row_num, gid, err in errors:
|
|
print(f" Row {row_num}: {gid} - {err}")
|
|
elif errors:
|
|
print(f"\nErrors: {len(errors)} total (showing first 20)")
|
|
for row_num, gid, err in errors[:20]:
|
|
print(f" Row {row_num}: {gid} - {err}")
|
|
|
|
cur.close()
|
|
|
|
def main():
|
|
excel_path = '/data/camera-database/data/aaa01Kameras.xlsx'
|
|
print("Connecting to database...")
|
|
conn = psycopg2.connect(**DB_CONFIG)
|
|
try:
|
|
import_goergens_data(excel_path, conn)
|
|
except Exception as e:
|
|
print(f"\nFATAL ERROR: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
conn.rollback()
|
|
return 1
|
|
finally:
|
|
conn.close()
|
|
return 0
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|