import asyncio import logging import sys import os from datetime import datetime, timedelta import pytz sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..')) from config import Config from services.advoware import AdvowareAPI from googleapiclient.discovery import build from googleapiclient.errors import HttpError from google.oauth2 import service_account import asyncpg # Setup logging logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) handler = logging.StreamHandler() logger.addHandler(handler) # Timezone and year BERLIN_TZ = pytz.timezone('Europe/Berlin') now = datetime.now(BERLIN_TZ) current_year = now.year async def connect_db(): """Connect to Postgres DB from Config.""" try: conn = await asyncpg.connect( host=Config.POSTGRES_HOST or 'localhost', user=Config.POSTGRES_USER, password=Config.POSTGRES_PASSWORD, database=Config.POSTGRES_DB_NAME, timeout=10 ) return conn except Exception as e: logger.error(f"Failed to connect to DB: {e}") raise async def get_google_service(): """Initialize Google Calendar service.""" try: service_account_path = Config.GOOGLE_CALENDAR_SERVICE_ACCOUNT_PATH if not os.path.exists(service_account_path): raise FileNotFoundError(f"Service account file not found: {service_account_path}") creds = service_account.Credentials.from_service_account_file( service_account_path, scopes=Config.GOOGLE_CALENDAR_SCOPES ) service = build('calendar', 'v3', credentials=creds) return service except Exception as e: logger.error(f"Failed to initialize Google service: {e}") raise async def ensure_google_calendar(service, employee_kuerzel): """Ensure Google Calendar exists for employee.""" calendar_name = f"AW-{employee_kuerzel}" try: # Fetch all calendars with pagination all_calendars = [] page_token = None while True: calendar_list = service.calendarList().list( pageToken=page_token, maxResults=250 ).execute() calendars = calendar_list.get('items', []) all_calendars.extend(calendars) page_token = calendar_list.get('nextPageToken') if not page_token: break for calendar in all_calendars: if calendar['summary'] == calendar_name: return calendar['id'] return None # Calendar doesn't exist except HttpError as e: logger.error(f"Google API error for calendar {employee_kuerzel}: {e}") raise except Exception as e: logger.error(f"Failed to check Google calendar for {employee_kuerzel}: {e}") raise async def fetch_advoware_appointments(advoware, employee_kuerzel): """Fetch Advoware appointments in range.""" try: # Use the same range as the sync script: previous year to 9 years ahead from_date = f"{current_year - 1}-01-01T00:00:00" to_date = f"{current_year + 9}-12-31T23:59:59" params = { 'kuerzel': employee_kuerzel, 'from': from_date, 'to': to_date } result = await advoware.api_call('api/v1/advonet/Termine', method='GET', params=params) appointments = result if isinstance(result, list) else [] # Check if Advoware respects the time limit from_dt = datetime.fromisoformat(from_date.replace('T', ' ')) to_dt = datetime.fromisoformat(to_date.replace('T', ' ')) out_of_range = [] for app in appointments: if 'datum' in app: app_date_str = app['datum'] if 'T' in app_date_str: app_dt = datetime.fromisoformat(app_date_str.replace('Z', '')) else: app_dt = datetime.fromisoformat(app_date_str + 'T00:00:00') if app_dt < from_dt or app_dt > to_dt: out_of_range.append(app) if out_of_range: logger.warning(f"Advoware returned {len(out_of_range)} appointments outside the requested range {from_date} to {to_date}") for app in out_of_range[:5]: # Log first 5 logger.warning(f"Out of range appointment: frNr {app.get('frNr')}, datum {app.get('datum')}") logger.info(f"Fetched {len(appointments)} Advoware appointments for {employee_kuerzel} (expected range: {from_date} to {to_date})") return {str(app['frNr']): app for app in appointments if app.get('frNr')} except Exception as e: logger.error(f"Failed to fetch Advoware appointments: {e}") raise async def fetch_google_events(service, calendar_id): """Fetch Google events in range.""" try: # Use the same range as the sync script: 2 years back to 10 years forward time_min = f"{current_year - 2}-01-01T00:00:00Z" time_max = f"{current_year + 10}-12-31T23:59:59Z" all_events = [] page_token = None while True: events_result = service.events().list( calendarId=calendar_id, timeMin=time_min, timeMax=time_max, singleEvents=True, orderBy='startTime', pageToken=page_token, maxResults=2500 # Max per page ).execute() events_page = events_result.get('items', []) all_events.extend(events_page) page_token = events_result.get('nextPageToken') if not page_token: break events = [evt for evt in all_events if evt.get('status') != 'cancelled'] logger.info(f"Fetched {len(all_events)} total Google events ({len(events)} not cancelled) for calendar {calendar_id}") return events, len(all_events) # Return filtered events and total count except HttpError as e: logger.error(f"Google API error fetching events: {e}") raise except Exception as e: logger.error(f"Failed to fetch Google events: {e}") raise async def audit_calendar_sync(employee_kuerzel, check_system, delete_orphaned_google=False): """Audit calendar sync entries for a user.""" if check_system not in ['google', 'advoware']: raise ValueError("check_system must be 'google' or 'advoware'") logger.info(f"Starting audit for {employee_kuerzel}, checking {check_system}, delete_orphaned_google={delete_orphaned_google}") # Initialize APIs advoware = AdvowareAPI({}) service = await get_google_service() calendar_id = await ensure_google_calendar(service, employee_kuerzel) if not calendar_id: logger.error(f"Google calendar for {employee_kuerzel} does not exist") return # Fetch API data advoware_map = {} google_events = [] total_google_events = 0 if check_system == 'advoware': advoware_map = await fetch_advoware_appointments(advoware, employee_kuerzel) elif check_system == 'google': google_events, total_google_events = await fetch_google_events(service, calendar_id) google_map = {evt['id']: evt for evt in google_events} # Connect to DB conn = await connect_db() try: # Fetch DB entries rows = await conn.fetch( """ SELECT sync_id, employee_kuerzel, advoware_frnr, google_event_id, source_system, sync_strategy, sync_status, last_sync FROM calendar_sync WHERE employee_kuerzel = $1 AND deleted = FALSE ORDER BY sync_id """, employee_kuerzel ) logger.info(f"Found {len(rows)} active sync entries in DB for {employee_kuerzel}") # Build DB indexes db_adv_index = {str(row['advoware_frnr']): row for row in rows if row['advoware_frnr']} db_google_index = {} for row in rows: if row['google_event_id']: db_google_index[row['google_event_id']] = row # Audit results total_entries = len(rows) existing_in_api = 0 missing_in_api = 0 missing_details = [] for row in rows: sync_id = row['sync_id'] advoware_frnr = row['advoware_frnr'] google_event_id = row['google_event_id'] exists_in_api = False if check_system == 'advoware' and advoware_frnr: exists_in_api = str(advoware_frnr) in advoware_map elif check_system == 'google' and google_event_id: exists_in_api = google_event_id in google_map if exists_in_api: existing_in_api += 1 else: missing_in_api += 1 missing_details.append({ 'sync_id': sync_id, 'advoware_frnr': advoware_frnr, 'google_event_id': google_event_id, 'source_system': row['source_system'], 'sync_strategy': row['sync_strategy'], 'sync_status': row['sync_status'], 'last_sync': row['last_sync'] }) # Check for orphaned Google events (events in Google not in DB) orphaned_google_events = [] if check_system == 'google': for event_id, evt in google_map.items(): if event_id not in db_google_index: # Check if this is an instance of a recurring event whose master is synced is_instance_of_synced_master = False if '_' in event_id: master_id = event_id.split('_')[0] if master_id in db_google_index: is_instance_of_synced_master = True if not is_instance_of_synced_master: orphaned_google_events.append({ 'event_id': event_id, 'summary': evt.get('summary', ''), 'start': evt.get('start', {}), 'end': evt.get('end', {}) }) # Print summary print(f"\n=== Calendar Sync Audit for {employee_kuerzel} ===") print(f"Checking system: {check_system}") print(f"Total active DB entries: {total_entries}") if check_system == 'google': print(f"Total events in Google: {total_google_events}") print(f"Orphaned events in Google (not in DB): {len(orphaned_google_events)}") print(f"Existing in {check_system}: {existing_in_api}") print(f"Missing in {check_system}: {missing_in_api}") print(".1f") if missing_details: print(f"\n=== Details of missing entries in {check_system} ===") for detail in missing_details: print(f"Sync ID: {detail['sync_id']}") print(f" Advoware frNr: {detail['advoware_frnr']}") print(f" Google Event ID: {detail['google_event_id']}") print(f" Source System: {detail['source_system']}") print(f" Sync Strategy: {detail['sync_strategy']}") print(f" Sync Status: {detail['sync_status']}") print(f" Last Sync: {detail['last_sync']}") print(" ---") else: print(f"\nAll entries exist in {check_system}!") # Delete orphaned Google events if requested if delete_orphaned_google and check_system == 'google' and orphaned_google_events: print(f"\n=== Deleting orphaned Google events ===") for orphaned in orphaned_google_events: event_id = orphaned['event_id'] try: service.events().delete(calendarId=calendar_id, eventId=event_id).execute() print(f"Deleted orphaned Google event: {event_id} - {orphaned['summary']}") except HttpError as e: print(f"Failed to delete Google event {event_id}: {e}") except Exception as e: print(f"Error deleting Google event {event_id}: {e}") finally: await conn.close() async def delete_google_calendar(service, employee_kuerzel): """Delete Google Calendar for employee if it exists.""" calendar_name = f"AW-{employee_kuerzel}" try: # Fetch all calendars with pagination all_calendars = [] page_token = None while True: calendar_list = service.calendarList().list( pageToken=page_token, maxResults=250 ).execute() calendars = calendar_list.get('items', []) all_calendars.extend(calendars) page_token = calendar_list.get('nextPageToken') if not page_token: break for calendar in all_calendars: if calendar['summary'] == calendar_name: calendar_id = calendar['id'] primary = calendar.get('primary', False) if primary: logger.warning(f"Cannot delete primary calendar: {calendar_name}") return False try: service.calendars().delete(calendarId=calendar_id).execute() logger.info(f"Deleted Google calendar: {calendar_name} (ID: {calendar_id})") return True except HttpError as e: logger.error(f"Failed to delete Google calendar {calendar_name}: {e}") return False except Exception as e: logger.error(f"Error deleting Google calendar {calendar_name}: {e}") return False logger.info(f"Google calendar {calendar_name} does not exist, nothing to delete") return False except HttpError as e: logger.error(f"Google API error checking calendar {employee_kuerzel}: {e}") raise except Exception as e: logger.error(f"Failed to check/delete Google calendar for {employee_kuerzel}: {e}") raise async def list_all_calendars(service): """List all Google Calendars.""" try: # Fetch all calendars with pagination all_calendars = [] page_token = None while True: calendar_list = service.calendarList().list( pageToken=page_token, maxResults=250 ).execute() calendars = calendar_list.get('items', []) all_calendars.extend(calendars) page_token = calendar_list.get('nextPageToken') if not page_token: break print(f"\n=== All Google Calendars ({len(all_calendars)}) ===") for cal in sorted(all_calendars, key=lambda x: x.get('summary', '')): summary = cal.get('summary', 'Unnamed') cal_id = cal['id'] primary = cal.get('primary', False) access_role = cal.get('accessRole', 'unknown') print(f" {summary} (ID: {cal_id}, Primary: {primary}, Access: {access_role})") return all_calendars except Exception as e: logger.error(f"Failed to list calendars: {e}") raise async def find_duplicates(service): """Find duplicate calendars by name.""" all_calendars = await list_all_calendars(service) from collections import defaultdict name_groups = defaultdict(list) for cal in all_calendars: summary = cal.get('summary', 'Unnamed') name_groups[summary].append(cal) duplicates = {name: cals for name, cals in name_groups.items() if len(cals) > 1} if duplicates: print(f"\n=== Duplicate Calendars Found ({len(duplicates)} unique names with duplicates) ===") total_duplicates = sum(len(cals) - 1 for cals in duplicates.values()) print(f"Total duplicate calendars: {total_duplicates}") for name, cals in duplicates.items(): print(f"\nCalendar Name: '{name}' - {len(cals)} instances") for cal in cals: cal_id = cal['id'] primary = cal.get('primary', False) access_role = cal.get('accessRole', 'unknown') print(f" ID: {cal_id}, Primary: {primary}, Access Role: {access_role}") else: print("\nNo duplicate calendars found!") return duplicates async def delete_duplicates(service, duplicates): """Delete duplicate calendars, keeping one per name.""" if not duplicates: print("No duplicates to delete.") return print(f"\n=== Deleting Duplicate Calendars ===") total_deleted = 0 for name, cals in duplicates.items(): # Keep the first one, delete the rest keep_cal = cals[0] to_delete = cals[1:] print(f"\nKeeping: '{name}' (ID: {keep_cal['id']})") for cal in to_delete: cal_id = cal['id'] try: service.calendars().delete(calendarId=cal_id).execute() print(f" Deleted: {cal_id}") total_deleted += 1 except HttpError as e: print(f" Failed to delete {cal_id}: {e}") except Exception as e: print(f" Error deleting {cal_id}: {e}") print(f"\nTotal calendars deleted: {total_deleted}") async def get_all_employees_from_db(): """Get all employee kuerzel from DB.""" conn = await connect_db() try: rows = await conn.fetch( """ SELECT DISTINCT employee_kuerzel FROM calendar_sync WHERE deleted = FALSE ORDER BY employee_kuerzel """, # No params ) employees = [row['employee_kuerzel'] for row in rows] logger.info(f"Found {len(employees)} distinct employees in DB") return employees finally: await conn.close() async def find_orphaned_calendars(service): """Find AW-* calendars that don't have corresponding employees in DB.""" all_calendars = await list_all_calendars(service) employees = await get_all_employees_from_db() # Create set of expected calendar names expected_names = {f"AW-{emp}" for emp in employees} orphaned = [] for cal in all_calendars: summary = cal.get('summary', '') if summary.startswith('AW-') and summary not in expected_names: orphaned.append(cal) if orphaned: print(f"\n=== Orphaned AW-* Calendars ({len(orphaned)}) ===") for cal in sorted(orphaned, key=lambda x: x.get('summary', '')): summary = cal.get('summary', '') cal_id = cal['id'] primary = cal.get('primary', False) access_role = cal.get('accessRole', 'unknown') print(f" {summary} (ID: {cal_id}, Primary: {primary}, Access: {access_role})") else: print("\nNo orphaned AW-* calendars found!") return orphaned async def cleanup_orphaned_calendars(service, orphaned): """Delete orphaned AW-* calendars.""" if not orphaned: print("No orphaned calendars to delete.") return print(f"\n=== Deleting Orphaned AW-* Calendars ===") total_deleted = 0 for cal in orphaned: summary = cal.get('summary', '') cal_id = cal['id'] primary = cal.get('primary', False) if primary: print(f" Skipping primary calendar: {summary}") continue try: service.calendars().delete(calendarId=cal_id).execute() print(f" Deleted: {summary} (ID: {cal_id})") total_deleted += 1 except HttpError as e: print(f" Failed to delete {summary} ({cal_id}): {e}") except Exception as e: print(f" Error deleting {summary} ({cal_id}): {e}") print(f"\nTotal orphaned calendars deleted: {total_deleted}") async def main(): if len(sys.argv) < 2: print("Usage: python audit_calendar_sync.py [options]") print("\nCommands:") print(" audit [--delete-orphaned-google]") print(" Audit sync entries for a specific employee") print(" delete-calendar ") print(" Delete the Google calendar for a specific employee") print(" list-all") print(" List all Google calendars") print(" find-duplicates") print(" Find duplicate calendars by name") print(" delete-duplicates") print(" Find and delete duplicate calendars (keeps one per name)") print(" find-orphaned") print(" Find AW-* calendars without corresponding employees in DB") print(" cleanup-orphaned") print(" Find and delete orphaned AW-* calendars") print("\nOptions:") print(" --delete-orphaned-google: Delete Google events that exist in Google but not in the DB (for audit command)") print("\nExamples:") print(" python audit_calendar_sync.py audit SB google --delete-orphaned-google") print(" python audit_calendar_sync.py delete-calendar SB") print(" python audit_calendar_sync.py list-all") print(" python audit_calendar_sync.py find-duplicates") print(" python audit_calendar_sync.py delete-duplicates") print(" python audit_calendar_sync.py find-orphaned") print(" python audit_calendar_sync.py cleanup-orphaned") sys.exit(1) command = sys.argv[1].lower() try: service = await get_google_service() if command == 'audit': if len(sys.argv) < 4: print("Usage: python audit_calendar_sync.py audit [--delete-orphaned-google]") sys.exit(1) employee_kuerzel = sys.argv[2].upper() check_system = sys.argv[3].lower() delete_orphaned_google = '--delete-orphaned-google' in sys.argv await audit_calendar_sync(employee_kuerzel, check_system, delete_orphaned_google) elif command == 'delete-calendar': if len(sys.argv) < 3: print("Usage: python audit_calendar_sync.py delete-calendar ") sys.exit(1) employee_kuerzel = sys.argv[2].upper() deleted = await delete_google_calendar(service, employee_kuerzel) if deleted: print(f"Successfully deleted Google calendar for {employee_kuerzel}") else: print(f"No calendar deleted for {employee_kuerzel}") elif command == 'list-all': await list_all_calendars(service) elif command == 'find-duplicates': await find_duplicates(service) elif command == 'delete-duplicates': duplicates = await find_duplicates(service) if duplicates: await delete_duplicates(service, duplicates) else: print("No duplicates to delete.") elif command == 'find-orphaned': await find_orphaned_calendars(service) elif command == 'cleanup-orphaned': orphaned = await find_orphaned_calendars(service) if orphaned: await cleanup_orphaned_calendars(service, orphaned) else: print("No orphaned calendars to delete.") else: print(f"Unknown command: {command}") sys.exit(1) except Exception as e: logger.error(f"Command failed: {e}") sys.exit(1) if __name__ == "__main__": asyncio.run(main())