Files
motia/bitbylaw/steps/advoware_cal_sync/audit_calendar_sync.py

355 lines
15 KiB
Python

import asyncio
import logging
import sys
import os
from datetime import datetime, timedelta
import pytz
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
from config import Config
from services.advoware import AdvowareAPI
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.oauth2 import service_account
import asyncpg
# Setup logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
logger.addHandler(handler)
# Timezone and year
BERLIN_TZ = pytz.timezone('Europe/Berlin')
now = datetime.now(BERLIN_TZ)
current_year = now.year
async def connect_db():
"""Connect to Postgres DB from Config."""
try:
conn = await asyncpg.connect(
host=Config.POSTGRES_HOST or 'localhost',
user=Config.POSTGRES_USER,
password=Config.POSTGRES_PASSWORD,
database=Config.POSTGRES_DB_NAME,
timeout=10
)
return conn
except Exception as e:
logger.error(f"Failed to connect to DB: {e}")
raise
async def get_google_service():
"""Initialize Google Calendar service."""
try:
service_account_path = Config.GOOGLE_CALENDAR_SERVICE_ACCOUNT_PATH
if not os.path.exists(service_account_path):
raise FileNotFoundError(f"Service account file not found: {service_account_path}")
creds = service_account.Credentials.from_service_account_file(
service_account_path, scopes=Config.GOOGLE_CALENDAR_SCOPES
)
service = build('calendar', 'v3', credentials=creds)
return service
except Exception as e:
logger.error(f"Failed to initialize Google service: {e}")
raise
async def ensure_google_calendar(service, employee_kuerzel):
"""Ensure Google Calendar exists for employee."""
calendar_name = f"AW-{employee_kuerzel}"
try:
calendar_list = service.calendarList().list().execute()
for calendar in calendar_list.get('items', []):
if calendar['summary'] == calendar_name:
return calendar['id']
return None # Calendar doesn't exist
except HttpError as e:
logger.error(f"Google API error for calendar {employee_kuerzel}: {e}")
raise
except Exception as e:
logger.error(f"Failed to check Google calendar for {employee_kuerzel}: {e}")
raise
async def fetch_advoware_appointments(advoware, employee_kuerzel):
"""Fetch Advoware appointments in range."""
try:
# Use the same range as the sync script: previous year to 9 years ahead
from_date = f"{current_year - 1}-01-01T00:00:00"
to_date = f"{current_year + 9}-12-31T23:59:59"
params = {
'kuerzel': employee_kuerzel,
'from': from_date,
'to': to_date
}
result = await advoware.api_call('api/v1/advonet/Termine', method='GET', params=params)
appointments = result if isinstance(result, list) else []
# Check if Advoware respects the time limit
from_dt = datetime.fromisoformat(from_date.replace('T', ' '))
to_dt = datetime.fromisoformat(to_date.replace('T', ' '))
out_of_range = []
for app in appointments:
if 'datum' in app:
app_date_str = app['datum']
if 'T' in app_date_str:
app_dt = datetime.fromisoformat(app_date_str.replace('Z', ''))
else:
app_dt = datetime.fromisoformat(app_date_str + 'T00:00:00')
if app_dt < from_dt or app_dt > to_dt:
out_of_range.append(app)
if out_of_range:
logger.warning(f"Advoware returned {len(out_of_range)} appointments outside the requested range {from_date} to {to_date}")
for app in out_of_range[:5]: # Log first 5
logger.warning(f"Out of range appointment: frNr {app.get('frNr')}, datum {app.get('datum')}")
logger.info(f"Fetched {len(appointments)} Advoware appointments for {employee_kuerzel} (expected range: {from_date} to {to_date})")
return {str(app['frNr']): app for app in appointments if app.get('frNr')}
except Exception as e:
logger.error(f"Failed to fetch Advoware appointments: {e}")
raise
async def fetch_google_events(service, calendar_id):
"""Fetch Google events in range."""
try:
# Use the same range as the sync script: 2 years back to 10 years forward
time_min = f"{current_year - 2}-01-01T00:00:00Z"
time_max = f"{current_year + 10}-12-31T23:59:59Z"
all_events = []
page_token = None
while True:
events_result = service.events().list(
calendarId=calendar_id,
timeMin=time_min,
timeMax=time_max,
singleEvents=True,
orderBy='startTime',
pageToken=page_token,
maxResults=2500 # Max per page
).execute()
events_page = events_result.get('items', [])
all_events.extend(events_page)
page_token = events_result.get('nextPageToken')
if not page_token:
break
events = [evt for evt in all_events if evt.get('status') != 'cancelled']
logger.info(f"Fetched {len(all_events)} total Google events ({len(events)} not cancelled) for calendar {calendar_id}")
return events, len(all_events) # Return filtered events and total count
except HttpError as e:
logger.error(f"Google API error fetching events: {e}")
raise
except Exception as e:
logger.error(f"Failed to fetch Google events: {e}")
raise
async def audit_calendar_sync(employee_kuerzel, check_system, delete_orphaned_google=False):
"""Audit calendar sync entries for a user."""
if check_system not in ['google', 'advoware']:
raise ValueError("check_system must be 'google' or 'advoware'")
logger.info(f"Starting audit for {employee_kuerzel}, checking {check_system}, delete_orphaned_google={delete_orphaned_google}")
# Initialize APIs
advoware = AdvowareAPI({})
service = await get_google_service()
calendar_id = await ensure_google_calendar(service, employee_kuerzel)
if not calendar_id:
logger.error(f"Google calendar for {employee_kuerzel} does not exist")
return
# Fetch API data
advoware_map = {}
google_events = []
total_google_events = 0
if check_system == 'advoware':
advoware_map = await fetch_advoware_appointments(advoware, employee_kuerzel)
elif check_system == 'google':
google_events, total_google_events = await fetch_google_events(service, calendar_id)
google_map = {evt['id']: evt for evt in google_events}
# Connect to DB
conn = await connect_db()
try:
# Fetch DB entries
rows = await conn.fetch(
"""
SELECT sync_id, employee_kuerzel, advoware_frnr, google_event_id, source_system, sync_strategy, sync_status, last_sync
FROM calendar_sync
WHERE employee_kuerzel = $1 AND deleted = FALSE
ORDER BY sync_id
""",
employee_kuerzel
)
logger.info(f"Found {len(rows)} active sync entries in DB for {employee_kuerzel}")
# Build DB indexes
db_adv_index = {str(row['advoware_frnr']): row for row in rows if row['advoware_frnr']}
db_google_index = {}
for row in rows:
if row['google_event_id']:
db_google_index[row['google_event_id']] = row
# Audit results
total_entries = len(rows)
existing_in_api = 0
missing_in_api = 0
missing_details = []
for row in rows:
sync_id = row['sync_id']
advoware_frnr = row['advoware_frnr']
google_event_id = row['google_event_id']
exists_in_api = False
if check_system == 'advoware' and advoware_frnr:
exists_in_api = str(advoware_frnr) in advoware_map
elif check_system == 'google' and google_event_id:
exists_in_api = google_event_id in google_map
if exists_in_api:
existing_in_api += 1
else:
missing_in_api += 1
missing_details.append({
'sync_id': sync_id,
'advoware_frnr': advoware_frnr,
'google_event_id': google_event_id,
'source_system': row['source_system'],
'sync_strategy': row['sync_strategy'],
'sync_status': row['sync_status'],
'last_sync': row['last_sync']
})
# Check for orphaned Google events (events in Google not in DB)
orphaned_google_events = []
if check_system == 'google':
for event_id, evt in google_map.items():
if event_id not in db_google_index:
# Check if this is an instance of a recurring event whose master is synced
is_instance_of_synced_master = False
if '_' in event_id:
master_id = event_id.split('_')[0]
if master_id in db_google_index:
is_instance_of_synced_master = True
if not is_instance_of_synced_master:
orphaned_google_events.append({
'event_id': event_id,
'summary': evt.get('summary', ''),
'start': evt.get('start', {}),
'end': evt.get('end', {})
})
# Print summary
print(f"\n=== Calendar Sync Audit for {employee_kuerzel} ===")
print(f"Checking system: {check_system}")
print(f"Total active DB entries: {total_entries}")
if check_system == 'google':
print(f"Total events in Google: {total_google_events}")
print(f"Orphaned events in Google (not in DB): {len(orphaned_google_events)}")
print(f"Existing in {check_system}: {existing_in_api}")
print(f"Missing in {check_system}: {missing_in_api}")
print(".1f")
if missing_details:
print(f"\n=== Details of missing entries in {check_system} ===")
for detail in missing_details:
print(f"Sync ID: {detail['sync_id']}")
print(f" Advoware frNr: {detail['advoware_frnr']}")
print(f" Google Event ID: {detail['google_event_id']}")
print(f" Source System: {detail['source_system']}")
print(f" Sync Strategy: {detail['sync_strategy']}")
print(f" Sync Status: {detail['sync_status']}")
print(f" Last Sync: {detail['last_sync']}")
print(" ---")
else:
print(f"\nAll entries exist in {check_system}!")
# Delete orphaned Google events if requested
if delete_orphaned_google and check_system == 'google' and orphaned_google_events:
print(f"\n=== Deleting orphaned Google events ===")
for orphaned in orphaned_google_events:
event_id = orphaned['event_id']
try:
service.events().delete(calendarId=calendar_id, eventId=event_id).execute()
print(f"Deleted orphaned Google event: {event_id} - {orphaned['summary']}")
except HttpError as e:
print(f"Failed to delete Google event {event_id}: {e}")
except Exception as e:
print(f"Error deleting Google event {event_id}: {e}")
finally:
await conn.close()
async def delete_google_calendar(service, employee_kuerzel):
"""Delete Google Calendar for employee if it exists."""
calendar_name = f"AW-{employee_kuerzel}"
try:
calendar_list = service.calendarList().list().execute()
for calendar in calendar_list.get('items', []):
if calendar['summary'] == calendar_name:
calendar_id = calendar['id']
primary = calendar.get('primary', False)
if primary:
logger.warning(f"Cannot delete primary calendar: {calendar_name}")
return False
try:
service.calendars().delete(calendarId=calendar_id).execute()
logger.info(f"Deleted Google calendar: {calendar_name} (ID: {calendar_id})")
return True
except HttpError as e:
logger.error(f"Failed to delete Google calendar {calendar_name}: {e}")
return False
except Exception as e:
logger.error(f"Error deleting Google calendar {calendar_name}: {e}")
return False
logger.info(f"Google calendar {calendar_name} does not exist, nothing to delete")
return False
except HttpError as e:
logger.error(f"Google API error checking calendar {employee_kuerzel}: {e}")
raise
except Exception as e:
logger.error(f"Failed to check/delete Google calendar for {employee_kuerzel}: {e}")
raise
async def main():
if len(sys.argv) < 3 or len(sys.argv) > 5:
print("Usage: python audit_calendar_sync.py <employee_kuerzel> <google|advoware> [--delete-orphaned-google] [--delete-calendar]")
print(" --delete-orphaned-google: Delete Google events that exist in Google but not in the DB")
print(" --delete-calendar: Delete the Google calendar for the employee")
print("Example: python audit_calendar_sync.py SB google --delete-orphaned-google")
print("Example: python audit_calendar_sync.py SB google --delete-calendar")
sys.exit(1)
employee_kuerzel = sys.argv[1].upper()
check_system = sys.argv[2].lower()
delete_orphaned_google = '--delete-orphaned-google' in sys.argv
delete_calendar = '--delete-calendar' in sys.argv
if delete_calendar:
# Delete calendar mode
try:
service = await get_google_service()
deleted = await delete_google_calendar(service, employee_kuerzel)
if deleted:
print(f"Successfully deleted Google calendar for {employee_kuerzel}")
else:
print(f"No calendar deleted for {employee_kuerzel}")
except Exception as e:
logger.error(f"Failed to delete calendar: {e}")
sys.exit(1)
else:
# Audit mode
try:
await audit_calendar_sync(employee_kuerzel, check_system, delete_orphaned_google)
except Exception as e:
logger.error(f"Audit failed: {e}")
sys.exit(1)
if __name__ == "__main__":
asyncio.run(main())