#!/usr/bin/env python3
"""
xAI Collections API Test Script

Tests all critical operations for our document sync requirements:
1. File upload and ID behavior (collection-specific vs global?)
2. Same file in multiple collections (shared file_id?)
3. CRUD operations on collections
4. CRUD operations on documents
5. Response structures and metadata
6. Update/versioning behavior

Usage:
    export XAI_API_KEY="xai-..."
    python test_xai_collections_api.py
"""

import os
import sys
import json
import asyncio
import aiohttp
from typing import Optional, Dict, Any, List
from datetime import datetime
import tempfile

# Configuration
XAI_MANAGEMENT_URL = os.getenv("XAI_MANAGEMENT_URL", "https://management-api.x.ai")
XAI_FILES_URL = os.getenv("XAI_FILES_URL", "https://api.x.ai")
XAI_MANAGEMENT_KEY = os.getenv("XAI_MANAGEMENT_KEY", "")  # Management API Key
XAI_API_KEY = os.getenv("XAI_API_KEY", "")  # Regular API Key for file upload

if not XAI_MANAGEMENT_KEY:
    print("❌ ERROR: XAI_MANAGEMENT_KEY environment variable not set!")
    print("   export XAI_MANAGEMENT_KEY='xai-token-...'")
    sys.exit(1)

if not XAI_API_KEY:
    print("❌ ERROR: XAI_API_KEY environment variable not set!")
    print("   export XAI_API_KEY='xai-...'")
    sys.exit(1)


class Colors:
    """ANSI color codes for terminal output"""
    HEADER = '\033[95m'
    BLUE = '\033[94m'
    CYAN = '\033[96m'
    GREEN = '\033[92m'
    YELLOW = '\033[93m'
    RED = '\033[91m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'
    END = '\033[0m'


def print_header(text: str):
    print(f"\n{Colors.BOLD}{Colors.CYAN}{'='*70}{Colors.END}")
    print(f"{Colors.BOLD}{Colors.CYAN}{text}{Colors.END}")
    print(f"{Colors.BOLD}{Colors.CYAN}{'='*70}{Colors.END}\n")


def print_success(text: str):
    print(f"{Colors.GREEN}✅ {text}{Colors.END}")


def print_error(text: str):
    print(f"{Colors.RED}❌ {text}{Colors.END}")


def print_info(text: str):
    print(f"{Colors.BLUE}ℹ️  {text}{Colors.END}")


def print_warning(text: str):
    print(f"{Colors.YELLOW}⚠️  {text}{Colors.END}")


def print_json(data: Any, title: Optional[str] = None):
    if title:
        print(f"{Colors.BOLD}{title}:{Colors.END}")
    print(json.dumps(data, indent=2, ensure_ascii=False))


class XAICollectionsTestClient:
    """Test client for xAI Collections API"""
    
    def __init__(self):
        self.management_url = XAI_MANAGEMENT_URL
        self.files_url = XAI_FILES_URL
        self.management_key = XAI_MANAGEMENT_KEY
        self.api_key = XAI_API_KEY
        self.session: Optional[aiohttp.ClientSession] = None
        
        # Test state
        self.created_collections: List[str] = []
        self.uploaded_files: List[str] = []
        self.test_results: Dict[str, bool] = {}
        
    async def __aenter__(self):
        # Session without default Content-Type (set per-request)
        self.session = aiohttp.ClientSession(
            timeout=aiohttp.ClientTimeout(total=30)
        )
        return self
    
    async def __aexit__(self, *args):
        if self.session:
            await self.session.close()
    
    async def _request(self, method: str, path: str, use_files_api: bool = False, **kwargs) -> tuple[int, Any]:
        """Make HTTP request and return (status, response_data)"""
        base_url = self.files_url if use_files_api else self.management_url
        url = f"{base_url}{path}"
        
        # Set headers per-request
        if 'headers' not in kwargs:
            kwargs['headers'] = {}
        
        # Set authorization
        if use_files_api:
            kwargs['headers']['Authorization'] = f"Bearer {self.api_key}"
        else:
            kwargs['headers']['Authorization'] = f"Bearer {self.management_key}"
        
        # Set Content-Type for JSON requests
        if 'json' in kwargs:
            kwargs['headers']['Content-Type'] = 'application/json'
        
        print_info(f"{method} {url}")
        print_info(f"Headers: {kwargs.get('headers', {})}")
        
        try:
            async with self.session.request(method, url, **kwargs) as response:
                status = response.status
                
                try:
                    data = await response.json()
                except:
                    text = await response.text()
                    data = {"_raw_text": text} if text else {}
                
                if status < 400:
                    print_success(f"Response: {status}")
                else:
                    print_error(f"Response: {status}")
                
                return status, data
                
        except Exception as e:
            print_error(f"Request failed: {e}")
            return 0, {"error": str(e)}
    
    # ========================================================================
    # COLLECTION OPERATIONS
    # ========================================================================
    
    async def create_collection(self, name: str, metadata: Optional[Dict] = None) -> tuple[int, Any]:
        """POST /v1/collections"""
        payload = {
            "collection_name": name,  # xAI uses "collection_name" not "name"
            "metadata": metadata or {}
        }
        status, data = await self._request("POST", "/v1/collections", json=payload)
        
        if status == 200 or status == 201:
            # Try different possible field names for collection ID
            collection_id = data.get("id") or data.get("collection_id") or data.get("collectionId")
            if collection_id:
                self.created_collections.append(collection_id)
                print_success(f"Created collection: {collection_id}")
        
        return status, data
    
    async def get_collection(self, collection_id: str) -> tuple[int, Any]:
        """GET /v1/collections/{collection_id}"""
        return await self._request("GET", f"/v1/collections/{collection_id}")
    
    async def list_collections(self) -> tuple[int, Any]:
        """GET /v1/collections"""
        return await self._request("GET", "/v1/collections")
    
    async def update_collection(self, collection_id: str, name: Optional[str] = None, 
                               metadata: Optional[Dict] = None) -> tuple[int, Any]:
        """PUT /v1/collections/{collection_id}"""
        payload = {}
        if name:
            payload["collection_name"] = name  # xAI uses "collection_name"
        if metadata:
            payload["metadata"] = metadata
        
        return await self._request("PUT", f"/v1/collections/{collection_id}", json=payload)
    
    async def delete_collection(self, collection_id: str) -> tuple[int, Any]:
        """DELETE /v1/collections/{collection_id}"""
        status, data = await self._request("DELETE", f"/v1/collections/{collection_id}")
        
        if status == 200 or status == 204:
            if collection_id in self.created_collections:
                self.created_collections.remove(collection_id)
        
        return status, data
    
    # ========================================================================
    # FILE OPERATIONS (multiple upload methods)
    # ========================================================================
    
    async def upload_file_multipart(self, content: bytes, filename: str,
                                    mime_type: str = "text/plain") -> tuple[int, Any]:
        """
        Method 0: Multipart form-data upload (what the server actually expects!)
        POST /v1/files with multipart/form-data
        """
        print_info("METHOD 0: Multipart Form-Data Upload (POST /v1/files)")
        
        # Create multipart form data
        form = aiohttp.FormData()
        form.add_field('file', content, filename=filename, content_type=mime_type)
        
        print_info(f"Uploading {len(content)} bytes as multipart/form-data")
        
        # Use _request but with form data instead of json
        base_url = self.files_url
        url = f"{base_url}/v1/files"
        
        headers = {
            "Authorization": f"Bearer {self.api_key}"
            # Do NOT set Content-Type - aiohttp will set it with boundary
        }
        
        print_info(f"POST {url}")
        print_info(f"Headers: {headers}")
        
        try:
            async with self.session.request("POST", url, data=form, headers=headers) as response:
                status = response.status
                
                try:
                    data = await response.json()
                except:
                    text = await response.text()
                    data = {"_raw_text": text} if text else {}
                
                if status < 400:
                    print_success(f"Response: {status}")
                else:
                    print_error(f"Response: {status}")
                
                return status, data
                
        except Exception as e:
            print_error(f"Request failed: {e}")
            return 0, {"error": str(e)}
    
    async def upload_file_direct(self, content: bytes, filename: str, 
                                mime_type: str = "text/plain") -> tuple[int, Any]:
        """
        Method 1: Direct upload to xAI Files API
        POST /v1/files with JSON body containing base64-encoded data
        """
        import base64
        
        print_info("METHOD 1: Direct Upload (POST /v1/files with JSON)")
        
        # Encode file content as base64
        data_b64 = base64.b64encode(content).decode('ascii')
        
        payload = {
            "name": filename,
            "content_type": mime_type,
            "data": data_b64
        }
        
        print_info(f"Uploading {len(content)} bytes as base64 ({len(data_b64)} chars)")
        
        status, data = await self._request(
            "POST", 
            "/v1/files",
            use_files_api=True,
            json=payload
        )
        
        return status, data
    
    async def upload_file_chunked(self, content: bytes, filename: str,
                                  mime_type: str = "text/plain") -> tuple[int, Any]:
        """
        Method 2: Initialize + Chunk streaming upload
        POST /v1/files:initialize → POST /v1/files:uploadChunks
        """
        import base64
        
        print_info("METHOD 2: Initialize + Chunk Streaming")
        
        # Step 1: Initialize upload
        print_info("Step 1: Initialize upload")
        init_payload = {
            "name": filename,
            "content_type": mime_type
        }
        
        status, data = await self._request(
            "POST",
            "/v1/files:initialize",
            use_files_api=True,
            json=init_payload
        )
        
        print_json(data, "Initialize Response")
        
        if status not in [200, 201]:
            print_error("Failed to initialize upload")
            return status, data
        
        file_id = data.get("file_id")
        if not file_id:
            print_error("No file_id in initialize response")
            return status, data
        
        print_success(f"Initialized upload with file_id: {file_id}")
        
        # Step 2: Upload chunks
        print_info(f"Step 2: Upload {len(content)} bytes in chunks")
        
        # Encode content as base64 for chunk upload
        chunk_b64 = base64.b64encode(content).decode('ascii')
        
        chunk_payload = {
            "file_id": file_id,
            "chunk": chunk_b64
        }
        
        status, data = await self._request(
            "POST",
            "/v1/files:uploadChunks",
            use_files_api=True,
            json=chunk_payload
        )
        
        print_json(data, "Upload Chunks Response")
        
        if status in [200, 201]:
            print_success(f"Uploaded file chunks: {file_id}")
            self.uploaded_files.append(file_id)
        
        return status, data
    
    async def upload_file(self, content: bytes, filename: str, 
                         mime_type: str = "text/plain") -> tuple[int, Any]:
        """
        Try multiple upload methods until one succeeds
        """
        print_info("Trying upload methods...")
        
        # Try Method 0: Multipart form-data (what the server really wants!)
        status0, data0 = await self.upload_file_multipart(content, filename, mime_type)
        
        if status0 in [200, 201]:
            file_id = data0.get("id") or data0.get("file_id")  # Try both field names
            if file_id:
                self.uploaded_files.append(file_id)
                print_success(f"✅ Multipart upload succeeded: {file_id}")
                return status0, data0
            else:
                print_error("No 'id' or 'file_id' in response")
                print_json(data0, "Response data")
        
        print_warning(f"Multipart upload failed ({status0}), trying JSON upload...")
        
        # Try Method 1: Direct upload with JSON
        status1, data1 = await self.upload_file_direct(content, filename, mime_type)
        
        if status1 in [200, 201]:
            file_id = data1.get("file_id")
            if file_id:
                self.uploaded_files.append(file_id)
                print_success(f"✅ Direct upload succeeded: {file_id}")
                return status1, data1
        
        print_warning(f"Direct upload failed ({status1}), trying chunked upload...")
        
        # Try Method 2: Initialize + Chunks
        status2, data2 = await self.upload_file_chunked(content, filename, mime_type)
        
        if status2 in [200, 201]:
            print_success("✅ Chunked upload succeeded")
            return status2, data2
        
        print_error("❌ All upload methods failed")
        return status0, data0  # Return multipart method's error
    
    # ========================================================================
    # COLLECTION DOCUMENT OPERATIONS
    # ========================================================================
    
    async def add_document_to_collection(self, collection_id: str, 
                                        file_id: str) -> tuple[int, Any]:
        """POST /v1/collections/{collection_id}/documents/{file_id}"""
        return await self._request("POST", 
            f"/v1/collections/{collection_id}/documents/{file_id}")
    
    async def get_collection_documents(self, collection_id: str) -> tuple[int, Any]:
        """GET /v1/collections/{collection_id}/documents"""
        return await self._request("GET", 
            f"/v1/collections/{collection_id}/documents")
    
    async def get_collection_document(self, collection_id: str, 
                                     file_id: str) -> tuple[int, Any]:
        """GET /v1/collections/{collection_id}/documents/{file_id}"""
        return await self._request("GET", 
            f"/v1/collections/{collection_id}/documents/{file_id}")
    
    async def update_collection_document(self, collection_id: str, file_id: str, 
                                        metadata: Dict) -> tuple[int, Any]:
        """PATCH /v1/collections/{collection_id}/documents/{file_id}"""
        return await self._request("PATCH", 
            f"/v1/collections/{collection_id}/documents/{file_id}",
            json={"metadata": metadata})
    
    async def remove_document_from_collection(self, collection_id: str, 
                                             file_id: str) -> tuple[int, Any]:
        """DELETE /v1/collections/{collection_id}/documents/{file_id}"""
        return await self._request("DELETE", 
            f"/v1/collections/{collection_id}/documents/{file_id}")
    
    async def batch_get_documents(self, collection_id: str, 
                                 file_ids: List[str]) -> tuple[int, Any]:
        """GET /v1/collections/{collection_id}/documents:batchGet"""
        params = {"fileIds": ",".join(file_ids)}
        return await self._request("GET", 
            f"/v1/collections/{collection_id}/documents:batchGet",
            params=params)
    
    # ========================================================================
    # TEST SCENARIOS
    # ========================================================================
    
    async def test_basic_collection_crud(self):
        """Test 1: Basic Collection CRUD operations"""
        print_header("TEST 1: Basic Collection CRUD")
        
        # Create
        print_info("Creating collection...")
        status, data = await self.create_collection(
            name="Test Collection 1",
            metadata={"test": True, "purpose": "API testing"}
        )
        print_json(data, "Response")
        
        if status not in [200, 201]:
            print_error("Failed to create collection")
            self.test_results["collection_crud"] = False
            return None
        
        # Try different possible field names for collection ID
        collection_id = data.get("id") or data.get("collection_id") or data.get("collectionId")
        if not collection_id:
            print_error("No collection ID field in response")
            print_json(data, "Response Data")
            self.test_results["collection_crud"] = False
            return None
        
        print_success(f"Collection created: {collection_id}")
        
        # Read
        print_info("Reading collection...")
        status, data = await self.get_collection(collection_id)
        print_json(data, "Response")
        
        # Update
        print_info("Updating collection...")
        status, data = await self.update_collection(
            collection_id,
            name="Test Collection 1 (Updated)",
            metadata={"test": True, "updated": True}
        )
        print_json(data, "Response")
        
        self.test_results["collection_crud"] = True
        return collection_id
    
    async def test_file_upload_and_structure(self, collection_id: str):
        """Test 2: File upload (two-step process)"""
        print_header("TEST 2: File Upload (Two-Step) & Response Structure")
        
        # Create test file content
        test_content = b"""
        This is a test document for xAI Collections API testing.
        
        Topic: German Contract Law
        
        Key Points:
        - Contracts require offer and acceptance
        - Consideration is necessary
        - Written form may be required for certain contracts
        
        This document contains sufficient content for testing.
        """
        
        # STEP 1: Upload file to Files API
        print_info("STEP 1: Uploading file to Files API (api.x.ai)...")
        status, data = await self.upload_file(
            content=test_content,
            filename="test_document.txt",
            mime_type="text/plain"
        )
        print_json(data, "Files API Upload Response")
        
        if status not in [200, 201]:
            print_error("File upload to Files API failed")
            self.test_results["file_upload"] = False
            return None
        
        # Try both field names: 'id' (Files API) or 'file_id' (Collections API)
        file_id = data.get("id") or data.get("file_id")
        if not file_id:
            print_error("No 'id' or 'file_id' field in response")
            print_json(data, "Response for debugging")
            self.test_results["file_upload"] = False
            return None
        
        print_success(f"File uploaded to Files API: {file_id}")
        
        # STEP 2: Add file to collection using Management API
        print_info("STEP 2: Adding file to collection (management-api.x.ai)...")
        status2, data2 = await self.add_document_to_collection(collection_id, file_id)
        print_json(data2, "Add to Collection Response")
        
        if status2 not in [200, 201]:
            print_error("Failed to add file to collection")
            self.test_results["file_upload"] = False
            return None
        
        print_success(f"File added to collection: {file_id}")
        self.test_results["file_upload"] = True
        return file_id
    
    async def test_document_in_collection(self, collection_id: str, file_id: str):
        """Test 3: Verify document is in collection and get details"""
        print_header("TEST 3: Verify Document in Collection")
        
        # Verify by listing documents
        print_info("Listing collection documents...")
        status, data = await self.get_collection_documents(collection_id)
        print_json(data, "Collection Documents")
        
        if status not in [200, 201]:
            print_error("Failed to list documents")
            self.test_results["add_to_collection"] = False
            return False
        
        # Get specific document
        print_info("Getting specific document...")
        status, data = await self.get_collection_document(collection_id, file_id)
        print_json(data, "Document Details")
        
        if status not in [200, 201]:
            print_error("Failed to get document details")
            self.test_results["add_to_collection"] = False
            return False
        
        print_success("Document verified in collection")
        self.test_results["add_to_collection"] = True
        return True
    
    async def test_shared_file_across_collections(self, file_id: str):
        """Test 4: CRITICAL - Can same file_id be used in multiple collections?"""
        print_header("TEST 4: Shared File Across Collections (CRITICAL)")
        
        # Create second collection
        print_info("Creating second collection...")
        status, data = await self.create_collection(
            name="Test Collection 2",
            metadata={"test": True, "purpose": "Multi-collection test"}
        )
        
        if status not in [200, 201]:
            print_error("Failed to create second collection")
            self.test_results["shared_file"] = False
            return
        
        collection2_id = data.get("collection_id") or data.get("id")
        print_success(f"Collection 2 created: {collection2_id}")
        
        # Try to add SAME file_id to second collection
        print_info(f"Adding SAME file_id {file_id} to collection 2...")
        
        status, data = await self.add_document_to_collection(collection2_id, file_id)
        print_json(data, "Response from adding existing file_id to second collection")
        
        if status not in [200, 201]:
            print_error("Failed to add same file to second collection")
            print_warning("⚠️  Files might be collection-specific (BAD for our use case)")
            self.test_results["shared_file"] = False
            return
        
        print_success("✅ SAME FILE_ID CAN BE USED IN MULTIPLE COLLECTIONS!")
        print_success("✅ This is PERFECT for our architecture!")
        
        # Verify both collections have the file
        print_info("Verifying file in both collections...")
        
        status1, data1 = await self.get_collection_documents(self.created_collections[0])
        status2, data2 = await self.get_collection_documents(collection2_id)
        
        print_json(data1, "Collection 1 Documents")
        print_json(data2, "Collection 2 Documents")
        
        # Extract file_ids from both collections to verify they match
        docs1 = data1.get("documents", [])
        docs2 = data2.get("documents", [])
        
        file_ids_1 = [d.get("file_metadata", {}).get("file_id") for d in docs1]
        file_ids_2 = [d.get("file_metadata", {}).get("file_id") for d in docs2]
        
        if file_id in file_ids_1 and file_id in file_ids_2:
            print_success(f"✅ CONFIRMED: file_id {file_id} is IDENTICAL in both collections!")
            print_info("   → We can store ONE xaiFileId per document!")
            print_info("   → Simply track which collections contain it!")
        
        self.test_results["shared_file"] = True
    
    async def test_document_update(self, collection_id: str, file_id: str):
        """Test 5: Update document metadata"""
        print_header("TEST 5: Update Document Metadata")
        
        print_info("Updating document metadata...")
        status, data = await self.update_collection_document(
            collection_id,
            file_id,
            metadata={"updated_at": datetime.now().isoformat(), "version": 2}
        )
        print_json(data, "Update Response")
        
        if status not in [200, 201]:
            print_error("Failed to update document")
            self.test_results["document_update"] = False
            return
        
        print_success("Document metadata updated")
        self.test_results["document_update"] = True
    
    async def test_document_removal(self):
        """Test 6: Remove document from collection"""
        print_header("TEST 6: Remove Document from Collection")
        
        if len(self.created_collections) < 2 or not self.uploaded_files:
            print_warning("Skipping - need at least 2 collections and 1 file")
            return
        
        collection_id = self.created_collections[0]
        file_id = self.uploaded_files[0]
        
        print_info(f"Removing file {file_id} from collection {collection_id}...")
        status, data = await self.remove_document_from_collection(collection_id, file_id)
        print_json(data, "Response")
        
        if status not in [200, 204]:
            print_error("Failed to remove document")
            self.test_results["document_removal"] = False
            return
        
        print_success("Document removed from collection")
        
        # Verify removal
        print_info("Verifying removal...")
        status, data = await self.get_collection_documents(collection_id)
        print_json(data, "Remaining Documents")
        
        self.test_results["document_removal"] = True
    
    async def test_batch_get(self):
        """Test 7: Batch get documents"""
        print_header("TEST 7: Batch Get Documents")
        
        if not self.created_collections or not self.uploaded_files:
            print_warning("Skipping - need collections and files")
            return
        
        collection_id = self.created_collections[-1]  # Use last collection
        file_ids = self.uploaded_files
        
        if not file_ids:
            print_warning("No file IDs to batch get")
            return
        
        print_info(f"Batch getting {len(file_ids)} documents...")
        status, data = await self.batch_get_documents(collection_id, file_ids)
        print_json(data, "Batch Response")
        
        self.test_results["batch_get"] = status in [200, 201]
    
    async def cleanup(self):
        """Clean up all created test resources"""
        print_header("CLEANUP: Deleting Test Resources")
        
        # Delete collections (should cascade delete documents?)
        for collection_id in list(self.created_collections):
            print_info(f"Deleting collection {collection_id}...")
            await self.delete_collection(collection_id)
        
        print_success("Cleanup complete")
    
    def print_summary(self):
        """Print test results summary"""
        print_header("TEST RESULTS SUMMARY")
        
        total = len(self.test_results)
        passed = sum(1 for v in self.test_results.values() if v)
        
        for test_name, result in self.test_results.items():
            status = "✅ PASS" if result else "❌ FAIL"
            print(f"{status} - {test_name}")
        
        print(f"\n{Colors.BOLD}Total: {passed}/{total} tests passed{Colors.END}\n")
        
        # Critical findings
        print_header("CRITICAL FINDINGS")
        
        if "shared_file" in self.test_results:
            if self.test_results["shared_file"]:
                print_success("✅ Same file CAN be used in multiple collections")
                print_info("   → We can use a SINGLE xaiFileId per document!")
                print_info("   → Much simpler architecture!")
            else:
                print_error("❌ Files seem to be collection-specific")
                print_warning("   → More complex mapping required")
                print_warning("   → Each collection might need separate file upload")


async def main():
    """Run all tests"""
    print_header("xAI Collections API Test Suite")
    print_info(f"Management URL: {XAI_MANAGEMENT_URL}")
    print_info(f"Files URL: {XAI_FILES_URL}")
    print_info(f"Management Key: {XAI_MANAGEMENT_KEY[:20]}...{XAI_MANAGEMENT_KEY[-4:]}")
    print_info(f"API Key: {XAI_API_KEY[:20]}...{XAI_API_KEY[-4:]}")
    
    async with XAICollectionsTestClient() as client:
        try:
            # Test 1: Basic Collection CRUD
            collection_id = await client.test_basic_collection_crud()
            
            if not collection_id:
                print_error("Cannot continue without collection. Stopping.")
                return
            
            # Test 2: File Upload (now two-step process)
            file_id = await client.test_file_upload_and_structure(collection_id)
            
            if not file_id:
                print_error("File upload failed. Continuing with remaining tests...")
            else:
                # Test 3: Verify document in collection
                await client.test_document_in_collection(collection_id, file_id)
                
                # Test 4: CRITICAL - Shared file test
                await client.test_shared_file_across_collections(file_id)
                
                # Test 5: Update document
                await client.test_document_update(collection_id, file_id)
                
                # Test 6: Remove document
                await client.test_document_removal()
                
                # Test 7: Batch get
                await client.test_batch_get()
            
            # Cleanup
            await client.cleanup()
            
            # Print summary
            client.print_summary()
            
        except Exception as e:
            print_error(f"Test suite failed: {e}")
            import traceback
            traceback.print_exc()
            
            # Try cleanup anyway
            try:
                await client.cleanup()
            except:
                pass


if __name__ == "__main__":
    asyncio.run(main())