import os
import psycopg2
from dotenv import load_dotenv
from psycopg2.extras import execute_values

# Load environment variables
load_dotenv()

def connect_to_db():
    """Create a connection to the database"""
    try:
        conn = psycopg2.connect(os.getenv('DATABASE_URL'))
        return conn
    except Exception as e:
        print(f"Error connecting to database: {str(e)}")
        raise

def delete_empty_rows(conn):
    """Delete rows where key identifying fields are all null or empty strings"""
    try:
        cursor = conn.cursor()
        
        # First, let's check how many potentially empty rows exist
        check_query = """
        SELECT COUNT(*) 
        FROM contacts c
        WHERE (c.company_name IS NULL OR TRIM(c.company_name) = '')
            AND (c.company_domain IS NULL OR TRIM(c.company_domain) = '')
            AND (c.job_title IS NULL OR TRIM(c.job_title) = '')
            AND (c.contact_name IS NULL OR TRIM(c.contact_name) = '')
            AND (c.contact_first_name IS NULL OR TRIM(c.contact_first_name) = '')
            AND (c.contact_middle_name IS NULL OR TRIM(c.contact_middle_name) = '');
        """
        
        cursor.execute(check_query)
        potential_deletions = cursor.fetchone()[0]
        print(f"Found {potential_deletions} potentially empty rows")
        
        # Check for user_contact relationships only
        fk_check_query = """
        SELECT c.id, 
               (SELECT COUNT(*) FROM user_contact uc WHERE uc.contact_id = c.id) as user_contact_count
        FROM contacts c
        WHERE (c.company_name IS NULL OR TRIM(c.company_name) = '')
            AND (c.company_domain IS NULL OR TRIM(c.company_domain) = '')
            AND (c.job_title IS NULL OR TRIM(c.job_title) = '')
            AND (c.contact_name IS NULL OR TRIM(c.contact_name) = '')
            AND (c.contact_first_name IS NULL OR TRIM(c.contact_first_name) = '')
            AND (c.contact_middle_name IS NULL OR TRIM(c.contact_middle_name) = '')
        LIMIT 5;
        """
        
        cursor.execute(fk_check_query)
        sample_rows = cursor.fetchall()
        
        if sample_rows:
            print("\nSample of rows that couldn't be deleted:")
            for row in sample_rows:
                print(f"Contact ID: {row[0]}")
                print(f"- Associated user_contact records: {row[1]}")
                print("---")
        
        # Proceed with deletion, only checking user_contact constraint
        delete_query = """
        DELETE FROM contacts c
        WHERE (c.company_name IS NULL OR TRIM(c.company_name) = '')
            AND (c.company_domain IS NULL OR TRIM(c.company_domain) = '')
            AND (c.job_title IS NULL OR TRIM(c.job_title) = '')
            AND (c.contact_name IS NULL OR TRIM(c.contact_name) = '')
            AND (c.contact_first_name IS NULL OR TRIM(c.contact_first_name) = '')
            AND (c.contact_middle_name IS NULL OR TRIM(c.contact_middle_name) = '')
            AND NOT EXISTS (
                SELECT 1 FROM user_contact uc WHERE uc.contact_id = c.id
            );
        """
        
        cursor.execute(delete_query)
        deleted_rows = cursor.rowcount
        conn.commit()
        
        print(f"\nDeletion Summary:")
        print(f"- Potentially empty rows found: {potential_deletions}")
        print(f"- Successfully deleted rows: {deleted_rows}")
        if potential_deletions != deleted_rows:
            print(f"- Rows not deleted due to user_contact constraints: {potential_deletions - deleted_rows}")
        
    except Exception as e:
        conn.rollback()
        print(f"Error deleting empty rows: {str(e)}")
        raise
    finally:
        cursor.close()

def main():
    conn = connect_to_db()
    try:
        delete_empty_rows(conn)
    finally:
        conn.close()

if __name__ == "__main__":
    main()