import os import psycopg2 from dotenv import load_dotenv from psycopg2.extras import execute_values # Load environment variables load_dotenv() def connect_to_db(): """Create a connection to the database""" try: conn = psycopg2.connect(os.getenv('DATABASE_URL')) return conn except Exception as e: print(f"Error connecting to database: {str(e)}") raise def delete_empty_rows(conn): """Delete rows where key identifying fields are all null or empty strings""" try: cursor = conn.cursor() # First, let's check how many potentially empty rows exist check_query = """ SELECT COUNT(*) FROM contacts c WHERE (c.company_name IS NULL OR TRIM(c.company_name) = '') AND (c.company_domain IS NULL OR TRIM(c.company_domain) = '') AND (c.job_title IS NULL OR TRIM(c.job_title) = '') AND (c.contact_name IS NULL OR TRIM(c.contact_name) = '') AND (c.contact_first_name IS NULL OR TRIM(c.contact_first_name) = '') AND (c.contact_middle_name IS NULL OR TRIM(c.contact_middle_name) = ''); """ cursor.execute(check_query) potential_deletions = cursor.fetchone()[0] print(f"Found {potential_deletions} potentially empty rows") # Check for user_contact relationships only fk_check_query = """ SELECT c.id, (SELECT COUNT(*) FROM user_contact uc WHERE uc.contact_id = c.id) as user_contact_count FROM contacts c WHERE (c.company_name IS NULL OR TRIM(c.company_name) = '') AND (c.company_domain IS NULL OR TRIM(c.company_domain) = '') AND (c.job_title IS NULL OR TRIM(c.job_title) = '') AND (c.contact_name IS NULL OR TRIM(c.contact_name) = '') AND (c.contact_first_name IS NULL OR TRIM(c.contact_first_name) = '') AND (c.contact_middle_name IS NULL OR TRIM(c.contact_middle_name) = '') LIMIT 5; """ cursor.execute(fk_check_query) sample_rows = cursor.fetchall() if sample_rows: print("\nSample of rows that couldn't be deleted:") for row in sample_rows: print(f"Contact ID: {row[0]}") print(f"- Associated user_contact records: {row[1]}") print("---") # Proceed with deletion, only checking user_contact constraint delete_query = """ DELETE FROM contacts c WHERE (c.company_name IS NULL OR TRIM(c.company_name) = '') AND (c.company_domain IS NULL OR TRIM(c.company_domain) = '') AND (c.job_title IS NULL OR TRIM(c.job_title) = '') AND (c.contact_name IS NULL OR TRIM(c.contact_name) = '') AND (c.contact_first_name IS NULL OR TRIM(c.contact_first_name) = '') AND (c.contact_middle_name IS NULL OR TRIM(c.contact_middle_name) = '') AND NOT EXISTS ( SELECT 1 FROM user_contact uc WHERE uc.contact_id = c.id ); """ cursor.execute(delete_query) deleted_rows = cursor.rowcount conn.commit() print(f"\nDeletion Summary:") print(f"- Potentially empty rows found: {potential_deletions}") print(f"- Successfully deleted rows: {deleted_rows}") if potential_deletions != deleted_rows: print(f"- Rows not deleted due to user_contact constraints: {potential_deletions - deleted_rows}") except Exception as e: conn.rollback() print(f"Error deleting empty rows: {str(e)}") raise finally: cursor.close() def main(): conn = connect_to_db() try: delete_empty_rows(conn) finally: conn.close() if __name__ == "__main__": main()