import pandas as pd
import os
import math

def split_excel_file(input_file, chunk_size=1000, output_dir='split_output'):
    """
    Split an Excel file into multiple chunks
    
    Args:
        input_file (str): Path to the input Excel file
        chunk_size (int): Number of rows per chunk
        output_dir (str): Directory to save the output files
    """
    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Read the Excel file
    df = pd.read_excel(input_file)
    
    # Calculate the number of chunks
    total_rows = len(df)
    num_chunks = math.ceil(total_rows / chunk_size)
    
    # Get the base filename without extension
    base_filename = os.path.splitext(os.path.basename(input_file))[0]
    
    # Split the dataframe into chunks and save them
    for i in range(num_chunks):
        start_idx = i * chunk_size
        end_idx = min((i + 1) * chunk_size, total_rows)
        
        # Create chunk dataframe
        chunk_df = df.iloc[start_idx:end_idx]
        
        # Generate output filename
        output_file = f"{base_filename}_chunk_{i+1}.xlsx"
        output_path = os.path.join(output_dir, output_file)
        
        # Save chunk to Excel file
        chunk_df.to_excel(output_path, index=False)
        print(f"Created chunk {i+1}/{num_chunks}: {output_file}")

if __name__ == "__main__":
    # Example usage
    input_file = "input_files/Entire_File6.xlsx"  # Replace with your file path Usually start from 1 and go up to 6
    chunk_size = 1000  # Adjust chunk size as needed
    
    split_excel_file(input_file, chunk_size)