import pandas as pd import os import math def split_excel_file(input_file, chunk_size=1000, output_dir='split_output'): """ Split an Excel file into multiple chunks Args: input_file (str): Path to the input Excel file chunk_size (int): Number of rows per chunk output_dir (str): Directory to save the output files """ # Create output directory if it doesn't exist if not os.path.exists(output_dir): os.makedirs(output_dir) # Read the Excel file df = pd.read_excel(input_file) # Calculate the number of chunks total_rows = len(df) num_chunks = math.ceil(total_rows / chunk_size) # Get the base filename without extension base_filename = os.path.splitext(os.path.basename(input_file))[0] # Split the dataframe into chunks and save them for i in range(num_chunks): start_idx = i * chunk_size end_idx = min((i + 1) * chunk_size, total_rows) # Create chunk dataframe chunk_df = df.iloc[start_idx:end_idx] # Generate output filename output_file = f"{base_filename}_chunk_{i+1}.xlsx" output_path = os.path.join(output_dir, output_file) # Save chunk to Excel file chunk_df.to_excel(output_path, index=False) print(f"Created chunk {i+1}/{num_chunks}: {output_file}") if __name__ == "__main__": # Example usage input_file = "input_files/Entire_File6.xlsx" # Replace with your file path Usually start from 1 and go up to 6 chunk_size = 1000 # Adjust chunk size as needed split_excel_file(input_file, chunk_size)