Skip to content

Commit ca322ef

Browse files
author
Neagu Marinel
authored
Merge branch 'larymak:main' into main
2 parents cc03ffd + c3e0965 commit ca322ef

File tree

1,800 files changed

+148123
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,800 files changed

+148123
-0
lines changed
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
<!--Please do not remove this part-->
2+
![Star Badge](https://img.shields.io/static/v1?label=%F0%9F%8C%9F&message=If%20Useful&style=style=flat&color=BC4E99)
3+
![Open Source Love](https://badges.frapsoft.com/os/v1/open-source.svg?v=103)
4+
5+
# PDF Page Color Counter
6+
7+
## 🛠️ Description
8+
This Python project provides a simple yet powerful tool for analyzing PDF documents and counting the number of black and color pages. Whether you're working on document analysis, quality control, or just curious about the composition of your PDF files, this code helps you gain insights into the document's visual characteristics.
9+
10+
**Key Features:**
11+
12+
* Easy Integration: With a few lines of code, you can integrate this functionality into your Python applications or workflows.
13+
14+
* PDF Expertise: Utilizing the PyMuPDF (MuPDF) library, this project efficiently processes PDF files, making it suitable for a wide range of applications.
15+
16+
* Color Page Detection: It accurately identifies color and black & white pages within the PDF document, providing valuable statistics.
17+
18+
* Use Cases: This code can be employed in various scenarios, such as document archiving, printing optimization, or content analysis.
19+
20+
## ⚙️ Languages or Frameworks Used
21+
- **Python**: The primary programming language used for the project.
22+
- **FastAPI**: A modern, fast (high-performance) web framework for building APIs with Python.
23+
- **PyMuPDF (MuPDF)**: A lightweight and efficient PDF processing library for Python.
24+
- **OpenCV**: Used for image analysis and processing.
25+
- **Pillow (PIL)**: Python Imaging Library for working with images.
26+
27+
## 🌟 How to run
28+
- ### Install all the requirements
29+
Run `pip install -r requirements.txt` to install all the requirements.
30+
- ### Setup a Virtual Enviroment
31+
32+
- Run this command in your terminal `python -m venv myenv`.
33+
- Change your directory by `cd myenv/Scripts` if on windows.
34+
- Activate the virtual enviroment by running this command `source activate`.
35+
- Move out from virtual env to your **Project Directory** by `cd..` .
36+
- Install the packages if not present - `uvicorn`, `fastapi`, `fitz`, `frontend`, `tools`, `opencv-python`, `pillow`, `python-multipart`, `PyMuPDF`.
37+
```
38+
pip install uvicorn fastapi fitz frontend tools opencv-python pillow python-multipart PyMuPDF
39+
```
40+
41+
- ### Now Just, Run the project
42+
43+
-Now Run the following command - `uvicorn main:app --reload`.
44+
-Open the localhost link on your browser and put `/docs` at your endpoint to see the fastapi docs UI.
45+
![Screenshot 2023-10-25 134746](https://github.com/Om25091210/Count-Color-Black-Pages-PDF/assets/74484315/2b5b64a2-1c00-4a5a-ab7c-99fb30e7aba6)
46+
47+
-Now, Click on **POST** and then **Try it out**.
48+
-Click on **Choose file** to select a pdf, which you want to count the number of black and color pages.
49+
-Click on **Execute**.
50+
51+
52+
## 📺 Demo
53+
![Screenshot 2023-10-25 133406](https://github.com/Om25091210/Count-Color-Black-Pages-PDF/assets/74484315/a84def7c-7db4-4ab5-bf0b-f8cfe5ded66b)
54+
55+
56+
## 🤖 Author
57+
58+
Github - [OM YADAV](https://github.com/Om25091210)
59+
LinkedIn - [OM YADAV](www.linkedin.com/in/omyadav)
60+
61+
62+
63+
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
from fastapi import FastAPI, UploadFile, File
2+
import fitz
3+
import cv2
4+
from PIL import Image
5+
import numpy as np
6+
import os
7+
8+
app = FastAPI()
9+
10+
@app.post("/")
11+
async def get_pdf(file : UploadFile = File(...)):
12+
#Initializing our variables.
13+
colored_page_count = 0
14+
color_list=[]
15+
black_list=[]
16+
num = 0
17+
black_count = 0
18+
#Getting the file name and then saving it in local.
19+
contents = await file.read()
20+
with open(file.filename, "wb") as f:
21+
f.write(contents)
22+
# Open the PDF file
23+
# Get the full path to the uploaded file
24+
file_path = os.path.join(os.getcwd(), file.filename)
25+
print(file_path)
26+
with fitz.open(file_path) as doc:
27+
print(doc)
28+
# Iterate through the pages
29+
for _, page in enumerate(doc):
30+
# Render the page to an image
31+
pix = page.get_pixmap(alpha=False)
32+
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
33+
34+
35+
arr = np.array(img)
36+
arr_mean = cv2.mean(arr)
37+
if not (arr_mean[0] == arr_mean[1] == arr_mean[2]):
38+
colored_page_count += 1
39+
num += 1
40+
color_list.append(num)
41+
#print('colored', num)
42+
else:
43+
num += 1
44+
black_count += 1
45+
black_list.append(num)
46+
#print('Black', num)
47+
print("\nColored Pages: ",color_list,"\n")
48+
print("Black & White Pages: ",black_list)
49+
#Close the file
50+
os.remove(file_path)
51+
return {"colored : ":colored_page_count,"Black Count : ":black_count}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
anyio==3.6.2
2+
click==8.1.3
3+
colorama==0.4.6
4+
fastapi==0.92.0
5+
h11==0.14.0
6+
idna==3.4
7+
numpy==1.24.2
8+
opencv-python==4.7.0.72
9+
Pillow==9.4.0
10+
pydantic==1.10.5
11+
PyMuPDF==1.21.1
12+
python-multipart==0.0.6
13+
sniffio==1.3.0
14+
starlette==0.25.0
15+
typing_extensions==4.5.0
16+
uvicorn==0.20.0
Loading
Loading
9.37 KB
Binary file not shown.

AUTOMATION/Web_Scraper/README.md

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Introduction
2+
3+
This Python program is a web scraper that extracts data about graphics cards from a specific website. It uses the BeautifulSoup library to parse the HTML content of the website and requests library to fetch the web page.
4+
5+
## Requirements
6+
7+
- Python 3.x
8+
- BeautifulSoup library (`beautifulsoup4`)
9+
- Requests library (`requests`)
10+
- Openpyxl library (`openpyxl`)
11+
12+
You can install the required libraries using pip:
13+
14+
```
15+
pip install beautifulsoup4 requests openpyxl
16+
```
17+
18+
## How to Use
19+
20+
1. Clone this repository or download the files.
21+
22+
2. Open a terminal or command prompt and navigate to the project directory.
23+
24+
3. Run the Python script `app.py`:
25+
26+
```
27+
app.py
28+
```
29+
30+
4. The program will start scraping data from the website and display the brand, name, and price of each graphics card on the console.
31+
32+
5. Once the scraping is complete, the program will save the data to an Excel file named `Graphics Card.xlsx`.
33+
34+
## Configuration
35+
36+
You can modify the URL in the `scrape_graphics_cards_data()` function inside the `app.py` file to scrape data from a different website or adjust the parameters as needed.
37+
38+
## Output
39+
40+
The program will generate an Excel file `Graphics Card.xlsx` containing the scraped data. Each row in the Excel file represents a graphics card and includes the columns `Brand`, `Name`, and `Price`.
41+
42+
## Disclaimer
43+
44+
This web scraper is provided for educational and informational purposes only. Please be respectful of the website's terms of service and scraping policies. Always obtain proper authorization before scraping any website, and use the scraper responsibly and ethically.

AUTOMATION/Web_Scraper/app.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
from bs4 import BeautifulSoup
2+
import requests
3+
import openpyxl
4+
5+
6+
def extract_brand_name_and_title(name):
7+
# Split the name and return the first word as the brand name and the rest as title
8+
brand, title = name.split(' ', 1)
9+
return brand, title
10+
11+
12+
def scrape_graphics_cards_data():
13+
try:
14+
# Create a new Excel workbook and set up the worksheet
15+
excel = openpyxl.Workbook()
16+
sheet = excel.active
17+
sheet.title = "price"
18+
sheet.append(['Brand', 'Name', 'Price'])
19+
20+
url = 'https://www.techlandbd.com/pc-components/graphics-card?sort=p.price&order=ASC&fq=1&limit=100'
21+
response = requests.get(url)
22+
response.raise_for_status()
23+
24+
# Parse the HTML content
25+
soup = BeautifulSoup(response.text, 'html.parser')
26+
27+
# Find all product cards on the webpage
28+
cards = soup.find('div', class_='main-products product-grid').find_all(
29+
'div', class_='product-layout has-extra-button')
30+
31+
for card in cards:
32+
# Extract the product name
33+
name = card.find('div', class_='name').a.text
34+
35+
# Split the name to get the brand and title
36+
brand, title = extract_brand_name_and_title(name)
37+
38+
# Extract the product price
39+
price = card.find('div', class_='price').span.text
40+
41+
# Print the product details and add them to the Excel sheet
42+
print(brand, title, price)
43+
sheet.append([brand, title, price])
44+
45+
# Save the Excel file
46+
excel.save('Graphics Card.xlsx')
47+
48+
except Exception as e:
49+
print("An error occurred:", e)
50+
51+
52+
if __name__ == "__main__":
53+
# Call the main scraping function
54+
scrape_graphics_cards_data()
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Genetic Algorithms in Problem Solving
2+
3+
## Overview
4+
This repository contains implementations of genetic algorithms (GAs) applied to solve various problems. Genetic algorithms are a family of optimization algorithms inspired by the process of natural selection. They are commonly used to find solutions for complex, non-linear, and multi-objective optimization problems. This collection demonstrates the application of GAs to address different problem domains.
5+
6+
7+
## Problem Domains
8+
- [Knapsack Problem](./knapsack/): Applying GAs to find the best combination of items within a weight limit.
9+
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
import random
2+
import matplotlib.pyplot as plt
3+
4+
"""
5+
This program uses a genetic algorithm to solve the 0/1 Knapsack problem.
6+
In the Knapsack problem, you are given a set of items, each with a value and a weight,
7+
and a knapsack with a weight limit. The goal is to select a combination of items
8+
to maximize the total value without exceeding the weight limit.
9+
This genetic algorithm iteratively evolves a population of candidate solutions to find the best combination.
10+
11+
Knapsack Problem Parameters:
12+
- weight_limit: The weight limit of the knapsack.
13+
- item_list: A list of items, where each item is represented as (value, weight).
14+
15+
Genetic Algorithm Parameters:
16+
- population_size: The size of the population.
17+
- max_generations: The maximum number of generations to run.
18+
- mutation_rate: The probability of mutation for each gene in the chromosome.
19+
- chromosome_length: The number of genes in each chromosome.
20+
"""
21+
22+
# Knapsack Problem Parameters
23+
weight_limit = 56
24+
item_list = [(17, 1), (78, 20), (56, 34), (2, 15), (34, 21), (3, 10)] # (value, weight)
25+
26+
# Genetic Algorithm Parameters
27+
population_size = 100
28+
max_generations = 300
29+
mutation_rate = 0.5
30+
chromosome_length = len(item_list)
31+
32+
33+
def initialize_population():
34+
# Initialize the population with random chromosomes
35+
population = []
36+
for _ in range(population_size):
37+
chromosome = [random.randint(0, 1) for _ in range(chromosome_length)]
38+
population.append(chromosome)
39+
return population
40+
41+
42+
def calculate_fitness(chromosome):
43+
# Calculate the fitness of a chromosome based on its value and weight
44+
total_value = 0
45+
total_weight = 0
46+
for gene, item in zip(chromosome, item_list):
47+
if gene == 1:
48+
total_value += item[0]
49+
total_weight += item[1]
50+
if total_weight > weight_limit:
51+
return 0 # Violates weight constraint
52+
return total_value
53+
54+
55+
def selection(population):
56+
# Select individuals from the population based on their fitness
57+
selected = []
58+
total_fitness = sum(calculate_fitness(chromosome) for chromosome in population)
59+
for _ in range(population_size):
60+
r = random.uniform(0, total_fitness)
61+
cumulative_fitness = 0
62+
for chromosome in population:
63+
cumulative_fitness += calculate_fitness(chromosome)
64+
if cumulative_fitness >= r:
65+
selected.append(chromosome)
66+
break
67+
return selected
68+
69+
70+
def crossover(parent1, parent2):
71+
# Perform one-point crossover to create two children
72+
crossover_point = random.randint(1, chromosome_length - 1)
73+
child1 = parent1[:crossover_point] + parent2[crossover_point:]
74+
child2 = parent2[:crossover_point] + parent1[crossover_point:]
75+
return child1, child2
76+
77+
78+
def mutation(chromosome):
79+
# Apply mutation to a chromosome with a given probability
80+
mutated_chromosome = chromosome[:]
81+
for i in range(chromosome_length):
82+
if random.random() < mutation_rate:
83+
mutated_chromosome[i] = 1 - mutated_chromosome[i]
84+
return mutated_chromosome
85+
86+
87+
def genetic_algorithm():
88+
# Main genetic algorithm loop
89+
population = initialize_population()
90+
fitness_history = []
91+
for generation in range(max_generations):
92+
population = selection(population)
93+
new_population = []
94+
while len(new_population) < population_size:
95+
parent1 = random.choice(population)
96+
parent2 = random.choice(population)
97+
child1, child2 = crossover(parent1, parent2)
98+
mutated_child1 = mutation(child1)
99+
mutated_child2 = mutation(child2)
100+
new_population.extend([mutated_child1, mutated_child2])
101+
102+
best_fit = max(calculate_fitness(chromosome) for chromosome in new_population)
103+
fitness_history.append(best_fit)
104+
105+
population = new_population
106+
107+
best_chromosome = max(population, key=calculate_fitness)
108+
best_fitness = calculate_fitness(best_chromosome)
109+
110+
return best_chromosome, best_fitness, fitness_history
111+
112+
113+
# Run the genetic algorithm and print the result
114+
best_solution, best_fitness_value, fitness_history = genetic_algorithm()
115+
print("Best Solution:", best_solution)
116+
print("Best Fitness Value:", best_fitness_value)
117+
118+
# Plot fitness history
119+
plt.plot(fitness_history)
120+
plt.title('Fitness History')
121+
plt.xlabel('Generation')
122+
plt.ylabel('Fitness')
123+
plt.show()

0 commit comments

Comments
 (0)