-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget matches urls.py
119 lines (56 loc) · 1.69 KB
/
get matches urls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python
# coding: utf-8
# In[6]:
from selenium import webdriver
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
import os
import requests
import time
from collections import defaultdict
from selenium.webdriver.common.by import By
# In[2]:
cricbuzz_url='https://www.cricbuzz.com/cricket-series/5945/indian-premier-league-2023/matches'
# In[3]:
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
# In[4]:
driver = webdriver.Chrome('chromedriver',options=chrome_options)
# In[30]:
driver.get(cricbuzz_url)
# In[31]:
cricbuzz_soup= BeautifulSoup(driver.page_source, 'html.parser')
# In[32]:
a=requests.get(cricbuzz_url)
# In[11]:
c=BeautifulSoup(a.content,'html.parser')
match_links=[]
div_tag=c.find_all('div',{'class':'cb-col-60 cb-col cb-srs-mtchs-tm'})
for a_tag in div_tag:
match_links.append(a_tag.a['href'])
# In[ ]:
# In[24]:
#cb-col-60 cb-col cb-srs-mtchs-tm
# In[25]:
match_links=[]
div_tag=cricbuzz_soup.find_all('div',{'class':'cb-col-60 cb-col cb-srs-mtchs-tm'})
for a_tag in div_tag:
match_links.append(a_tag.a['href'])
# In[26]:
#https://www.cricbuzz.com/cricket-full-commentary/66169/gt-vs-csk-1st-match-indian-premier-league-2023
#/cricket-scores/72622/csk-vs-gt-final-reserve-day-indian-premier-league-2023
# In[27]:
def create_link(link):
a=link.split('/')[2:]
a='/'.join(a)
http='https://www.cricbuzz.com/cricket-full-commentary/'
result=http+a
return result
# In[28]:
working_links=list(map(create_link,match_links))
# In[29]:
working_links