- #!/usr/bin/env python
-
- import csv
- import re
- from time import sleep
-
- from lxml import etree, html
- from selenium import webdriver
- from selenium.common.exceptions import NoSuchElementException
- from selenium.webdriver.common.keys import Keys
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support.expected_conditions import visibility_of_element_located
-
- # This is for test, load the urls list from https://dpaste.org/nZpuq
-
- urls_list = [
- "https://www.flashscore.com/football/zimbabwe/premier-soccer-league-2021-2022/",
- ]
-
- for item in urls_list:
- base_url = item + "results/"
- print(f"Working on {item}")
- # As this code was created originaly in Linux, this is the source of Geecko driver.
- path = "/usr/local/bin/geckodriver"
- driver_service = webdriver.FirefoxService(executable_path=path)
- driver = webdriver.Firefox(service=driver_service)
- driver.get(base_url)
-
- # Reject cookies
- WebDriverWait(driver, 10).until(visibility_of_element_located((By.XPATH, '//button[contains(@id, "onetrust-reject-all-handler")]'))).location
- driver.find_element(By.XPATH, '//button[contains(@id, "onetrust-reject-all-handler")]').click()
-
- # Look for all content of results page, as it loads the content dynamically
- while True:
- html_content = html.fromstring(driver.page_source)
- tree = etree.ElementTree(html_content)
- show_more_item = html_content.xpath(".//span[text()='Show more matches']")
- if not show_more_item == []:
- try:
- while True:
- show_more_path = tree.getpath(html_content.xpath(".//span[text()='Show more matches']")[0].getparent())
- driver.find_element(By.XPATH, show_more_path).send_keys(Keys.PAGE_DOWN)
- sleep(5)
- driver.find_element(By.XPATH, show_more_path).click()
- except NoSuchElementException:
- print("finish")
- break
- else:
- break
-
- html_content = html.fromstring(driver.page_source)
- driver.close()
-
- # Div container of all results
- results = html_content.xpath('//div[@id = "live-table"]//div[@class = "event event--results"]//div[@class = "leagues--static event--leagues results"]//div[@class = "sportName soccer"]')[0]
-
- standings = []
- round = None
- tournament = None
- last_month = None
- year = None
-
- for row in results.getchildren():
- if row.attrib.get("class") == "wcl-header_uBhYi wclLeagueHeader wclLeagueHeader--collapsed wclLeagueHeader--noCheckBox wclLeagueHeader--indent":
- tournament = row.text_content().replace("Standings", "").replace("Draw", "").replace(chr(160), "")
- if row.attrib.get("class") == "event__round event__round--static":
- round = row.text_content()
- if row.attrib.get("class").startswith("event__match"):
- date = row.xpath(".//div[@class = 'event__time']")[0].text_content().split()[0][:-1]
- years = re.search(r"\d{4}\-\d{4}", base_url)
- match = {
- "date": date,
- "time": row.xpath(".//div[@class = 'event__time']")[0].text_content().split()[1],
- "home_team": row.xpath('.//div[contains(@class, "homeParticipant")]')[0].text_content(),
- "away_team": row.xpath('.//div[contains(@class, "awayParticipant")]')[0].text_content(),
- "home_score": row.xpath('.//span[contains(@class, "event__score--home")]')[0].text_content(),
- "away_score": row.xpath('.//span[contains(@class, "event__score--away")]')[0].text_content()
- }
- if years:
- _years = years.group().split("-")
- if last_month == None: last_month = int(date.split(".")[-1])
- if year == None: year = _years[-1]
- if int(date.split(".")[-1]) <= last_month:
- match["date"] = f"{date}.{year}"
- last_month = int(date.split(".")[-1])
- else:
- print("change year")
- year = _years[0]
- match["date"] = f"{date}.{year}"
- last_month = int(date.split(".")[-1])
-
- standings.append({"tournament": tournament, "round": round} | match)
-
- # Save results to file
- fieldnames = ['tournament', 'round', 'date', 'time', 'home_team', 'away_team', 'home_score', 'away_score']
-
- with open('./tournament_results.csv', 'a', newline='') as csvfile:
- writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
- writer.writeheader()
- writer.writerows(standings)
- sleep(5)