I was trying to fetch some data (daily new cases and daily new deaths) from Worldometers and I came up with this:
import requestsfrom bs4 import BeautifulSoup as bsimport numpy as np import matplotlib.pyplot as plt # Plotting Functiondef plot(data , country): fontsize = 10 csfont = {'fontname':'Times New Roman'} plt.plot(data) plt.xlabel(f'Days since the beginning of the COVID-19 Pandemic in {country}',fontsize=fontsize, fontweight='bold',**csfont) plt.ylabel('Daily new Cases',fontsize=fontsize, fontweight='bold',**csfont) plt.tight_layout() plt.show()# Daily New Casesdef DNC(country,Plot = False): url = f"https://www.worldometers.info/coronavirus/country/{country}/" r = requests.get(url) htmlcontent = r.content soup = str(bs(htmlcontent, "html.parser")) n = soup.find("name: 'Daily Cases',") n2 = soup[n:].find("data:") m = soup[n:].find(']') data = np.array(soup[n+n2+7:n+m].replace('null','0').split(','),dtype=int) if Plot == True: plot(data,country) return data# Daily New Deathsdef DND(country,Plot = False): url = f"https://www.worldometers.info/coronavirus/country/{country}/" r = requests.get(url) htmlcontent = r.content soup = str(bs(htmlcontent, "html.parser")) n = soup.find("name: 'Daily Deaths',") n2 = soup[n:].find("data:") m = soup[n:].find(']') data = np.array(soup[n+n2+7:n+m].replace('null','0').split(','),dtype=int) if Plot == True: plot(data,country) return dataif __name__ == '__main__': DNC('us',Plot=True) DND('us',Plot=True)
Which technically works, but I'm not happy with how it finds the data in the HTML (I think I have used the dumbest idea, to convert the soup to string and then find it there with counting letters and so on). Is there a better way to do this?