@@ -56,11 +56,17 @@ from tqdm import tqdm
5656
5757# Getting the HTML from our desired URL as a text string
5858url = ' https://carpentries.org/workshops/upcoming-workshops/'
59- req = requests.get(url).text
59+ req = requests.get(url)
6060
61- # Cleaning and printing the string
62- cleaned_req = re.sub(r ' \s * \n \s * ' , ' ' , req).strip()
63- print (cleaned_req[0 :1000 ])
61+ # Checking if the request was successful
62+ if req.status_code == 200 :
63+ req = req.text
64+
65+ # Cleaning and printing the string
66+ cleaned_req = re.sub(r ' \s * \n \s * ' , ' ' , req).strip()
67+ print (cleaned_req[0 :1000 ])
68+ else :
69+ print (f " Failed to retrieve the webpage. Status code: { req.status_code} " )
6470```
6571
6672``` output
@@ -114,7 +120,7 @@ soup = BeautifulSoup(cleaned_req, 'html.parser')
114120# Finding all third-level headers and doing a formatted print
115121h3_by_tag = soup.find_all(' h3' )
116122print (" Number of h3 elements found: " , len (h3_by_tag))
117- for n, h3 in enumerate (h3_by_tag):
123+ for n, h3 in enumerate (h3_by_tag, start = 1 ):
118124 print (f " Workshop # { n} - { h3.get_text()} " )
119125```
120126
@@ -246,11 +252,11 @@ workshop_list = []
246252for item in divs:
247253 dict_workshop = {}
248254 dict_workshop[' host' ] = item.find(' h3' ).get_text()
249- dict_workshop[' link' ] = div_firsth3 .find(' h3' ).find(' a' ).get(' href' )
250- dict_workshop[' curriculum' ] = div_firsth3 .get(' data-curriculum' )
251- dict_workshop[' country' ] = div_firsth3 .get(' data-country' )
252- dict_workshop[' format' ] = div_firsth3 .get(' data-meeting' )
253- dict_workshop[' program' ] = div_firsth3 .get(' data-program' )
255+ dict_workshop[' link' ] = item .find(' h3' ).find(' a' ).get(' href' ) # get is used to access attribute values as a dictionary
256+ dict_workshop[' curriculum' ] = item .get(' data-curriculum' )
257+ dict_workshop[' country' ] = item .get(' data-country' )
258+ dict_workshop[' format' ] = item .get(' data-meeting' )
259+ dict_workshop[' program' ] = item .get(' data-program' )
254260 workshop_list.append(dict_workshop)
255261
256262# Transform list into a DataFrame
0 commit comments