@@ -56,11 +56,17 @@ from tqdm import tqdm
5656
5757# Getting the HTML from our desired URL as a text string
5858url = ' https://carpentries.org/workshops/upcoming-workshops/'
59- req = requests.get(url).text
59+ req = requests.get(url)
6060
61- # Cleaning and printing the string
62- cleaned_req = re.sub(r ' \s * \n \s * ' , ' ' , req).strip()
63- print (cleaned_req[0 :1000 ])
61+ # Checking if the request was successful
62+ if req.status_code == 200 :
63+ req = req.text
64+
65+ # Cleaning and printing the string
66+ cleaned_req = re.sub(r ' \s * \n \s * ' , ' ' , req).strip()
67+ print (cleaned_req[0 :1000 ])
68+ else :
69+ print (f " Failed to retrieve the webpage. Status code: { req.status_code} " )
6470```
6571
6672``` output
@@ -114,7 +120,7 @@ soup = BeautifulSoup(cleaned_req, 'html.parser')
114120# Finding all third-level headers and doing a formatted print
115121h3_by_tag = soup.find_all(' h3' )
116122print (" Number of h3 elements found: " , len (h3_by_tag))
117- for n, h3 in enumerate (h3_by_tag):
123+ for n, h3 in enumerate (h3_by_tag, start = 1 ):
118124 print (f " Workshop # { n} - { h3.get_text()} " )
119125```
120126
@@ -159,18 +165,18 @@ print(div_firsth3.prettify())
159165
160166Remember, the output shown here is probably different than yours, as the website is continuously updated.
161167``` output
162- <div class="p-8 mb-5 border" data-country="Puerto Rico " data-curriculum="Software Carpentry (Shell, Git, R for Reproducible Scientific Analysis )" data-meeting="In Person" data-program="Software Carpentry">
168+ <div class="p-8 mb-5 border" data-country="United States " data-curriculum="Library Carpentry (Intro to Data, Unix Shell, Git, and/or OpenRefine )" data-meeting="In Person" data-program="Library Carpentry">
163169 <div class="flex mb-4 -mx-2">
164170 <div class="flex items-center mx-2">
165- <img alt="" class="mx-1" src="/software .svg"/>
171+ <img alt="" class="mx-1" src="/library .svg"/>
166172 <span class="text-[0.625rem] uppercase">
167- Software Carpentry
173+ Library Carpentry
168174 </span>
169175 </div>
170176 <div class="flex items-center mx-2">
171- <img alt="" class="mr-1" height="20" src="/flags/pr .png" width="20"/>
177+ <img alt="" class="mr-1" height="20" src="/flags/us .png" width="20"/>
172178 <span class="text-[0.625rem] uppercase">
173- Puerto Rico
179+ United States
174180 </span>
175181 </div>
176182 <div class="flex items-center mx-2">
@@ -181,20 +187,20 @@ Remember, the output shown here is probably different than yours, as the website
181187 </div>
182188 </div>
183189 <h3 class="title text-base md:text-[1.75rem] leading-[2.125rem] font-semibold">
184- <a class="underline hover:text-blue-hover text-gray-dark" href="https://dept-ccom-uprrp .github.io/2025-06-04-uprrp-r /">
185- University of Puerto Rico
190+ <a class="underline hover:text-blue-hover text-gray-dark" href="https://unt-carpentries .github.io/2026-01-22-unt /">
191+ University of North Texas
186192 </a>
187193 </h3>
188194 <div class="mb-5 text-lg font-semibold text-gray-mid">
189- Software Carpentry (Shell, Git, R for Reproducible Scientific Analysis )
195+ Library Carpentry (Intro to Data, Unix Shell, Git, and/or OpenRefine )
190196 </div>
191197 <div class="mb-2 text-xs">
192198 <strong class="font-bold">
193199 Instructors
194200 </strong>
195201 :
196202 <span class="instructors">
197- Humberto Ortiz-Zuazaga, Airined Montes Mercado
203+ Sarah Lynn Fisher, Maristella Feustle, Whitney Johnson-Freeman
198204 </span>
199205 </div>
200206 <div class="mb-4 text-xs">
@@ -203,11 +209,11 @@ Remember, the output shown here is probably different than yours, as the website
203209 </strong>
204210 :
205211 <span class="helpers">
206- Isabel Rivera, Diana Buitrago Escobar, Yabdiel Ramos Valerio
212+ Marcia McIntosh, Trey Clark
207213 </span>
208214 </div>
209215 <div class="text-sm font-semibold text-gray-mid">
210- Jun 04 - Jun 10 2025
216+ Jan 22 - Jan 22 2026
211217 </div>
212218</div>
213219```
@@ -246,11 +252,11 @@ workshop_list = []
246252for item in divs:
247253 dict_workshop = {}
248254 dict_workshop[' host' ] = item.find(' h3' ).get_text()
249- dict_workshop[' link' ] = div_firsth3 .find(' h3' ).find(' a' ).get(' href' )
250- dict_workshop[' curriculum' ] = div_firsth3 .get(' data-curriculum' )
251- dict_workshop[' country' ] = div_firsth3 .get(' data-country' )
252- dict_workshop[' format' ] = div_firsth3 .get(' data-meeting' )
253- dict_workshop[' program' ] = div_firsth3 .get(' data-program' )
255+ dict_workshop[' link' ] = item .find(' h3' ).find(' a' ).get(' href' ) # get is used to access attribute values as a dictionary
256+ dict_workshop[' curriculum' ] = item .get(' data-curriculum' )
257+ dict_workshop[' country' ] = item .get(' data-country' )
258+ dict_workshop[' format' ] = item .get(' data-meeting' )
259+ dict_workshop[' program' ] = item .get(' data-program' )
254260 workshop_list.append(dict_workshop)
255261
256262# Transform list into a DataFrame
0 commit comments