Skip to content

Commit eebb572

Browse files
authored
Merge pull request #3 from jairomelo/realwebsite
Realwebsite
2 parents e68e1cf + e992940 commit eebb572

1 file changed

Lines changed: 16 additions & 10 deletions

File tree

episodes/a-real-website.md

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,17 @@ from tqdm import tqdm
5656

5757
# Getting the HTML from our desired URL as a text string
5858
url = 'https://carpentries.org/workshops/upcoming-workshops/'
59-
req = requests.get(url).text
59+
req = requests.get(url)
6060

61-
# Cleaning and printing the string
62-
cleaned_req = re.sub(r'\s*\n\s*', '', req).strip()
63-
print(cleaned_req[0:1000])
61+
# Checking if the request was successful
62+
if req.status_code == 200:
63+
req = req.text
64+
65+
# Cleaning and printing the string
66+
cleaned_req = re.sub(r'\s*\n\s*', '', req).strip()
67+
print(cleaned_req[0:1000])
68+
else:
69+
print(f"Failed to retrieve the webpage. Status code: {req.status_code}")
6470
```
6571

6672
```output
@@ -114,7 +120,7 @@ soup = BeautifulSoup(cleaned_req, 'html.parser')
114120
# Finding all third-level headers and doing a formatted print
115121
h3_by_tag = soup.find_all('h3')
116122
print("Number of h3 elements found: ", len(h3_by_tag))
117-
for n, h3 in enumerate(h3_by_tag):
123+
for n, h3 in enumerate(h3_by_tag, start=1):
118124
print(f"Workshop #{n} - {h3.get_text()}")
119125
```
120126

@@ -246,11 +252,11 @@ workshop_list = []
246252
for item in divs:
247253
dict_workshop = {}
248254
dict_workshop['host'] = item.find('h3').get_text()
249-
dict_workshop['link'] = div_firsth3.find('h3').find('a').get('href')
250-
dict_workshop['curriculum'] = div_firsth3.get('data-curriculum')
251-
dict_workshop['country'] = div_firsth3.get('data-country')
252-
dict_workshop['format'] = div_firsth3.get('data-meeting')
253-
dict_workshop['program'] = div_firsth3.get('data-program')
255+
dict_workshop['link'] = item.find('h3').find('a').get('href') # get is used to access attribute values as a dictionary
256+
dict_workshop['curriculum'] = item.get('data-curriculum')
257+
dict_workshop['country'] = item.get('data-country')
258+
dict_workshop['format'] = item.get('data-meeting')
259+
dict_workshop['program'] = item.get('data-program')
254260
workshop_list.append(dict_workshop)
255261

256262
# Transform list into a DataFrame

0 commit comments

Comments
 (0)