|
9 | 9 | }, |
10 | 10 | { |
11 | 11 | "cell_type": "code", |
12 | | - "execution_count": 3, |
| 12 | + "execution_count": 1, |
13 | 13 | "metadata": {}, |
14 | 14 | "outputs": [], |
15 | 15 | "source": [ |
|
1309 | 1309 | }, |
1310 | 1310 | { |
1311 | 1311 | "cell_type": "code", |
1312 | | - "execution_count": 47, |
| 1312 | + "execution_count": 2, |
1313 | 1313 | "metadata": {}, |
1314 | 1314 | "outputs": [ |
1315 | 1315 | { |
|
5285 | 5285 | }, |
5286 | 5286 | { |
5287 | 5287 | "cell_type": "code", |
5288 | | - "execution_count": 127, |
| 5288 | + "execution_count": 4, |
5289 | 5289 | "metadata": {}, |
5290 | 5290 | "outputs": [], |
5291 | 5291 | "source": [ |
5292 | 5292 | "# We'll use BeautifulSoup to parse the HTML,\n", |
5293 | 5293 | "# as it has useful functions and tools to access the data in the HTML\n", |
5294 | | - "soup = BeautifulSoup(req.text, 'html.parser')" |
| 5294 | + "soup = BeautifulSoup(req, 'html.parser')" |
5295 | 5295 | ] |
5296 | 5296 | }, |
5297 | 5297 | { |
|
6733 | 6733 | }, |
6734 | 6734 | { |
6735 | 6735 | "cell_type": "code", |
6736 | | - "execution_count": 129, |
| 6736 | + "execution_count": 24, |
6737 | 6737 | "metadata": {}, |
6738 | | - "outputs": [], |
| 6738 | + "outputs": [ |
| 6739 | + { |
| 6740 | + "name": "stdout", |
| 6741 | + "output_type": "stream", |
| 6742 | + "text": [ |
| 6743 | + "Number of table elements found: 1\n", |
| 6744 | + "Printing only the first 1000 characters of the table element: \n", |
| 6745 | + " <table class=\"table table-striped\" style=\"width: 100%;\">\n", |
| 6746 | + "<tr>\n", |
| 6747 | + "<td>\n", |
| 6748 | + "<img alt=\"swc logo\" class=\"flags\" height=\"24\" src=\"https://carpentries.org/assets/img/logos/swc.svg\" title=\"swc workshop\" width=\"24\">\n", |
| 6749 | + "</img></td>\n", |
| 6750 | + "<td>\n", |
| 6751 | + "<img alt=\"mx\" class=\"flags\" src=\"https://carpentries.org/assets/img/flags/24/mx.png\" title=\"MX\">\n", |
| 6752 | + "<img alt=\"globe image\" class=\"flags\" src=\"https://carpentries.org/assets/img/flags/24/w3.png\" title=\"Online\">\n", |
| 6753 | + "<a href=\"https://galn3x.github.io/-2024-10-28-Metagenomics-online/\">Nodo Nacional de Bioinformática UNAM</a>\n", |
| 6754 | + "<br>\n", |
| 6755 | + "<b>Instructors:</b> César Aguilar, Diana Oaxaca, Nelly Selem-Mojica\n", |
| 6756 | + " \n", |
| 6757 | + " \n", |
| 6758 | + " <br>\n", |
| 6759 | + "<b>Helpers:</b> Andreas Chavez, José Manuel Villalobos Escobedo, Aaron Espinosa Jaime, Andrés Arredondo, Mirna Vázquez Rosas-Landa, David Alberto GarcÃa-Estrada\n", |
| 6760 | + " \n", |
| 6761 | + "\t</br></br></img></img></td>\n", |
| 6762 | + "<td>\n", |
| 6763 | + "\t\tOct 28 - Oct 31, 2024\n", |
| 6764 | + "\t</td>\n", |
| 6765 | + "</tr>\n", |
| 6766 | + "<tr>\n", |
| 6767 | + "<td>\n", |
| 6768 | + "<img alt=\"dc logo\" class=\"flags\" height=\"24\" src=\"https://carpentries.org/assets/img/logos/dc.svg\" title=\"dc \n" |
| 6769 | + ] |
| 6770 | + } |
| 6771 | + ], |
| 6772 | + "source": [ |
| 6773 | + "soup = BeautifulSoup(req, 'html.parser')\n", |
| 6774 | + "tables_by_tag = soup.find_all('table')\n", |
| 6775 | + "print(\"Number of table elements found: \", len(tables_by_tag))\n", |
| 6776 | + "print(\"Printing only the first 1000 characters of the table element: \\n\", str(tables_by_tag[0])[0:1000])" |
| 6777 | + ] |
| 6778 | + }, |
| 6779 | + { |
| 6780 | + "cell_type": "code", |
| 6781 | + "execution_count": 25, |
| 6782 | + "metadata": {}, |
| 6783 | + "outputs": [ |
| 6784 | + { |
| 6785 | + "name": "stdout", |
| 6786 | + "output_type": "stream", |
| 6787 | + "text": [ |
| 6788 | + "Number of table elements found: 1\n", |
| 6789 | + "<table class=\"table table-striped\" style=\"width: 100%;\">\n", |
| 6790 | + "<tr>\n", |
| 6791 | + "<td>\n", |
| 6792 | + "<img alt=\"swc logo\" class=\"flags\" height=\"24\" src=\"https://carpentries.org/assets/img/logos/swc.svg\" title=\"swc workshop\" width=\"24\">\n", |
| 6793 | + "</img></td>\n", |
| 6794 | + "<td>\n", |
| 6795 | + "<img alt=\"mx\" class=\"flags\" src=\"https://carpentries.org/assets/img/flags/24/mx.png\" title=\"MX\">\n", |
| 6796 | + "<img alt=\"globe image\" class=\"flags\" src=\"https://carpentries.org/assets/img/flags/24/w3.png\" title=\"Online\">\n", |
| 6797 | + "<a href=\"https://galn3x.github.io/-2024-10-28-Metagenomics-online/\">Nodo Nacional de Bioinformática UNAM</a>\n", |
| 6798 | + "<br>\n", |
| 6799 | + "<b>Instructors:</b> César Aguilar, Diana Oaxaca, Nelly Selem-Mojica\n", |
| 6800 | + " \n", |
| 6801 | + " \n", |
| 6802 | + " <br>\n", |
| 6803 | + "<b>Helpers:</b> Andreas Chavez, José Manuel Villalobos Escobedo, Aaron Espinosa Jaime, Andrés Arredondo, Mirna Vázquez Rosas-Landa, David Alberto GarcÃa-Estrada\n", |
| 6804 | + " \n", |
| 6805 | + "\t</br></br></img></img></td>\n", |
| 6806 | + "<td>\n", |
| 6807 | + "\t\tOct 28 - Oct 31, 2024\n", |
| 6808 | + "\t</td>\n", |
| 6809 | + "</tr>\n", |
| 6810 | + "<tr>\n", |
| 6811 | + "<td>\n", |
| 6812 | + "<img alt=\"dc logo\" class=\"flags\" height=\"24\" src=\"https://carpentries.org/assets/img/logos/dc.svg\" title=\"dc \n" |
| 6813 | + ] |
| 6814 | + } |
| 6815 | + ], |
6739 | 6816 | "source": [ |
6740 | | - "tables = soup.find_all('table')" |
| 6817 | + "tables_by_class = soup.find_all(class_=\"table table-striped\")\n", |
| 6818 | + "print(\"Number of table elements found: \", len(tables_by_class))\n", |
| 6819 | + "print(str(tables_by_class[0])[0:1000])" |
6741 | 6820 | ] |
6742 | 6821 | }, |
6743 | 6822 | { |
6744 | 6823 | "cell_type": "code", |
6745 | | - "execution_count": 130, |
| 6824 | + "execution_count": 9, |
6746 | 6825 | "metadata": {}, |
6747 | 6826 | "outputs": [ |
6748 | 6827 | { |
6749 | 6828 | "data": { |
6750 | 6829 | "text/plain": [ |
6751 | | - "1" |
| 6830 | + "bs4.element.Tag" |
6752 | 6831 | ] |
6753 | 6832 | }, |
6754 | | - "execution_count": 130, |
| 6833 | + "execution_count": 9, |
6755 | 6834 | "metadata": {}, |
6756 | 6835 | "output_type": "execute_result" |
6757 | 6836 | } |
6758 | 6837 | ], |
6759 | 6838 | "source": [ |
6760 | 6839 | "# How many tables (elements with the table tag) did we find?\n", |
6761 | | - "len(tables)" |
| 6840 | + "type(tables[0])" |
6762 | 6841 | ] |
6763 | 6842 | }, |
6764 | 6843 | { |
|
0 commit comments