you need to specify:
to see what it contain (source):
print soup
that will help you see your data you want to extract.
to get texts: use get_text()
you should use find() and findAll()
data = soup.findAll(id="Dia:")
for dat in data:
print (dat.get_text())
you can also use next_sibling() in a loop (iteration)
or you choose a children to get only the tag children or use descendants()
ive had a look at that site Event is a class on html coding
so it must be
data = soup.findAll("p",{"class":"Event"})
for dat in data:
print (dat.get_text())
that will give all events in page:
to print only the date you want you can use next_sibling, cause event are directly under the date.
date = str(input("Enter the day date in spanish:"))
date_day = soup.findAll({"strong"})
for stong in date_day:
if (strong.get_text() == date):
print strong.get_text().next_sibling
this will scrape you the event from the day you want.
you can use from datetime date and Calendar to get the day and use a condition to date like:
date_aa = { "1":"janvier",....,"3":"Março",......}'
you can use previous_sibling() insteed to avoid some unknown text that not events.
Event_day = soup.findAll("p",{"class":"Event"})
for sting in Event_day:
if (string.get_text().previous_sibling == date):
print string.get_text()
that'd be better C: lol
use regular expression too.
import re
a = """(<p class="event">)+\w"""
for strong in soup.findAll({"strong"}).parent.nextsibling:
global strong
if (strong.get_text() == date):
if (re.search(a, strong.next_sibling)!= None):
print (strong)
Another way:
to show all Dia (dates i guess):
for strong in soup.findAll({"strong"}):
print(strong.get_text())
to show all events:
for strong in soup.findAll("p",{"class":"event"}):
print(strong.get_text())
to get the event for the date you want:
for strong in soup.findAll({"strong"}).parent.descendants:
print (strong.get_title():)
you can use descendants insteed of children
reading web scraping with python book will help a lot.
thats how to scrape the event and date
from bs4 import BeautifulSoup
import requests
import re
url = 'http://www.bhaktiyogapura.com/2017/03/calendario-vaisnava-marco-de-2017/'
header = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/51.0.2704.103 Safari/537.36'}
req = requests.get(url,headers= header)
html = req.text
soup = BeautifulSoup(html,'html.parser')
date = "23 Março 2017- "
#a = """(<p class="event">)+\w"""
#date_day = soup.findAll({"strong"})
#for strong in soup.find({"strong"}).descendants:
# print (strong)
for strong in soup.findAll("strong"):
print ("Dia:" + strong.get_text())
for string in soup.findAll("p",{"class":"event"}):
print ("Event: " + string.get_text())
to print only (Dia:) and (Event:) in BeautifulSoup:
from bs4 import BeautifulSoup
import requests
import re
url = 'http://www.bhaktiyogapura.com/2017/03/calendario-vaisnava-marco-de-2017/'
header = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/51.0.2704.103 Safari/537.36'}
req = requests.get(url,headers= header)
html = req.text
soup = BeautifulSoup(html,'html.parser')
date = str(input("Enter the Dia as (23 Março 2017): ")) + "- "
for strong in soup.findAll("strong"):
if ( strong.get_text() == date):
print ("Dia: ",strong.get_text())
for string in soup.findAll("p",{"class":"event"}):
print ("Event: ", string.get_text())
break
<strong>tag, so use.select("strong"). Then for the event, maybe you can look for allptags that begin with-.