I am writing a simple Python program which grabs a webpage and finds all the URL links in it. However I try to index the starting and ending delimiter (") of each href link but the ending one always indexed wrong.
# open a url and find all the links in it
import urllib2
url=urllib2.urlopen('right.html')
urlinfo = url.info()
urlcontent = url.read()
bodystart = urlcontent.index('<body')
print 'body starts at',bodystart
bodycontent = urlcontent[bodystart:].lower()
print bodycontent
linklist = []
n = bodycontent.index('<a href=')
while n:
print n
bodycontent = bodycontent[n:]
a = bodycontent.index('"')
b = bodycontent[(a+1):].index('"')
print a, b
linklist.append(bodycontent[(a+1):b])
n = bodycontent[b:].index('<a href=')
print linklist