lidongok

一个用于分析某个URL连接页面内部包含的URL地址的正确解析的问题
lidongok | Sep 21, 2007 3:21:51 PM
#test file path
url = '/china/shenzhen/futian/bagua/bagua.htm'
findurls=['../futian_good.htm','../../shenzhenisgood.htm','/chinaisgood.htm','baguaisgood.htm','../../../../../../../badone.htm']

newurllist = []

curPath = url.split('/')[:-1]
print curPath
for myurl in findurls:
    print "original url %s" %myurl
    modifiedurl = ""
    if myurl.startswith('/'):
        modifiedurl = myurl
    elif myurl.find('../') >= 0:
        backleval = len(myurl.split('../')) -1
        if backleval > len(curPath): backleval = len(curPath)
        truePath = '/'.join(curPath[:len(curPath)-backleval]) +'/'
        modifiedurl = truePath + myurl.split('../')[-1]
    else:
        modifiedurl = '/'.join(curPath) + '/' + myurl
    print "modified url %s" %modifiedurl
    

Comment: (no reply)
To post your comment, Please login first.