Foros del Web - Ver Mensaje Individual

razpeitia · #4 (**permalink**) 02/11/2010, 14:46

Código Python:

Ver original#Codigo en python 2.6
import re
import urllib
import datetime
 
class AppURLopener(urllib.FancyURLopener):
    version = "App/1.7"
 
urllib._urlopener = AppURLopener()
 
class Links():
    def __init__(self,url):
        self.__direccion=url
 
    def setter(self,url):
        self.__direccion=url
 
    def adhtml(self):
        try:
            f= urllib.urlopen(self.__direccion)
        except:
            print("Fallo en", self.__direccion)
            raise SystemExit
        a=f.read()
        f.close()
        return a
 
 
    def buscaVideo(self, html):
        try: 
            Exprecion = re.compile('<a\s*href=[\'|"](.*?)[\'|"]')
            finalVideo=[]
            ec = Exprecion.findall(str(html))
            for i in range(0,len(ec)):
                if (re.match("[0-9]*",ec[i])):
                    finalVideo.append(ec[i])
 
        except:
            print("ocurrio un error")
        return (finalVideo)
 
 
    def buscaDescripcion(self, html): 
        Exprecion = re.compile('<div id="description">(.*?)<br><br>(.*?)<br><br>(.*?)<b r><br>(.*?)<br><br>(.*?)<br><br>(.*?) </div>')
        ec = Exprecion.findall(str(html))
        return (ec)
 
    def buscaRate(self,html):
        Exprecion = re.compile('<td>(.*?)</td>')
        ec = Exprecion.findall(str(html))
        return (ec)
 
    def buscaTitulo(self,html):
        Exprecion = re.compile('<div class="title">(.*?)</div>') 
        ec = Exprecion.findall(str(html))
        return (ec)
    
    def buscaCategoria(self,html):
        Exprecion = re.compile('<p id="eow-category"><a href="/.*">(.*?)</a></p>') 
        ec = Exprecion.findall(str(html))
        return (ec)
 
url='vimeo.com/ajax/user/home_videos?&jdata={"page":1}'
link=Links(url)
fecha=datetime.date.today()
dia=(str(fecha))
print(fecha)
loco=open("videosVimeo.txt","w")
 
 
loco.write("<?xml>\n")
loco.write('<list origin="vimeo" date="'+ dia +'">\n') 
for k in range(1,35):
    url='http://vimeo.com/ajax/user/home_videos?&jdata={"page":'+str(k)+'}'
    print(url)
    print (url.__repr__())
    link.setter(url)
    html=link.adhtml()
    print html
    arrayVideo=link.buscaVideo(html)
    s=set(arrayVideo)
    for h in s:
        link.setter("http://www.vimeo.com"+h)
        html=link.adhtml()
        a=link.buscaDescripcion(html)
        rate=link.buscaRate(html)
        titulo=link.buscaTitulo(html)
        loco.write('\t<item>\n')
        loco.write('\t\t <title>'+ titulo[0] + '</title>\n')
        loco.write('\t\t<refer>http://www.vimeo.com"'+h+'"</refer>\n')
        loco.write('\t\t<rate>'+ rate[1] +'</rate>\n')
        loco.write('\t\t<description>' +a[2]+ '</description>\n')
        #loco.write('\t\t<category>'+categoria[0]+'</category>\n')
        loco.write('\t</item>\n')
 
loco.close()
 
print("Aca salen los videos")
for j in range(1,len(arrayVideo)):
    print(arrayVideo[j])

Al parecer urllib de python 3.1 no hace muchas cosas por defecto que el de python 2.6 Si
Por cierto tienes problemas con los indices de tus listas. Por eso entre otras cosas es mejor usar un parser.

Codigo en python 3.1

Código Python:

Ver originalimport re
import urllib.request
import datetime
 
class Links():
    def __init__(self,url):
        self.__direccion=url
 
    def setter(self,url):
        self.__direccion=url
 
    def adhtml(self):
        user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
        values = {}
        headers = {'User-Agent':user_agent}
        data = urllib.parse.urlencode(values)
        req = urllib.request.Request(url, data, headers)
        f= urllib.request.urlopen(req)
        a=f.read()
        f.close()
        return a
 
 
    def buscaVideo(self, html):
        try: 
            Exprecion = re.compile('<a\s*href=[\'|"](.*?)[\'|"]')
            finalVideo=[]
            ec = Exprecion.findall(str(html))
            for i in range(0,len(ec)):
                if (re.match("[0-9]*",ec[i])):
                    finalVideo.append(ec[i])
 
        except:
            print("ocurrio un error")
        return (finalVideo)
 
 
    def buscaDescripcion(self, html): 
        Exprecion = re.compile('<div id="description">(.*?)<br><br>(.*?)<br><br>(.*?)<b r><br>(.*?)<br><br>(.*?)<br><br>(.*?) </div>')
        ec = Exprecion.findall(str(html))
        return (ec)
 
    def buscaRate(self,html):
        Exprecion = re.compile('<td>(.*?)</td>')
        ec = Exprecion.findall(str(html))
        return (ec)
 
    def buscaTitulo(self,html):
        Exprecion = re.compile('<div class="title">(.*?)</div>') 
        ec = Exprecion.findall(str(html))
        return (ec)
    
    def buscaCategoria(self,html):
        Exprecion = re.compile('<p id="eow-category"><a href="/.*">(.*?)</a></p>') 
        ec = Exprecion.findall(str(html))
        return (ec)
 
url='vimeo.com/ajax/user/home_videos?&jdata={"page":1}'
link=Links(url)
fecha=datetime.date.today()
dia=(str(fecha))
print(fecha)
loco=open("videosVimeo.txt","w")
 
 
loco.write("<?xml>\n")
loco.write('<list origin="vimeo" date="'+ dia +'">\n') 
for k in range(1,35):
    url='http://vimeo.com/ajax/user/home_videos?&jdata={"page":'+str(k)+'}'
    print(url)
    print (url.__repr__())
    link.setter(url)
    html=link.adhtml()
    arrayVideo=link.buscaVideo(html)
    s=set(arrayVideo)
    for h in s:
        link.setter("http://www.vimeo.com"+h)
        html=link.adhtml()
        a=link.buscaDescripcion(html)
        rate=link.buscaRate(html)
        titulo=link.buscaTitulo(html)
        loco.write('\t<item>\n')
        if titulo:
            loco.write('\t\t <title>'+ titulo[0] + '</title>\n')
        loco.write('\t\t<refer>http://www.vimeo.com"'+h+'"</refer>\n')
        if len(titulo) > 2:
            loco.write('\t\t<rate>'+ rate[1] +'</rate>\n')
        if len(titulo) > 3:
            loco.write('\t\t<description>' +a[2]+ '</description>\n')
        #loco.write('\t\t<category>'+categoria[0]+'</category>\n')
        loco.write('\t</item>\n')
 
loco.close()
 
print("Aca salen los videos")
for j in range(1,len(arrayVideo)):
    print(arrayVideo[j])

No se si esta bien la identación