Buenas, soy programador en php, acabo de empezar en python hace apenas una semana porque tengo que realizar unos proyectos. Les agradeceria que me ayudasen son lo siguiente:
Estoy intentando crear un scrapper web y que una vez extraidos los datos los pase a un excel, el problema viene en que los datos me llegan en formato xml dentro de un array y no se como reemplazar los caracteres para que sean validos para introducirlos en excel. Se que el script se puede reducir en muchas menos linias, pero aun no tengo la experiencia.
He intentando aplicando month.replace("[","").replace("(","").replace(")","").replace(")", "")
Tambien he probado algo similar con re.sub y con lstrip, rstrip pero nada parece funcionar
Código Python:
Ver originalfrom lxml import html
import requests
from openpyxl import Workbook
wb = Workbook()
ws = wb.active
def Volume( url ):
tree = html.fromstring(requests.get(url).content)
month = tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[1]/th/span/text()'), tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[2]/th/span/text()'), tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[3]/th/span/text()'), tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[4]/th/span/text()'), tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[5]/th/span/text()')
tvol = tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[1]/td[4]/text()'), tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[2]/td[4]/text()'), tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[3]/td[4]/text()'), tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[4]/td[4]/text()'), tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[5]/td[4]/text()')
atclose = tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[1]/td[11]/text()'), tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[2]/td[11]/text()'), tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[3]/td[11]/text()'), tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[4]/td[11]/text()'), tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[5]/td[11]/text()')
change = tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[1]/td[12]/text()'), tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[2]/td[12]/text()'), tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[3]/td[12]/text()'), tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[4]/td[12]/text()'), tree.xpath('//*[@id="volumeDetailProductTable"]/tbody/tr[5]/td[12]/text()')
if url == 'http://www.cmegroup.com/trading/agricultural/grain-and-oilseed/corn_quotes_volume_voi.html':
ws['A2']=month[0]
ws['A3']=month[1]
ws['A4']=month[2]
ws['A5']=month[3]
ws['A6']=month[4]
ws['B2']=tvol[0]
ws['B3']=tvol[1]
ws['B4']=tvol[2]
ws['B5']=tvol[3]
ws['B6']=tvol[4]
ws['C2']=atclose[0]
ws['C3']=atclose[1]
ws['C4']=atclose[2]
ws['C5']=atclose[3]
ws['C6']=atclose[4]
ws['D2']=change[0]
ws['D3']=change[1]
ws['D4']=change[2]
ws['D5']=change[3]
ws['D6']=change[4]
elif url == 'http://www.cmegroup.com/trading/agricultural/grain-and-oilseed/wheat_quotes_volume_voi.html':
ws['A8']=month[0]
ws['A9']=month[1]
ws['A10']=month[2]
ws['A11']=month[3]
ws['A12']=month[4]
ws['B8']=tvol[0]
ws['B9']=tvol[1]
ws['B10']=tvol[2]
ws['B11']=tvol[3]
ws['B12']=tvol[4]
ws['C8']=atclose[0]
ws['C9']=atclose[1]
ws['C10']=atclose[2]
ws['C11']=atclose[3]
ws['C12']=atclose[4]
ws['D8']=change[0]
ws['D9']=change[1]
ws['D10']=change[2]
ws['D11']=change[3]
ws['D12']=change[4]
return month[1],tvol[2],atclose[4],change[4]
ws['A1']="Vencimiento"
ws['B1']="Total Volumen"
ws['C1']="Op. Int. At Close"
ws['D1']="Op. Int. Change"
print Volume('http://www.cmegroup.com/trading/agricultural/grain-and-oilseed/corn_quotes_volume_voi.html')
print Volume('http://www.cmegroup.com/trading/agricultural/grain-and-oilseed/wheat_quotes_volume_voi.html')
wb.save('balances.xlsx')
El output de ejecutar una vez la funcion Volume mostrando: month, tvol, atclose, change es el siguiente:
Código CONSOLA:
Ver original((['MAR 16'], ['MAY 16'], ['JUL 16'], ['SEP 16'], ['DEC 16']), (['23,036'], ['144,696'], ['48,048'], ['18,267'], ['21,333']), (['16,103'], ['656,971'], ['287,740'], ['126,250'], ['175,102']), (['-7,746'], ['4,999'], ['3,182'], ['2,095'], ['2,315']))
El error en consola es el siguiente:
Código CONSOLA:
Ver originalzipus@zipus-linux ~/Escritorio $ python scrapper.py
Traceback (most recent call last):
File "scrapper.py", line 71, in <module>
print Volume('http://www.cmegroup.com/trading/agricultural/grain-and-oilseed/corn_quotes_volume_voi.html')
File "scrapper.py", line 16, in Volume
ws['A2']=month[0]
File "/usr/local/lib/python2.7/dist-packages/openpyxl/worksheet/worksheet.py", line 342, in __setitem__
self[key].value = value
File "/usr/local/lib/python2.7/dist-packages/openpyxl/cell/cell.py", line 305, in value
self._bind_value(value)
File "/usr/local/lib/python2.7/dist-packages/openpyxl/cell/cell.py", line 209, in _bind_value
raise ValueError("Cannot convert {0} to Excel".format(value))
ValueError: Cannot convert ['MAR 16'] to Excel
Saludos!