Add files via upload
This commit is contained in:
parent
6b0e5b87f9
commit
5233c2ff6b
BIN
chromedriver.exe
Normal file
BIN
chromedriver.exe
Normal file
Binary file not shown.
BIN
scrapImo.exe
Normal file
BIN
scrapImo.exe
Normal file
Binary file not shown.
101
scrapImo.py
Normal file
101
scrapImo.py
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
from bs4 import BeautifulSoup as bs
|
||||||
|
from selenium import webdriver
|
||||||
|
import csv
|
||||||
|
import os
|
||||||
|
|
||||||
|
filename = 'offers.csv'
|
||||||
|
|
||||||
|
if os.path.exists(filename):
|
||||||
|
os.remove(filename)
|
||||||
|
|
||||||
|
headers = ['Type', 'Prix', 'Lieu', 'Quartier', 'Surface',
|
||||||
|
'Nb_pieces', 'Nb_chambres', 'Extra', 'Numero', 'Lien']
|
||||||
|
|
||||||
|
with open(filename, 'w') as f:
|
||||||
|
writer = csv.writer(f)
|
||||||
|
writer.writerow(headers)
|
||||||
|
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
cp = input("Code postal ?\n")
|
||||||
|
max_page = input("Jusqu a quelle page ?\n")
|
||||||
|
choice = raw_input("Entrer a pour achat ou l pour location\n")
|
||||||
|
|
||||||
|
buy = 'https://www.seloger.com/list.htm?enterprise=0&natures=1,4&places=%5b%7bcp%3a' + \
|
||||||
|
str(cp) + '%7d%5d&projects=2&qsversion=1.0&rooms=1,2,3,4&types=1'
|
||||||
|
rent = 'https://www.seloger.com/list.htm?enterprise=0&furnished=0&places=%5b%7bcp%3a' + \
|
||||||
|
str(cp) + '%7d%5d&projects=1&qsversion=1.0&rooms=1,2,3,4&types=1'
|
||||||
|
|
||||||
|
if choice.lower() in ['achat', 'a']:
|
||||||
|
adress = buy
|
||||||
|
elif choice.lower() in ['location', 'l']:
|
||||||
|
adress = rent
|
||||||
|
|
||||||
|
for x in xrange(1, max_page):
|
||||||
|
url = adress + '&LISTING-LISTpg=' + str(x)
|
||||||
|
|
||||||
|
options = webdriver.ChromeOptions()
|
||||||
|
options.add_argument('headless')
|
||||||
|
browser = webdriver.Chrome(chrome_options=options)
|
||||||
|
browser.get(url)
|
||||||
|
html = browser.page_source
|
||||||
|
# browser.close()
|
||||||
|
|
||||||
|
soup = bs(html, "html.parser")
|
||||||
|
|
||||||
|
containers = soup.findAll("div", {"class": ["c-pa-list c-pa-sl c-pa-gold cartouche ",
|
||||||
|
"c-pa-list c-pa-bd c-pa-gold cartouche ", "c-pa-list c-pa-sl c-pa-silver cartouche ",
|
||||||
|
"c-pa-list c-pa-bd c-pa-silver cartouche ", "c-pa-list c-pa-sl cartouche "]})
|
||||||
|
|
||||||
|
for c in containers:
|
||||||
|
info_c = c.findAll("div", {"class": "c-pa-info"})[0]
|
||||||
|
|
||||||
|
h_link = info_c.a["href"].strip()
|
||||||
|
h_quartier = h_link.split('/')[-2].replace('-', ' ')
|
||||||
|
if 'paris' in h_quartier:
|
||||||
|
h_quartier = h_link.split('/')[-1].replace('-', ' ')
|
||||||
|
if '?' in h_quartier:
|
||||||
|
h_quartier = ''
|
||||||
|
|
||||||
|
h_type = info_c.a.text
|
||||||
|
h_price = info_c.findAll(
|
||||||
|
"span", {"class": "c-pa-cprice"})[0].text.strip().encode('ascii', errors='ignore')
|
||||||
|
h_loc = info_c.findAll("div", {
|
||||||
|
"class": "c-pa-city"})[0].text.encode('ascii', errors='ignore').replace('me', '')
|
||||||
|
if len(info_c.findAll("a", {"class": " tagClick desktop listContactPhone"})) != 0:
|
||||||
|
h_phone = info_c.findAll("a", {"class": " tagClick desktop listContactPhone"})[
|
||||||
|
0]["data-tooltip-focus"]
|
||||||
|
|
||||||
|
h_nbp = ""
|
||||||
|
h_nbch = ""
|
||||||
|
h_extra = ""
|
||||||
|
h_surface = ""
|
||||||
|
|
||||||
|
print(h_type)
|
||||||
|
print(h_price)
|
||||||
|
print(h_loc)
|
||||||
|
print(h_quartier)
|
||||||
|
|
||||||
|
for info in info_c.div.text.strip().split('\n'):
|
||||||
|
if "m" in info:
|
||||||
|
h_surface = info.encode(
|
||||||
|
'ascii', errors='ignore').replace(' m', '')
|
||||||
|
print(h_surface)
|
||||||
|
elif "p" in info:
|
||||||
|
h_nbp = info.replace(' p', '')
|
||||||
|
print(h_nbp)
|
||||||
|
elif "ch" in info:
|
||||||
|
h_nbch = info.replace(' ch', '')
|
||||||
|
print(h_nbch)
|
||||||
|
else:
|
||||||
|
h_extra = info.encode('ascii', errors='ignore')
|
||||||
|
print(h_extra)
|
||||||
|
print(h_phone)
|
||||||
|
print("")
|
||||||
|
|
||||||
|
with open(filename, 'a') as f:
|
||||||
|
writer = csv.writer(f)
|
||||||
|
writer.writerow([h_type, h_price, h_loc, h_quartier,
|
||||||
|
h_surface, h_nbp, h_nbch, h_extra, h_phone, h_link])
|
||||||
|
|
||||||
|
f.close()
|
||||||
Loading…
Reference in New Issue
Block a user