#!/usr/bin/python
# -*- coding: utf-8 -*-
import logging
import requests
from bs4 import BeautifulSoup
from scrapper_helpers.utils import caching, get_random_user_agent, key_sha1
from trojmiastopl import BASE_URL
log = logging.getLogger(__file__)
SEARCH_URL = "https://ogloszenia.trojmiasto.pl/szukaj/"
[docs]def decode_type(filter_value):
""" Decodes offer type name to it's value
List of available options and it's translation can be found bellow.
:param filter_value: One of available type names
:type filter_value: str
:return: Int value for POST variable
:rtype: int
"""
available = {
"Mieszkanie": 100, # flat
"Pokoj": 395, # room
"Biuro": 400, # office
"Dom": 200, # house
"Blizniak": 230, # semi-detached house
"Kamienica": 250, # tenement house
"Pietrowy": 260, # storey house
"Rekreacyjny": 220, # leisure house
"Szeregowy": 240, # terraced house
"Wolnostojacy": 210, # detached house
"Lokal usługowy": 450 # service area
}
return available.get(filter_value, 0)
[docs]def decode_category_name(category):
""" Decodes category name to it's value
:param category: Category name
:type category: str
:return: Category number
:rtype: int
"""
available = {
"nieruchomosci-sprzedam": 101,
"nieruchomosci-mam-do-wynajecia": 104
}
return available.get(category, 100)
[docs]def get_url_for_filters(payload):
""" Parses url from trojmiasto.pl search engine using POST method for given payload of data
:param payload: Tuple of tuples containing POST key and argument
:type payload: tuple
:return: Url generated by trojmiasto.pl search engine
:rtype: str
"""
response = requests.post(SEARCH_URL, payload, headers={'User-Agent': get_random_user_agent()})
html_parser = BeautifulSoup(response.content, "html.parser")
url = html_parser.find(class_="nice-select-tsi").find("option").next_sibling.next_sibling.attrs["value"]
return url
[docs]def get_url(category, region=None, **filters):
""" Creates url for given parameters
:param category: Search category
:param region: Search region
:param filters: Dictionary with additional filters. See :meth:'trojmiastopl.get_category' for reference
:type category: str
:type region: str
:type filters: dict
:return: Url for given parameters
:rtype: str
"""
url = "/".join([BASE_URL, category]) + "/"
category_id = decode_category_name(category)
if filters:
if region is not None:
payload = (("id_kat", category_id), ("s", region))
else:
payload = (("id_kat", category_id),)
for k, v in filters.items():
if isinstance(v, tuple):
if v[0] is None:
v[0] = 0
if v[1] is None:
payload += (k, v[0]),
continue
payload += (k, v[0]), (k, v[1])
continue
elif "offer_type" == k:
v = decode_type(v)
k = "rodzaj_nieruchomosci"
elif "data_wprow" == k:
available = ["1d", "3d", "1w", "2w"]
if v not in available:
continue
payload += (k, v),
try:
url = get_url_for_filters(payload)
except (AttributeError, requests.HTTPError):
raise requests.HTTPError
elif region is not None:
url += "s,{0}.html".format(region)
return url
@caching(key_func=key_sha1)
[docs]def get_content_for_url(url):
""" Connects with given url
If environmental variable DEBUG is True it will cache response for url in /var/temp directory
:param url: Website url
:type url: str
:return: Response for requested url
"""
response = requests.get(url, headers={'User-Agent': get_random_user_agent()})
response.raise_for_status()
return response