import dateparser
import requests
from bs4 import BeautifulSoup

import twitter_utils

def get(url):
    res = requests.get(url)
    return BeautifulSoup(res.text, 'html.parser')


def find_navy_section(root, label):
    for elm in root.select('.text-navy'):
        if elm.contents[-1].strip().startswith(label):
            return elm.find_parent(lambda e: e.has_attr('class') and 'col-' in ' '.join(e['class'])).find('h2')


def parse_event(url):
    root = get(url)

    if root.find('span', string='Speaker Profile'):
        return None

    if 'Log in' in root.find('title').string:
        return None

    if '@ Sessionize.com' not in root.find('title').string:
        return None

    data = {
        'Conference Name': root.select('.ibox-title h4')[0].string,
        'CFP URL': url,
    }

    elm = find_navy_section(root, 'location')
    if elm:
         data['Location'] = elm.select('.block')[-1].string

    elm = find_navy_section(root, 'website')
    if elm:
        data['Conference URL'] = elm.find('a')['href']

    elm = find_navy_section(root, 'event date')
    if elm:
        data['Conference Start Date'] = data['Conference End Date'] = dateparser.parse(elm.string).date()

    elm = find_navy_section(root, 'event starts')
    if elm:
        data['Conference Start Date'] = dateparser.parse(elm.string).date()

    elm = find_navy_section(root, 'event ends')
    if elm:
        data['Conference End Date'] = dateparser.parse(elm.string).date()

    # Find the UTC version of the CFP end date.
    elm = root.select('.js-closedate')[0]
    if not elm:
        raise ValueError(f'js-closedate not found in {url}')
    utc_cfp_end_date = dateparser.parse(elm['data-date']).replace(tzinfo=None)
    data['CFP End Date'] = utc_cfp_end_date

    elm = find_navy_section(root, 'CfS closes at')
    if not elm:
        raise ValueError(f'CfS closes at not found in {url}')
    time = elm.parent.select('.text-navy')[0].string[13:]
    parsed = dateparser.parse(f'{elm.string} {time}')
    utc_offset = parsed - utc_cfp_end_date


    elm = find_navy_section(root, 'CfS opens at')
    if elm:
        time = elm.parent.select('.text-navy')[0].string[13:]
        date = elm.string
        parsed = dateparser.parse(f'{date} {time}')
        data['CFP Start Date'] = (parsed - utc_offset).date()

    return data


def find_events():
    seen_urls = set()
    for url in twitter_utils.search_for_url('sessionize.com'):
        # Skip the queryparams and downcase it.
        clean_url = url.split('?')[0].lower().rstrip('/')
        if clean_url in seen_urls:
            continue
        if '/api/' in clean_url:
            continue
        evt = parse_event(clean_url)
        if evt is not None:
            yield evt
        seen_urls.add(clean_url)


def scrape():
    yield from find_events()


if __name__ == '__main__':
    for d in find_events():
        print(d)