:- ensure_loaded(common).
cvs_id('$Id: webload.pl,v 1.27 2004/10/08 17:42:33 sandro Exp $').
% ? dereference(Term, [robot_name(N), max_threads(N), background_after(S)])
:- ensure_loaded(retrieve).
:- ensure_loaded(thread_util).
:- ensure_loaded(library(debug)).
%% webload_bg(+URI, +Timeout, -Status)
%%
%% Start a webload in another thread and return when it's done
%% or when Timeout seconds have passed (whichever is sooner).
%%
%% Status == fresh, timeout, done, or failed
webload_bg(URI, Timeout, fresh) :-
( fresh(URI, Ret)
-> pagein(Ret),
debug(webload, 'still fresh: ~a', [URI])
; debug(webload, 'not fresh: ~a; fetching', [URI]),
thread_clear_all_messages,
thread_self(Me),
thread_create_caught(webload(URI),
( debug(webload, 'webload thread done', []),
thread_send_message(Me, done)
),
Error,
( debug(webload, 'webload thread status: ~q', [Error]),
thread_send_message(Me, Error)
)
),
interruptable_sleep(Timeout, Status),
debug(webload, '~a tried to fetch ~a for ~q seconds: ~q', [Me, URI, Timeout, Status])
).
%% webload(URI)
%%
%% Make sure the content from the given URI is loaded
webload(URI) :-
rdfpage_uri(URI), !.
webload(URI) :-
( rdfpage(Ret, retr:initialResource, URI)
-> debug(webload, 'using prior retrieval ~a', [Ret])
; retrieve(URI, [], Ret, []),
debug(webload, 'using (maybe-new) retrieval ~a', [Ret])
),
webload2(Ret, URI).
%% webreload(URI)
%%
%% Like webload, but do it even if not expired.
webreload(URI) :-
retrieve(URI, [], Ret, [max_age(1)]),
debug(webload, 'using new retrieval ~a', [Ret]),
webload2(Ret, URI).
webload2(Ret, URI) :-
rdfpage_sub_uri(Page, _, Ret),
debug(webload, 'page ~a', [Page]),
pagein(Ret),
get_time(Now),
flag(latest_rdf_db_mod_time, _, Now),
flag(latest_rdf_db_mod_source, _, URI),
!.
chaa :-
webload('http://www.w3.org/People/Charles/foaf.rdf').
worked :-
['x/worked_source.pl'],
forall(worked_source(U), webload(U)).
most_recent_retrieval_success(URI, Ret, Time) :-
address_latest_successful_ret(URI, Ret),
% pagein(Ret),
rdfpage(Ret, retr:exchangingHeaders, literal(unixtime(Time))).
most_recent_retrieval_attempt(URI, Ret, Time) :-
address_latest_ret(URI, Ret),
% pagein(Ret),
rdfpage(Ret, retr:exchangingHeaders, literal(unixtime(Time))).
/*
most_recent_retrieval_attempt(URI, Ret, Time) :-
findall(time(NT, Ret),
( rdfpage(Ret, retr:initialResource, URI),
rdfpage(Ret, retr:startTime, literal(unixtime(T))),
NT is -T
),
UnsortedRets),
sort(UnsortedRets, Rets),
Rets = [RetPair | _],
RetPair = time(NTime, Ret),
Time is -NTime.
most_recent_retrieval_success(URI, Ret, Time) :-
findall(time(NT, Ret),
( rdfpage(Ret, retr:initialResource, URI),
rdfpage(Ret, rdf:type, retr:'SuccessfulRetrieval'),
rdfpage(Ret, retr:startTime, literal(unixtime(T))),
% use end time?
NT is -T
),
UnsortedRets),
sort(UnsortedRets, Rets),
Rets = [RetPair | _],
RetPair = time(NTime, Ret),
Time is -NTime.
most_recent_retrieval_success_final(URI, Ret, Time) :-
findall(time(NT, Ret),
( rdfpage(Ret, retr:finalResource, URI),
rdfpage(Ret, rdf:type, retr:'SuccessfulRetrieval'),
rdfpage(Ret, retr:startTime, literal(unixtime(T))),
% use end time?
NT is -T
),
UnsortedRets),
sort(UnsortedRets, Rets),
Rets = [RetPair | _],
RetPair = time(NTime, Ret),
Time is -NTime.
% use stale/3 instead!!!
expiration(Ret, Time) :-
% assume 30 minutes after fetch, for now.
rdfpage(Ret, retr:startTime, literal(unixtime(T))),
Time is T + 30 * 60.
*/
fresh(URI, Ret) :-
uri_scheme(URI, Scheme),
( yes_deref(Scheme)
-> uri_stem(URI, Stem),
address_latest_ret(Stem, Ret),
stale(Ret, Stale, []),
Stale > 0
; true % call it fresh if we can't ever dereference it
).
%% retrieval_status(+Ret, -Status)
%%
%% Obtain a status indicator for the current retrieval, as of right
%% now. This can change due to actions of other threads and other
%% processes.
%%
%% Status:
%% startup no "curl_output" file
%% connecting no "data" file
%% retrieving no "after_download.in" file
%% parsing / not rdf no "save.rdfdb"
%%
%% just use the pdb?
%%
%retrieval_status(Ret, Status) :-
% ( rdfpage(Ret, rdf:type, retr:'SuccessfulRetrieval')
% -> (
Webmaster