pdbench/movie_data/movie_query.triql

58 lines
1.7 KiB
Plaintext

-- Create new uncertain Trio tables (without lineage)
-- from the initial relational tables
-- The new tables will be treated as base tables in Trio,
-- with tuple alternatives and confidences attached to them
create table T_Movies as
select nolineage groupalts(movie_id) *, uniform as conf from Movies;
create table T_Ratings as
select nolineage groupalts(movie_id, cust_id, date)
movie_id, cust_id, date, rating, confidence as conf from Ratings;
-- Select very uncertain movies: wide range of possible years
-- Assign confidence values uniformly to alternatives
create table U_Movies as
select *, uniform as conf
from T_Movies
where [max(year) - min(year)] > 30;
-- Join in recent high-confidence ratings for these movies
create table M_Ratings as
select merged title, year, date, rating
from T_Ratings R, U_Movies M
where R.movie_id = M.movie_id
and R.date like '2005%'
and conf(R) >= 0.4;
-- Get directors back using lineage
create table Dir_Ratings as
select R.title, R.year, R.rating, M.director
from M_Ratings R, U_Movies M
where R ==> M;
-- Find directors of what are probably series: more than
-- one alternative with same director, different year
create table Series as
select merged movie_id, director, uniform as conf
from T_Movies M1
where exists
[select * from T_Movies M2
where M2.director = M1.director
and M2.year <> M1.year];
-- Find controversial series: high variance in ratings
select merged S.movie_id, S.director,
R1.rating as rating1,
R2.rating as rating2
from Series S, T_Ratings R1, T_Ratings R2
where S.movie_id = R1.movie_id
and S.movie_id = R2.movie_id
and R1.rating - R2.rating >=3;