--
-- laden der Daten
--
userid_track_data = LOAD 'lastfm-dataset-1K/userid_track_data.tsv' AS (user_id, country, artist_name, track_name);

--
-- Hier kann die Anzahl der Datensaetze eingeschrenkt werden, wenn noetig
--
userid_track_data_sample = SAMPLE userid_track_data 1;

--
-- gruppieren nach country, artist_name, track_name
--
grouped_userid_track_data = GROUP userid_track_data_sample BY (country, artist_name, track_name);

--
-- zaehlen der User pro Land, Kuenster und Track
--
counted_grouped = FOREACH grouped_userid_track_data GENERATE 'Top-10-per-Land', FLATTEN(group), COUNT(userid_track_data_sample.user_id) AS counter;

grouped_by_country = GROUP counted_grouped BY country;

--
-- Berechnet fuer jedes Land die Top 10 Tracks
--
top_10_by_country = FOREACH grouped_by_country {
        ordered_by_tracks = ORDER counted_grouped BY counter DESC;
        top_10_tracks = LIMIT ordered_by_tracks 10;
        GENERATE FLATTEN(top_10_tracks);
}
--
-- Ordnet die Laender nach Groesse
--

ordered_by_country = ORDER top_10_by_country BY $0, $1, $2, $3 DESC;
--
-- Speichern der Ergebnisdaten
--

STORE ordered_by_country INTO 'lastfm-dataset-1K/ordered_by_country.tsv';
