﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using Helpers;
using System.Threading;
using System.Collections.Concurrent;
using System.Threading.Tasks;
using System.Collections;

namespace MapReduce {
    class MapReduceSample {

        static string inputDirectory = @"..\..\..\textfiles\";

        static char[] delimiters = Enumerable.Range(0, 256).Select(i => (char)i)
            .Where(c => Char.IsWhiteSpace(c) || Char.IsPunctuation(c))
            .ToArray();

        ////////////////////////////////////////////////////////////////////////////////
        //
        // main
        //
        static void Main(string[] args) {

            var files = Directory.EnumerateFiles(inputDirectory, "*.txt");
            IEnumerable<KeyValuePair<string, int>> counts = null;
            
            TestRunner.Runtest(() => MapReduceParallelSimple(files, out counts));
            
            WriteCountsToConsole(counts);

            Console.WriteLine();
            Console.WriteLine("done. Press any key.");
            Console.ReadKey();
        }



        ////////////////////////////////////////////////////////////////////////////////
        //
        // Woerter zaehlen nach MapReduce-Muster
        //
        static void MapReduceParallelSimple(IEnumerable<string>
            inputFiles, out IEnumerable<KeyValuePair<string, int>> counts) {

            string[] fileNames = inputFiles.ToArray();
            
            // Split- und Map-Phase (Dateinamen => pro Datei eine Liste 
            // (Dictionary) mit deren Worthaeufigkeit)
            ConcurrentBag<Dictionary<string, int>> 
                intermediateResults = new ConcurrentBag<Dictionary<string, int>>();
            
            Parallel.ForEach(
                fileNames,
                () => new Dictionary<string, int>(),    // (thread-) lokale Liste für Ergebnisse
                (fileName, loopState , localResult) => {
                    // hole Liste von Woertern aus Datei
                    IEnumerable words = 
                        File.ReadLines(fileName).SelectMany(line => line.Split(delimiters));
                    
                    // zaehle wörter lokal ("map" und "combine" zusammen)
                    foreach (string word in words ) {
                        string w = word.ToLower();
                        if (localResult.ContainsKey(w)) {
                            localResult[w] += 1;
                        } else {
                            localResult.Add(w, 1);
                        }
                    }
                    return localResult;
                },
                // ...und die lokal berechnete Ergebnisse merken
                localResult => intermediateResults.Add(localResult)
            );


            // Partition: gruppiere Listen mit gleichen Keys
            Dictionary<string, List<int>> 
                allResults = new Dictionary<string,List<int>>();

            foreach (Dictionary<string, int> 
                localResults in intermediateResults) {
                foreach (KeyValuePair<string, int> p in localResults) {
                    if (allResults.ContainsKey(p.Key)) {
                        allResults[p.Key].Add(p.Value);
                    } else {
                        List<int> l = new List<int>();
                        allResults.Add(p.Key, l);
                        l.Add(p.Value);
                    }
                }
            }

            // Reduce: Teilergebisse zusammenfassen
            ConcurrentBag<KeyValuePair<string, int>> 
                combinedResults = new ConcurrentBag<KeyValuePair<string, int>>();
            Parallel.ForEach(allResults, result => {
                // für jeden Key (jedes Wort)...
                int temp = 0;
                // ...summiere die Teilergebnisse
                foreach(int v in result.Value) {
                    temp += v;
                }
                combinedResults.Add(new KeyValuePair<string, int>(result.Key, temp));
            });
            counts = combinedResults; 
        }


        ////////////////////////////////////////////////////////////////////////////////
        //
        // Hilfsmethode zum Ausgeben der Ergebnisse (nur die Wörter, die mehr als 
        // 1000-mal vorkommen.
        //
        private static void WriteCountsToConsole(IEnumerable<KeyValuePair<string, int>> counts) {
            if (counts != null) {
                var orderedCount =
                    from w in counts
                    where (w.Value > 100 && w.Key != "")
                    orderby w.Value ascending
                    select w;
                foreach (var word in orderedCount) {
                    Console.WriteLine(word.Key + ": " + word.Value);
                }
            }
        }
    }
}
