# -*- Makefile -*-
# (c) 2004 Torsten Will, Oliver Lau, c't Heise Verlag, Germany

#####################################################
#
# Configure section
#

# Configure here where your "c't-ROM" files are. 
# This is probably a mounted CD-ROM drive.
CTROMDIR := /var/tmp/towi/ctrom
#CTROMDIR := /cdrom

# You need binaries (win/linux) files from the svn-repository, 
# which hold a copy of downloadables from:
#   http://www.mfn.unipmn.it/~manzini/fmindex/executables.html
#   http://www.mfn.unipmn.it/~manzini/fmindex/
BWINDEX := ../../fmindex-linux/bwi
BWSEARCH:= ../../fmindex-linux/bwsearch

######################################################
#
# Rules section
#
all: demos
demos: demo1 demo2 demo3

# create a concatenated file with converted characters
# like uppercase-only, converted special-chars, etc.
ctrom.ascii: ctrom.files
	./make_ascii.sh ctrom.files > ctrom.ascii

# needed for make_ascii.sh
ctrom.files:
	find $(CTROMDIR) -name "*.HTM" -type f > ctrom.files

# remember the positions of original the files in the
# concatenated file. needed for matches2files.py
ctrom.starts: ctrom.ascii
	grep -b -e "> " ctrom.ascii > ctrom.starts

# BUILD THE FM-INDEX!
ctrom.ascii.bwi: ctrom.ascii
	$(BWINDEX) -v ctrom.ascii

#
# demonstrate occurence searching
#
demo1: ctrom.ascii.bwi
	@echo '========='
	@echo '========= Look for occurences of some strings in the index ========'
	@echo '========= This is _really_ fast, meaning O(P)!             ========'
	@echo '========='
	$(BWSEARCH) OLIVER_LAU ctrom.ascii.bwi
	@echo '========='
	$(BWSEARCH) NETZWERK ctrom.ascii.bwi
	@echo '========= Umlauts are "*" in the index.'
	$(BWSEARCH) 'B*CHERWURM' ctrom.ascii.bwi
	@echo '========='
	$(BWSEARCH) THISISNOTAWORD ctrom.ascii.bwi
	@echo '========='

#
# demonstrate location searching
#
demo2: ctrom.ascii.bwi
	@echo '========='
	@echo '========= Look for positions if patterns in the index  ========'
	@echo '========= This is also O(P), but with a big "factor"!  ========'
	@echo '========='
	$(BWSEARCH) -r KOMPLIKATION ctrom.ascii.bwi
	@echo '========='
	@echo '========= Umlauts are "*" in the index.'
	$(BWSEARCH) -r 'B*CHERWURM' ctrom.ascii.bwi
	@echo '========= printing sorrunding chars is even slower, of course,'
	@echo '=========   since more buckets must be decompressed.'
	$(BWSEARCH) -s30 OLIVER_LAU ctrom.ascii.bwi
	@echo '========='

#
# go a step further and find the files with a match
#
demo3: ctrom.ascii.bwi ctrom.starts
	@echo '========='
	@echo '========= get the file name from the positions, too ========'
	@echo '========='
	$(BWSEARCH) -r KOMPLIKATION ctrom.ascii.bwi &> matches.tmp
	python ./matches2files.py ctrom.starts matches.tmp
	@echo '========='
	$(BWSEARCH) -r OLIVER_LAU ctrom.ascii.bwi &> matches.tmp
	python ./matches2files.py ctrom.starts matches.tmp


###############################
#
# mandatory:
#

clean: CTclean
	rm -f ctrom.ascii ctrom.files ctrom.ascii.bwi ctrom.starts matches.tmp

CTclean:
	rm -f CT[0-9]*

################################
#
# notes only:
#   this is searching in ctrom.ascii with "vmatch" (www.vmatch.de)
ctvmatch: ctrom.prj ctrom.patterns
	$(VHOME)/vmatch -s -showdesc 40 -e 1 -complete -online -v -q ctrom.patterns ctrom

# make a new array of test text data
CT: ctrom.ascii
	for i in 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 ;\
		do dd if=ctrom.ascii of=CT$$i bs=1024 count=$$i ;\
	done

