<?xml version="1.0"?>
<config output-dir="/Users/jelsas/temp">

<index collection-name="1" 
        input-file="/path/to/wegt/output/dir/wget.log" 
        input-dir="/path/to/wegt/output/dir"
        save-to-file="true">
	
	<parse-filters>
	   <filter class-name="idl.tmt.documentparsing.filters.WordFilter"/>
		<filter class-name="idl.tmt.documentparsing.filters.LowerCaseFilter"/>
		<filter class-name="idl.tmt.documentparsing.filters.StopWordFilter">
			<param name="stopWordFile" value="stopwords.txt"/>
		</filter>
		<filter class-name="idl.tmt.documentparsing.filters.LengthFilter">
			<param name="minLength" value="3" />
		</filter>
	</parse-filters>
	
	<representation>
		<builder 
		  class-name="idl.tmt.representation.TitleTextRepresentationBuilder" 
		  weight="1.0" share-termlist="true"/>
		<builder 
		  class-name="idl.tmt.representation.MetaTextRepresentationBuilder" 
		  weight="1.0" share-termlist="true"/>
		<builder
		  class-name="idl.tmt.representation.LinkTextRepresentationBuilder" 
		  weight="1.0" share-termlist="true" binarize="true"/>
	</representation>
	
	<transformations>
		<transform class-name="idl.tmt.representation.transformations.TermOccurrenceFilter">
			<param name="minOccurrences" value="6"/>
		</transform>
	</transformations>
</index>

<cluster name="1" use-collection="1" save-to-file="true">
	
	<clusterer class-name="idl.tmt.clusterers.EnhancedEM">
	    <param name="initializerName" 
	   			value="idl.tmt.clusterers.RandomInstancesEMInitializer"/>
	    <param name="debug" value="false"/>
	    <param name="seed" value="100"/>
	    <param name="minStdDev" value="0.02"/>
	    <param name="maxClusterersToBuild" value="10"/>
    	<param name="numClusters" value="3"/>
	</clusterer>
	
	<training-sets>
	   <set class-name="idl.tmt.training.RandomSelector">
	      <param name="instanceCount" value="3000"/>
		  <param name="seed" value="100"/>
	   </set>
	</training-sets>
</cluster>

<analysis type="HTMLAnalysis" name="out" use-collection="1" use-clusterer="1"/>
<analysis type="Table" name="table" use-collection="1" use-clusterer="1"/>

</config>