$ cd ~/kazmuzikblog
$ mkdir /usr/local/nutch-0.9/kazmuzik-url-dir
$ java -classpath classes LiveJournalMonthlyManager \
| cut -f 2 \
| sed -e 's/^/http\:\/\/kazuomik.livejournal.com\//' -e 's/$/.html/' \
> /usr/local/nutch-0.9/kazmuzik-url-dir/20071116.txt
$ cd /usr/local/nutch-0.9
$ bin/nutch inject kazmuzik-crawldb kazmuzik-url-dir
...
$ mkdir kazmuzik-segments
$ bin/nutch generate kazmuzik-crawldb kazmuzik-segments
...
Generator: segment: kazmuzik-segments/20071117161617
...
$ bin/nutch fetch kazmuzik-segments/20071117161617
...
fetching http://kazuomik.livejournal.com/141077.html
fetching http://kazuomik.livejournal.com/88261.html
fetching http://kazuomik.livejournal.com/151961.html
...
fetching http://kazuomik.livejournal.com/36988.html
fetching http://kazuomik.livejournal.com/79224.html
fetching http://kazuomik.livejournal.com/81466.html
Fetcher: done
$ bin/nutch readseg -list -dir kazmuzik-segments
NAME GENERATED FETCHER START FETCHER END FETCHED PARSED
20071117161617 609 2007-11-17T16:16:46 2007-11-17T17:11:29 609 594
$ touch kazmuzik-segments/20071117161617/fetcher.done
$ bin/nutch updatedb kazmuzik-crawldb -dir kazmuzik-segments -noAdditions
...
$ bin/nutch readdb kazmuzik-crawldb -stats
CrawlDb statistics start: kazmuzik-crawldb
Statistics for CrawlDb: kazmuzik-crawldb
TOTAL urls: 609
retry 0: 609
min score: 1.001
avg score: 1.015
max score: 1.114
status 1 (db_unfetched): 15
status 2 (db_fetched): 594
CrawlDb statistics: done
$ bin/nutch generate kazmuzik-crawldb kazmuzik-segments
...
Generator: segment: kazmuzik-segments/20071117184651
...
$ bin/nutch fetch kazmuzik-segments/20071117184651
...
fetching http://kazuomik.livejournal.com/152532.html
fetching http://kazuomik.livejournal.com/148363.html
fetching http://kazuomik.livejournal.com/34221.html
Fetcher: done
$ bin/nutch readseg -list -dir kazmuzik-segments
NAME GENERATED FETCHER START FETCHER END FETCHED PARSED
20071117161617 609 2007-11-17T16:16:46 2007-11-17T17:11:29 609 594
20071117184651 15 2007-11-17T18:47:24 2007-11-17T18:48:40 15 15
$ bin/nutch updatedb kazmuzik-crawldb -dir kazmuzik-segments -noAdditions
...
$ bin/nutch readdb kazmuzik-crawldb -stats
CrawlDb statistics start: kazmuzik-crawldb
Statistics for CrawlDb: kazmuzik-crawldb
TOTAL urls: 609
retry 0: 609
min score: 1.002
avg score: 1.031
max score: 1.228
status 2 (db_fetched): 609
CrawlDb statistics: done
$ bin/nutch mergesegs kazmuzik-segments2 -dir kazmuzik-segments
...
$ bin/nutch readseg -list -dir kazmuzik-segments2
NAME GENERATED FETCHER START FETCHER END FETCHED PARSED
20071117185404 609 2007-11-17T16:16:46 2007-11-17T18:48:40 609 609
$ cd ~/kazmuzikblog
$ java -classpath classes:/usr/java/jdk/db/lib/derby.jar:/usr/local/nutch-0.9/nutch-0.9.jar:\
/usr/local/nutch-0.9/lib/hadoop-0.12.2-core.jar:/usr/local/nutch-0.9/lib/commons-logging-1.0.4.jar:\
/usr/local/nutch-0.9/lib/log4j-1.2.13.jar \
LiveJournalEntryDatabaseInitializer /usr/local/nutch-0.9/kazmuzik-segments2/20071117185404
...
$ |