#!/usr/bin/perl -T use strict; use warnings; use Encode; use CGI; use HTML::Template; BEGIN {unshift @INC, "/home/jaja/lib/perl5/site_perl/5.8.8/mach/", "/home/jaja/lib/perl/"}; use MeCab; use JSON; my $q = new CGI; if (not $q->param) { print $q->header(-charset => 'UTF-8'); my $url = $q->self_url(); print << "END_HTML" MECAPI - MeCab Web Service (MeCab API)

MECAPI - MeCab Web Service (MeCab API)

Use "MeCab", the Japanese morphological analyzer, via Web service !!!
[SENTENCE] ==> MeCab Web Service ==> [Results of Morphological Analysis in XML]


response: (surface,feature,pos,inflection,baseform,pronounciation)
filter: format:

Request URL

$url

Request parameters

ParameterValueDescription
sentencestring (required)The sentence to be analyzed (Japanse, UTF-8)
response surface, feature, pos, inflection, baseform, pronounciation Controls the data returned by the operation.
surface : Surface string of words.
feature : Various information. Contains pos, inflection, baseform and pronounciation
pos : Part-Of-Speech of the word.
inflection : Type and form of inflection.
baseform : Baseform of the word.
pronounciation : Pronounciation of the word.
Default: "surface,feature"
filter noun, uniq Filters the words in the result of MeCab by the operation.
noun : ignores the words whose part-of-speech is not noun.
uniq : removes duplicate words and count them.
Default: ""
format xml, json Specifies the output format. Default: "xml"
callback string The name of the callback function to wrap around the JSON data. If format=json has not been requested, this parameter is ignored.

Sample Request Url:

Response fields

FieldDescription
MecabResultContains all of the results.
wordContains each individual word.
surfaceSurface string of a word.
featureContains pos, inflection, baseform and pronounciation.
posPart-Of-Speech of the word.
inflectionType and form of inflection.
baseformBaseform of the word.
pronounciationPronounciation of the word.
countThe frequency of words in the result. (for filter=uniq)

Sample response

Source Code

You can get SOURCE CODE FROM HERE.


original version by Tatsuo Yamashita, since 2006.9.18.
modified by YOU. END_HTML ; exit; } my $mode = $q->param('mode') || ""; if ($mode eq "code") { open my $fh, $0 or die; my $code = join("", <$fh>); close $fh; print "Content-Type: text/plain; charset=UTF-8\n\n$code\n"; exit; } my $sentence = $q->param('sentence') || ""; my $res = $q->param('response') || "surface,feature"; # surface, feature my $filter = $q->param('filter') || ""; # noun, uniq my $format = $q->param('format') || "xml"; # xml, json my $callback = $q->param('callback') || ""; Encode::from_to($sentence, 'utf-8', 'euc-jp'); my $words_ref = do_macab_euc($sentence); if ($format eq "json") { my $json = objToJson($words_ref); if ($callback ne "") { $json = qq{$callback($json);}; } Encode::from_to($json, 'euc-jp', 'utf-8'); print "Content-Type: application/x-javascript; charset=UTF-8\n\n$json\n"; exit; } my $template = join("", ); my $t = HTML::Template->new(scalarref => \$template, die_on_bad_params => 0); $t->param(results => $words_ref); my $out = $t->output(); Encode::from_to($out, 'euc-jp', 'utf-8'); print "Content-Type: text/xml; charset=UTF-8\n\n$out"; sub do_macab_euc { my ($str) = @_; my $m = new MeCab::Tagger(""); my $n = $m->parseToNode($str); my @words; my %count; while ($n = $n->{next}) { next if $n->{surface} eq ""; my $f = $n->{feature}; next if ($filter =~ /\b noun \b/x and $f !~ /^\xcc\xbe\xbb\xec/); # Meishi (noun) $count{$n->{surface}}++; next if ($filter =~ /\b uniq \b/x and $count{$n->{surface}} >= 2); my %hash; $hash{surface} = $n->{surface} if ($res =~ /\b surface \b/x); $hash{feature} = $n->{feature} if ($res =~ /\b feature \b/x); my @ft = split(",", $f); $hash{pos} = join(",", grep {!/^\*/} @ft[0,1,2,3]) if ($res =~ /\b pos \b/x); $hash{inflection} = join(",", grep {!/^\*/} @ft[4,5]) if ($res =~ /\b inflection \b/x); $hash{baseform} = $ft[6] if ($res =~ /\b baseform \b/x); $hash{pronounciation} = $ft[7] if ($res =~ /\b pronounciation \b/x); push @words, \%hash; } if ($filter =~ /\b uniq \b/x) { foreach my $w (@words) { $w->{count} = $count{$w->{surface}}; } } return \@words; } __DATA__