iPhoneで使える、日本語 WordNetのObjective-Cクラスを書いてみた。
「自然言語処理は Python がいちばん」とか見てて、iPhoneで使いたかったので、Objective-Cでも書いてみようと思い、書いてみました。
日本語 WordNetデータベースにアクセスするためのObjective-Cクラス
http://nlpwww.nict.go.jp/wn-ja/
2009.4.4修正
2011.6.27
伊藤真央さんからご指摘、修正。
databaseをクローズしていない部分を修正
実用になる速度となりました。
辞書を抜いて置いてあります。
「wnjpn.db」を入れて使用してください。
https://github.com/oomori/WordNetJP_ObjC
間違いあればご指摘ください。
WordNetJPN.h
// // WordNetJPN.h // // Created by 大森 智史 on 09/03/31. // Copyright 2009 Satoshi Oomori. All rights reserved. // #import <UIKit/UIKit.h> //#import <Cocoa/Cocoa.h> #import "sqlite3.h" //********************* Synset ************************* //関連性 //Synset 1:多 Sense //Synset 1:多 Synlink //Synset 1:多 SynsetDef @interface Synset : NSObject { NSString *pos; //品詞(名詞=n、動詞=v、形容詞=a、=r) NSString *synset; //関連ID("06589574-n") NSString *src; //("eng30") NSString *name; //名称("publication") } @property (nonatomic, retain) NSString *synset; @property (nonatomic, retain) NSString *pos; @property (nonatomic, retain) NSString *name; @property (nonatomic, retain) NSString *src; @end //********************* Sense ************************* //概念クラス //Sense 多:1 Word //Sense 多:1 Synset @interface Sense : NSObject { int rank; // int lexid; // NSString *synset; // int freq; // NSString *src; // NSString *lang; //言語(日本語=jpn、英語=eng) int wordid; //語 ID(1から始まる整数) } //nonatomic マルチスレッド環境を考慮しない代わりに高速。 //retain retainする。 //asign 参照を持つだけ @property (nonatomic, assign) int rank; //ランク @property (nonatomic, assign) int lexid; // @property (nonatomic, assign) NSString *synset; @property (nonatomic, assign) int freq; // @property (nonatomic, assign) NSString *src; @property (nonatomic, assign) NSString *lang; @property (nonatomic, assign) int wordid; @end //語クラス //********************* Word ************************* @interface Word : NSObject { int wordid; //語 ID(1から始まる整数) NSString *lang; //言語(日本語=jpn、英語=eng) NSString *lemma;//語("理性的"、"アルデヒド") NSString *pron; // NSString *pos; //品詞(名詞=n、動詞=v、形容詞=a、=r) } @property (nonatomic, assign) int wordid; @property (nonatomic, assign) NSString *lang; @property (nonatomic, assign) NSString *lemma; @property (nonatomic, assign) NSString *pron; @property (nonatomic, assign) NSString *pos; @end //********************* Synlink ************************* @interface Synlink : NSObject { NSString *synset1; NSString *synset2; NSString *link; NSString *src; } @property (nonatomic, assign) NSString *synset1; @property (nonatomic, assign) NSString *synset2; @property (nonatomic, assign) NSString *link; @property (nonatomic, assign) NSString *src; @end @interface WordNetJPN : NSObject { NSString *path; sqlite3 *database; } -(NSArray *)getSynset:(NSString *)synset; -(NSArray *)getWord:(int)wordid; -(NSArray *)getSense:(NSArray *)words; -(NSArray *)getSynLinks:(Sense *)sense link:(NSString *)link; -(NSArray *)getSenses:(NSArray *)words; -(NSArray *)getWords:(NSString *)lemma; //Synsetと言語で、Wordの配列を返します。 -(NSArray *)wordsOfSynset:(NSString *)synset language:(NSString *)lang; //語を与えてsynset(同じ意味合いの語のグループ)を得ます。 -(NSArray *)synsetWithLemma:(NSString *)lemma; @property (nonatomic, retain) NSString *path; @end
WordNetJPN.m
// // WordNetJPN.m // // Created by 大森 智史 on 09/03/31. // Copyright 2009-2011 Satoshi Oomori. All rights reserved. // // // Bug Fix 伊藤真央さん 2011/6/28(at //Mao Ito added this code) #import "WordNetJPN.h" //Synset @implementation Synset @synthesize synset; @synthesize pos; @synthesize name; @synthesize src; @end //Sense @implementation Sense @synthesize rank; @synthesize lexid; @synthesize synset; @synthesize freq; @synthesize src; @synthesize lang; @synthesize wordid; @end //Word @implementation Word @synthesize wordid; @synthesize lang; @synthesize lemma; @synthesize pron; @synthesize pos; @synthesize gloss; @end //Synlink @implementation Synlink @synthesize synset1; @synthesize synset2; @synthesize link; @synthesize src; @end @implementation WordNetJPN @synthesize path; //辞書のパス - (id) initWithPath:(NSString *)aPath { self = [super init]; self.path = aPath; return self; } //語を与えてwordidを得る -(NSArray *)getWords:(NSString *)lemma{ NSMutableArray *retArry = [NSMutableArray arrayWithCapacity:1]; if (sqlite3_open([path UTF8String], &database) == SQLITE_OK) { const char *sql = [[NSString stringWithFormat:@"select word.wordid,word.lang,word.lemma,word.pos from word where word.lemma='%@'",lemma] UTF8String]; //NSString * sqlString = [NSString stringWithFormat:@"select lemma from word join sense on word.wordid =sense.wordid where synset = '%@' and sense.lang = 'jpn'",synset]; sqlite3_stmt *statement; if (sqlite3_prepare_v2(database, sql, -1, &statement, NULL) == SQLITE_OK) { while (sqlite3_step(statement) == SQLITE_ROW) { Word *word = [[Word alloc] init]; word.wordid = sqlite3_column_int(statement, 0); word.lang = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 1)]; word.lemma =[NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 2)]; //word.pron = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 3)]; word.pos = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 3)]; [retArry addObject:word]; [word release]; } }else{ //データベースが開けなかったとき sqlite3_close(database); NSLog(@"error message '%s'.", sqlite3_errmsg(database)); } sqlite3_finalize(statement); } else { //データベースが開けなかったとき sqlite3_close(database); NSAssert1(0, @"error message '%s'.", sqlite3_errmsg(database)); } sqlite3_close(database); //Mao Ito added this code return [NSArray arrayWithArray:retArry]; } -(NSArray *)getWord:(int)wordid{ NSMutableArray *retArry = [NSMutableArray arrayWithCapacity:1]; if (sqlite3_open([path UTF8String], &database) == SQLITE_OK) { NSString *aqlString = [NSString stringWithFormat:@"select * from word where wordid=%d",wordid]; const char *sql = [aqlString UTF8String]; sqlite3_stmt *statement; if (sqlite3_prepare_v2(database, sql, -1, &statement, NULL) == SQLITE_OK) { while (sqlite3_step(statement) == SQLITE_ROW) { Word *word = [[Word alloc] init]; [word setWordid : wordid]; word.lang = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 1)]; word.lemma = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 2)]; //word.pron = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 3)]; word.pos = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 4)]; [retArry addObject:word]; [word release]; } }else{ //データベースが開けなかったとき sqlite3_close(database); NSLog(@"message '%s'.", sqlite3_errmsg(database)); } //} sqlite3_finalize(statement); } else { //データベースが開けなかったとき sqlite3_close(database); NSAssert1(0, @"Failed to open database with message '%s'.", sqlite3_errmsg(database)); } sqlite3_close(database); return [NSArray arrayWithArray:retArry]; } //wordidを与えてsenseを得る -(NSArray *)getSenses:(NSArray *)words{ NSMutableArray *retArry = [NSMutableArray arrayWithCapacity:1]; if (sqlite3_open([path UTF8String], &database) == SQLITE_OK) { NSUInteger i, count = [words count]; for (i = 0; i < count; i++) { //NSNumber * obj = [words objectAtIndex:i]; Word *word = [words objectAtIndex:i]; NSString *sqlString = [NSString stringWithFormat:@"select sense.synset,sense.lang,sense.wordid from sense where wordid=%d and sense.lang='jpn'",word.wordid]; const char *sql = [sqlString UTF8String]; sqlite3_stmt *statement; if (sqlite3_prepare_v2(database, sql, -1, &statement, NULL) == SQLITE_OK) { while (sqlite3_step(statement) == SQLITE_ROW) { //sense = sqlite3_column_value(statement, 0); Sense *sense = [[Sense alloc] init]; sense.synset = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 0)]; //sense.lexid = sqlite3_column_int(statement, 1);// sense.lang = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 1)]; //sense.rank = sqlite3_column_int(statement, 3);//freqかも4 sense.wordid = sqlite3_column_int(statement, 2); //NSLog(@"getSense--%d , wordid = %d",sense.wordid,word.wordid); [retArry addObject:sense ]; [sense release]; } }else{ //データベースが開けなかったとき sqlite3_close(database); NSLog(@"message '%s'.", sqlite3_errmsg(database)); } sqlite3_finalize(statement); } } else { //データベースが開けなかったとき sqlite3_close(database); NSAssert1(0, @"Failed to open database with message '%s'.", sqlite3_errmsg(database)); } sqlite3_close(database); //Mao Ito added this code //sqlite3_close(database); return [NSArray arrayWithArray:retArry]; } //wordidを与えてsenseを得る -(NSArray *)getSense:(NSArray *)words{ NSMutableArray *retArry = [NSMutableArray arrayWithCapacity:1]; if (sqlite3_open([path UTF8String], &database) == SQLITE_OK) { NSUInteger i, count = [words count]; for (i = 0; i < count; i++) { Word *word = [words objectAtIndex:i]; NSString *aqlString = [NSString stringWithFormat:@"select * from sense where wordid=%d",word.wordid]; const char *sql = [aqlString UTF8String]; sqlite3_stmt *statement; if (sqlite3_prepare_v2(database, sql, -1, &statement, NULL) == SQLITE_OK) { while (sqlite3_step(statement) == SQLITE_ROW) { //sense = sqlite3_column_value(statement, 0); Sense *sense = [[Sense alloc] init]; sense.synset = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 0)]; //sense.wordid = [NSNumber numberWithInt: sqlite3_column_int(statement, 1) ]; NSString *str = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 1)]; [retArry addObject:sense.synset]; NSLog(@"getSense--%@",str); } }else{ //データベースが開けなかったとき sqlite3_close(database); NSLog(@"message '%s'.", sqlite3_errmsg(database)); } sqlite3_finalize(statement); } } else { //データベースが開けなかったとき sqlite3_close(database); NSAssert1(0, @"Failed to open database with message '%s'.", sqlite3_errmsg(database)); } sqlite3_close(database); //Mao Ito added this code return [NSArray arrayWithArray:retArry]; } -(NSArray *)getSynset:(NSString *)synset{ NSMutableArray *retArry = [NSMutableArray arrayWithCapacity:1]; if (sqlite3_open([path UTF8String], &database) == SQLITE_OK) { NSString * sqlString = [NSString stringWithFormat:@"select * from synset where synset='%@'",synset]; const char *sql = [sqlString UTF8String]; sqlite3_stmt *statement; if (sqlite3_prepare_v2(database, sql, -1, &statement, NULL) == SQLITE_OK) { while (sqlite3_step(statement) == SQLITE_ROW) { Word *word = [[Word alloc] init]; word.wordid = sqlite3_column_int(statement, 0); word.lang = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 1)]; word.lemma = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 2)]; //word.pron = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 3)]; //word.pos = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 4)]; [retArry addObject:word]; [word release]; } }else{ //データベースが開けなかったとき sqlite3_close(database); NSLog(@"message '%s'.", sqlite3_errmsg(database)); } //} sqlite3_finalize(statement); } else { //データベースが開けなかったとき sqlite3_close(database); NSAssert1(0, @"Failed to open database with message '%s'.", sqlite3_errmsg(database)); } sqlite3_close(database); //Mao Ito added this code return [NSArray arrayWithArray:retArry]; } -(NSArray *)getSynLinks:(Sense *)sense link:(NSString *)link{ NSMutableArray *synLinks = [NSMutableArray arrayWithCapacity:1]; if (sqlite3_open([path UTF8String], &database) == SQLITE_OK) { NSString *sqlString = [NSString stringWithFormat:@"SELECT * FROM synlink where synset1 ='%@' and link='%@'",sense.synset,link ]; const char *sql = [sqlString UTF8String]; sqlite3_stmt *statement; if (sqlite3_prepare_v2(database, sql, -1, &statement, NULL) == SQLITE_OK) { while (sqlite3_step(statement) == SQLITE_ROW) { Synlink *synlink = [[Synlink alloc] init]; synlink.synset1 = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 0)]; synlink.synset2 = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 1)]; synlink.link = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 2)]; synlink.src = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 3)]; [synLinks addObject:synlink]; [synlink release]; } }else{ //データベースが開けなかったとき sqlite3_close(database); NSLog(@"message '%s'.", sqlite3_errmsg(database)); } sqlite3_finalize(statement); } sqlite3_close(database); //Mao Ito added this code return [NSArray arrayWithArray:synLinks]; } //synsetに含まれるWordの配列を返します。 -(NSArray *)wordsOfSynset:(NSString *)synset language:(NSString *)lang{ NSMutableArray *retArry = [NSMutableArray arrayWithCapacity:1]; if (sqlite3_open([path UTF8String], &database) == SQLITE_OK) { NSString * sqlString = [NSString stringWithFormat:@"select word.wordid,word.lang,word.lemma,word.pos from word join sense on word.wordid =sense.wordid where synset = '%@' and sense.lang = '%@'",synset,lang]; const char *sql = [sqlString UTF8String]; sqlite3_stmt *statement; if (sqlite3_prepare_v2(database, sql, -1, &statement, NULL) == SQLITE_OK) { while (sqlite3_step(statement) == SQLITE_ROW) { Word *word = [[Word alloc] init]; word.wordid = sqlite3_column_int(statement, 0); word.lang = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 1)]; word.lemma = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 2)]; //word.pron = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 3)]; word.pos = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 3)]; [retArry addObject:word]; [word release]; } }else{ //データベースが開けなかったとき sqlite3_close(database); NSLog(@"message '%s'.", sqlite3_errmsg(database)); } //} sqlite3_finalize(statement); } else { //データベースが開けなかったとき sqlite3_close(database); NSAssert1(0, @"Failed to open database with message '%s'.", sqlite3_errmsg(database)); } sqlite3_close(database); //Mao Ito added this code return [NSArray arrayWithArray:retArry]; } //語を与えてsynset(同じ意味合いの語のグループ)を得る //When "lemma" is a proper noun, return NULL -(NSArray *)synsetWithLemma:(NSString *)lemma{ NSMutableArray *retArry = [NSMutableArray arrayWithCapacity:1]; if (sqlite3_open([path UTF8String], &database) == SQLITE_OK) { const char *sql = [[NSString stringWithFormat:@"select synset,src,pos from word join sense on word.wordid =sense.wordid where word.lemma='%@'",lemma] UTF8String]; sqlite3_stmt *statement; if (sqlite3_prepare_v2(database, sql, -1, &statement, NULL) == SQLITE_OK) { while (sqlite3_step(statement) == SQLITE_ROW) { Synset *synset = [[Synset alloc] init]; synset.synset = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 0)]; synset.pos = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 2)]; //synset.name = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 3)]; synset.src = [NSString stringWithUTF8String:(char *)sqlite3_column_text(statement, 1)]; [retArry addObject:synset]; [synset release]; //NSLog(synset.src); } }else{ //データベースが開けなかったとき sqlite3_close(database); return NULL; //NSAssert1(0, @"error message '%s'.", sqlite3_errmsg(database)); } sqlite3_finalize(statement); } else { //データベースが開けなかったとき sqlite3_close(database); NSAssert1(0, @"error message '%s'.", sqlite3_errmsg(database)); } sqlite3_close(database); //Mao Ito added this code return [NSArray arrayWithArray:retArry]; } @end
下記のサンプルで、このようなログが書き出されます。
---------関連グループ-----------------------------------
646 犬,品詞:n
647 ワンワン,品詞:n
650 ---------関連グループ-----------------------------------
652 浮人,品詞:n
653 浮かれ人,品詞:n
653 鼻摘,品詞:n
654 げじげじ,品詞:n
657 浮れ人,品詞:n
662 犬,品詞:n
665 芋虫,品詞:n
666 碌でなし,品詞:n
667 鼻つまみ,品詞:n
669 鼻摘まみ,品詞:n
671 与太,品詞:n
672 惑い者,品詞:n
672 惑者,品詞:n
673 鼻摘み,品詞:n
675 まどい者,品詞:n
682 ---------関連グループ-----------------------------------
683 廻者,品詞:n
683 間諜,品詞:n
684 工作員,品詞:n
684 犬,品詞:n
685 間者,品詞:n
685 探,品詞:n
686 諜報員,品詞:n
686 諜者,品詞:n
687 密偵,品詞:n
687 スパイ,品詞:n
688 秘密捜査員,品詞:n
688 いぬ,品詞:n
689 まわし者,品詞:n
689 隠密,品詞:n
690 探り,品詞:n
690 廻し者,品詞:n
691 回し者,品詞:n
691 回者,品詞:n
693 ---------関連グループ-----------------------------------
694 飼い犬,品詞:n
694 犬,品詞:n
695 飼犬,品詞:n
696 洋犬,品詞:n
696 イヌ,品詞:n
697 番犬,品詞:n
699 ---------関連グループ-----------------------------------
700 仔犬,品詞:n
700 犬,品詞:n
701 子犬,品詞:n
701 ウェルプ,品詞:n
702 小犬,品詞:n
703 ---------関連グループ-----------------------------------
704 犬,品詞:v
151 ----------------------------------
153 「犬」 についての関連語
239 「pooch」の下位語を表示
240 ----------------------------------
244 「犬」 についての関連語
330 「so-and-so」の下位語を表示
334 ----------------------------------
337 「犬」 についての関連語
426 「spy」の下位語を表示
603 spy -> counterspy
860 spy -> double_agent
Controll.m
// // Controll.m // // Created by 大森 智史 on 09/04/01. // Copyright 2008 Satoshi Oomori. All rights reserved. #import "Controll.h" #import "WordNetJPN.h" @implementation Controll -(IBAction)buttonAction:(id)sender{ //辞書ファイルから辞書を作る NSString *path = [[NSBundle bundleForClass:[self class]] pathForResource:@"wnjpn-0.9" ofType:@"db"]; WordNetJPN *wordnet = [[WordNetJPN alloc] initWithPath:path]; //語を与えて関連語セットを返す NSArray *synsets = [wordnet synsetWithLemma:@"犬"]; for (Synset* synset in synsets) { NSArray *words = [wordnet wordsOfSynset:synset.synset language:@"jpn"]; NSLog(@"---------関連グループ-----------------------------------"); for (Word *element in words) { NSLog(@"%@,品詞:%@", element.lemma , element.pos); } } //語それぞれに対してリンクを返す。 NSArray *words = [wordnet getWords:@"犬"]; NSArray *senseArray; if (words){ senseArray = [wordnet getSenses:words]; NSUInteger i, count = [senseArray count]; for (i = 0; i < count; i++) { NSLog(@"----------------------------------"); Sense *sense = [senseArray objectAtIndex:i]; // NSArray *wordArray = [wordnet getWord:sense.wordid]; NSUInteger u2, wordCount = [wordArray count]; for (u2 = 0; u2 < wordCount; u2++) { Word *word = [wordArray objectAtIndex:u2]; NSLog(@"「%@」 についての関連語",word.lemma); } //NSLog(@"getSsenses--%@,%d",sense.synset,sense.lexid); NSArray *synsetArray3 = [wordnet getSynset:sense.synset]; Word* word3 = (Word *)[synsetArray3 objectAtIndex:0]; NSLog(@"「%@」の下位語を表示",word3.lemma); NSArray *synLinks = [wordnet getSynLinks: sense link:@"hypo"]; //hype 上位語 //hypo 下位語 //inst インスタンス NSUInteger u, count2 = [synLinks count]; for (u = 0; u < count2; u++) { Synlink *synlink = [synLinks objectAtIndex:u]; NSArray *synsetArray1 = [wordnet getSynset:synlink.synset1]; Word* word1 = (Word *)[synsetArray1 objectAtIndex:0]; NSArray *synsetArray2 = [wordnet getSynset:synlink.synset2]; Word* word2 = (Word *)[synsetArray2 objectAtIndex:0]; NSLog(@"%@ -> %@",word1.lemma,word2.lemma); [wordnet getSynset:synlink.synset2]; } } }else{ NSLog(@"NG"); } //オブジェクト解放 [wordnet release]; } @end