Đoạn mã sau bao gồm các lớp Tokenizer, Token và QueryBuilder. Nó có thể không phải là giải pháp thanh lịch nhất từ trước đến nay, nhưng nó thực sự làm được những gì bạn đang yêu cầu:
<?
// QueryBuilder Grammar:
// =====================
// SearchRule := SimpleSearchRule { KeyWord }
// SimpleSearchRule := Expression | SimpleSearchRule { 'OR' Expression }
// Expression := SimpleExpression | Expression { 'AND' SimpleExpression }
// SimpleExpression := '(' SimpleSearchRule ')' | FieldExpression
$input = 'from:me AND (to:john OR to:jenny) dinner party';
$fieldMapping = array(
'id' => 'id',
'from' => 'from',
'to' => 'to',
'title' => 'title',
'description' => 'description',
'time_created' => 'time_created'
);
$fullTextFields = array('title','description');
$qb = new QueryBuilder($fieldMapping, $fullTextFields);
try {
echo $qb->parseSearchRule($input);
} catch(Exception $error) {
echo 'Error occurred while parsing search query: <br/>'.$error->getMessage();
}
class Token {
const KEYWORD = 'KEYWORD',
OPEN_PAR='OPEN_PAR',
CLOSE_PAR='CLOSE_PAR',
FIELD='FIELD',
AND_OP='AND_OP',
OR_OP='OR_OP';
public $type;
public $chars;
public $position;
function __construct($type,$chars,$position) {
$this->type = $type;
$this->chars = $chars;
$this->position = $position;
}
function __toString() {
return 'Token[ type='.$this->type.', chars='.$this->chars.', position='.$this->position.' ]';
}
}
class Tokenizer {
private $tokens = array();
private $input;
private $currentPosition;
function __construct($input) {
$this->input = trim($input);
$this->currentPosition = 0;
}
/**
* @return Token
*/
function getToken() {
if(count($this->tokens)==0) {
$token = $this->nextToken();
if($token==null) {
return null;
}
array_push($this->tokens, $token);
}
return $this->tokens[0];
}
function consumeToken() {
$token = $this->getToken();
if($token==null) {
return null;
}
array_shift($this->tokens);
return $token;
}
protected function nextToken() {
$reservedCharacters = '\:\s\(\)';
$fieldExpr = '/^([^'.$reservedCharacters.']+)\:([^'.$reservedCharacters.']+)/';
$keyWord = '/^([^'.$reservedCharacters.']+)/';
$andOperator = '/^AND\s/';
$orOperator = '/^OR\s/';
// Remove whitespaces ..
$whiteSpaces = '/^\s+/';
$remaining = substr($this->input,$this->currentPosition);
if(preg_match($whiteSpaces, $remaining, $matches)) {
$this->currentPosition += strlen($matches[0]);
$remaining = substr($this->input,$this->currentPosition);
}
if($remaining=='') {
return null;
}
switch(substr($remaining,0,1)) {
case '(':
return new Token(Token::OPEN_PAR,'(',$this->currentPosition++);
case ')':
return new Token(Token::CLOSE_PAR,')',$this->currentPosition++);
}
if(preg_match($fieldExpr, $remaining, $matches)) {
$token = new Token(Token::FIELD, $matches[0], $this->currentPosition);
$this->currentPosition += strlen($matches[0]);
} else if(preg_match($andOperator, $remaining, $matches)) {
$token = new Token(Token::AND_OP, 'AND', $this->currentPosition);
$this->currentPosition += 3;
} else if(preg_match($orOperator, $remaining, $matches)) {
$token = new Token(Token::OR_OP, 'OR', $this->currentPosition);
$this->currentPosition += 2;
} else if(preg_match($keyWord, $remaining, $matches)) {
$token = new Token(Token::KEYWORD, $matches[0], $this->currentPosition);
$this->currentPosition += strlen($matches[0]);
} else throw new Exception('Unable to tokenize: '.$remaining);
return $token;
}
}
class QueryBuilder {
private $fieldMapping;
private $fulltextFields;
function __construct($fieldMapping, $fulltextFields) {
$this->fieldMapping = $fieldMapping;
$this->fulltextFields = $fulltextFields;
}
function parseSearchRule($input) {
$t = new Tokenizer($input);
$token = $t->getToken();
if($token==null) {
return '';
}
$token = $t->getToken();
if($token->type!=Token::KEYWORD) {
$searchRule = $this->parseSimpleSearchRule($t);
} else {
$searchRule = '';
}
$keywords = '';
while($token = $t->consumeToken()) {
if($token->type!=Token::KEYWORD) {
throw new Exception('Only keywords allowed at end of search rule.');
}
if($keywords!='') {
$keywords .= ' ';
}
$keywords .= $token->chars;
}
if($keywords!='') {
$matchClause = 'MATCH (`'.(implode('`,`',$this->fulltextFields)).'`) AGAINST (';
$keywords = $matchClause.'\''.mysql_real_escape_string($keywords).'\' IN BOOLEAN MODE)';
if($searchRule=='') {
$searchRule = $keywords;
} else {
$searchRule = '('.$searchRule.') AND ('.$keywords.')';
}
}
return $searchRule;
}
protected function parseSimpleSearchRule(Tokenizer $t) {
$expressions = array();
do {
$repeat = false;
$expressions[] = $this->parseExpression($t);
$token = $t->getToken();
if($token->type==Token::OR_OP) {
$t->consumeToken();
$repeat = true;
}
} while($repeat);
return implode(' OR ', $expressions);
}
protected function parseExpression(Tokenizer $t) {
$expressions = array();
do {
$repeat = false;
$expressions[] = $this->parseSimpleExpression($t);
$token = $t->getToken();
if($token->type==Token::AND_OP) {
$t->consumeToken();
$repeat = true;
}
} while($repeat);
return implode(' AND ', $expressions);
}
protected function parseSimpleExpression(Tokenizer $t) {
$token = $t->consumeToken();
if($token->type==Token::OPEN_PAR) {
$spr = $this->parseSimpleSearchRule($t);
$token = $t->consumeToken();
if($token==null || $token->type!=Token::CLOSE_PAR) {
throw new Exception('Expected closing parenthesis, found: '.$token->chars);
}
return '('.$spr.')';
} else if($token->type==Token::FIELD) {
$fieldVal = explode(':', $token->chars,2);
if(isset($this->fieldMapping[$fieldVal[0]])) {
return '`'.$this->fieldMapping[$fieldVal[0]].'` = \''.mysql_real_escape_string($fieldVal[1]).'\'';
}
throw new Exception('Unknown field selected: '.$token->chars);
} else {
throw new Exception('Expected opening parenthesis or field-expression, found: '.$token->chars);
}
}
}
?>
Một giải pháp phù hợp hơn trước tiên sẽ xây dựng một cây phân tích cú pháp và sau đó chuyển nó thành một truy vấn, sau một số phân tích sâu hơn.