Blame Identity/Webenv/phpBB/3.0.4/includes/search/fulltext_native.php

ef5584
ef5584
/**
ef5584
*
ef5584
* @package search
ef5584
* @version $Id: fulltext_native.php 9173 2008-12-04 17:01:39Z naderman $
ef5584
* @copyright (c) 2005 phpBB Group
ef5584
* @license http://opensource.org/licenses/gpl-license.php GNU Public License
ef5584
*
ef5584
*/
ef5584
ef5584
/**
ef5584
* @ignore
ef5584
*/
ef5584
if (!defined('IN_PHPBB'))
ef5584
{
ef5584
	exit;
ef5584
}
ef5584
ef5584
/**
ef5584
* @ignore
ef5584
*/
ef5584
include_once($phpbb_root_path . 'includes/search/search.' . $phpEx);
ef5584
ef5584
/**
ef5584
* fulltext_native
ef5584
* phpBB's own db driven fulltext search, version 2
ef5584
* @package search
ef5584
*/
ef5584
class fulltext_native extends search_backend
ef5584
{
ef5584
	var $stats = array();
ef5584
	var $word_length = array();
ef5584
	var $search_query;
ef5584
	var $common_words = array();
ef5584
ef5584
	var $must_contain_ids = array();
ef5584
	var $must_not_contain_ids = array();
ef5584
	var $must_exclude_one_ids = array();
ef5584
ef5584
	/**
ef5584
	* Initialises the fulltext_native search backend with min/max word length and makes sure the UTF-8 normalizer is loaded.
ef5584
	*
ef5584
	* @param	boolean|string	&$error	is passed by reference and should either be set to false on success or an error message on failure.
ef5584
	*
ef5584
	* @access	public
ef5584
	*/
ef5584
	function fulltext_native(&$error)
ef5584
	{
ef5584
		global $phpbb_root_path, $phpEx, $config;
ef5584
ef5584
		$this->word_length = array('min' => $config['fulltext_native_min_chars'], 'max' => $config['fulltext_native_max_chars']);
ef5584
ef5584
		/**
ef5584
		* Load the UTF tools
ef5584
		*/
ef5584
		if (!class_exists('utf_normalizer'))
ef5584
		{
ef5584
			include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
ef5584
		}
ef5584
ef5584
ef5584
		$error = false;
ef5584
	}
ef5584
ef5584
	/**
ef5584
	* This function fills $this->search_query with the cleaned user search query.
ef5584
	*
ef5584
	* If $terms is 'any' then the words will be extracted from the search query
ef5584
	* and combined with | inside brackets. They will afterwards be treated like
ef5584
	* an standard search query.
ef5584
	*
ef5584
	* Then it analyses the query and fills the internal arrays $must_not_contain_ids,
ef5584
	* $must_contain_ids and $must_exclude_one_ids which are later used by keyword_search().
ef5584
	*
ef5584
	* @param	string	$keywords	contains the search query string as entered by the user
ef5584
	* @param	string	$terms		is either 'all' (use search query as entered, default words to 'must be contained in post')
ef5584
	* 	or 'any' (find all posts containing at least one of the given words)
ef5584
	* @return	boolean				false if no valid keywords were found and otherwise true
ef5584
	*
ef5584
	* @access	public
ef5584
	*/
ef5584
	function split_keywords($keywords, $terms)
ef5584
	{
ef5584
		global $db, $user;
ef5584
ef5584
		$keywords = trim($this->cleanup($keywords, '+-|()*'));
ef5584
ef5584
		// allow word|word|word without brackets
ef5584
		if ((strpos($keywords, ' ') === false) && (strpos($keywords, '|') !== false) && (strpos($keywords, '(') === false))
ef5584
		{
ef5584
			$keywords = '(' . $keywords . ')';
ef5584
		}
ef5584
ef5584
		$open_bracket = $space = false;
ef5584
		for ($i = 0, $n = strlen($keywords); $i < $n; $i++)
ef5584
		{
ef5584
			if ($open_bracket !== false)
ef5584
			{
ef5584
				switch ($keywords[$i])
ef5584
				{
ef5584
					case ')':
ef5584
						if ($open_bracket + 1 == $i)
ef5584
						{
ef5584
							$keywords[$i - 1] = '|';
ef5584
							$keywords[$i] = '|';
ef5584
						}
ef5584
						$open_bracket = false;
ef5584
					break;
ef5584
					case '(':
ef5584
						$keywords[$i] = '|';
ef5584
					break;
ef5584
					case '+':
ef5584
					case '-':
ef5584
					case ' ':
ef5584
						$keywords[$i] = '|';
ef5584
					break;
ef5584
				}
ef5584
			}
ef5584
			else
ef5584
			{
ef5584
				switch ($keywords[$i])
ef5584
				{
ef5584
					case ')':
ef5584
						$keywords[$i] = ' ';
ef5584
					break;
ef5584
					case '(':
ef5584
						$open_bracket = $i;
ef5584
						$space = false;
ef5584
					break;
ef5584
					case '|':
ef5584
						$keywords[$i] = ' ';
ef5584
					break;
ef5584
					case '-':
ef5584
					case '+':
ef5584
						$space = $keywords[$i];
ef5584
					break;
ef5584
					case ' ':
ef5584
						if ($space !== false)
ef5584
						{
ef5584
							$keywords[$i] = $space;
ef5584
						}
ef5584
					break;
ef5584
					default:
ef5584
						$space = false;
ef5584
				}
ef5584
			}
ef5584
		}
ef5584
ef5584
		if ($open_bracket)
ef5584
		{
ef5584
			$keywords .= ')';
ef5584
		}
ef5584
ef5584
		$match = array(
ef5584
			'#  +#',
ef5584
			'#\|\|+#',
ef5584
			'#(\+|\-)(?:\+|\-)+#',
ef5584
			'#\(\|#',
ef5584
			'#\|\)#',
ef5584
		);
ef5584
		$replace = array(
ef5584
			' ',
ef5584
			'|',
ef5584
			'$1',
ef5584
			'(',
ef5584
			')',
ef5584
		);
ef5584
ef5584
		$keywords = preg_replace($match, $replace, $keywords);
ef5584
ef5584
		// $keywords input format: each word separated by a space, words in a bracket are not separated
ef5584
ef5584
		// the user wants to search for any word, convert the search query
ef5584
		if ($terms == 'any')
ef5584
		{
ef5584
			$words = array();
ef5584
ef5584
			preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $words);
ef5584
			if (sizeof($words[1]))
ef5584
			{
ef5584
				$keywords = '(' . implode('|', $words[1]) . ')';
ef5584
			}
ef5584
		}
ef5584
ef5584
		// set the search_query which is shown to the user
ef5584
		$this->search_query = $keywords;
ef5584
ef5584
		$exact_words = array();
ef5584
		preg_match_all('#([^\\s+\\-|*()]+)(?:$|[\\s+\\-|()])#u', $keywords, $exact_words);
ef5584
		$exact_words = $exact_words[1];
ef5584
ef5584
		$common_ids = $words = array();
ef5584
ef5584
		if (sizeof($exact_words))
ef5584
		{
ef5584
			$sql = 'SELECT word_id, word_text, word_common
ef5584
				FROM ' . SEARCH_WORDLIST_TABLE . '
ef5584
				WHERE ' . $db->sql_in_set('word_text', $exact_words);
ef5584
			$result = $db->sql_query($sql);
ef5584
ef5584
			// store an array of words and ids, remove common words
ef5584
			while ($row = $db->sql_fetchrow($result))
ef5584
			{
ef5584
				if ($row['word_common'])
ef5584
				{
ef5584
					$this->common_words[] = $row['word_text'];
ef5584
					$common_ids[$row['word_text']] = (int) $row['word_id'];
ef5584
					continue;
ef5584
				}
ef5584
ef5584
				$words[$row['word_text']] = (int) $row['word_id'];
ef5584
			}
ef5584
			$db->sql_freeresult($result);
ef5584
		}
ef5584
		unset($exact_words);
ef5584
ef5584
		// now analyse the search query, first split it using the spaces
ef5584
		$query = explode(' ', $keywords);
ef5584
ef5584
		$this->must_contain_ids = array();
ef5584
		$this->must_not_contain_ids = array();
ef5584
		$this->must_exclude_one_ids = array();
ef5584
ef5584
		$mode = '';
ef5584
		$ignore_no_id = true;
ef5584
ef5584
		foreach ($query as $word)
ef5584
		{
ef5584
			if (empty($word))
ef5584
			{
ef5584
				continue;
ef5584
			}
ef5584
ef5584
			// words which should not be included
ef5584
			if ($word[0] == '-')
ef5584
			{
ef5584
				$word = substr($word, 1);
ef5584
ef5584
				// a group of which at least one may not be in the resulting posts
ef5584
				if ($word[0] == '(')
ef5584
				{
ef5584
					$word = array_unique(explode('|', substr($word, 1, -1)));
ef5584
					$mode = 'must_exclude_one';
ef5584
				}
ef5584
				// one word which should not be in the resulting posts
ef5584
				else
ef5584
				{
ef5584
					$mode = 'must_not_contain';
ef5584
				}
ef5584
				$ignore_no_id = true;
ef5584
			}
ef5584
			// words which have to be included
ef5584
			else
ef5584
			{
ef5584
				// no prefix is the same as a +prefix
ef5584
				if ($word[0] == '+')
ef5584
				{
ef5584
					$word = substr($word, 1);
ef5584
				}
ef5584
ef5584
				// a group of words of which at least one word should be in every resulting post
ef5584
				if ($word[0] == '(')
ef5584
				{
ef5584
					$word = array_unique(explode('|', substr($word, 1, -1)));
ef5584
				}
ef5584
				$ignore_no_id = false;
ef5584
				$mode = 'must_contain';
ef5584
			}
ef5584
ef5584
			if (empty($word))
ef5584
			{
ef5584
				continue;
ef5584
			}
ef5584
ef5584
			// if this is an array of words then retrieve an id for each
ef5584
			if (is_array($word))
ef5584
			{
ef5584
				$non_common_words = array();
ef5584
				$id_words = array();
ef5584
				foreach ($word as $i => $word_part)
ef5584
				{
ef5584
					if (strpos($word_part, '*') !== false)
ef5584
					{
ef5584
						$id_words[] = '\'' . $db->sql_escape(str_replace('*', '%', $word_part)) . '\'';
ef5584
						$non_common_words[] = $word_part;
ef5584
					}
ef5584
					else if (isset($words[$word_part]))
ef5584
					{
ef5584
						$id_words[] = $words[$word_part];
ef5584
						$non_common_words[] = $word_part;
ef5584
					}
ef5584
					else
ef5584
					{
ef5584
						$len = utf8_strlen($word_part);
ef5584
						if ($len < $this->word_length['min'] || $len > $this->word_length['max'])
ef5584
						{
ef5584
							$this->common_words[] = $word_part;
ef5584
						}
ef5584
					}
ef5584
				}
ef5584
				if (sizeof($id_words))
ef5584
				{
ef5584
					sort($id_words);
ef5584
					if (sizeof($id_words) > 1)
ef5584
					{
ef5584
						$this->{$mode . '_ids'}[] = $id_words;
ef5584
					}
ef5584
					else
ef5584
					{
ef5584
						$mode = ($mode == 'must_exclude_one') ? 'must_not_contain' : $mode;
ef5584
						$this->{$mode . '_ids'}[] = $id_words[0];
ef5584
					}
ef5584
				}
ef5584
				// throw an error if we shall not ignore unexistant words
ef5584
				else if (!$ignore_no_id && sizeof($non_common_words))
ef5584
				{
ef5584
					trigger_error(sprintf($user->lang['WORDS_IN_NO_POST'], implode(', ', $non_common_words)));
ef5584
				}
ef5584
				unset($non_common_words);
ef5584
			}
ef5584
			// else we only need one id
ef5584
			else if (($wildcard = strpos($word, '*') !== false) || isset($words[$word]))
ef5584
			{
ef5584
				if ($wildcard)
ef5584
				{
ef5584
					$len = utf8_strlen(str_replace('*', '', $word));
ef5584
					if ($len >= $this->word_length['min'] && $len <= $this->word_length['max'])
ef5584
					{
ef5584
						$this->{$mode . '_ids'}[] = '\'' . $db->sql_escape(str_replace('*', '%', $word)) . '\'';
ef5584
					}
ef5584
					else
ef5584
					{
ef5584
						$this->common_words[] = $word;
ef5584
					}
ef5584
				}
ef5584
				else
ef5584
				{
ef5584
					$this->{$mode . '_ids'}[] = $words[$word];
ef5584
				}
ef5584
			}
ef5584
			// throw an error if we shall not ignore unexistant words
ef5584
			else if (!$ignore_no_id)
ef5584
			{
ef5584
				if (!isset($common_ids[$word]))
ef5584
				{
ef5584
					$len = utf8_strlen($word);
ef5584
					if ($len >= $this->word_length['min'] && $len <= $this->word_length['max'])
ef5584
					{
ef5584
						trigger_error(sprintf($user->lang['WORD_IN_NO_POST'], $word));
ef5584
					}
ef5584
					else
ef5584
					{
ef5584
						$this->common_words[] = $word;
ef5584
					}
ef5584
				}
ef5584
			}
ef5584
			else
ef5584
			{
ef5584
				$len = utf8_strlen($word);
ef5584
				if ($len < $this->word_length['min'] || $len > $this->word_length['max'])
ef5584
				{
ef5584
					$this->common_words[] = $word;
ef5584
				}
ef5584
			}
ef5584
		}
ef5584
ef5584
		// we can't search for negatives only
ef5584
		if (!sizeof($this->must_contain_ids))
ef5584
		{
ef5584
			return false;
ef5584
		}
ef5584
ef5584
		sort($this->must_contain_ids);
ef5584
		sort($this->must_not_contain_ids);
ef5584
		sort($this->must_exclude_one_ids);
ef5584
ef5584
		if (!empty($this->search_query))
ef5584
		{
ef5584
			return true;
ef5584
		}
ef5584
		return false;
ef5584
	}
ef5584
ef5584
	/**
ef5584
	* Performs a search on keywords depending on display specific params. You have to run split_keywords() first.
ef5584
	*
ef5584
	* @param	string		$type				contains either posts or topics depending on what should be searched for
ef5584
	* @param	string		&$fields			contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched)
ef5584
	* @param	string		&$terms				is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words)
ef5584
	* @param	array		&$sort_by_sql		contains SQL code for the ORDER BY part of a query
ef5584
	* @param	string		&$sort_key			is the key of $sort_by_sql for the selected sorting
ef5584
	* @param	string		&$sort_dir			is either a or d representing ASC and DESC
ef5584
	* @param	string		&$sort_days			specifies the maximum amount of days a post may be old
ef5584
	* @param	array		&$ex_fid_ary		specifies an array of forum ids which should not be searched
ef5584
	* @param	array		&$m_approve_fid_ary	specifies an array of forum ids in which the searcher is allowed to view unapproved posts
ef5584
	* @param	int			&$topic_id			is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
ef5584
	* @param	array		&$author_ary		an array of author ids if the author should be ignored during the search the array is empty
ef5584
	* @param	array		&$id_ary			passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
ef5584
	* @param	int			$start				indicates the first index of the page
ef5584
	* @param	int			$per_page			number of ids each page is supposed to contain
ef5584
	* @return	boolean|int						total number of results
ef5584
	*
ef5584
	* @access	public
ef5584
	*/
ef5584
	function keyword_search($type, &$fields, &$terms, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page)
ef5584
	{
ef5584
		global $config, $db;
ef5584
ef5584
		// No keywords? No posts.
ef5584
		if (empty($this->search_query))
ef5584
		{
ef5584
			return false;
ef5584
		}
ef5584
ef5584
		// generate a search_key from all the options to identify the results
ef5584
		$search_key = md5(implode('#', array(
ef5584
			serialize($this->must_contain_ids),
ef5584
			serialize($this->must_not_contain_ids),
ef5584
			serialize($this->must_exclude_one_ids),
ef5584
			$type,
ef5584
			$fields,
ef5584
			$terms,
ef5584
			$sort_days,
ef5584
			$sort_key,
ef5584
			$topic_id,
ef5584
			implode(',', $ex_fid_ary),
ef5584
			implode(',', $m_approve_fid_ary),
ef5584
			implode(',', $author_ary)
ef5584
		)));
ef5584
ef5584
		// try reading the results from cache
ef5584
		$total_results = 0;
ef5584
		if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
ef5584
		{
ef5584
			return $total_results;
ef5584
		}
ef5584
ef5584
		$id_ary = array();
ef5584
ef5584
		$sql_where = array();
ef5584
		$group_by = false;
ef5584
		$m_num = 0;
ef5584
		$w_num = 0;
ef5584
ef5584
		$sql_array = array(
ef5584
			'SELECT'	=> ($type == 'posts') ? 'p.post_id' : 'p.topic_id',
ef5584
			'FROM'		=> array(
ef5584
				SEARCH_WORDMATCH_TABLE	=> array(),
ef5584
				SEARCH_WORDLIST_TABLE	=> array(),
ef5584
			),
ef5584
			'LEFT_JOIN' => array(array(
ef5584
				'FROM'	=> array(POSTS_TABLE => 'p'),
ef5584
				'ON'	=> 'm0.post_id = p.post_id',
ef5584
			)),
ef5584
		);
ef5584
ef5584
		$title_match = '';
ef5584
		$left_join_topics = false;
ef5584
		$group_by = true;
ef5584
		// Build some display specific sql strings
ef5584
		switch ($fields)
ef5584
		{
ef5584
			case 'titleonly':
ef5584
				$title_match = 'title_match = 1';
ef5584
				$group_by = false;
ef5584
			// no break
ef5584
			case 'firstpost':
ef5584
				$left_join_topics = true;
ef5584
				$sql_where[] = 'p.post_id = t.topic_first_post_id';
ef5584
			break;
ef5584
ef5584
			case 'msgonly':
ef5584
				$title_match = 'title_match = 0';
ef5584
				$group_by = false;
ef5584
			break;
ef5584
		}
ef5584
ef5584
		if ($type == 'topics')
ef5584
		{
ef5584
			$left_join_topics = true;
ef5584
			$group_by = true;
ef5584
		}
ef5584
ef5584
		/**
ef5584
		* @todo Add a query optimizer (handle stuff like "+(4|3) +4")
ef5584
		*/
ef5584
ef5584
		foreach ($this->must_contain_ids as $subquery)
ef5584
		{
ef5584
			if (is_array($subquery))
ef5584
			{
ef5584
				$group_by = true;
ef5584
ef5584
				$word_id_sql = array();
ef5584
				$word_ids = array();
ef5584
				foreach ($subquery as $id)
ef5584
				{
ef5584
					if (is_string($id))
ef5584
					{
ef5584
						$sql_array['LEFT_JOIN'][] = array(
ef5584
							'FROM'	=> array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
ef5584
							'ON'	=> "w$w_num.word_text LIKE $id"
ef5584
						);
ef5584
						$word_ids[] = "w$w_num.word_id";
ef5584
ef5584
						$w_num++;
ef5584
					}
ef5584
					else
ef5584
					{
ef5584
						$word_ids[] = $id;
ef5584
					}
ef5584
				}
ef5584
ef5584
				$sql_where[] = $db->sql_in_set("m$m_num.word_id", $word_ids);
ef5584
ef5584
				unset($word_id_sql);
ef5584
				unset($word_ids);
ef5584
			}
ef5584
			else if (is_string($subquery))
ef5584
			{
ef5584
				$sql_array['FROM'][SEARCH_WORDLIST_TABLE][] = 'w' . $w_num;
ef5584
ef5584
				$sql_where[] = "w$w_num.word_text LIKE $subquery";
ef5584
				$sql_where[] = "m$m_num.word_id = w$w_num.word_id";
ef5584
ef5584
				$group_by = true;
ef5584
				$w_num++;
ef5584
			}
ef5584
			else
ef5584
			{
ef5584
				$sql_where[] = "m$m_num.word_id = $subquery";
ef5584
			}
ef5584
ef5584
			$sql_array['FROM'][SEARCH_WORDMATCH_TABLE][] = 'm' . $m_num;
ef5584
ef5584
			if ($title_match)
ef5584
			{
ef5584
				$sql_where[] = "m$m_num.$title_match";
ef5584
			}
ef5584
ef5584
			if ($m_num != 0)
ef5584
			{
ef5584
				$sql_where[] = "m$m_num.post_id = m0.post_id";
ef5584
			}
ef5584
			$m_num++;
ef5584
		}
ef5584
ef5584
		foreach ($this->must_not_contain_ids as $key => $subquery)
ef5584
		{
ef5584
			if (is_string($subquery))
ef5584
			{
ef5584
				$sql_array['LEFT_JOIN'][] = array(
ef5584
					'FROM'	=> array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
ef5584
					'ON'	=> "w$w_num.word_text LIKE $subquery"
ef5584
				);
ef5584
ef5584
				$this->must_not_contain_ids[$key] = "w$w_num.word_id";
ef5584
ef5584
				$group_by = true;
ef5584
				$w_num++;
ef5584
			}
ef5584
		}
ef5584
ef5584
		if (sizeof($this->must_not_contain_ids))
ef5584
		{
ef5584
			$sql_array['LEFT_JOIN'][] = array(
ef5584
				'FROM'	=> array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
ef5584
				'ON'	=> $db->sql_in_set("m$m_num.word_id", $this->must_not_contain_ids) . (($title_match) ? " AND m$m_num.$title_match" : '') . " AND m$m_num.post_id = m0.post_id"
ef5584
			);
ef5584
ef5584
			$sql_where[] = "m$m_num.word_id IS NULL";
ef5584
			$m_num++;
ef5584
		}
ef5584
ef5584
		foreach ($this->must_exclude_one_ids as $ids)
ef5584
		{
ef5584
			$is_null_joins = array();
ef5584
			foreach ($ids as $id)
ef5584
			{
ef5584
				if (is_string($id))
ef5584
				{
ef5584
					$sql_array['LEFT_JOIN'][] = array(
ef5584
						'FROM'	=> array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
ef5584
						'ON'	=> "w$w_num.word_text LIKE $id"
ef5584
					);
ef5584
					$id = "w$w_num.word_id";
ef5584
ef5584
					$group_by = true;
ef5584
					$w_num++;
ef5584
				}
ef5584
ef5584
				$sql_array['LEFT_JOIN'][] = array(
ef5584
					'FROM'	=> array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
ef5584
					'ON'	=> "m$m_num.word_id = $id AND m$m_num.post_id = m0.post_id" . (($title_match) ? " AND m$m_num.$title_match" : '')
ef5584
				);
ef5584
				$is_null_joins[] = "m$m_num.word_id IS NULL";
ef5584
ef5584
				$m_num++;
ef5584
			}
ef5584
			$sql_where[] = '(' . implode(' OR ', $is_null_joins) . ')';
ef5584
		}
ef5584
ef5584
		if (!sizeof($m_approve_fid_ary))
ef5584
		{
ef5584
			$sql_where[] = 'p.post_approved = 1';
ef5584
		}
ef5584
		else if ($m_approve_fid_ary !== array(-1))
ef5584
		{
ef5584
			$sql_where[] = '(p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')';
ef5584
		}
ef5584
ef5584
		if ($topic_id)
ef5584
		{
ef5584
			$sql_where[] = 'p.topic_id = ' . $topic_id;
ef5584
		}
ef5584
ef5584
		if (sizeof($author_ary))
ef5584
		{
ef5584
			$sql_where[] = $db->sql_in_set('p.poster_id', $author_ary);
ef5584
		}
ef5584
ef5584
		if (sizeof($ex_fid_ary))
ef5584
		{
ef5584
			$sql_where[] = $db->sql_in_set('p.forum_id', $ex_fid_ary, true);
ef5584
		}
ef5584
ef5584
		if ($sort_days)
ef5584
		{
ef5584
			$sql_where[] = 'p.post_time >= ' . (time() - ($sort_days * 86400));
ef5584
		}
ef5584
ef5584
		$sql_array['WHERE'] = implode(' AND ', $sql_where);
ef5584
ef5584
		$is_mysql = false;
ef5584
		// if the total result count is not cached yet, retrieve it from the db
ef5584
		if (!$total_results)
ef5584
		{
ef5584
			$sql = '';
ef5584
			$sql_array_count = $sql_array;
ef5584
ef5584
			switch ($db->sql_layer)
ef5584
			{
ef5584
				case 'mysql4':
ef5584
				case 'mysqli':
ef5584
ef5584
					// 3.x does not support SQL_CALC_FOUND_ROWS
ef5584
					$sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT'];
ef5584
					$is_mysql = true;
ef5584
ef5584
				break;
ef5584
ef5584
				case 'sqlite':
ef5584
					$sql_array_count['SELECT'] = ($type == 'posts') ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id';
ef5584
					$sql = 'SELECT COUNT(' . (($type == 'posts') ? 'post_id' : 'topic_id') . ') as total_results
ef5584
							FROM (' . $db->sql_build_query('SELECT', $sql_array_count) . ')';
ef5584
ef5584
				// no break
ef5584
ef5584
				default:
ef5584
					$sql_array_count['SELECT'] = ($type == 'posts') ? 'COUNT(DISTINCT p.post_id) AS total_results' : 'COUNT(DISTINCT p.topic_id) AS total_results';
ef5584
					$sql = (!$sql) ? $db->sql_build_query('SELECT', $sql_array_count) : $sql;
ef5584
ef5584
					$result = $db->sql_query($sql);
ef5584
					$total_results = (int) $db->sql_fetchfield('total_results');
ef5584
					$db->sql_freeresult($result);
ef5584
ef5584
					if (!$total_results)
ef5584
					{
ef5584
						return false;
ef5584
					}
ef5584
				break;
ef5584
			}
ef5584
ef5584
			unset($sql_array_count, $sql);
ef5584
		}
ef5584
ef5584
		// Build sql strings for sorting
ef5584
		$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
ef5584
ef5584
		switch ($sql_sort[0])
ef5584
		{
ef5584
			case 'u':
ef5584
				$sql_array['FROM'][USERS_TABLE] = 'u';
ef5584
				$sql_where[] = 'u.user_id = p.poster_id ';
ef5584
			break;
ef5584
ef5584
			case 't':
ef5584
				$left_join_topics = true;
ef5584
			break;
ef5584
ef5584
			case 'f':
ef5584
				$sql_array['FROM'][FORUMS_TABLE] = 'f';
ef5584
				$sql_where[] = 'f.forum_id = p.forum_id';
ef5584
			break;
ef5584
		}
ef5584
		
ef5584
		if ($left_join_topics)
ef5584
		{
ef5584
			$sql_array['LEFT_JOIN'][$left_join_topics] = array(
ef5584
				'FROM'	=> array(TOPICS_TABLE => 't'),
ef5584
				'ON'	=> 'p.topic_id = t.topic_id'
ef5584
			);
ef5584
		}
ef5584
ef5584
		$sql_array['WHERE'] = implode(' AND ', $sql_where);
ef5584
		$sql_array['GROUP_BY'] = ($group_by) ? (($type == 'posts') ? 'p.post_id' : 'p.topic_id') . ', ' . $sort_by_sql[$sort_key] : '';
ef5584
		$sql_array['ORDER_BY'] = $sql_sort;
ef5584
ef5584
		unset($sql_where, $sql_sort, $group_by);
ef5584
ef5584
		$sql = $db->sql_build_query('SELECT', $sql_array);
ef5584
		$result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
ef5584
ef5584
		while ($row = $db->sql_fetchrow($result))
ef5584
		{
ef5584
			$id_ary[] = $row[(($type == 'posts') ? 'post_id' : 'topic_id')];
ef5584
		}
ef5584
		$db->sql_freeresult($result);
ef5584
ef5584
		if (!sizeof($id_ary))
ef5584
		{
ef5584
			return false;
ef5584
		}
ef5584
ef5584
		// if we use mysql and the total result count is not cached yet, retrieve it from the db
ef5584
		if (!$total_results && $is_mysql)
ef5584
		{
ef5584
			$sql = 'SELECT FOUND_ROWS() as total_results';
ef5584
			$result = $db->sql_query($sql);
ef5584
			$total_results = (int) $db->sql_fetchfield('total_results');
ef5584
			$db->sql_freeresult($result);
ef5584
ef5584
			if (!$total_results)
ef5584
			{
ef5584
				return false;
ef5584
			}
ef5584
		}
ef5584
ef5584
		// store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page
ef5584
		$this->save_ids($search_key, $this->search_query, $author_ary, $total_results, $id_ary, $start, $sort_dir);
ef5584
		$id_ary = array_slice($id_ary, 0, (int) $per_page);
ef5584
ef5584
		return $total_results;
ef5584
	}
ef5584
ef5584
	/**
ef5584
	* Performs a search on an author's posts without caring about message contents. Depends on display specific params
ef5584
	*
ef5584
	* @param	string		$type				contains either posts or topics depending on what should be searched for
ef5584
	* @param	boolean		$firstpost_only		if true, only topic starting posts will be considered
ef5584
	* @param	array		&$sort_by_sql		contains SQL code for the ORDER BY part of a query
ef5584
	* @param	string		&$sort_key			is the key of $sort_by_sql for the selected sorting
ef5584
	* @param	string		&$sort_dir			is either a or d representing ASC and DESC
ef5584
	* @param	string		&$sort_days			specifies the maximum amount of days a post may be old
ef5584
	* @param	array		&$ex_fid_ary		specifies an array of forum ids which should not be searched
ef5584
	* @param	array		&$m_approve_fid_ary	specifies an array of forum ids in which the searcher is allowed to view unapproved posts
ef5584
	* @param	int			&$topic_id			is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
ef5584
	* @param	array		&$author_ary		an array of author ids
ef5584
	* @param	array		&$id_ary			passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
ef5584
	* @param	int			$start				indicates the first index of the page
ef5584
	* @param	int			$per_page			number of ids each page is supposed to contain
ef5584
	* @return	boolean|int						total number of results
ef5584
	*
ef5584
	* @access	public
ef5584
	*/
ef5584
	function author_search($type, $firstpost_only, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page)
ef5584
	{
ef5584
		global $config, $db;
ef5584
ef5584
		// No author? No posts.
ef5584
		if (!sizeof($author_ary))
ef5584
		{
ef5584
			return 0;
ef5584
		}
ef5584
ef5584
		// generate a search_key from all the options to identify the results
ef5584
		$search_key = md5(implode('#', array(
ef5584
			'',
ef5584
			$type,
ef5584
			($firstpost_only) ? 'firstpost' : '',
ef5584
			'',
ef5584
			'',
ef5584
			$sort_days,
ef5584
			$sort_key,
ef5584
			$topic_id,
ef5584
			implode(',', $ex_fid_ary),
ef5584
			implode(',', $m_approve_fid_ary),
ef5584
			implode(',', $author_ary)
ef5584
		)));
ef5584
ef5584
		// try reading the results from cache
ef5584
		$total_results = 0;
ef5584
		if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
ef5584
		{
ef5584
			return $total_results;
ef5584
		}
ef5584
ef5584
		$id_ary = array();
ef5584
ef5584
		// Create some display specific sql strings
ef5584
		$sql_author		= $db->sql_in_set('p.poster_id', $author_ary);
ef5584
		$sql_fora		= (sizeof($ex_fid_ary)) ? ' AND ' . $db->sql_in_set('p.forum_id', $ex_fid_ary, true) : '';
ef5584
		$sql_time		= ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
ef5584
		$sql_topic_id	= ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : '';
ef5584
		$sql_firstpost = ($firstpost_only) ? ' AND p.post_id = t.topic_first_post_id' : '';
ef5584
ef5584
		// Build sql strings for sorting
ef5584
		$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
ef5584
		$sql_sort_table = $sql_sort_join = '';
ef5584
		switch ($sql_sort[0])
ef5584
		{
ef5584
			case 'u':
ef5584
				$sql_sort_table	= USERS_TABLE . ' u, ';
ef5584
				$sql_sort_join	= ' AND u.user_id = p.poster_id ';
ef5584
			break;
ef5584
ef5584
			case 't':
ef5584
				$sql_sort_table	= ($type == 'posts' && !$firstpost_only) ? TOPICS_TABLE . ' t, ' : '';
ef5584
				$sql_sort_join	= ($type == 'posts' && !$firstpost_only) ? ' AND t.topic_id = p.topic_id ' : '';
ef5584
			break;
ef5584
ef5584
			case 'f':
ef5584
				$sql_sort_table	= FORUMS_TABLE . ' f, ';
ef5584
				$sql_sort_join	= ' AND f.forum_id = p.forum_id ';
ef5584
			break;
ef5584
		}
ef5584
ef5584
		if (!sizeof($m_approve_fid_ary))
ef5584
		{
ef5584
			$m_approve_fid_sql = ' AND p.post_approved = 1';
ef5584
		}
ef5584
		else if ($m_approve_fid_ary == array(-1))
ef5584
		{
ef5584
			$m_approve_fid_sql = '';
ef5584
		}
ef5584
		else
ef5584
		{
ef5584
			$m_approve_fid_sql = ' AND (p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')';
ef5584
		}
ef5584
ef5584
		$select = ($type == 'posts') ? 'p.post_id' : 't.topic_id';
ef5584
		$is_mysql = false;
ef5584
ef5584
		// If the cache was completely empty count the results
ef5584
		if (!$total_results)
ef5584
		{
ef5584
			switch ($db->sql_layer)
ef5584
			{
ef5584
				case 'mysql4':
ef5584
				case 'mysqli':
ef5584
					$select = 'SQL_CALC_FOUND_ROWS ' . $select;
ef5584
					$is_mysql = true;
ef5584
				break;
ef5584
ef5584
				default:
ef5584
					if ($type == 'posts')
ef5584
					{
ef5584
						$sql = 'SELECT COUNT(p.post_id) as total_results
ef5584
							FROM ' . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t ' : ' ') . "
ef5584
							WHERE $sql_author
ef5584
								$sql_topic_id
ef5584
								$sql_firstpost
ef5584
								$m_approve_fid_sql
ef5584
								$sql_fora
ef5584
								$sql_time";
ef5584
					}
ef5584
					else
ef5584
					{
ef5584
						if ($db->sql_layer == 'sqlite')
ef5584
						{
ef5584
							$sql = 'SELECT COUNT(topic_id) as total_results
ef5584
								FROM (SELECT DISTINCT t.topic_id';
ef5584
						}
ef5584
						else
ef5584
						{
ef5584
							$sql = 'SELECT COUNT(DISTINCT t.topic_id) as total_results';
ef5584
						}
ef5584
ef5584
						$sql .= ' FROM ' . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
ef5584
							WHERE $sql_author
ef5584
								$sql_topic_id
ef5584
								$sql_firstpost
ef5584
								$m_approve_fid_sql
ef5584
								$sql_fora
ef5584
								AND t.topic_id = p.topic_id
ef5584
								$sql_time" . (($db->sql_layer == 'sqlite') ? ')' : '');
ef5584
					}
ef5584
					$result = $db->sql_query($sql);
ef5584
ef5584
					$total_results = (int) $db->sql_fetchfield('total_results');
ef5584
					$db->sql_freeresult($result);
ef5584
ef5584
					if (!$total_results)
ef5584
					{
ef5584
						return false;
ef5584
					}
ef5584
				break;
ef5584
			}
ef5584
		}
ef5584
ef5584
		// Build the query for really selecting the post_ids
ef5584
		if ($type == 'posts')
ef5584
		{
ef5584
			$sql = "SELECT $select
ef5584
				FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t' : '') . "
ef5584
				WHERE $sql_author
ef5584
					$sql_topic_id
ef5584
					$sql_firstpost
ef5584
					$m_approve_fid_sql
ef5584
					$sql_fora
ef5584
					$sql_sort_join
ef5584
					$sql_time
ef5584
				ORDER BY $sql_sort";
ef5584
			$field = 'post_id';
ef5584
		}
ef5584
		else
ef5584
		{
ef5584
			$sql = "SELECT $select
ef5584
				FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
ef5584
				WHERE $sql_author
ef5584
					$sql_topic_id
ef5584
					$sql_firstpost
ef5584
					$m_approve_fid_sql
ef5584
					$sql_fora
ef5584
					AND t.topic_id = p.topic_id
ef5584
					$sql_sort_join
ef5584
					$sql_time
ef5584
				GROUP BY t.topic_id, " . $sort_by_sql[$sort_key] . '
ef5584
				ORDER BY ' . $sql_sort;
ef5584
			$field = 'topic_id';
ef5584
		}
ef5584
ef5584
		// Only read one block of posts from the db and then cache it
ef5584
		$result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
ef5584
ef5584
		while ($row = $db->sql_fetchrow($result))
ef5584
		{
ef5584
			$id_ary[] = $row[$field];
ef5584
		}
ef5584
		$db->sql_freeresult($result);
ef5584
ef5584
		if (!$total_results && $is_mysql)
ef5584
		{
ef5584
			$sql = 'SELECT FOUND_ROWS() as total_results';
ef5584
			$result = $db->sql_query($sql);
ef5584
			$total_results = (int) $db->sql_fetchfield('total_results');
ef5584
			$db->sql_freeresult($result);
ef5584
ef5584
			if (!$total_results)
ef5584
			{
ef5584
				return false;
ef5584
			}
ef5584
		}
ef5584
ef5584
		if (sizeof($id_ary))
ef5584
		{
ef5584
			$this->save_ids($search_key, '', $author_ary, $total_results, $id_ary, $start, $sort_dir);
ef5584
			$id_ary = array_slice($id_ary, 0, $per_page);
ef5584
ef5584
			return $total_results;
ef5584
		}
ef5584
		return false;
ef5584
	}
ef5584
ef5584
	/**
ef5584
	* Split a text into words of a given length
ef5584
	*
ef5584
	* The text is converted to UTF-8, cleaned up, and split. Then, words that
ef5584
	* conform to the defined length range are returned in an array.
ef5584
	*
ef5584
	* NOTE: duplicates are NOT removed from the return array
ef5584
	*
ef5584
	* @param	string	$text	Text to split, encoded in UTF-8
ef5584
	* @return	array			Array of UTF-8 words
ef5584
	*
ef5584
	* @access	private
ef5584
	*/
ef5584
	function split_message($text)
ef5584
	{
ef5584
		global $phpbb_root_path, $phpEx, $user;
ef5584
ef5584
		$match = $words = array();
ef5584
ef5584
		/**
ef5584
		* Taken from the original code
ef5584
		*/
ef5584
		// Do not index code
ef5584
		$match[] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is';
ef5584
		// BBcode
ef5584
		$match[] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#';
ef5584
ef5584
		$min = $this->word_length['min'];
ef5584
		$max = $this->word_length['max'];
ef5584
ef5584
		$isset_min = $min - 1;
ef5584
ef5584
		/**
ef5584
		* Clean up the string, remove HTML tags, remove BBCodes
ef5584
		*/
ef5584
		$word = strtok($this->cleanup(preg_replace($match, ' ', strip_tags($text)), -1), ' ');
ef5584
ef5584
		while (strlen($word))
ef5584
		{
ef5584
			if (strlen($word) > 255 || strlen($word) <= $isset_min)
ef5584
			{
ef5584
				/**
ef5584
				* Words longer than 255 bytes are ignored. This will have to be
ef5584
				* changed whenever we change the length of search_wordlist.word_text
ef5584
				*
ef5584
				* Words shorter than $isset_min bytes are ignored, too
ef5584
				*/
ef5584
				$word = strtok(' ');
ef5584
				continue;
ef5584
			}
ef5584
ef5584
			$len = utf8_strlen($word);
ef5584
ef5584
			/**
ef5584
			* Test whether the word is too short to be indexed.
ef5584
			*
ef5584
			* Note that this limit does NOT apply to CJK and Hangul
ef5584
			*/
ef5584
			if ($len < $min)
ef5584
			{
ef5584
				/**
ef5584
				* Note: this could be optimized. If the codepoint is lower than Hangul's range
ef5584
				* we know that it will also be lower than CJK ranges
ef5584
				*/
ef5584
				if ((strncmp($word, UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, UTF8_HANGUL_LAST, 3) > 0)
ef5584
				 && (strncmp($word, UTF8_CJK_FIRST, 3) < 0 || strncmp($word, UTF8_CJK_LAST, 3) > 0)
ef5584
				 && (strncmp($word, UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, UTF8_CJK_B_LAST, 4) > 0))
ef5584
				{
ef5584
					$word = strtok(' ');
ef5584
					continue;
ef5584
				}
ef5584
			}
ef5584
ef5584
			$words[] = $word;
ef5584
			$word = strtok(' ');
ef5584
		}
ef5584
ef5584
		return $words;
ef5584
	}
ef5584
ef5584
	/**
ef5584
	* Updates wordlist and wordmatch tables when a message is posted or changed
ef5584
	*
ef5584
	* @param	string	$mode		Contains the post mode: edit, post, reply, quote
ef5584
	* @param	int		$post_id	The id of the post which is modified/created
ef5584
	* @param	string	&$message	New or updated post content
ef5584
	* @param	string	&$subject	New or updated post subject
ef5584
	* @param	int		$poster_id	Post author's user id
ef5584
	* @param	int		$forum_id	The id of the forum in which the post is located
ef5584
	*
ef5584
	* @access	public
ef5584
	*/
ef5584
	function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id)
ef5584
	{
ef5584
		global $config, $db, $user;
ef5584
ef5584
		if (!$config['fulltext_native_load_upd'])
ef5584
		{
ef5584
			/**
ef5584
			* The search indexer is disabled, return
ef5584
			*/
ef5584
			return;
ef5584
		}
ef5584
ef5584
		// Split old and new post/subject to obtain array of 'words'
ef5584
		$split_text = $this->split_message($message);
ef5584
		$split_title = $this->split_message($subject);
ef5584
ef5584
		$cur_words = array('post' => array(), 'title' => array());
ef5584
ef5584
		$words = array();
ef5584
		if ($mode == 'edit')
ef5584
		{
ef5584
			$words['add']['post'] = array();
ef5584
			$words['add']['title'] = array();
ef5584
			$words['del']['post'] = array();
ef5584
			$words['del']['title'] = array();
ef5584
ef5584
			$sql = 'SELECT w.word_id, w.word_text, m.title_match
ef5584
				FROM ' . SEARCH_WORDLIST_TABLE . ' w, ' . SEARCH_WORDMATCH_TABLE . " m
ef5584
				WHERE m.post_id = $post_id
ef5584
					AND w.word_id = m.word_id";
ef5584
			$result = $db->sql_query($sql);
ef5584
ef5584
			while ($row = $db->sql_fetchrow($result))
ef5584
			{
ef5584
				$which = ($row['title_match']) ? 'title' : 'post';
ef5584
				$cur_words[$which][$row['word_text']] = $row['word_id'];
ef5584
			}
ef5584
			$db->sql_freeresult($result);
ef5584
ef5584
			$words['add']['post'] = array_diff($split_text, array_keys($cur_words['post']));
ef5584
			$words['add']['title'] = array_diff($split_title, array_keys($cur_words['title']));
ef5584
			$words['del']['post'] = array_diff(array_keys($cur_words['post']), $split_text);
ef5584
			$words['del']['title'] = array_diff(array_keys($cur_words['title']), $split_title);
ef5584
		}
ef5584
		else
ef5584
		{
ef5584
			$words['add']['post'] = $split_text;
ef5584
			$words['add']['title'] = $split_title;
ef5584
			$words['del']['post'] = array();
ef5584
			$words['del']['title'] = array();
ef5584
		}
ef5584
		unset($split_text);
ef5584
		unset($split_title);
ef5584
ef5584
		// Get unique words from the above arrays
ef5584
		$unique_add_words = array_unique(array_merge($words['add']['post'], $words['add']['title']));
ef5584
		
ef5584
		// We now have unique arrays of all words to be added and removed and
ef5584
		// individual arrays of added and removed words for text and title. What
ef5584
		// we need to do now is add the new words (if they don't already exist)
ef5584
		// and then add (or remove) matches between the words and this post
ef5584
		if (sizeof($unique_add_words))
ef5584
		{
ef5584
			$sql = 'SELECT word_id, word_text
ef5584
				FROM ' . SEARCH_WORDLIST_TABLE . '
ef5584
				WHERE ' . $db->sql_in_set('word_text', $unique_add_words);
ef5584
			$result = $db->sql_query($sql);
ef5584
ef5584
			$word_ids = array();
ef5584
			while ($row = $db->sql_fetchrow($result))
ef5584
			{
ef5584
				$word_ids[$row['word_text']] = $row['word_id'];
ef5584
			}
ef5584
			$db->sql_freeresult($result);
ef5584
			$new_words = array_diff($unique_add_words, array_keys($word_ids));
ef5584
ef5584
			$db->sql_transaction('begin');
ef5584
			if (sizeof($new_words))
ef5584
			{
ef5584
				$sql_ary = array();
ef5584
ef5584
				foreach ($new_words as $word)
ef5584
				{
ef5584
					$sql_ary[] = array('word_text' => (string) $word, 'word_count' => 0);
ef5584
				}
ef5584
				$db->sql_return_on_error(true);
ef5584
				$db->sql_multi_insert(SEARCH_WORDLIST_TABLE, $sql_ary);
ef5584
				$db->sql_return_on_error(false);
ef5584
			}
ef5584
			unset($new_words, $sql_ary);
ef5584
		}
ef5584
		else
ef5584
		{
ef5584
			$db->sql_transaction('begin');
ef5584
		}
ef5584
ef5584
		// now update the search match table, remove links to removed words and add links to new words
ef5584
		foreach ($words['del'] as $word_in => $word_ary)
ef5584
		{
ef5584
			$title_match = ($word_in == 'title') ? 1 : 0;
ef5584
ef5584
			if (sizeof($word_ary))
ef5584
			{
ef5584
				$sql_in = array();
ef5584
				foreach ($word_ary as $word)
ef5584
				{
ef5584
					$sql_in[] = $cur_words[$word_in][$word];
ef5584
				}
ef5584
ef5584
				$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
ef5584
					WHERE ' . $db->sql_in_set('word_id', $sql_in) . '
ef5584
						AND post_id = ' . intval($post_id) . "
ef5584
						AND title_match = $title_match";
ef5584
				$db->sql_query($sql);
ef5584
ef5584
				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
ef5584
					SET word_count = word_count - 1
ef5584
					WHERE ' . $db->sql_in_set('word_id', $sql_in) . '
ef5584
						AND word_count > 0';
ef5584
				$db->sql_query($sql);
ef5584
ef5584
				unset($sql_in);
ef5584
			}
ef5584
		}
ef5584
ef5584
		$db->sql_return_on_error(true);
ef5584
		foreach ($words['add'] as $word_in => $word_ary)
ef5584
		{
ef5584
			$title_match = ($word_in == 'title') ? 1 : 0;
ef5584
ef5584
			if (sizeof($word_ary))
ef5584
			{
ef5584
				$sql = 'INSERT INTO ' . SEARCH_WORDMATCH_TABLE . ' (post_id, word_id, title_match)
ef5584
					SELECT ' . (int) $post_id . ', word_id, ' . (int) $title_match . '
ef5584
					FROM ' . SEARCH_WORDLIST_TABLE . '
ef5584
					WHERE ' . $db->sql_in_set('word_text', $word_ary);
ef5584
				$db->sql_query($sql);
ef5584
ef5584
				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
ef5584
					SET word_count = word_count + 1
ef5584
					WHERE ' . $db->sql_in_set('word_text', $word_ary);
ef5584
				$db->sql_query($sql);
ef5584
			}
ef5584
		}
ef5584
		$db->sql_return_on_error(false);
ef5584
ef5584
		$db->sql_transaction('commit');
ef5584
ef5584
		// destroy cached search results containing any of the words removed or added
ef5584
		$this->destroy_cache(array_unique(array_merge($words['add']['post'], $words['add']['title'], $words['del']['post'], $words['del']['title'])), array($poster_id));
ef5584
ef5584
		unset($unique_add_words);
ef5584
		unset($words);
ef5584
		unset($cur_words);
ef5584
	}
ef5584
ef5584
	/**
ef5584
	* Removes entries from the wordmatch table for the specified post_ids
ef5584
	*/
ef5584
	function index_remove($post_ids, $author_ids, $forum_ids)
ef5584
	{
ef5584
		global $db;
ef5584
ef5584
		if (sizeof($post_ids))
ef5584
		{
ef5584
			$sql = 'SELECT w.word_id, w.word_text, m.title_match
ef5584
				FROM ' . SEARCH_WORDMATCH_TABLE . ' m, ' . SEARCH_WORDLIST_TABLE . ' w
ef5584
				WHERE ' . $db->sql_in_set('m.post_id', $post_ids) . '
ef5584
					AND w.word_id = m.word_id';
ef5584
			$result = $db->sql_query($sql);
ef5584
ef5584
			$message_word_ids = $title_word_ids = $word_texts = array();
ef5584
			while ($row = $db->sql_fetchrow($result))
ef5584
			{
ef5584
				if ($row['title_match'])
ef5584
				{
ef5584
					$title_word_ids[] = $row['word_id'];
ef5584
				}
ef5584
				else
ef5584
				{
ef5584
					$message_word_ids[] = $row['word_id'];
ef5584
				}
ef5584
				$word_texts[] = $row['word_text'];
ef5584
			}
ef5584
			$db->sql_freeresult($result);
ef5584
ef5584
			if (sizeof($title_word_ids))
ef5584
			{
ef5584
				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
ef5584
					SET word_count = word_count - 1
ef5584
					WHERE ' . $db->sql_in_set('word_id', $title_word_ids) . '
ef5584
						AND word_count > 0';
ef5584
				$db->sql_query($sql);
ef5584
			}
ef5584
ef5584
			if (sizeof($message_word_ids))
ef5584
			{
ef5584
				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
ef5584
					SET word_count = word_count - 1
ef5584
					WHERE ' . $db->sql_in_set('word_id', $message_word_ids) . '
ef5584
						AND word_count > 0';
ef5584
				$db->sql_query($sql);
ef5584
			}
ef5584
ef5584
			unset($title_word_ids);
ef5584
			unset($message_word_ids);
ef5584
ef5584
			$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
ef5584
				WHERE ' . $db->sql_in_set('post_id', $post_ids);
ef5584
			$db->sql_query($sql);
ef5584
		}
ef5584
ef5584
		$this->destroy_cache(array_unique($word_texts), $author_ids);
ef5584
	}
ef5584
ef5584
	/**
ef5584
	* Tidy up indexes: Tag 'common words' and remove
ef5584
	* words no longer referenced in the match table
ef5584
	*/
ef5584
	function tidy()
ef5584
	{
ef5584
		global $db, $config;
ef5584
ef5584
		// Is the fulltext indexer disabled? If yes then we need not
ef5584
		// carry on ... it's okay ... I know when I'm not wanted boo hoo
ef5584
		if (!$config['fulltext_native_load_upd'])
ef5584
		{
ef5584
			set_config('search_last_gc', time(), true);
ef5584
			return;
ef5584
		}
ef5584
ef5584
		$destroy_cache_words = array();
ef5584
ef5584
		// Remove common words
ef5584
		if ($config['num_posts'] >= 100 && $config['fulltext_native_common_thres'])
ef5584
		{
ef5584
			$common_threshold = ((double) $config['fulltext_native_common_thres']) / 100.0;
ef5584
			// First, get the IDs of common words
ef5584
			$sql = 'SELECT word_id, word_text
ef5584
				FROM ' . SEARCH_WORDLIST_TABLE . '
ef5584
				WHERE word_count > ' . floor($config['num_posts'] * $common_threshold) . '
ef5584
					OR word_common = 1';
ef5584
			$result = $db->sql_query($sql);
ef5584
ef5584
			$sql_in = array();
ef5584
			while ($row = $db->sql_fetchrow($result))
ef5584
			{
ef5584
				$sql_in[] = $row['word_id'];
ef5584
				$destroy_cache_words[] = $row['word_text'];
ef5584
			}
ef5584
			$db->sql_freeresult($result);
ef5584
ef5584
			if (sizeof($sql_in))
ef5584
			{
ef5584
				// Flag the words
ef5584
				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
ef5584
					SET word_common = 1
ef5584
					WHERE ' . $db->sql_in_set('word_id', $sql_in);
ef5584
				$db->sql_query($sql);
ef5584
ef5584
				// by setting search_last_gc to the new time here we make sure that if a user reloads because the
ef5584
				// following query takes too long, he won't run into it again
ef5584
				set_config('search_last_gc', time(), true);
ef5584
ef5584
				// Delete the matches
ef5584
				$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
ef5584
					WHERE ' . $db->sql_in_set('word_id', $sql_in);
ef5584
				$db->sql_query($sql);
ef5584
			}
ef5584
			unset($sql_in);
ef5584
		}
ef5584
ef5584
		if (sizeof($destroy_cache_words))
ef5584
		{
ef5584
			// destroy cached search results containing any of the words that are now common or were removed
ef5584
			$this->destroy_cache(array_unique($destroy_cache_words));
ef5584
		}
ef5584
ef5584
		set_config('search_last_gc', time(), true);
ef5584
	}
ef5584
ef5584
	/**
ef5584
	* Deletes all words from the index
ef5584
	*/
ef5584
	function delete_index($acp_module, $u_action)
ef5584
	{
ef5584
		global $db;
ef5584
ef5584
		switch ($db->sql_layer)
ef5584
		{
ef5584
			case 'sqlite':
ef5584
			case 'firebird':
ef5584
				$db->sql_query('DELETE FROM ' . SEARCH_WORDLIST_TABLE);
ef5584
				$db->sql_query('DELETE FROM ' . SEARCH_WORDMATCH_TABLE);
ef5584
				$db->sql_query('DELETE FROM ' . SEARCH_RESULTS_TABLE);
ef5584
			break;
ef5584
ef5584
			default:
ef5584
				$db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDLIST_TABLE);
ef5584
				$db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDMATCH_TABLE);
ef5584
				$db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE);
ef5584
			break;
ef5584
		}
ef5584
	}
ef5584
ef5584
	/**
ef5584
	* Returns true if both FULLTEXT indexes exist
ef5584
	*/
ef5584
	function index_created()
ef5584
	{
ef5584
		if (!sizeof($this->stats))
ef5584
		{
ef5584
			$this->get_stats();
ef5584
		}
ef5584
ef5584
		return ($this->stats['total_words'] && $this->stats['total_matches']) ? true : false;
ef5584
	}
ef5584
ef5584
	/**
ef5584
	* Returns an associative array containing information about the indexes
ef5584
	*/
ef5584
	function index_stats()
ef5584
	{
ef5584
		global $user;
ef5584
ef5584
		if (!sizeof($this->stats))
ef5584
		{
ef5584
			$this->get_stats();
ef5584
		}
ef5584
ef5584
		return array(
ef5584
			$user->lang['TOTAL_WORDS']		=> $this->stats['total_words'],
ef5584
			$user->lang['TOTAL_MATCHES']	=> $this->stats['total_matches']);
ef5584
	}
ef5584
ef5584
	function get_stats()
ef5584
	{
ef5584
		global $db;
ef5584
ef5584
		$sql = 'SELECT COUNT(*) as total_words
ef5584
			FROM ' . SEARCH_WORDLIST_TABLE;
ef5584
		$result = $db->sql_query($sql);
ef5584
		$this->stats['total_words'] = (int) $db->sql_fetchfield('total_words');
ef5584
		$db->sql_freeresult($result);
ef5584
ef5584
		$sql = 'SELECT COUNT(*) as total_matches
ef5584
			FROM ' . SEARCH_WORDMATCH_TABLE;
ef5584
		$result = $db->sql_query($sql);
ef5584
		$this->stats['total_matches'] = (int) $db->sql_fetchfield('total_matches');
ef5584
		$db->sql_freeresult($result);
ef5584
	}
ef5584
ef5584
	/**
ef5584
	* Clean up a text to remove non-alphanumeric characters
ef5584
	*
ef5584
	* This method receives a UTF-8 string, normalizes and validates it, replaces all
ef5584
	* non-alphanumeric characters with strings then returns the result.
ef5584
	*
ef5584
	* Any number of "allowed chars" can be passed as a UTF-8 string in NFC.
ef5584
	*
ef5584
	* @param	string	$text			Text to split, in UTF-8 (not normalized or sanitized)
ef5584
	* @param	string	$allowed_chars	String of special chars to allow
ef5584
	* @param	string	$encoding		Text encoding
ef5584
	* @return	string					Cleaned up text, only alphanumeric chars are left
ef5584
	*
ef5584
	* @todo normalizer::cleanup being able to be used?
ef5584
	*/
ef5584
	function cleanup($text, $allowed_chars = null, $encoding = 'utf-8')
ef5584
	{
ef5584
		global $phpbb_root_path, $phpEx;
ef5584
		static $conv = array(), $conv_loaded = array();
ef5584
		$words = $allow = array();
ef5584
ef5584
		// Convert the text to UTF-8
ef5584
		$encoding = strtolower($encoding);
ef5584
		if ($encoding != 'utf-8')
ef5584
		{
ef5584
			$text = utf8_recode($text, $encoding);
ef5584
		}
ef5584
ef5584
		$utf_len_mask = array(
ef5584
			"\xC0"	=>	2,
ef5584
			"\xD0"	=>	2,
ef5584
			"\xE0"	=>	3,
ef5584
			"\xF0"	=>	4
ef5584
		);
ef5584
ef5584
		/**
ef5584
		* Replace HTML entities and NCRs
ef5584
		*/
ef5584
		$text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES);
ef5584
ef5584
		/**
ef5584
		* Load the UTF-8 normalizer
ef5584
		*
ef5584
		* If we use it more widely, an instance of that class should be held in a
ef5584
		* a global variable instead
ef5584
		*/
ef5584
		utf_normalizer::nfc($text);
ef5584
ef5584
		/**
ef5584
		* The first thing we do is:
ef5584
		*
ef5584
		* - convert ASCII-7 letters to lowercase
ef5584
		* - remove the ASCII-7 non-alpha characters
ef5584
		* - remove the bytes that should not appear in a valid UTF-8 string: 0xC0,
ef5584
		*   0xC1 and 0xF5-0xFF
ef5584
		*
ef5584
		* @todo in theory, the third one is already taken care of during normalization and those chars should have been replaced by Unicode replacement chars
ef5584
		*/
ef5584
		$sb_match	= "ISTCPAMELRDOJBNHFGVWUQKYXZ\r\n\t!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\xC0\xC1\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF";
ef5584
		$sb_replace	= 'istcpamelrdojbnhfgvwuqkyxz                                                                              ';
ef5584
ef5584
		/**
ef5584
		* This is the list of legal ASCII chars, it is automatically extended
ef5584
		* with ASCII chars from $allowed_chars
ef5584
		*/
ef5584
		$legal_ascii = ' eaisntroludcpmghbfvq10xy2j9kw354867z';
ef5584
ef5584
		/**
ef5584
		* Prepare an array containing the extra chars to allow
ef5584
		*/
ef5584
		if (isset($allowed_chars[0]))
ef5584
		{
ef5584
			$pos = 0;
ef5584
			$len = strlen($allowed_chars);
ef5584
			do
ef5584
			{
ef5584
				$c = $allowed_chars[$pos];
ef5584
ef5584
				if ($c < "\x80")
ef5584
				{
ef5584
					/**
ef5584
					* ASCII char
ef5584
					*/
ef5584
					$sb_pos = strpos($sb_match, $c);
ef5584
					if (is_int($sb_pos))
ef5584
					{
ef5584
						/**
ef5584
						* Remove the char from $sb_match and its corresponding
ef5584
						* replacement in $sb_replace
ef5584
						*/
ef5584
						$sb_match = substr($sb_match, 0, $sb_pos) . substr($sb_match, $sb_pos + 1);
ef5584
						$sb_replace = substr($sb_replace, 0, $sb_pos) . substr($sb_replace, $sb_pos + 1);
ef5584
						$legal_ascii .= $c;
ef5584
					}
ef5584
ef5584
					++$pos;
ef5584
				}
ef5584
				else
ef5584
				{
ef5584
					/**
ef5584
					* UTF-8 char
ef5584
					*/
ef5584
					$utf_len = $utf_len_mask[$c & "\xF0"];
ef5584
					$allow[substr($allowed_chars, $pos, $utf_len)] = 1;
ef5584
					$pos += $utf_len;
ef5584
				}
ef5584
			}
ef5584
			while ($pos < $len);
ef5584
		}
ef5584
ef5584
		$text = strtr($text, $sb_match, $sb_replace);
ef5584
		$ret = '';
ef5584
ef5584
		$pos = 0;
ef5584
		$len = strlen($text);
ef5584
ef5584
		do
ef5584
		{
ef5584
			/**
ef5584
			* Do all consecutive ASCII chars at once
ef5584
			*/
ef5584
			if ($spn = strspn($text, $legal_ascii, $pos))
ef5584
			{
ef5584
				$ret .= substr($text, $pos, $spn);
ef5584
				$pos += $spn;
ef5584
			}
ef5584
ef5584
			if ($pos >= $len)
ef5584
			{
ef5584
				return $ret;
ef5584
			}
ef5584
ef5584
			/**
ef5584
			* Capture the UTF char
ef5584
			*/
ef5584
			$utf_len = $utf_len_mask[$text[$pos] & "\xF0"];
ef5584
			$utf_char = substr($text, $pos, $utf_len);
ef5584
			$pos += $utf_len;
ef5584
ef5584
			if (($utf_char >= UTF8_HANGUL_FIRST && $utf_char <= UTF8_HANGUL_LAST)
ef5584
			 || ($utf_char >= UTF8_CJK_FIRST && $utf_char <= UTF8_CJK_LAST)
ef5584
			 || ($utf_char >= UTF8_CJK_B_FIRST && $utf_char <= UTF8_CJK_B_LAST))
ef5584
			{
ef5584
				/**
ef5584
				* All characters within these ranges are valid
ef5584
				*
ef5584
				* We separate them with a space in order to index each character
ef5584
				* individually
ef5584
				*/
ef5584
				$ret .= ' ' . $utf_char . ' ';
ef5584
				continue;
ef5584
			}
ef5584
ef5584
			if (isset($allow[$utf_char]))
ef5584
			{
ef5584
				/**
ef5584
				* The char is explicitly allowed
ef5584
				*/
ef5584
				$ret .= $utf_char;
ef5584
				continue;
ef5584
			}
ef5584
ef5584
			if (isset($conv[$utf_char]))
ef5584
			{
ef5584
				/**
ef5584
				* The char is mapped to something, maybe to itself actually
ef5584
				*/
ef5584
				$ret .= $conv[$utf_char];
ef5584
				continue;
ef5584
			}
ef5584
ef5584
			/**
ef5584
			* The char isn't mapped, but did we load its conversion table?
ef5584
			*
ef5584
			* The search indexer table is split into blocks. The block number of
ef5584
			* each char is equal to its codepoint right-shifted for 11 bits. It
ef5584
			* means that out of the 11, 16 or 21 meaningful bits of a 2-, 3- or
ef5584
			* 4- byte sequence we only keep the leftmost 0, 5 or 10 bits. Thus,
ef5584
			* all UTF chars encoded in 2 bytes are in the same first block.
ef5584
			*/
ef5584
			if (isset($utf_char[2]))
ef5584
			{
ef5584
				if (isset($utf_char[3]))
ef5584
				{
ef5584
					/**
ef5584
					* 1111 0nnn 10nn nnnn 10nx xxxx 10xx xxxx
ef5584
					* 0000 0111 0011 1111 0010 0000
ef5584
					*/
ef5584
					$idx = ((ord($utf_char[0]) & 0x07) << 7) | ((ord($utf_char[1]) & 0x3F) << 1) | ((ord($utf_char[2]) & 0x20) >> 5);
ef5584
				}
ef5584
				else
ef5584
				{
ef5584
					/**
ef5584
					* 1110 nnnn 10nx xxxx 10xx xxxx
ef5584
					* 0000 0111 0010 0000
ef5584
					*/
ef5584
					$idx = ((ord($utf_char[0]) & 0x07) << 1) | ((ord($utf_char[1]) & 0x20) >> 5);
ef5584
				}
ef5584
			}
ef5584
			else
ef5584
			{
ef5584
				/**
ef5584
				* 110x xxxx 10xx xxxx
ef5584
				* 0000 0000 0000 0000
ef5584
				*/
ef5584
				$idx = 0;
ef5584
			}
ef5584
ef5584
			/**
ef5584
			* Check if the required conv table has been loaded already
ef5584
			*/
ef5584
			if (!isset($conv_loaded[$idx]))
ef5584
			{
ef5584
				$conv_loaded[$idx] = 1;
ef5584
				$file = $phpbb_root_path . 'includes/utf/data/search_indexer_' . $idx . '.' . $phpEx;
ef5584
ef5584
				if (file_exists($file))
ef5584
				{
ef5584
					$conv += include($file);
ef5584
				}
ef5584
			}
ef5584
ef5584
			if (isset($conv[$utf_char]))
ef5584
			{
ef5584
				$ret .= $conv[$utf_char];
ef5584
			}
ef5584
			else
ef5584
			{
ef5584
				/**
ef5584
				* We add an entry to the conversion table so that we
ef5584
				* don't have to convert to codepoint and perform the checks
ef5584
				* that are above this block
ef5584
				*/
ef5584
				$conv[$utf_char] = ' ';
ef5584
				$ret .= ' ';
ef5584
			}
ef5584
		}
ef5584
		while (1);
ef5584
ef5584
		return $ret;
ef5584
	}
ef5584
ef5584
	/**
ef5584
	* Returns a list of options for the ACP to display
ef5584
	*/
ef5584
	function acp()
ef5584
	{
ef5584
		global $user, $config;
ef5584
ef5584
ef5584
		/**
ef5584
		* if we need any options, copied from fulltext_native for now, will have to be adjusted or removed
ef5584
		*/
ef5584
ef5584
		$tpl = '
ef5584
		
ef5584
			
<label for="fulltext_native_load_upd">' . $user->lang['YES_SEARCH_UPDATE'] . ':</label>
' . $user->lang['YES_SEARCH_UPDATE_EXPLAIN'] . '
ef5584
			
<label><input type="radio" id="fulltext_native_load_upd" name="config[fulltext_native_load_upd]" value="1"' . (($config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $user->lang['YES'] . '</label><label><input type="radio" name="config[fulltext_native_load_upd]" value="0"' . ((!$config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $user->lang['NO'] . '</label>
ef5584
		
ef5584
		
ef5584
			
<label for="fulltext_native_min_chars">' . $user->lang['MIN_SEARCH_CHARS'] . ':</label>
' . $user->lang['MIN_SEARCH_CHARS_EXPLAIN'] . '
ef5584
			
<input id="fulltext_native_min_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_min_chars]" value="' . (int) $config['fulltext_native_min_chars'] . '" />
ef5584
		
ef5584
		
ef5584
			
<label for="fulltext_native_max_chars">' . $user->lang['MAX_SEARCH_CHARS'] . ':</label>
' . $user->lang['MAX_SEARCH_CHARS_EXPLAIN'] . '
ef5584
			
<input id="fulltext_native_max_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_max_chars]" value="' . (int) $config['fulltext_native_max_chars'] . '" />
ef5584
		
ef5584
		
ef5584
			
<label for="fulltext_native_common_thres">' . $user->lang['COMMON_WORD_THRESHOLD'] . ':</label>
' . $user->lang['COMMON_WORD_THRESHOLD_EXPLAIN'] . '
ef5584
			
<input id="fulltext_native_common_thres" type="text" size="3" maxlength="3" name="config[fulltext_native_common_thres]" value="' . (double) $config['fulltext_native_common_thres'] . '" /> %
ef5584
		
ef5584
		';
ef5584
ef5584
		// These are fields required in the config table
ef5584
		return array(
ef5584
			'tpl'		=> $tpl,
ef5584
			'config'	=> array('fulltext_native_load_upd' => 'bool', 'fulltext_native_min_chars' => 'integer:0:255', 'fulltext_native_max_chars' => 'integer:0:255', 'fulltext_native_common_thres' => 'double:0:100')
ef5584
		);
ef5584
	}
ef5584
}
ef5584
ef5584
?>