· 7 years ago · Dec 18, 2018, 12:10 PM
1<?php
2
3$aConf = parse_ini_file('conf.ini');
4
5set_time_limit(0);
6ini_set('memory_limit', '700M');
7
8function microtime_float()
9{
10 list($usec, $sec) = explode(" ", microtime());
11 return ((float) $usec + (float) $sec);
12}
13
14function convert($size)
15{
16 $unit = array('b', 'kb', 'mb', 'gb', 'tb', 'pb');
17 return @round($size / pow(1024, ($i = floor(log($size, 1024)))), 2) . ' ' . $unit[$i];
18}
19$time_start = microtime_float();
20
21
22class InvicoKeywords
23{
24
25 /**
26 *
27 * @var int Max word for suggest phrase
28 */
29 private $_iMaxWords = 5;
30 /**
31 *
32 * @var int min Word length for suggest word
33 */
34 private $_iMinWordLength = 2;
35 /**
36 *
37 * @var array List of phrases (phrase => freq)
38 */
39 private $_aPhrases = array();
40 private $_aPhrasesTest = array();
41 /**
42 *
43 * @var string uggest table name
44 */
45 private $_tableName = 'keywords';
46 /**
47 *
48 * @var type minimum freq for adding
49 */
50 private $_iMinFreq = 2;
51
52 /**
53 *
54 * @param string $name
55 */
56 public function setTableName($name)
57 {
58 $this->_tableName = $name;
59 }
60
61 private function _addPhrase($i, $j, $aWordArray)
62 {
63 $sWord = '';
64 $bAdd = true;
65 for ($s = 0; $s <= $j; $s++) {
66 $is = $i + $s;
67 if (isset($aWordArray[$is])) {
68 $sWord .= " " . $aWordArray[$is];
69 }
70 else {
71 $bAdd = false;
72 }
73 }
74 $sWord = trim($sWord);
75 if ($bAdd) {
76 if (isset($this->_aPhrases[$sWord])) {
77 $this->_aPhrases[$sWord]++;
78 }
79 else {
80 $this->_aPhrases[$sWord] = 1;
81 }
82 }
83 }
84
85 private function _allowParse($word)
86 {
87 if (is_numeric($word)) {
88 return false;
89 }
90 //elseif (strlen($word) < $this->_iMinWordLength) {
91 elseif (!isset($word{$this->_iMinWordLength})) {
92 return false;
93 }
94 return true;
95 }
96
97 private function _parseArray($aWordArray)
98 {
99 $i = 0;
100 while (isset($aWordArray[$i])) {
101 if ($this->_allowParse($aWordArray[$i])) {
102 $j = 0;
103 while ($j < $this->_iMaxWords) {
104 $this->_addPhrase($i, $j, $aWordArray);
105 $j++;
106 }
107 }
108 $i++;
109 }
110 }
111
112 /**
113 * Parse givven text
114 *
115 * @param string $sText
116 */
117 public function addText($sText)
118 {
119 $aTextList = strtolower(trim(strip_tags($sText)));
120
121 preg_match_all("/[\w]+/", $aTextList, $matches);
122
123 $this->_parseArray($matches[0]);
124 }
125
126 /**
127 *
128 * @return array
129 */
130 public function getKeywords()
131 {
132 return $this->_aPhrases;
133 }
134
135 /**
136 *
137 * @return string
138 */
139 function getSql()
140 {
141 $sSql = "
142DROP TABLE IF EXISTS {$this->_tableName};
143
144CREATE TABLE `{$this->_tableName}` (
145 `keyword` varchar(255) NOT NULL,
146 `freq` int(11) NOT NULL
147) ENGINE=MyISAM DEFAULT CHARSET=utf8;
148";
149
150 $i = 0;
151 foreach ($this->getKeywords() as $keyword => $freq) {
152 if ($freq > $this->_iMinFreq) {
153 if ($i != 0) {
154 $sSql .= ",\n";
155 }
156 else {
157 $sSql .= "\nINSERT INTO {$this->_tableName} VALUES ";
158 }
159 $sSql .= "('" . $keyword . "', $freq)";
160 $i++;
161 if ($i > 1000) {
162 $sSql .= ";\n";
163 $i = 0;
164 }
165 }
166 }
167
168 return $sSql;
169 }
170
171 /**
172 * Sending data to php://stdout
173 */
174 public function sendToStdout()
175 {
176 $out = fopen("php://stdout", "w+");
177
178 fwrite($out, $this->getSql());
179
180 fclose($out);
181 }
182
183}
184
185mysql_connect($aConf['mysql.host'], $aConf['mysql.user'], $aConf['mysql.pass']);
186mysql_select_db($aConf['mysql.dbname']);
187
188$i = mysql_fetch_object(mysql_query($aConf['query.count']));
189$chunks = ceil($i->number / $aConf['limit']);
190
191for ($i=0; $i<$chunks; $i++) {
192 $from = $i * $aConf['limit'];
193 $oResult = mysql_query($aConf['query.data'] . " limit $from," . $aConf['limit']);
194 $oKeywordParser = new InvicoKeywords();
195 while ($obj = mysql_fetch_object($oResult)) {
196 $oKeywordParser->addText($obj->mytext);
197 unset($obj);
198 }
199}
200mysql_free_result($oResult);
201
202$oKeywordParser->sendToStdout();
203
204$time_end = microtime_float();
205$time = $time_end - $time_start;
206
207//echo "Did nothing in $time seconds\n";
208//echo "Peak: " . convert(memory_get_peak_usage(true)) . " Usage: " . convert(memory_get_usage(true)) . "\n";