· 9 years ago · Aug 31, 2016, 12:18 PM
1<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
2
3class Keyword {
4
5
6 function scrape(){
7
8
9 $referer = $this->pk_stt2_function_get_referer();
10
11 $delimiter = $this->pk_stt2_function_get_delimiter($referer);
12
13 $term = $this->pk_stt2_function_get_terms($delimiter);
14 /// if (!$this->pk_stt2_is_contain_bad_words($term)) {
15
16 //print_r($term);
17
18 //}
19 $this->pk_stt2_db_save_searchterms($term);
20
21 }
22
23
24
25 function pk_stt2_function_get_referer() {
26 if (!isset($_SERVER['HTTP_REFERER']) || ($_SERVER['HTTP_REFERER'] == '')) return false;
27 $referer_info = parse_url($_SERVER['HTTP_REFERER']);
28 $referer = $referer_info['host'];
29 if(substr($referer, 0, 4) == 'www.')
30 $referer = substr($referer, 4);
31 return $referer;
32}
33
34
35 function pk_stt2_function_get_delimiter($ref) {
36 $search_engines = array('google.com' => 'q',
37 'go.google.com' => 'q',
38 'images.google.com' => 'q',
39 'video.google.com' => 'q',
40 'news.google.com' => 'q',
41 'blogsearch.google.com' => 'q',
42 'maps.google.com' => 'q',
43 'local.google.com' => 'q',
44 'search.yahoo.com' => 'p',
45 'id.search.yahoo.com' => 'p',
46 'search.msn.com' => 'q',
47 'bing.com' => 'q',
48 'msxml.excite.com' => 'qkw',
49 'search.lycos.com' => 'query',
50 'alltheweb.com' => 'q',
51 'search.aol.com' => 'query',
52 'search.iwon.com' => 'searchfor',
53 'ask.com' => 'q',
54 'ask.co.uk' => 'ask',
55 'search.cometsystems.com' => 'qry',
56 'hotbot.com' => 'query',
57 'overture.com' => 'Keywords',
58 'metacrawler.com' => 'qkw',
59 'search.netscape.com' => 'query',
60 'looksmart.com' => 'key',
61 'dpxml.webcrawler.com' => 'qkw',
62 'search.earthlink.net' => 'q',
63 'search.viewpoint.com' => 'k',
64 'yandex.kz' => 'text',
65 'yandex.ru' => 'text',
66 'baidu.com' => 'wd',
67 'mamma.com' => 'query');
68 $delim = false;
69 if (isset($search_engines[$ref])) {
70 $delim = $search_engines[$ref];
71 } else {
72 if (strpos('ref:'.$ref,'google'))
73 $delim = "q";
74 elseif (strpos('ref:'.$ref,'search.atomz.'))
75 $delim = "sp-q";
76 elseif (strpos('ref:'.$ref,'search.msn.'))
77 $delim = "q";
78 elseif (strpos('ref:'.$ref,'search.yahoo.'))
79 $delim = "p";
80 elseif (strpos('ref:'.$ref,'yandex'))
81 $delim = "text";
82 elseif (strpos('ref:'.$ref,'baidu'))
83 $delim = "wd";
84 elseif (preg_match('/home\.bellsouth\.net\/s\/s\.dll/i', $ref))
85 $delim = "bellsouth";
86 }
87 return $delim;
88 }
89
90
91
92 function pk_stt2_function_get_terms($d = NULL) {
93 $terms = NULL;
94 $query_array = array();
95 $query_terms = NULL;
96 $query = explode($d.'=', $_SERVER['HTTP_REFERER']);
97 $query = explode('&', $query[1]);
98 $query = urldecode($query[0]);
99 $query = str_replace("'", '', $query);
100 $query = str_replace('"', '', $query);
101 $query_array = preg_split('/[\s,\+\.]+/',$query);
102 $query_terms = implode(' ', $query_array);
103 $terms = htmlspecialchars(urldecode(trim($query_terms)));
104 return $terms;
105 }
106
107
108
109 function pk_stt2_is_contain_bad_words( $term ){
110 $option = 'http:,cache:,site:,utm_source,sex,porn,gamble,xxx,nude,squirt,gay,abortion,attack,bomb,casino,cocaine,die,death,erection,gambling,heroin,marijuana,masturbation,pedophile,penis,poker,pussy,terrorist';
111 $badwords = explode( ',',$option );
112 $term = str_ireplace( $badwords,'***',$term );
113 if( false === strpos( $term, '***' ) )
114 return false;
115 else
116 return true;
117 }
118
119
120
121
122 function pk_stt2_db_save_searchterms( $keyword) {
123 if ( strlen($keyword) > 3 ){
124 $CI =&get_instance();
125 $CI->load->model('main_model');
126 //$id =1;
127 //$sql ="INSERT INTO `wp_aero_keyword` (`post_id`, `meta_value`,`meta_count`) VALUES
128 //(".$id.", '".$keyword."' , '1') ON DUPLICATE KEY UPDATE `meta_count` = `meta_count` + 1 ";
129
130 $CI->main_model->insert_keyword($keyword);
131
132 }
133
134 }
135
136
137
138
139}
140
141?>