· 6 years ago · Jul 05, 2019, 03:06 PM
1select id, col_a, col_b, col_c from tablename;
2
3id | col_a | col_b | col_c
4------------------------------------------|---------
51 | apple orange plum | red green blue | bill dave sue
62 | orange plum banana | yellow red | frank james
73 | kiwi fruit apple | green pink | bill sarah-jane frank
8
9expected_result: ["apple", "orange", "banana", "fruit",
10 "green", "yellow", "frank", "james", "sarah-jane"]
11
12DROP TABLE IF EXISTS `tablename`;
13CREATE TABLE `tablename` (
14 `id` int(11) NOT NULL AUTO_INCREMENT,
15 `col_a` text,
16 `col_b` text,
17 `col_c` text,
18 PRIMARY KEY (`id`)
19) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8;
20LOCK TABLES `tablename` WRITE;
21INSERT INTO `tablename` VALUES (1,'apple orange plum','red green blue','bill dave sue'),(2,'orange plum banana','yellow red','frank james'),(3,'kiwi fruit apple','green pink','bill sarah-jane frank');
22UNLOCK TABLES;
23
24DELIMITER //
25CREATE PROCEDURE split_string_to_table (str longtext)
26BEGIN
27 DECLARE val TEXT DEFAULT NULL;
28 DROP TEMPORARY TABLE IF EXISTS temp_values;
29 CREATE TEMPORARY TABLE temp_values (
30 `value` varchar(200)
31 );
32
33 iterator:
34 LOOP
35 IF LENGTH(TRIM(str)) = 0 OR str IS NULL THEN
36 LEAVE iterator;
37 END IF;
38 SET val = SUBSTRING_INDEX(str, ' ', 1);
39 INSERT INTO temp_values (`value`) VALUES (TRIM(val));
40 SET str = INSERT(str, 1, LENGTH(val) + 1, '');
41 END LOOP;
42 SELECT DISTINCT(`value`) FROM temp_values WHERE CHAR_LENGTH(`value`) >= 5;
43END //
44DELIMITER ;
45
46SELECT CONCAT_WS(' ',
47 GROUP_CONCAT(col_a SEPARATOR ' '),
48 GROUP_CONCAT(col_b SEPARATOR ' '),
49 GROUP_CONCAT(col_c SEPARATOR ' ')
50 ) INTO @text
51FROM mytable;
52
53CALL split_string_to_table(@text);
54
55--------------
56| value |
57--------------
58| apple |
59--------------
60| orange |
61--------------
62| banana |
63--------------
64| fruit |
65--------------
66| green |
67--------------
68| yellow |
69--------------
70| frank |
71--------------
72| james |
73--------------
74| sarah-jane |
75--------------
76
77mysql -e "SELECT ... INTO OUTFILE 'x' ..." ...
78 tr ' ' "n" <x | awk 'length($1) >= 5' | sort | uniq
79
80SELECT DISTINCT results.col_a as "values"
81
82FROM(
83
84 SELECT DISTINCT
85 SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.col_a, ' ', numbers.n), ' ', -1) col_a
86 FROM (SELECT 1 n UNION ALL SELECT 2
87 UNION ALL SELECT 3 UNION ALL SELECT 4) numbers INNER JOIN tablename
88 ON CHAR_LENGTH(tablename.col_a)-CHAR_LENGTH(REPLACE(tablename.col_a, ' ', ''))>=numbers.n-1
89
90 UNION DISTINCT
91 SELECT DISTINCT
92 SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.col_b, ' ', numbers.n), ' ', -1) col_b
93 FROM (SELECT 1 n UNION ALL SELECT 2
94 UNION ALL SELECT 3 UNION ALL SELECT 4) numbers INNER JOIN tablename
95 ON CHAR_LENGTH(tablename.col_b)-CHAR_LENGTH(REPLACE(tablename.col_b, ' ', ''))>=numbers.n-1
96
97 UNION DISTINCT
98 SELECT DISTINCT
99 SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.col_c, ' ', numbers.n), ' ', -1) col_c
100 FROM (SELECT 1 n UNION ALL SELECT 2
101 UNION ALL SELECT 3 UNION ALL SELECT 4) numbers INNER JOIN tablename
102 ON CHAR_LENGTH(tablename.col_c)-CHAR_LENGTH(REPLACE(tablename.col_c, ' ', ''))>=numbers.n-1
103
104) AS results
105
106WHERE length(results.col_a) > 4
107
108+-----------+
109|values |
110+-----------+
111|apple |
112+-----------+
113|banana |
114+-----------+
115|frank |
116+-----------+
117|fruit |
118+-----------+
119|green |
120+-----------+
121|james |
122+-----------+
123|orange |
124+-----------+
125|sarah-jane |
126+-----------+
127|yellow |
128+-----------+
129
130(
131SELECT
132 SUBSTRING_INDEX(SUBSTRING_INDEX(t.col_a, ' ', ngen.num), ' ', -1) AS word
133FROM
134 tablename AS t
135 INNER JOIN
136 (SELECT 1 num UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6) AS ngen
137 ON LENGTH(REPLACE(t.col_a, ' ' , '')) <= LENGTH(t.col_a)-ngen.num+1
138WHERE CHAR_LENGTH(SUBSTRING_INDEX(SUBSTRING_INDEX(t.col_a, ' ', ngen.num), ' ', -1)) >= 5
139)
140
141UNION
142
143(
144SELECT
145 SUBSTRING_INDEX(SUBSTRING_INDEX(t.col_b, ' ', ngen.num), ' ', -1) AS word
146FROM
147 tablename AS t
148 INNER JOIN
149 (SELECT 1 num UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6) AS ngen
150 ON LENGTH(REPLACE(t.col_b, ' ' , '')) <= LENGTH(t.col_b)-ngen.num+1
151WHERE CHAR_LENGTH(SUBSTRING_INDEX(SUBSTRING_INDEX(t.col_b, ' ', ngen.num), ' ', -1)) >= 5
152)
153
154UNION
155
156(
157SELECT
158 SUBSTRING_INDEX(SUBSTRING_INDEX(t.col_c, ' ', ngen.num), ' ', -1) AS word
159FROM
160 tablename AS t
161 INNER JOIN
162 (SELECT 1 num UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6) AS ngen
163 ON LENGTH(REPLACE(t.col_c, ' ' , '')) <= LENGTH(t.col_c)-ngen.num+1
164WHERE CHAR_LENGTH(SUBSTRING_INDEX(SUBSTRING_INDEX(t.col_c, ' ', ngen.num), ' ', -1)) >= 5
165);
166
167| word |
168| ---------- |
169| apple |
170| orange |
171| fruit |
172| banana |
173| yellow |
174| green |
175| frank |
176| james |
177| sarah-jane |
178
179select col_a, col_b, col_c
180 into outfile '/tmp/words.csv'
181 fields terminated by ' ' escaped by ''
182 lines terminated by ' '
183 from tablename;
184
185create table tmp_words(word varchar(50));
186
187load data infile '/tmp/words.csv'
188 into table tmp_words
189 lines terminated by ' ';
190
191select distinct word from tmp_words where char_length(word) >= 5;
192
193drop table tmp_words;
194
195word
196----------
197apple
198orange
199green
200banana
201yellow
202frank
203james
204fruit
205sarah-jane
206
207set global innodb_ft_aux_table = 'test/tablename';
208
209select WORD
210from information_schema.INNODB_FT_INDEX_TABLE
211where char_length(WORD) >= 5
212union
213select WORD
214from information_schema.INNODB_FT_INDEX_CACHE
215where char_length(WORD) >= 5
216
217WORD
218------
219apple
220banana
221frank
222fruit
223green
224james
225orange
226sarah <-- !
227yellow
228
229SELECT MYWORD FROM (
230SELECT
231 DISTINCT SUBSTRING_INDEX(SUBSTRING_INDEX(CONCAT_WS(' ', COL_A, COL_B, COL_C), ' ', N.DIGIT+1), ' ', -1) MYWORD
232FROM
233 MYTABLE
234 INNER JOIN
235 (SELECT 0 DIGIT UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6) N
236 ON (LENGTH(REPLACE(CONCAT_WS(' ', COL_A, COL_B, COL_C), ' ' , '')) <= LENGTH(CONCAT_WS(' ', COL_A, COL_B, COL_C))-N.DIGIT) ) MYRESULT
237 WHERE LENGTH(MYRESULT.MYWORD)>=5
238
239SELECT DISTINCT word
240 FROM summary_table
241
242SELECT DISTINCT word
243FROM
244(SELECT DISTINCT SUBSTRING_INDEX(SUBSTRING_INDEX(words, '¬', digits.idx + 1), '¬', -1) word
245 FROM
246 (SELECT reg_replace(TRIM(CONCAT(col_a, ' ', col_b, ' ', col_c)),
247 '[[:space:]]+',
248 '¬',
249 TRUE,
250 1,
251 0) AS words
252 FROM table_name) delimited
253 INNER JOIN
254 (SELECT @row := @row + 1 as idx FROM
255 (SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5
256 UNION ALL SELECT 6 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8
257 UNION ALL SELECT 9) t1,
258 (SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5
259 UNION ALL SELECT 6 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8
260 UNION ALL SELECT 9) t2,
261 (SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5
262 UNION ALL SELECT 6 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8
263 UNION ALL SELECT 9) t3,
264 (SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5
265 UNION ALL SELECT 6 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8
266 UNION ALL SELECT 9) t4,
267 (SELECT @row := -1) t5) digits
268 ON LENGTH(REPLACE(words, '¬' , '')) <= LENGTH(words) - digits.idx) subq
269WHERE CHAR_LENGTH(word) >= 5
270ORDER BY word
271
272word
2731 apple
2742 banana
2753 bill
2764 blue
2775 dave
2786 frank
2797 fruit
2808 green
2819 james
28210 kiwi
28311 orange
28412 pink
28513 plum
28614 red
28715 sarah-jane
28816 sue
28917 yellow