-
Notifications
You must be signed in to change notification settings - Fork 1
/
extractor.php
66 lines (56 loc) · 1.36 KB
/
extractor.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
<?php
/*
* Name: ImportantWordExtractor
* Author: Max Base
* Date: 22 Sep, 2021
* Repository: https://github.com/BaseMax/ImportantWordExtractor
*/
class WordExtractor {
private $filepath = '';
function __construct($filepath) {
$this->filepath = $filepath;
}
public function extract() {
if(!file_exists($this->filepath)) {
return false;
}
$data = file_get_contents($this->filepath);
$data = preg_replace('/[\(\)\[\]\{\}.\?\!\,\.\;\"\']/i', '', $data);
$data = preg_replace('/[ \r\n\t]+/i', ' ', $data);
// return $data;
$words = explode(" ", $data);
$words = array_map(function($value) {
return mb_strtolower($value);
}, $words);
$this->saveWords($words);
$_words = $words;
$words = [];
foreach($_words as $word) {
if(isset($words[$word])) {
$words[$word]++;
} else {
$words[$word] = 1;
}
}
// $words = array_filter(array_map((function ($v) { return $v > 1 ? $v : 0; }), array_count_values($words)));
asort($words);
$words = array_reverse($words);
return $words;
}
public function saveFile($words) {
$data = "";
foreach($words as $word=>$repeat) {
$data.= "$repeat\t: $word\n";
}
file_put_contents("output.txt", $data);
return true;
}
public function saveWords($words) {
$data = "";
foreach($words as $word) {
$data.= "$word\n";
}
file_put_contents("words.txt", $data);
return true;
}
}