generated from obsidianmd/obsidian-sample-plugin
-
Notifications
You must be signed in to change notification settings - Fork 62
/
scraper.ts
54 lines (44 loc) · 1.41 KB
/
scraper.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import { requestUrl } from 'obsidian'
function blank(text: string): boolean {
return text === undefined || text === null || text === ''
}
function notBlank(text: string): boolean {
return !blank(text)
}
async function scrape(url: string): Promise<string> {
try {
const response = await requestUrl(url)
if (!response.headers['content-type'].includes('text/html')) return getUrlFinalSegment(url)
const html = response.text
const doc = new DOMParser().parseFromString(html, 'text/html')
const title = doc.querySelector('title')
if (blank(title?.innerText)) {
// If site is javascript based and has a no-title attribute when unloaded, use it.
var noTitle = title?.getAttr('no-title')
if (notBlank(noTitle)) {
return noTitle
}
// Otherwise if the site has no title/requires javascript simply return Title Unknown
return url
}
return title.innerText
} catch (ex) {
console.error(ex)
return 'Site Unreachable'
}
}
function getUrlFinalSegment(url: string): string {
try {
const segments = new URL(url).pathname.split('/')
const last = segments.pop() || segments.pop() // Handle potential trailing slash
return last
} catch (_) {
return 'File'
}
}
export default async function getPageTitle(url: string) {
if (!(url.startsWith('http') || url.startsWith('https'))) {
url = 'https://' + url
}
return scrape(url)
}