Skip to content

Commit

Permalink
Merge branch 'feature/184-improve-reddit-scraping' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
Marthym committed Nov 7, 2023
2 parents b3a9738 + aee1752 commit c85a06b
Show file tree
Hide file tree
Showing 10 changed files with 103 additions and 55 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ Baywatch is based on Springboot 3 and uses the Webflux model. Spring serves both
and backend, but it is advisable to place them behind a proxy for caching purposes.

In order to compile and run the Baywatch code, it is necessary to have installed versions
later than or equal to **Java 17**, **Node 18** and **Maven 3**.
later than or equal to **Java 21**, **Node 18** and **Maven 3**.

Find more informations in [CONTRIBUTING.md](./CONTRIBUTING.md).
Take a look on our [Code of Conduct](./CODE_OF_CONDUCT.md)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
public class OpenGraphFilter implements NewsFilter {
private static final Set<String> SUPPORTED_SCHEMES = Set.of("http", "https");
private static final Pattern YOUTUBE_URI_PATTERN = Pattern.compile("(youtube|youtu\\.be|googlevideo|ytimg)");
private static final Pattern REDDIT_URI_PATTERN = Pattern.compile("www\\.reddit\\.com");
private final HeadScraper headScrapper;

@Override
Expand All @@ -35,7 +36,12 @@ public Mono<RawNews> filter(RawNews news) {
return Mono.just(news);
}
ScrapRequestBuilder scrapRequestBldr = ScrapRequest.builder(news.link());
if (YOUTUBE_URI_PATTERN.matcher(news.link().getHost()).find()) {

if (REDDIT_URI_PATTERN.matcher(news.link().getHost()).find()) {
// Ignore Reddit, OG was placed by JS after page loaded
return Mono.just(news);

} else if (YOUTUBE_URI_PATTERN.matcher(news.link().getHost()).find()) {
HttpCookie ytCookie = new HttpCookie("CONSENT", "YES+0");
ytCookie.setPath("/");
ytCookie.setDomain("youtube.com");
Expand All @@ -45,6 +51,7 @@ public Mono<RawNews> filter(RawNews news) {
scrapRequestBldr.addCookie(ytCookie);
}


return headScrapper.scrap(scrapRequestBldr.build())
.map(headMetas -> handleMetaData(news, headMetas))
.switchIfEmpty(Mono.just(news));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ public class RedditNewsFilter implements NewsFilter {
private static final Pattern LINK_EXTRACT_PATTERN =
Pattern.compile("href=\"(?<" + LINK + ">[^\"]*)\">\\[link]", Pattern.MULTILINE);

private final URI redditImage;

public RedditNewsFilter(String redditImage) {
this.redditImage = URI.create(redditImage);
}

@Override
public Mono<RawNews> filter(RawNews news) {
if (!news.link().getHost().contains("reddit.com")) {
Expand All @@ -34,6 +40,7 @@ public Mono<RawNews> filter(RawNews news) {
return Mono.just(
news.withId(Hasher.identify(realLink))
.withLink(realLink)
.withImage(redditImage)
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import fr.ght1pc9kc.baywatch.scraper.domain.filters.RedditNewsFilter;
import fr.ght1pc9kc.baywatch.scraper.domain.filters.SanitizerFilter;
import fr.ght1pc9kc.scraphead.core.HeadScraper;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.annotation.Order;
Expand All @@ -22,8 +23,8 @@ public Scheduler getScraperScheduler() {

@Bean
@Order(1)
public NewsFilter redditNews() {
return new RedditNewsFilter();
public NewsFilter redditNews(@Value("${baywatch.scraper.reddit.image}") String redditImage) {
return new RedditNewsFilter(redditImage);
}

@Bean
Expand Down
2 changes: 2 additions & 0 deletions sandside/src/main/resources/application.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ baywatch:
conservation: ${BAYWATCH_SCRAPER_CONSERVATION:3m}
timeout: ${BAYWATCH_SCRAPER_TIMEOUT:2s}
dns.timeout: ${BAYWATCH_DNS_TIMEOUT:10s}
reddit:
image: ${BAYWATCH_SCRAPER_REDDIT_IMAGE:https://www.redditstatic.com/desktop2x/img/favicon/android-icon-192x192.png}
indexer:
enable: ${BAYWATCH_INDEXER_ENABLE:true}
directory: ${BAYWATCH_HOME:.}/.baywatch/feedidx
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class RedditNewsFilterTest {
&amp;#32; &lt;span&gt;&lt;a href=&quot;https://www.reddit.com/r/java/comments/k3hmfl/deploying_jakarta_ee_9_applications_to_wildfly/&quot;&gt;[commentaires]&lt;/a&gt;&lt;/span&gt;
""").build();

private final RedditNewsFilter tested = new RedditNewsFilter();
private final RedditNewsFilter tested = new RedditNewsFilter("https://www.jide.com/reddit.png");

@Test
void should_filter_reddit() {
Expand All @@ -33,6 +33,7 @@ void should_filter_reddit() {
() -> assertThat(actual.id()).isEqualTo("c74cb819fe7a596814406c9ec164dfa5d502fba8659ec13e634a33d1ae7cbd56"),
() -> assertThat(actual.title()).isEqualTo(RAW.title()),
() -> assertThat(actual.description()).isEqualTo(RAW.description()),
() -> assertThat(actual.image()).hasToString("https://www.jide.com/reddit.png"),
() -> assertThat(actual.link()).isEqualTo(URI.create("https://www.reddit.com/r/java/comments/k3hmfl/deploying_jakarta_ee_9_applications_to_wildfly/"))
)).verifyComplete();
}
Expand Down
43 changes: 17 additions & 26 deletions seaside/src/App.vue
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,11 @@ import NotificationArea from '@/common/components/notificationArea/NotificationA
import { EventType } from '@/techwatch/model/EventType.enum';
import { Notification } from '@/services/notification/Notification.type';
import { registerNotificationListener, unregisterNotificationListener } from '@/layout/services/ServerEventService';
import { refresh } from '@/security/services/AuthenticationService';
import notificationService from '@/services/notification/NotificationService';
import { useStore } from 'vuex';
import { Store, useStore } from 'vuex';
import { UPDATE_MUTATION as STATS_UPDATE_MUTATION } from '@/techwatch/store/statistics/StatisticsConstants';
import {
HAS_ROLE_USER_GETTER,
LOGOUT_MUTATION,
UPDATE_MUTATION as USER_UPDATE_MUTATION,
} from '@/store/user/UserConstants';
import { HAS_ROLE_USER_GETTER } from '@/store/user/UserConstants';
import { UserState } from '@/store/user/user';
@Component({
components: {
Expand All @@ -47,27 +43,22 @@ import {
},
})
export default class App extends Vue {
private readonly store;
private readonly store: Store<UserState>;
mounted(): void {
refresh().subscribe({
next: session => {
this.store.commit(USER_UPDATE_MUTATION, session.user);
this.registerSessionNotifications();
},
error: () => {
this.store.commit(LOGOUT_MUTATION);
unregisterNotificationListener(EventType.NEWS_UPDATE, this.onServerMessage);
const unwatch = this.store.watch(
(state, getters) => getters[HAS_ROLE_USER_GETTER],
newValue => {
unwatch();
if (newValue) {
this.registerSessionNotifications();
}
});
},
});
if (this.store.state.user.isAuthenticated) {
this.registerSessionNotifications();
} else {
unregisterNotificationListener(EventType.NEWS_UPDATE, this.onServerMessage);
const unwatch = this.store.watch(
(state, getters) => getters[HAS_ROLE_USER_GETTER],
newValue => {
unwatch();
if (newValue) {
this.registerSessionNotifications();
}
});
}
}
private registerSessionNotifications(): void {
Expand Down
18 changes: 15 additions & 3 deletions seaside/src/configuration/components/feedslist/FeedsList.vue
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
</th>
<td class="grid grid-cols-1 md:grid-cols-12 auto-cols-auto">
<FeedListItem :ref="vFeed.data._id" :view="vFeed"
@item-update="itemUpdate" @item-delete="itemDelete"/>
@item-view="itemView" @item-update="itemUpdate" @item-delete="itemDelete"/>
</td>
</tr>
</template>
Expand Down Expand Up @@ -87,22 +87,29 @@ import { actionServiceRegisterFunction, actionServiceUnregisterFunction } from '
import { defineAsyncComponent } from 'vue';
import { AlertResponse, AlertType } from '@/common/components/alertdialog/AlertDialog.types';
import FeedActions from '@/configuration/components/feedslist/FeedActions.vue';
import { useStore } from 'vuex';
import { Store, useStore } from 'vuex';
import { UserState } from '@/store/user/user';
import { NEWS_FILTER_FEED_MUTATION } from '@/common/model/store/NewsStore.type';
import { Router, useRouter } from 'vue-router';
const FileUploadWindow = defineAsyncComponent(() => import('@/common/components/FileUploadWindow.vue'));
@Component({
name: 'FeedsList',
components: { FeedActions, FeedEditor, FeedListItem, FileUploadWindow },
setup() {
const store: Store<UserState> = useStore();
return {
userState: useStore().state.user,
store: store,
userState: store.state.user,
router: useRouter(),
};
},
})
export default class FeedsList extends Vue {
private store: Store<UserState>;
private userState: UserState;
private router: Router;
private feedEditor!: FeedEditor;
// noinspection JSMismatchedCollectionQueryUpdate
private feeds: FeedView[] = new Array(0);
Expand Down Expand Up @@ -167,6 +174,11 @@ export default class FeedsList extends Vue {
});
}
private itemView(item: Feed): void {
this.store.commit(NEWS_FILTER_FEED_MUTATION, { id: item._id, label: item.name });
this.router.push('/news');
}
private itemUpdate(item: Feed): void {
this.feedEditor.openFeed({ ...item }).pipe(
take(1),
Expand Down
37 changes: 20 additions & 17 deletions seaside/src/configuration/components/feedslist/FeedsListItem.vue
Original file line number Diff line number Diff line change
@@ -1,28 +1,31 @@
<template>
<FeedCard :view="{...view.data, icon: view.icon}"/>
<div class="md:col-span-2 btn-group justify-self-end">
<button class="btn btn-sm btn-square btn-ghost" @click="$emit('item-update', view.data)">
<PencilIcon class="h-6 w-6"/>
</button>
<button class="btn btn-sm btn-square btn-ghost" @click="$emit('item-delete', view.data._id)">
<TrashIcon class="h-6 w-6"/>
</button>
</div>
<FeedCard :view="{...view.data, icon: view.icon}"/>
<div class="md:col-span-2 btn-group justify-self-end">
<button class="btn btn-sm btn-square btn-ghost" @click="$emit('item-view', view.data)">
<EyeIcon class="h-6 w-6"/>
</button>
<button class="btn btn-sm btn-square btn-ghost" @click="$emit('item-update', view.data)">
<PencilIcon class="h-6 w-6"/>
</button>
<button class="btn btn-sm btn-square btn-ghost" @click="$emit('item-delete', view.data._id)">
<TrashIcon class="h-6 w-6"/>
</button>
</div>
</template>

<script lang="ts">
import {Component, Prop, Vue} from 'vue-facing-decorator';
import {FeedView} from "@/configuration/components/feedslist/model/FeedView";
import FeedCard from "@/common/components/FeedCard.vue";
import {PencilIcon, TrashIcon} from "@heroicons/vue/24/outline";
import { Component, Prop, Vue } from 'vue-facing-decorator';
import { FeedView } from '@/configuration/components/feedslist/model/FeedView';
import FeedCard from '@/common/components/FeedCard.vue';
import { EyeIcon, PencilIcon, TrashIcon } from '@heroicons/vue/24/outline';
@Component({
name: 'FeedsListItem',
components: {FeedCard, PencilIcon, TrashIcon},
emits: ['item-update', 'item-delete']
name: 'FeedsListItem',
components: { FeedCard, EyeIcon, PencilIcon, TrashIcon },
emits: ['item-update', 'item-delete', 'item-view'],
})
export default class FeedsListItem extends Vue {
@Prop() private view!: FeedView;
@Prop() private view!: FeedView;
}
</script>
32 changes: 28 additions & 4 deletions seaside/src/router.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ import { routes as adminRoutes } from '@/administration/router';
import { routes as configRoutes } from '@/configuration/router';
import { routes as teamsRoutes } from '@/teams/router';
import { routes as techwatchRoutes } from '@/techwatch/router';
import { useStore } from 'vuex';
import { Store, useStore } from 'vuex';
import { UserState } from '@/store/user/user';
import { refresh } from '@/security/services/AuthenticationService';
import { LOGOUT_MUTATION, UPDATE_MUTATION as USER_UPDATE_MUTATION } from '@/store/user/UserConstants';

const LoginPage = () => import('@/pages/LoginPage.vue');

Expand All @@ -20,11 +22,33 @@ const router = createRouter({
],
} as RouterOptions);

function refreshSession(store: Store<UserState>): Promise<boolean> {
const isAuthenticated = useStore<UserState>().state.user.isAuthenticated;
if (isAuthenticated === undefined) {
return new Promise(resolve => {
refresh().subscribe({
next: session => {
store.commit(USER_UPDATE_MUTATION, session.user);
resolve(true);
},
error: () => {
store.commit(LOGOUT_MUTATION);
resolve(false);
},
});
});
} else {
return Promise.resolve(true);
}
}

router.beforeEach(async to => {
const isAuthenticated = await refreshSession(useStore<UserState>());
if (to.matched.some(record => record.meta.requiresAuth)) {
const isAuthenticated = useStore<UserState>().state.user.isAuthenticated;
if (isAuthenticated !== true) {
return { name: 'LoginPage', query: {redirect: to.path} };
console.debug('to', to);
console.debug('isAuthenticated', isAuthenticated);
if (!isAuthenticated) {
return { name: 'LoginPage', query: { redirect: to.path } };
}
}
});
Expand Down

0 comments on commit c85a06b

Please sign in to comment.