fix: skip if no artist or song name
This commit is contained in:
29
crawler/package-lock.json
generated
29
crawler/package-lock.json
generated
@@ -11,10 +11,12 @@
|
||||
"dependencies": {
|
||||
"crawlee": "^3.0.0",
|
||||
"fs": "^0.0.1-security",
|
||||
"playwright": "^1.28.0"
|
||||
"playwright": "^1.28.0",
|
||||
"slug": "^8.2.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@apify/tsconfig": "^0.1.0",
|
||||
"@types/slug": "^5.0.5",
|
||||
"ts-node": "^10.8.0",
|
||||
"typescript": "^4.7.4"
|
||||
}
|
||||
@@ -778,6 +780,12 @@
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/slug": {
|
||||
"version": "5.0.5",
|
||||
"resolved": "https://registry.npmjs.org/@types/slug/-/slug-5.0.5.tgz",
|
||||
"integrity": "sha512-vcHM79Xu5ALOC90kf5S1B4XGbRl8VW6f1+6jpBmK/FLHi4AyWKAVENgMOyHFyjHV5vDbNRPtjsNJuPRqrLBOxw==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/tough-cookie": {
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.2.tgz",
|
||||
@@ -2760,6 +2768,14 @@
|
||||
"resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz",
|
||||
"integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ=="
|
||||
},
|
||||
"node_modules/slug": {
|
||||
"version": "8.2.3",
|
||||
"resolved": "https://registry.npmjs.org/slug/-/slug-8.2.3.tgz",
|
||||
"integrity": "sha512-fXjhAZszNecz855GUNIwW0+sFPi9WV4bMiEKDOCA4wcq1ts1UnUVNy/F78B0Aat7/W3rA+se//33ILKNMrbeYQ==",
|
||||
"bin": {
|
||||
"slug": "cli.js"
|
||||
}
|
||||
},
|
||||
"node_modules/source-map": {
|
||||
"version": "0.6.1",
|
||||
"resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
|
||||
@@ -3848,6 +3864,12 @@
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"@types/slug": {
|
||||
"version": "5.0.5",
|
||||
"resolved": "https://registry.npmjs.org/@types/slug/-/slug-5.0.5.tgz",
|
||||
"integrity": "sha512-vcHM79Xu5ALOC90kf5S1B4XGbRl8VW6f1+6jpBmK/FLHi4AyWKAVENgMOyHFyjHV5vDbNRPtjsNJuPRqrLBOxw==",
|
||||
"dev": true
|
||||
},
|
||||
"@types/tough-cookie": {
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.2.tgz",
|
||||
@@ -5233,6 +5255,11 @@
|
||||
"resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz",
|
||||
"integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ=="
|
||||
},
|
||||
"slug": {
|
||||
"version": "8.2.3",
|
||||
"resolved": "https://registry.npmjs.org/slug/-/slug-8.2.3.tgz",
|
||||
"integrity": "sha512-fXjhAZszNecz855GUNIwW0+sFPi9WV4bMiEKDOCA4wcq1ts1UnUVNy/F78B0Aat7/W3rA+se//33ILKNMrbeYQ=="
|
||||
},
|
||||
"source-map": {
|
||||
"version": "0.6.1",
|
||||
"resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
|
||||
|
||||
@@ -6,10 +6,12 @@
|
||||
"dependencies": {
|
||||
"crawlee": "^3.0.0",
|
||||
"fs": "^0.0.1-security",
|
||||
"playwright": "^1.28.0"
|
||||
"playwright": "^1.28.0",
|
||||
"slug": "^8.2.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@apify/tsconfig": "^0.1.0",
|
||||
"@types/slug": "^5.0.5",
|
||||
"ts-node": "^10.8.0",
|
||||
"typescript": "^4.7.4"
|
||||
},
|
||||
|
||||
@@ -2,6 +2,7 @@ import { Dataset, createPlaywrightRouter } from "crawlee";
|
||||
import * as fs from "fs";
|
||||
import { sleep } from "crawlee";
|
||||
export const router = createPlaywrightRouter();
|
||||
import slug from "slug";
|
||||
|
||||
router.addDefaultHandler(async ({ enqueueLinks }) => {
|
||||
const songs = await enqueueLinks({
|
||||
@@ -18,13 +19,17 @@ router.addDefaultHandler(async ({ enqueueLinks }) => {
|
||||
router.addHandler("SONG", async ({ request, page }) => {
|
||||
await Dataset.pushData({ url: request.loadedUrl });
|
||||
await page.waitForSelector('aside div div section button[name="download"]');
|
||||
const title = await page.locator("h1").textContent();
|
||||
const artist = await page
|
||||
let title = await page.locator("h1").textContent();
|
||||
if (title == null) return
|
||||
title = slug(title);
|
||||
let artist = await page
|
||||
.locator(
|
||||
"body > div.js-page.react-container > div > section > aside > div:nth-child(5) > div > section > h3:nth-child(2) > a"
|
||||
)
|
||||
.first()
|
||||
.textContent();
|
||||
if (artist == null) return
|
||||
artist = slug(artist);
|
||||
const genres = await page
|
||||
.locator(
|
||||
"body > div.js-page.react-container > div > section > aside > div:nth-child(6) > div > table > tbody > tr:nth-child(5) > td > div > a"
|
||||
|
||||
Reference in New Issue
Block a user