diff --git a/crawler/src/main.ts b/crawler/src/main.ts index 67120ca..ac1baa3 100644 --- a/crawler/src/main.ts +++ b/crawler/src/main.ts @@ -17,5 +17,5 @@ const crawler = new PlaywrightCrawler({ // Add first URL to the queue and start the crawl. await crawler.run([ - "https://musescore.com/sheetmusic?complexity=1&instrument=2&license=to_modify_commercially%2Cto_use_commercially&recording_type=public-domain", + "https://musescore.com/sheetmusic?complexity=1&instrument=2&instrumentation=114&license=to_modify_commercially%2Cto_use_commercially&recording_type=public-domain&sort=rating", ]); diff --git a/crawler/src/routes.ts b/crawler/src/routes.ts index bb6946f..d7dc569 100644 --- a/crawler/src/routes.ts +++ b/crawler/src/routes.ts @@ -19,9 +19,9 @@ router.addDefaultHandler(async ({ enqueueLinks }) => { router.addHandler("SONG", async ({ request, page }) => { await Dataset.pushData({ url: request.loadedUrl }); await page.waitForSelector('aside div div section button[name="download"]'); - let title = await page.locator("h1").textContent(); - if (title == null) return - title = slug(title); + let og_title = await page.locator("h1").textContent(); + if (og_title == null) return + let title = slug(og_title); let artist = await page .locator( "body > div.js-page.react-container > div > section > aside > div:nth-child(5) > div > section > h3:nth-child(2) > a" @@ -71,7 +71,7 @@ router.addHandler("SONG", async ({ request, page }) => { `../musics/a/${title}/${title}.ini`, ` [Metadata] -Name=${title} +Name=${og_title} Artist=${artist} Genre=${genres} Album=