crawler piano only
This commit is contained in:
committed by
Clément Le Bihan
parent
3335516f0e
commit
9b12c76978
@@ -1,5 +1,5 @@
|
|||||||
import { PlaywrightCrawler, Dataset } from 'crawlee';
|
import { PlaywrightCrawler, Dataset } from "crawlee";
|
||||||
import { router } from './routes.js';
|
import { router } from "./routes.js";
|
||||||
// PlaywrightCrawler crawls the web using a headless
|
// PlaywrightCrawler crawls the web using a headless
|
||||||
// browser controlled by the Playwright library.
|
// browser controlled by the Playwright library.
|
||||||
const crawler = new PlaywrightCrawler({
|
const crawler = new PlaywrightCrawler({
|
||||||
@@ -16,4 +16,6 @@ const crawler = new PlaywrightCrawler({
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Add first URL to the queue and start the crawl.
|
// Add first URL to the queue and start the crawl.
|
||||||
await crawler.run(['https://musescore.com/sheetmusic?license=to_modify_commercially%2Cto_use_commercially&recording_type=public-domain']);
|
await crawler.run([
|
||||||
|
"https://musescore.com/sheetmusic?complexity=1&instrument=2&license=to_modify_commercially%2Cto_use_commercially&recording_type=public-domain",
|
||||||
|
]);
|
||||||
|
|||||||
@@ -1,57 +1,70 @@
|
|||||||
import { Dataset, createPlaywrightRouter } from 'crawlee';
|
import { Dataset, createPlaywrightRouter } from "crawlee";
|
||||||
import * as fs from 'fs';
|
import * as fs from "fs";
|
||||||
import { sleep } from 'crawlee';
|
import { sleep } from "crawlee";
|
||||||
export const router = createPlaywrightRouter();
|
export const router = createPlaywrightRouter();
|
||||||
|
|
||||||
router.addDefaultHandler(async ({ enqueueLinks }) => {
|
router.addDefaultHandler(async ({ enqueueLinks }) => {
|
||||||
const songs = await enqueueLinks({
|
const songs = await enqueueLinks({
|
||||||
selector: 'article a.xrntp',
|
selector: "article a.xrntp",
|
||||||
label: 'SONG',
|
label: "SONG",
|
||||||
});
|
});
|
||||||
// Find a link to the next page and enqueue it if it exists.
|
// Find a link to the next page and enqueue it if it exists.
|
||||||
const lists = await enqueueLinks({
|
const lists = await enqueueLinks({
|
||||||
selector: '.VECGt',
|
selector: ".VECGt",
|
||||||
label: 'LIST',
|
label: "LIST",
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
router.addHandler('SONG', async ({ request, page }) => {
|
router.addHandler("SONG", async ({ request, page }) => {
|
||||||
await Dataset.pushData({ url: request.loadedUrl });
|
await Dataset.pushData({ url: request.loadedUrl });
|
||||||
await page.waitForSelector('aside div div section button[name="download"]');
|
await page.waitForSelector('aside div div section button[name="download"]');
|
||||||
const title = await page.locator('h1').textContent()
|
const title = await page.locator("h1").textContent();
|
||||||
// const artist = 'a';
|
const artist = await page
|
||||||
const artist = await page.locator('body > div.js-page.react-container > div > section > aside > div:nth-child(5) > div > section > h3:nth-child(2) > a').first().textContent()
|
.locator(
|
||||||
//const genre = 'b';
|
"body > div.js-page.react-container > div > section > aside > div:nth-child(5) > div > section > h3:nth-child(2) > a"
|
||||||
const genres = await page.locator('body > div.js-page.react-container > div > section > aside > div:nth-child(6) > div > table > tbody > tr:nth-child(5) > td > div > a').allTextContents()
|
)
|
||||||
console.log("new song", title, artist, genres)
|
.first()
|
||||||
await page.locator('aside div div section button[name="download"]').click()
|
.textContent();
|
||||||
await page.waitForSelector('section.b_r17 button');
|
const genres = await page
|
||||||
console.log("downloading Mxl")
|
.locator(
|
||||||
|
"body > div.js-page.react-container > div > section > aside > div:nth-child(6) > div > table > tbody > tr:nth-child(5) > td > div > a"
|
||||||
|
)
|
||||||
|
.allTextContents();
|
||||||
|
console.log("new song", title, artist, genres);
|
||||||
|
await page.locator('aside div div section button[name="download"]').click();
|
||||||
|
await page.waitForSelector("section.b_r17 button");
|
||||||
|
console.log("downloading Mxl");
|
||||||
const [downloadMxl] = await Promise.all([
|
const [downloadMxl] = await Promise.all([
|
||||||
// Start waiting for the download
|
// Start waiting for the download
|
||||||
page.waitForEvent('download'),
|
page.waitForEvent("download"),
|
||||||
// Perform the action that initiates download
|
// Perform the action that initiates download
|
||||||
page.locator('section.b_r17 section section div:nth-child(3) button').click(),
|
page
|
||||||
|
.locator("section.b_r17 section section div:nth-child(3) button")
|
||||||
|
.click(),
|
||||||
]);
|
]);
|
||||||
// Save downloaded file somewhere
|
// Save downloaded file somewhere
|
||||||
await downloadMxl.saveAs(`../musics/a/${title}/${title}.mxl`);
|
await downloadMxl.saveAs(`../musics/a/${title}/${title}.mxl`);
|
||||||
|
|
||||||
await page.locator('body > article > section > button').click();
|
await page.locator("body > article > section > button").click();
|
||||||
|
|
||||||
await page.waitForTimeout(1000);
|
await page.waitForTimeout(1000);
|
||||||
await page.locator('aside div div section button[name="download"]').click()
|
await page.locator('aside div div section button[name="download"]').click();
|
||||||
await page.waitForSelector('section.b_r17 button');
|
await page.waitForSelector("section.b_r17 button");
|
||||||
console.log("downloading Midi")
|
console.log("downloading Midi");
|
||||||
const [downloadMidi] = await Promise.all([
|
const [downloadMidi] = await Promise.all([
|
||||||
// Start waiting for the download
|
// Start waiting for the download
|
||||||
page.waitForEvent('download'),
|
page.waitForEvent("download"),
|
||||||
// Perform the action that initiates download
|
// Perform the action that initiates download
|
||||||
page.locator('section.b_r17 section section div:nth-child(4) button').click(),
|
page
|
||||||
|
.locator("section.b_r17 section section div:nth-child(4) button")
|
||||||
|
.click(),
|
||||||
]);
|
]);
|
||||||
// Save downloaded file somewhere
|
// Save downloaded file somewhere
|
||||||
await downloadMidi.saveAs(`../musics/a/${title}/${title}.midi`);
|
await downloadMidi.saveAs(`../musics/a/${title}/${title}.midi`);
|
||||||
|
|
||||||
fs.writeFile(`../musics/a/${title}/${title}.ini`, `
|
fs.writeFile(
|
||||||
|
`../musics/a/${title}/${title}.ini`,
|
||||||
|
`
|
||||||
[Metadata]
|
[Metadata]
|
||||||
Name=${title}
|
Name=${title}
|
||||||
Artist=${artist}
|
Artist=${artist}
|
||||||
@@ -72,9 +85,11 @@ ChordTiming=0
|
|||||||
Length=0
|
Length=0
|
||||||
PedalPoint=0
|
PedalPoint=0
|
||||||
Precision=0
|
Precision=0
|
||||||
`, () => { })
|
`,
|
||||||
console.log("done downloading")
|
() => { }
|
||||||
|
);
|
||||||
|
console.log("done downloading");
|
||||||
|
|
||||||
console.log("sleeping for 10k seconds")
|
//console.log("sleeping for 10k seconds")
|
||||||
//await sleep(10_000_000);
|
//await sleep(10_000_000);
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user