crawler piano only

This commit is contained in:
GitBluub
2023-05-16 00:44:58 +09:00
committed by Clément Le Bihan
parent 3335516f0e
commit 9b12c76978
2 changed files with 49 additions and 32 deletions

View File

@@ -1,5 +1,5 @@
import { PlaywrightCrawler, Dataset } from 'crawlee';
import { router } from './routes.js';
import { PlaywrightCrawler, Dataset } from "crawlee";
import { router } from "./routes.js";
// PlaywrightCrawler crawls the web using a headless
// browser controlled by the Playwright library.
const crawler = new PlaywrightCrawler({
@@ -16,4 +16,6 @@ const crawler = new PlaywrightCrawler({
});
// Add first URL to the queue and start the crawl.
await crawler.run(['https://musescore.com/sheetmusic?license=to_modify_commercially%2Cto_use_commercially&recording_type=public-domain']);
await crawler.run([
"https://musescore.com/sheetmusic?complexity=1&instrument=2&license=to_modify_commercially%2Cto_use_commercially&recording_type=public-domain",
]);

View File

@@ -1,57 +1,70 @@
import { Dataset, createPlaywrightRouter } from 'crawlee';
import * as fs from 'fs';
import { sleep } from 'crawlee';
import { Dataset, createPlaywrightRouter } from "crawlee";
import * as fs from "fs";
import { sleep } from "crawlee";
export const router = createPlaywrightRouter();
router.addDefaultHandler(async ({ enqueueLinks }) => {
const songs = await enqueueLinks({
selector: 'article a.xrntp',
label: 'SONG',
selector: "article a.xrntp",
label: "SONG",
});
// Find a link to the next page and enqueue it if it exists.
const lists = await enqueueLinks({
selector: '.VECGt',
label: 'LIST',
selector: ".VECGt",
label: "LIST",
});
});
router.addHandler('SONG', async ({ request, page }) => {
router.addHandler("SONG", async ({ request, page }) => {
await Dataset.pushData({ url: request.loadedUrl });
await page.waitForSelector('aside div div section button[name="download"]');
const title = await page.locator('h1').textContent()
// const artist = 'a';
const artist = await page.locator('body > div.js-page.react-container > div > section > aside > div:nth-child(5) > div > section > h3:nth-child(2) > a').first().textContent()
//const genre = 'b';
const genres = await page.locator('body > div.js-page.react-container > div > section > aside > div:nth-child(6) > div > table > tbody > tr:nth-child(5) > td > div > a').allTextContents()
console.log("new song", title, artist, genres)
await page.locator('aside div div section button[name="download"]').click()
await page.waitForSelector('section.b_r17 button');
console.log("downloading Mxl")
const title = await page.locator("h1").textContent();
const artist = await page
.locator(
"body > div.js-page.react-container > div > section > aside > div:nth-child(5) > div > section > h3:nth-child(2) > a"
)
.first()
.textContent();
const genres = await page
.locator(
"body > div.js-page.react-container > div > section > aside > div:nth-child(6) > div > table > tbody > tr:nth-child(5) > td > div > a"
)
.allTextContents();
console.log("new song", title, artist, genres);
await page.locator('aside div div section button[name="download"]').click();
await page.waitForSelector("section.b_r17 button");
console.log("downloading Mxl");
const [downloadMxl] = await Promise.all([
// Start waiting for the download
page.waitForEvent('download'),
page.waitForEvent("download"),
// Perform the action that initiates download
page.locator('section.b_r17 section section div:nth-child(3) button').click(),
page
.locator("section.b_r17 section section div:nth-child(3) button")
.click(),
]);
// Save downloaded file somewhere
await downloadMxl.saveAs(`../musics/a/${title}/${title}.mxl`);
await page.locator('body > article > section > button').click();
await page.locator("body > article > section > button").click();
await page.waitForTimeout(1000);
await page.locator('aside div div section button[name="download"]').click()
await page.waitForSelector('section.b_r17 button');
console.log("downloading Midi")
await page.locator('aside div div section button[name="download"]').click();
await page.waitForSelector("section.b_r17 button");
console.log("downloading Midi");
const [downloadMidi] = await Promise.all([
// Start waiting for the download
page.waitForEvent('download'),
page.waitForEvent("download"),
// Perform the action that initiates download
page.locator('section.b_r17 section section div:nth-child(4) button').click(),
page
.locator("section.b_r17 section section div:nth-child(4) button")
.click(),
]);
// Save downloaded file somewhere
await downloadMidi.saveAs(`../musics/a/${title}/${title}.midi`);
fs.writeFile(`../musics/a/${title}/${title}.ini`, `
fs.writeFile(
`../musics/a/${title}/${title}.ini`,
`
[Metadata]
Name=${title}
Artist=${artist}
@@ -72,9 +85,11 @@ ChordTiming=0
Length=0
PedalPoint=0
Precision=0
`, () => { })
console.log("done downloading")
`,
() => { }
);
console.log("done downloading");
console.log("sleeping for 10k seconds")
//console.log("sleeping for 10k seconds")
//await sleep(10_000_000);
});