Social media scraping

Here you can find code integration examples for basic social media scraping, such as posts, profiles, etc.

Instagram

Scraping a post

import requests

url = "https://scrape.smartproxy.com/v1/tasks"

payload = {
    "target": "universal",
    "parse": False,
    "headless": "html",
    "url": "https://www.instagram.com/p/Ch2hW9-JHTT/"
}
headers = {
    "Accept": "application/json",
    "Content-Type": "application/json",
    "Authorization": "Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
  'body' => '{"target":"universal","parse":false,"headless":"html","url":"https://www.instagram.com/p/Ch2hW9-JHTT/"}',
  'headers' => [
    'Accept' => 'application/json',
    'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
    'Content-Type' => 'application/json',
  ],
]);

echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');

sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
  target: 'universal',
  parse: false,
  headless: 'html',
  url: 'https://www.instagram.com/p/Ch2hW9-JHTT/'
})
  .then(res => console.log(res))
  .catch(err => console.error(err));
{
  "results": [
    {
      "content": "<html> Instagram page content</html>"
      "status_code": 200,
      "url": "https://www.instagram.com/p/Ch2hW9-JHTT/",
      "task_id": "6971442143109891073",
      "created_at": "2022-09-02 12:22:10",
      "updated_at": "2022-09-02 12:22:30"
    }
  ]
}

Scraping a profile

import requests

url = "https://scrape.smartproxy.com/v1/tasks"

payload = {
    "target": "universal",
    "parse": False,
    "url": "https://www.instagram.com/eminem/",
    "headless": "html"
}
headers = {
    "Accept": "application/json",
    "Content-Type": "application/json",
    "Authorization": "Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'https://scrape.smartproxy.com/v1/tasks', [
  'body' => '{"target":"universal","parse":false,"url":"https://www.instagram.com/eminem/","headless":"html"}',
  'headers' => [
    'Accept' => 'application/json',
    'Authorization' => 'Basic U1B1c2VybmFtZTpTUHBhc3N3b3Jk',
    'Content-Type' => 'application/json',
  ],
]);

echo $response->getBody();
const sdk = require('api')('@smartproxy/v1.0#25e7913l1ow524w');

sdk.auth('SPusername', 'SPpassword');
sdk.realTimeExample({
  target: 'universal',
  parse: false,
  url: 'https://www.instagram.com/eminem/',
  headless: 'html'
})
  .then(res => console.log(res))
  .catch(err => console.error(err));
{
  "results": [
    {
      "content": "<html> Instagram page content</html>"
      "status_code": 200,
      "url": "https://www.instagram.com/eminem/",
      "task_id": "6971440655478339585",
      "created_at": "2022-09-02 12:16:15",
      "updated_at": "2022-09-02 12:16:37"
    }
  ]
}