Reply to topic  [ 3 posts ] 
[Solved] A script attempt for Amazon.com - failure 
Author Message
Supporting actor

Joined: Thu Jun 09, 2011 1:44 pm
Posts: 57
Post [Solved] A script attempt for Amazon.com - failure
I decided to butcher a script that was already in use by Movienizer (film-tv_movie.pas) and try to get it to work with Amazon.com. I learned programming from my VCR, so you'll have an idea how bad this is going to get. I will post what I came up with below. I ran into a problem regarding the search page and the images folder. I don't know where Amazon keeps it's goodies & I'm not sure how to find that out. It also looks as if the script uses Amazon to fetch some of the information prior to my fiddling. I thought it might work, but just not be able to fetch images, so I gave it a go. It didn't work, so is there anyone who can help me out a bit? Right now if I try to get a movie with this script, it gets to the page where its downloading, but nothing comes through and it seems stuck there (though I can still use "cancel"). Initially I received a pop-up, but I can't say precisely about what it said. I didn't memorize it since I thought it would do it again.

I've added a few questions to the script below just for this post. They are between */ with bold red color. If it helps, I took a semester of VB about 7 years ago, so that's the extent of my knowledge.


const
script_name='Amazon.com;
language='English';
version=15;
author='Vlad';
PCount = 12;
//MaxPicturesCount = 3;
var
title, MovieName, s, s2, s3, s4, year, director, name, character, BasicURL, country, description, salary: string;
episodes, movie_code, movie_site_url, title_code, imdb_code: string;
titles: array of string;
titles_count: integer;
i, j: integer;
director_code, code: string;
directors, writers, actors, characters, genres, imdb_codes: array of string;
duration: integer;
rating, imdb_rating: double;
front_cover_url: string;
posters_exists, ask_amazon: boolean;
original_title, mpaa, mpaa_rating: string;
prefixes: array [1..PCount] of string;

function MovieSearch: integer;
var
i, j, k, l: integer;
title: string;
begin
s:='';
titles_count:=0;
//if the search is forwarded to the movie's page (only one result)
if (pos('<title>Ricerca ', all)<=0) then begin */What does this code do?*/
title_code:=ParseString(all, '<li id="sel"><a href="/scheda.php/film/(\d+)/', 1);
if title_code<>'' then begin */How do you know what href to use?*/
SetSearchResult('movie', title_code);
titles_count:=1;
end;
end
//otherwise check seach results
else begin
i:=pos('<div id="ricerca">', all);
if i>0 then begin
j:=MyPos('<!-- fine ricerca -->', all, i+1);
if j>0 then
s:=copy(all, i+17, j-i-17);
end;
if (s='') or (pos('<h3>Non ho trovato nulla!</h3>', all)>0) then
//Movie not found
else begin
i := pos('<dt><a href="/scheda.php/film/',s);
while i>0 do begin
titles_count:=titles_count+1;
SetLength(titles, titles_count);
j := MyPos('</dd>', s, i+1);
if j<=0 then
j:=MyPos('</dt>', s, i+1);
s2 := copy(s, i, j-i);
title_code:=ParseString(s2, '<a href="/scheda.php/film/(\d+)/', 1);

title:=RemoveTags(s2);
title:=DecodeHTML(title);
title:=ClearExtraSpaces(title);


while (length(title)>0) and (title[1]=' ') do
delete(title, 1, 1);
titles[titles_count-1]:=title_code;
SetSearchResult(title, title_code);
delete(s, 1, j+20);
i:=pos('<dt><a href="/scheda.php/film/', s);
end;
end;
end;
result:=titles_count;
end;

//get movie name by barcode(UPC-12)
function GetMovieNameByUPC(barcode: string): string;
var
url, all, s2, s3, res: string;
i, j: integer;
begin
res:='';
url:=format('http://webservices.amazon.com/onca/xml?Service=AWSECommerceService&SubscriptionId=0J0N83PT9M72BYH98YR2&Operation=ItemSearch&Keywords=%s&SearchIndex=Video&ResponseGroup=Request,Images,Tracks,ItemAttributes', [barcode]);
url:=ReplaceStr(url, ' ', '+', -1);
all:=DownloadURL(url);
//search image by title
i:=pos('<Item>', all);
while (i>0) and (res='') do begin
j:=MyPos('</Item>', all, i+1);
if j>0 then begin
s2:=copy(all, i, j-i);
s3:=ParseString(s2, '<Title>(.*?) \(.*\)</Title>', 1);
if s3='' then
s3:=ParseString(s2, '<Title>(.*?)</Title>', 1);
if s3<>'' then
res:=s3;
end;
i:=MyPos('<Item>', all, i+1);
end;
i:=pos('[', res);
if i>0 then begin
j:=MyPos(']', res, i+1);
if j>0 then
delete(res, i, j-i+1);
end;
result:=res;
end;

//Get search URL by movie name
function GetSearchURLbyName(MovieName: string): string;
var
url: string;
begin
url:=format(‘http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Ddvd&field-keywords=%s&x=0&y=0', [MovieName]);
url:=ReplaceStr(url, ' ', '+', -1);
result:=url;
end;

//Download search results from URL
function DownloadSearchURL(url: string): integer;
begin
all:=DownloadURL(url);
if all='-1' then
result:=-1
else
result:=MovieSearch;
end;

function GetMovieURL(code: string; images_only: boolean): string;
begin
result:=format(‘http://www.amazon.com/%s/', [code]);
end;

//returned link to front cover
function GetAmazonImages: string;
var
res, all, url, s, s2, s3, amazon_url: string;
i, j: integer;
begin
//discard info in brackets
s:=original_title;
url:=format('http://webservices.amazon.com/onca/xml?Service=AWSECommerceService&SubscriptionId=0J0N83PT9M72BYH98YR2&Operation=ItemSearch&Keywords=%s&SearchIndex=Video&ResponseGroup=Request,Images,Tracks,ItemAttributes', [original_title]);
url:=ReplaceStr(url, ' ', '+', -1);
all:=DownloadURL(url);
//search image by title
amazon_url:='';
i:=pos('<Item>', all);
while (i>0) and (amazon_url='') do begin
j:=MyPos('</Item>', all, i+1);
if j>0 then begin
s2:=copy(all, i, j-i);
s3:=ParseString(s2, '<Title>(.*?) \(.*\)</Title>', 1);
if s3='' then
s3:=ParseString(s2, '<Title>(.*?)</Title>', 1);
s3:=DecodeHTML(s3);
if s3<>'' then
begin
//if title=s3 then begin //!!!
if pos(UpperCase(original_title), UpperCase(s3))=1 then
begin
amazon_url:=ParseString(s2, '<LargeImage><URL>(.*?)</URL>', 1);
end;
end;
end;
i:=MyPos('<Item>', all, i+1);
end;
if amazon_url<>'' then begin
SetImageName('', amazon_url, '', 1);
res:=amazon_url;
end;
result:=res;
end;

//save front cover to file
function SaveFrontCover(url, FileName: string): boolean;
var
fs: TFileStream;
s, referer: string;
i: integer;
begin
referer:='http://www.film.tv.it/gallery.php/film/'; */If using Amazon, what do I put here?*/
s:=DownloadURLWithReferer(url, referer);
if (length(s)>0) and (s<>'-1') then begin
try
fs:=TFileStream.Create(FileName, fmCreate);
fs.Write(s, Length(s));
fs.Free;
result:=true;
except
result:=false;
end;
end
else
result:=false;
end;

function MovePrefixLeft(name: string): string;
var
i, j: integer;
begin
// replace "The", "A", "An"... to the left
for j:=1 to PCount do
begin
i:=pos(', '+Prefixes[j], name);
if i>0 then
if (i = length(name)-length(Prefixes[j])-1) or
(length(name) >= i+2+length(Prefixes[j])) and (name[i+2+length(Prefixes[j])]=':') then
begin
if Prefixes[j][length(Prefixes[j])] = '''' then
name := Prefixes[j] + copy(name, 1, i-1) + copy(name, i+length(Prefixes[j])+2, length(name)-i-length(Prefixes[j])-2+1)
else
name := Prefixes[j]+' ' + copy(name, 1, i-1) + copy(name, i+length(Prefixes[j])+2, length(name)-i-length(Prefixes[j])-2+1);
break;
end;
end;
Result := name;
end;

function ClearExtraQuotes(name: string): string;
begin
// Clears extra quotes «"»
if length(name)>0 then
if (name[1]='"') and (name[length(name)]='"') then
name := copy(name,2,length(name)-2);
Result := name;
end;


function GetInfo(url: string; images_only: boolean): string;
var
keyword, s, s2, s3, plot_outline, shot, shot_prefix, shot_ext: string;
i, j, k, frames_count: integer;
actors: boolean;
begin
result:='';
all:=DownloadURL(url);
if all='-1' then
result:='-1'
else
result:=all;

movie_code:=ParseString(url, 'http://www.amazon.com/(\d+)/',1);
movie_site_url:=GetMovieSiteURL(movie_code);
//movie title
i:=pos('<h1>', all);
j:=MyPos('</h1>', all, i+1);
title:=copy(all, i+4, j-i-4);
title:=RemoveTags(title);
title:=DecodeHTML(title);
title:=ClearExtraSpaces(title);
i:=pos('>', title);
if i>0 then
delete(title, 1, i);
title := ClearExtraQuotes(title);
title := MovePrefixLeft(title);

if title='' then
exit;

duration:=0;
//original title
i:=pos('[<i>', all);
if i>0 then begin
j:=MyPos('</i>', all, i+1);
s2:=copy(all, i+4, j-i-4);
original_title:=DecodeHTML(s2);
original_title:=ClearExtraSpaces(original_title);
end;
//Countries
i:=pos('<a href="/cerca.php?nazione=', all);
if i>0 then begin
i:=MyPos('">', all, i+1);
j:=MyPos('</a>', all, i+1);
s:=copy(all, i+2, j-i-2);
k:=pos('/', s);
while k>0 do begin
name:=copy(s, 1, k-1);
if name<>'' then SetCountry(name);
if (name='USA') or (name='Gran Bretagna') or (name='Australia') then
ask_amazon:=true;
delete(s, 1, k);
k:=pos('/',s);
end;
if s<>'' then SetCountry(s);
if (s='USA') or (s='Gran Bretagna') or (s='Australia') then
ask_amazon:=true;
end;

//Year
i:=Pos('<a href="/cerca.php?anno=', all);
if i>0 then
year:=copy(all, i+25, 4);

//Genres
i:=Pos('<a href="/cerca.php?genere=', all);
if i>0 then begin
i:=MyPos('">', all, i+1);
j:=MyPos('</a>', all, i+1);
name:=copy(all, i+2, j-i-2);
if name<>'' then SetGenre(name);
end;

//Duration
i:=Pos('durata ', all);
if i>0 then begin
j:=MyPos('''', all, i+1);
s:=copy(all, i+7, j-i-7);
try
duration:=StrToInt(s);
except
end;
end;


//directors and actors
//directors
actors:=true;
i:=pos('Regia di', all);
if i>0 then begin
j:=MyPos('Con ', all, i+1);
k:=MyPos('</div>', all, i+1);
if (j>k) or (j=0) then begin
j:=k;
actors:=false;
end;
s:=copy(all, i, j-i);
i:=pos('<a href=', s);
while i>0 do begin
k:=MyPos('</a>', s, i+1);
s2:=copy(s, i, k-i);
code:=ParseString(s2, '<a href="/scheda.php/persona/(\d+)/', 1);
name:=RemoveTags(s2);
name:=DecodeHTML(name);
name:=ClearExtraSpaces(name);
if name<>'' then
SetPersonName(name, '', 1, code);
delete(s, 1, k+4);
i:=pos('<a href=', s);
end;
//actors
if actors then begin
i:=j+4; //j is taken from the previous
j:=MyPos('</div>', all, i+1);
s:=copy(all, i, j-i);
i:=pos('<a href=', s);
while i>0 do begin
j:=MyPos('</a>', s, i+1);
s2:=copy(s, i, j-i);
code:=ParseString(s2, '<a href="/scheda.php/persona/(\d+)/', 1);
name:=RemoveTags(s2);
name:=DecodeHTML(name);
name:=ClearExtraSpaces(name);
if name<>'' then
SetPersonName(name, '', 3, code);
delete(s, 1, j+4);
i:=pos('<a href=', s);
end;
end;
end;

//image
i:=pos('<img src="http://www.film.tv.it/imgbank/', all);
if i>0 then begin
j:=MyPos('"', all, i+11);
s2:=copy(all, i, j-i+1);
s3:=ParseString(s2, '<img src="(.*?)"', 1);
if s3='/imgbank/DUMMY/locandina.gif' then
s3:=''
else
front_cover_url:=s3;
end;
if front_cover_url<>'' then
SetImageName('', front_cover_url, '', 1);


//description
i:=pos('<p class="sommario">', all);
if i>0 then begin
j:=MyPos('</p>', all, i+1);
s2:=copy(all, i, j-i);
description:=RemoveTags(s2);
description:=description+'<br><br>';
end;
i:=pos('<p class="trama">', all);
if i>0 then begin
j:=MyPos('</p>', all, i+1);
s2:=copy(all, i, j-i);
s2:=RemoveTags(s2);
description:=description+s2;
end;
i:=pos('<p class="commento">', all);
if i>0 then begin
j:=MyPos('</p>', all, i+1);
s2:=copy(all, i, j-i);
s2:=RemoveTags(s2);
description:=description+'<br><br>';
description:=description+s2;
end;
description:=DecodeHTML(description);

//snapshots
frames_count:=0;
i:=pos('<div id="photogallery">', all);
if i>0 then begin
j:=MyPos('</div>', all, i+1);
s2:=copy(all, i, j-i); */ I added the "ecx.images..." below. it was a guess.*/
i:=pos(‘http://ecx.images-amazon.com/images/I/', s2);
while (i>0) and (frames_count<MaxPicturesCount) do begin
j:=MyPos('</a>', s2, i+1);
s3:=copy(s2, i, j-i);
if (pos('JPG', s3)>0) then
shot_ext:='JPG'
else
shot_ext:='jpg';
shot:=ParseString(s3, ‘http://ecx.images-amazon.com/images/I/'.*?/(\d+).jpg', 1);
if shot='' then
shot:=ParseString(s3, ‘http://ecx.images-amazon.com/images/I/’.*?/(\d+).JPG', 1);
shot_prefix:=ParseString(s3, ‘http://ecx.images-amazon.com/images/I/’(.*?)/\d+.*?', 1);
if shot<>'' then begin
shot:='http://www.film.tv.it/imgbank/GALLERY/'+shot_prefix+'/'+shot+'.'+shot_ext;
SetImageName('', shot, '', 3);
if (shot<>front_cover_url) then
frames_count:=frames_count+1;
end;
delete(s2, 1, j+10);
i:=pos(‘http://ecx.images-amazon.com/images/I/’', s2);
end;
end;

end;

//returned images links
procedure GetImages;
begin
if ask_amazon then
GetAmazonImages;
end;

//get movie site url
function GetMovieSiteURL(code: string): string;
begin
if code='' then
result:=''
else
result:='http://www.amazon.com/'+code+'/';
end;

//get person site url
function GetPersonSiteURL(code: string): string;
begin
if code='' then
result:=''
else
result:= 'http://www.amazon.com/'+code+'/';
end;


begin

prefixes[1]:='The';
prefixes[2]:='A';
prefixes[3]:='An';
prefixes[4]:='La';
prefixes[5]:='Les';
prefixes[6]:='Des';
prefixes[7]:='L''';
prefixes[8]:='Un';
prefixes[9]:='El';
prefixes[10]:='Il';
prefixes[11]:='Le';
prefixes[12]:='Uno';

end.


Last edited by Thunder on Thu Jun 23, 2011 6:55 pm, edited 1 time in total.



Thu Jun 16, 2011 12:55 am
Profile
Developer
User avatar

Joined: Wed Jan 23, 2008 11:37 am
Posts: 2495
Post Re: A script for Amazon.com
Unfortunately, this script is mainly the film-tv script, it does not parse any information that you actually receive from Amazon. You should take a look at the HTML source of the page that you receive and decide how to extract information. For learning, you can take a look at how the script does this for film-tv.

_________________
ImageImage


Thu Jun 23, 2011 9:12 am
Profile
Supporting actor

Joined: Thu Jun 09, 2011 1:44 pm
Posts: 57
Post Re: A script for Amazon.com
Yeah, I think I gave up on this idea, as I'm way out of my league. It sounded easy on your web site, but I've been comparing the two sites and Amazon seems much more complicated in their search language. They do assign a number, so I may take a look at the IMDB script, but I have to wait for my eyes to stop bleeding.

Thanks for the reply!


Thu Jun 23, 2011 12:32 pm
Profile
Display posts from previous:  Sort by  
Reply to topic   [ 3 posts ] 

Who is online

Users browsing this forum: No registered users and 4 guests


You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot post attachments in this forum

Search for:
Jump to:  
Powered by phpBB © 2000, 2002, 2005, 2007 phpBB Group.
Designed by STSoftware.