|
Thunder
Supporting actor
Joined: Thu Jun 09, 2011 1:44 pm Posts: 57
|
 [Solved] A script attempt for Amazon.com - failure
I decided to butcher a script that was already in use by Movienizer (film-tv_movie.pas) and try to get it to work with Amazon.com. I learned programming from my VCR, so you'll have an idea how bad this is going to get. I will post what I came up with below. I ran into a problem regarding the search page and the images folder. I don't know where Amazon keeps it's goodies & I'm not sure how to find that out. It also looks as if the script uses Amazon to fetch some of the information prior to my fiddling. I thought it might work, but just not be able to fetch images, so I gave it a go. It didn't work, so is there anyone who can help me out a bit? Right now if I try to get a movie with this script, it gets to the page where its downloading, but nothing comes through and it seems stuck there (though I can still use "cancel"). Initially I received a pop-up, but I can't say precisely about what it said. I didn't memorize it since I thought it would do it again.
I've added a few questions to the script below just for this post. They are between */ with bold red color. If it helps, I took a semester of VB about 7 years ago, so that's the extent of my knowledge.
const script_name='Amazon.com; language='English'; version=15; author='Vlad'; PCount = 12; //MaxPicturesCount = 3; var title, MovieName, s, s2, s3, s4, year, director, name, character, BasicURL, country, description, salary: string; episodes, movie_code, movie_site_url, title_code, imdb_code: string; titles: array of string; titles_count: integer; i, j: integer; director_code, code: string; directors, writers, actors, characters, genres, imdb_codes: array of string; duration: integer; rating, imdb_rating: double; front_cover_url: string; posters_exists, ask_amazon: boolean; original_title, mpaa, mpaa_rating: string; prefixes: array [1..PCount] of string;
function MovieSearch: integer; var i, j, k, l: integer; title: string; begin s:=''; titles_count:=0; //if the search is forwarded to the movie's page (only one result) if (pos('<title>Ricerca ', all)<=0) then begin */What does this code do?*/ title_code:=ParseString(all, '<li id="sel"><a href="/scheda.php/film/(\d+)/', 1); if title_code<>'' then begin */How do you know what href to use?*/ SetSearchResult('movie', title_code); titles_count:=1; end; end //otherwise check seach results else begin i:=pos('<div id="ricerca">', all); if i>0 then begin j:=MyPos('<!-- fine ricerca -->', all, i+1); if j>0 then s:=copy(all, i+17, j-i-17); end; if (s='') or (pos('<h3>Non ho trovato nulla!</h3>', all)>0) then //Movie not found else begin i := pos('<dt><a href="/scheda.php/film/',s); while i>0 do begin titles_count:=titles_count+1; SetLength(titles, titles_count); j := MyPos('</dd>', s, i+1); if j<=0 then j:=MyPos('</dt>', s, i+1); s2 := copy(s, i, j-i); title_code:=ParseString(s2, '<a href="/scheda.php/film/(\d+)/', 1); title:=RemoveTags(s2); title:=DecodeHTML(title); title:=ClearExtraSpaces(title);
while (length(title)>0) and (title[1]=' ') do delete(title, 1, 1); titles[titles_count-1]:=title_code; SetSearchResult(title, title_code); delete(s, 1, j+20); i:=pos('<dt><a href="/scheda.php/film/', s); end; end; end; result:=titles_count; end;
//get movie name by barcode(UPC-12) function GetMovieNameByUPC(barcode: string): string; var url, all, s2, s3, res: string; i, j: integer; begin res:=''; url:=format('http://webservices.amazon.com/onca/xml?Service=AWSECommerceService&SubscriptionId=0J0N83PT9M72BYH98YR2&Operation=ItemSearch&Keywords=%s&SearchIndex=Video&ResponseGroup=Request,Images,Tracks,ItemAttributes', [barcode]); url:=ReplaceStr(url, ' ', '+', -1); all:=DownloadURL(url); //search image by title i:=pos('<Item>', all); while (i>0) and (res='') do begin j:=MyPos('</Item>', all, i+1); if j>0 then begin s2:=copy(all, i, j-i); s3:=ParseString(s2, '<Title>(.*?) \(.*\)</Title>', 1); if s3='' then s3:=ParseString(s2, '<Title>(.*?)</Title>', 1); if s3<>'' then res:=s3; end; i:=MyPos('<Item>', all, i+1); end; i:=pos('[', res); if i>0 then begin j:=MyPos(']', res, i+1); if j>0 then delete(res, i, j-i+1); end; result:=res; end;
//Get search URL by movie name function GetSearchURLbyName(MovieName: string): string; var url: string; begin url:=format(‘http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Ddvd&field-keywords=%s&x=0&y=0', [MovieName]); url:=ReplaceStr(url, ' ', '+', -1); result:=url; end;
//Download search results from URL function DownloadSearchURL(url: string): integer; begin all:=DownloadURL(url); if all='-1' then result:=-1 else result:=MovieSearch; end;
function GetMovieURL(code: string; images_only: boolean): string; begin result:=format(‘http://www.amazon.com/%s/', [code]); end;
//returned link to front cover function GetAmazonImages: string; var res, all, url, s, s2, s3, amazon_url: string; i, j: integer; begin //discard info in brackets s:=original_title; url:=format('http://webservices.amazon.com/onca/xml?Service=AWSECommerceService&SubscriptionId=0J0N83PT9M72BYH98YR2&Operation=ItemSearch&Keywords=%s&SearchIndex=Video&ResponseGroup=Request,Images,Tracks,ItemAttributes', [original_title]); url:=ReplaceStr(url, ' ', '+', -1); all:=DownloadURL(url); //search image by title amazon_url:=''; i:=pos('<Item>', all); while (i>0) and (amazon_url='') do begin j:=MyPos('</Item>', all, i+1); if j>0 then begin s2:=copy(all, i, j-i); s3:=ParseString(s2, '<Title>(.*?) \(.*\)</Title>', 1); if s3='' then s3:=ParseString(s2, '<Title>(.*?)</Title>', 1); s3:=DecodeHTML(s3); if s3<>'' then begin //if title=s3 then begin //!!! if pos(UpperCase(original_title), UpperCase(s3))=1 then begin amazon_url:=ParseString(s2, '<LargeImage><URL>(.*?)</URL>', 1); end; end; end; i:=MyPos('<Item>', all, i+1); end; if amazon_url<>'' then begin SetImageName('', amazon_url, '', 1); res:=amazon_url; end; result:=res; end;
//save front cover to file function SaveFrontCover(url, FileName: string): boolean; var fs: TFileStream; s, referer: string; i: integer; begin referer:='http://www.film.tv.it/gallery.php/film/'; */If using Amazon, what do I put here?*/ s:=DownloadURLWithReferer(url, referer); if (length(s)>0) and (s<>'-1') then begin try fs:=TFileStream.Create(FileName, fmCreate); fs.Write(s, Length(s)); fs.Free; result:=true; except result:=false; end; end else result:=false; end;
function MovePrefixLeft(name: string): string; var i, j: integer; begin // replace "The", "A", "An"... to the left for j:=1 to PCount do begin i:=pos(', '+Prefixes[j], name); if i>0 then if (i = length(name)-length(Prefixes[j])-1) or (length(name) >= i+2+length(Prefixes[j])) and (name[i+2+length(Prefixes[j])]=':') then begin if Prefixes[j][length(Prefixes[j])] = '''' then name := Prefixes[j] + copy(name, 1, i-1) + copy(name, i+length(Prefixes[j])+2, length(name)-i-length(Prefixes[j])-2+1) else name := Prefixes[j]+' ' + copy(name, 1, i-1) + copy(name, i+length(Prefixes[j])+2, length(name)-i-length(Prefixes[j])-2+1); break; end; end; Result := name; end;
function ClearExtraQuotes(name: string): string; begin // Clears extra quotes «"» if length(name)>0 then if (name[1]='"') and (name[length(name)]='"') then name := copy(name,2,length(name)-2); Result := name; end;
function GetInfo(url: string; images_only: boolean): string; var keyword, s, s2, s3, plot_outline, shot, shot_prefix, shot_ext: string; i, j, k, frames_count: integer; actors: boolean; begin result:=''; all:=DownloadURL(url); if all='-1' then result:='-1' else result:=all;
movie_code:=ParseString(url, 'http://www.amazon.com/(\d+)/',1); movie_site_url:=GetMovieSiteURL(movie_code); //movie title i:=pos('<h1>', all); j:=MyPos('</h1>', all, i+1); title:=copy(all, i+4, j-i-4); title:=RemoveTags(title); title:=DecodeHTML(title); title:=ClearExtraSpaces(title); i:=pos('>', title); if i>0 then delete(title, 1, i); title := ClearExtraQuotes(title); title := MovePrefixLeft(title);
if title='' then exit;
duration:=0; //original title i:=pos('[<i>', all); if i>0 then begin j:=MyPos('</i>', all, i+1); s2:=copy(all, i+4, j-i-4); original_title:=DecodeHTML(s2); original_title:=ClearExtraSpaces(original_title); end; //Countries i:=pos('<a href="/cerca.php?nazione=', all); if i>0 then begin i:=MyPos('">', all, i+1); j:=MyPos('</a>', all, i+1); s:=copy(all, i+2, j-i-2); k:=pos('/', s); while k>0 do begin name:=copy(s, 1, k-1); if name<>'' then SetCountry(name); if (name='USA') or (name='Gran Bretagna') or (name='Australia') then ask_amazon:=true; delete(s, 1, k); k:=pos('/',s); end; if s<>'' then SetCountry(s); if (s='USA') or (s='Gran Bretagna') or (s='Australia') then ask_amazon:=true; end; //Year i:=Pos('<a href="/cerca.php?anno=', all); if i>0 then year:=copy(all, i+25, 4); //Genres i:=Pos('<a href="/cerca.php?genere=', all); if i>0 then begin i:=MyPos('">', all, i+1); j:=MyPos('</a>', all, i+1); name:=copy(all, i+2, j-i-2); if name<>'' then SetGenre(name); end; //Duration i:=Pos('durata ', all); if i>0 then begin j:=MyPos('''', all, i+1); s:=copy(all, i+7, j-i-7); try duration:=StrToInt(s); except end; end; //directors and actors //directors actors:=true; i:=pos('Regia di', all); if i>0 then begin j:=MyPos('Con ', all, i+1); k:=MyPos('</div>', all, i+1); if (j>k) or (j=0) then begin j:=k; actors:=false; end; s:=copy(all, i, j-i); i:=pos('<a href=', s); while i>0 do begin k:=MyPos('</a>', s, i+1); s2:=copy(s, i, k-i); code:=ParseString(s2, '<a href="/scheda.php/persona/(\d+)/', 1); name:=RemoveTags(s2); name:=DecodeHTML(name); name:=ClearExtraSpaces(name); if name<>'' then SetPersonName(name, '', 1, code); delete(s, 1, k+4); i:=pos('<a href=', s); end; //actors if actors then begin i:=j+4; //j is taken from the previous j:=MyPos('</div>', all, i+1); s:=copy(all, i, j-i); i:=pos('<a href=', s); while i>0 do begin j:=MyPos('</a>', s, i+1); s2:=copy(s, i, j-i); code:=ParseString(s2, '<a href="/scheda.php/persona/(\d+)/', 1); name:=RemoveTags(s2); name:=DecodeHTML(name); name:=ClearExtraSpaces(name); if name<>'' then SetPersonName(name, '', 3, code); delete(s, 1, j+4); i:=pos('<a href=', s); end; end; end;
//image i:=pos('<img src="http://www.film.tv.it/imgbank/', all); if i>0 then begin j:=MyPos('"', all, i+11); s2:=copy(all, i, j-i+1); s3:=ParseString(s2, '<img src="(.*?)"', 1); if s3='/imgbank/DUMMY/locandina.gif' then s3:='' else front_cover_url:=s3; end; if front_cover_url<>'' then SetImageName('', front_cover_url, '', 1); //description i:=pos('<p class="sommario">', all); if i>0 then begin j:=MyPos('</p>', all, i+1); s2:=copy(all, i, j-i); description:=RemoveTags(s2); description:=description+'<br><br>'; end; i:=pos('<p class="trama">', all); if i>0 then begin j:=MyPos('</p>', all, i+1); s2:=copy(all, i, j-i); s2:=RemoveTags(s2); description:=description+s2; end; i:=pos('<p class="commento">', all); if i>0 then begin j:=MyPos('</p>', all, i+1); s2:=copy(all, i, j-i); s2:=RemoveTags(s2); description:=description+'<br><br>'; description:=description+s2; end; description:=DecodeHTML(description); //snapshots frames_count:=0; i:=pos('<div id="photogallery">', all); if i>0 then begin j:=MyPos('</div>', all, i+1); s2:=copy(all, i, j-i); */ I added the "ecx.images..." below. it was a guess.*/ i:=pos(‘http://ecx.images-amazon.com/images/I/', s2); while (i>0) and (frames_count<MaxPicturesCount) do begin j:=MyPos('</a>', s2, i+1); s3:=copy(s2, i, j-i); if (pos('JPG', s3)>0) then shot_ext:='JPG' else shot_ext:='jpg'; shot:=ParseString(s3, ‘http://ecx.images-amazon.com/images/I/'.*?/(\d+).jpg', 1); if shot='' then shot:=ParseString(s3, ‘http://ecx.images-amazon.com/images/I/’.*?/(\d+).JPG', 1); shot_prefix:=ParseString(s3, ‘http://ecx.images-amazon.com/images/I/’(.*?)/\d+.*?', 1); if shot<>'' then begin shot:='http://www.film.tv.it/imgbank/GALLERY/'+shot_prefix+'/'+shot+'.'+shot_ext; SetImageName('', shot, '', 3); if (shot<>front_cover_url) then frames_count:=frames_count+1; end; delete(s2, 1, j+10); i:=pos(‘http://ecx.images-amazon.com/images/I/’', s2); end; end;
end;
//returned images links procedure GetImages; begin if ask_amazon then GetAmazonImages; end;
//get movie site url function GetMovieSiteURL(code: string): string; begin if code='' then result:='' else result:='http://www.amazon.com/'+code+'/'; end;
//get person site url function GetPersonSiteURL(code: string): string; begin if code='' then result:='' else result:= 'http://www.amazon.com/'+code+'/'; end;
begin
prefixes[1]:='The'; prefixes[2]:='A'; prefixes[3]:='An'; prefixes[4]:='La'; prefixes[5]:='Les'; prefixes[6]:='Des'; prefixes[7]:='L'''; prefixes[8]:='Un'; prefixes[9]:='El'; prefixes[10]:='Il'; prefixes[11]:='Le'; prefixes[12]:='Uno';
end.
Last edited by Thunder on Thu Jun 23, 2011 6:55 pm, edited 1 time in total.
|