#region Methods Scan
..
private void Scrape_all_Details()
{
//--------< Scrape_all_Details() >--------
fx_Log("----< @Read Details
>----");
//----< @Loop: Alle Empty Records
>----
while (1 == 1)
{
//----< Detail >----
if (optStop.IsChecked == true) return;
//< find record >
string sSQL = "SELECT TOP 1 [IDDetail] FROM
tbl_Details WHERE [dtScan] IS NULL";
DataRow row =
clsDB.Get_DataRow(sSQL);
//</ find record >
if (row != null)
{
Scrape_Detail(Convert.ToInt32(row["IDDetail"]));
}
else
{
break;
}
//----</ Detail >----
}
//----</ @Loop: Alle Empty Records
>----
fx_Log("----</ @Read Details
>----");
//--------</ Scrape_all_Details()
>--------
}
private void Scrape_Detail(int ID)
{
//--------< Scrape_Detail()
>--------
if (optStop.IsChecked == true) return;
//fx_Log("--< Read Detail
>--");
fx_Log("Detail=" + ID);
string sURL = clsDB.Get_Value_as_String("URL", "tbl_Details", "[IDDetail]=" + ID);
HtmlDocument doc =
Web_Get_HtmlDocument(sURL);
//< check >
if (doc == null)
{
//< update >
string sql_Error = "UPDATE tbl_Details SET [dtScan]
= SYSDATETIME() WHERE IDDetail = " + ID;
clsDB.Execute_SQL(sql_Error);
//</ update >
return;
}
//</ check >
//----< In Detail-Node >----
//< Text >
string sText = "";
HtmlNode nodeText = doc.DocumentNode.SelectSingleNode("//p[@id=\"viewad-description-text\"]");
if (nodeText != null) {
sText = nodeText.InnerText;
sText = sText.Trim();
}
//</ Text >
//--< Check deleted >--
if (sText=="")
{
HtmlNode nodeWarning =
doc.DocumentNode.SelectSingleNode("//div[@class=\"outcomemessage-warning\"]"); //class outcomemessage-warning,
ID=srchrslt-adexpired
if (nodeWarning == null )
{
return; //abbruch ohne Text
}
else
{
//expired, deleted
//< delete >
string sql_Error = "DELETE FROM tbl_Details WHERE
IDDetail = "
+ ID;
clsDB.Execute_SQL(sql_Error);
//</ delete >
return;
}
}
//--</ Check deleted >--
if (sText.Length>50)
{
fx_Log("text=" + sText.Substring(0, 50));
}
else
{
fx_Log("text=" + sText);
}
//<
Preis >
string sPreis = "";
int intPreis = 0;
HtmlNode nodePreis =
doc.DocumentNode.SelectSingleNode("//h2[@id=\"viewad-price\"]");
if (nodePreis != null)
{
sPreis = nodePreis.InnerText;
sPreis = sPreis.Replace("Preis:","");
sPreis = sPreis.Replace("€", "");
sPreis = sPreis.Replace("VB", "");
sPreis = sPreis.Replace(".", "");
sPreis = sPreis.Replace("Zu verschenken", "");
sPreis = sPreis.Trim();
if(sPreis=="")
{
intPreis = 0;
}
else
{
intPreis =
Convert.ToInt32(sPreis);
}
}
//</
Preis >
//--<
Special >--
string sErstellungsdatum = html_GetText_Kleinanzeigen_Info_Text(doc, "Erstellungsdatum:"); //19.02.2018
string sSchlagwoerter = html_GetText_Kleinanzeigen_Info_Links (doc, "Schlagwörter:");
string sArt = html_GetText_Kleinanzeigen_Info_Links(doc, "Art:");
//--</ Special >--
//< correct >
sText =
clsCheck.correct_String(sText);
sErstellungsdatum
= clsCheck.correct_String(sErstellungsdatum );
sSchlagwoerter = clsCheck.correct_String(sSchlagwoerter );
sArt = clsCheck.correct_String(sArt);
//</ correct >
//Date to Iso
//*yyyy-mm-dd
string sDate_ISO = sErstellungsdatum.Substring(6, 4) + "-" +
sErstellungsdatum.Substring(3, 2) + "-" + sErstellungsdatum.Substring(0, 2);
//< update >
string sql_Update = "UPDATE tbl_Details ";
sql_Update += Environment.NewLine
+ "
SET [Text]='"
+ sText + "', [dtScan] = SYSDATETIME()";
sql_Update
+= Environment.NewLine + "
,[Schlagwoerter]='" + sSchlagwoerter + "',[Art]='" + sArt + "'";
sql_Update += Environment.NewLine
+ " ,[Erstellungsdatum]=CAST('" + sDate_ISO + "' AS
DATETIME)";
sql_Update += Environment.NewLine
+ " ,[Preis]=" +
intPreis ;
//in: 19.02.2018
//out: CAST('2009 - 05 - 25' AS
DATETIME)'
sql_Update += " WHERE IDDetail =
" + ID;
clsDB.Execute_SQL(sql_Update);
//</ update >
//----</ In Detail-Node >----
//----< Images >----
//--< Image-Area >--
HtmlNode node_Image_Area =
doc.DocumentNode.SelectSingleNode("//div[@id=\"viewad-images\"]");
if (node_Image_Area != null)
{
HtmlNodeCollection
image_Nodes = node_Image_Area.SelectNodes("//img");
foreach(HtmlNode imgNode in image_Nodes )
{
string sImageURL = imgNode.GetAttributeValue("src", "");
if (sImageURL.Contains("/common/")==false )
{
//sImageURL =
sImageURL.Replace("/", "\/");
//< add >
string sql_Add = "INSERT INTO tbl_Images
([IDDetail],[URL]) VALUES(" + ID + " , '" + sImageURL + "')";
clsDB.Execute_SQL(sql_Add);
//</ add >
}
}
}
//--</ Image-Area >--
//----</ Images >----
//fx_Log("--</ Read Detail
>--");
fx_Log("/Detail=" + ID);
//--------</ Scrape_Detail()
>--------
}
//--------------------<
region: Methods Scan >---------------------
#endregion Methods Scan
|