Enhance link extraction logic in search function to handle cases with no result containers, improving robustness and accuracy of results.

2026-03-11 08:54:34 +00:00 · 2025-09-30 20:32:51 -05:00 · 2025-09-30 20:32:51 -05:00 · 457725ee5a
commit 457725ee5a
parent 442060b2ef
2 changed files with 52 additions and 23 deletions
--- a/.github/workflows/pypi.yml
+++ b/.github/workflows/pypi.yml
@ -38,23 +38,37 @@ jobs:
          password: ${{ secrets.TEST_PYPI_API_TOKEN }}
          repository_url: https://test.pypi.org/legacy/
  publish:
-    # Gate real PyPI publishing to stable SemVer tags only (e.g., v1.2.3 or 1.2.3)
-    if: startsWith(github.ref, 'refs/tags/') && (github.ref_name matches '^v?\\d+\\.\\d+\\.\\d+$')
+    # Gate real PyPI publishing to stable SemVer tags only
+    if: startsWith(github.ref, 'refs/tags/')
    name: Build and publish to PyPI
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
+      - name: Check if stable release
+        id: check_tag
+        run: |
+          TAG="${{ github.ref_name }}"
+          if echo "$TAG" | grep -qE '^v?[0-9]+\.[0-9]+\.[0-9]+$'; then
+            echo "is_stable=true" >> $GITHUB_OUTPUT
+            echo "Tag '$TAG' is a stable release. Will publish to PyPI."
+          else
+            echo "is_stable=false" >> $GITHUB_OUTPUT
+            echo "Tag '$TAG' is not a stable release (contains pre-release suffix). Skipping PyPI publish."
+          fi
      - name: Set up Python 3.9
+        if: steps.check_tag.outputs.is_stable == 'true'
        uses: actions/setup-python@v5
        with:
          python-version: 3.9
      - name: Install pypa/build
+        if: steps.check_tag.outputs.is_stable == 'true'
        run: >-
          python -m
          pip install
          build
          --user
      - name: Build binary wheel and source tarball
+        if: steps.check_tag.outputs.is_stable == 'true'
        run: >-
          python -m
          build
@ -63,7 +77,7 @@ jobs:
          --outdir dist/
          .
      - name: Publish distribution to PyPI
-        if: startsWith(github.ref, 'refs/tags')
+        if: steps.check_tag.outputs.is_stable == 'true'
        uses: pypa/gh-action-pypi-publish@master
        with:
          password: ${{ secrets.PYPI_API_TOKEN }}
--- a/app/routes.py
+++ b/app/routes.py
@ -411,28 +411,43 @@ def search():
        # Find all result containers (using known result classes)
        result_divs = json_soup.find_all('div', class_=['ZINbbc', 'ezO2md'])
        
-        for div in result_divs:
-            # Find the first valid link in this result container
-            link = None
-            for a in div.find_all('a', href=True):
-                if a['href'].startswith('http'):
-                    link = a
-                    break
-            
-            if not link:
-                continue
+        if result_divs:
+            # Process structured Google results with container divs
+            for div in result_divs:
+                # Find the first valid link in this result container
+                link = None
+                for a in div.find_all('a', href=True):
+                    if a['href'].startswith('http'):
+                        link = a
+                        break
                
-            href = link['href']
-            if href in seen:
-                continue
-            
-            # Get all text from the result container, not just the link
-            text = div.get_text(separator=' ', strip=True)
-            if not text:
-                continue
+                if not link:
+                    continue
+                    
+                href = link['href']
+                if href in seen:
+                    continue
                
-            seen.add(href)
-            results.append({'href': href, 'text': text})
+                # Get all text from the result container, not just the link
+                text = div.get_text(separator=' ', strip=True)
+                if not text:
+                    continue
+                    
+                seen.add(href)
+                results.append({'href': href, 'text': text})
+        else:
+            # Fallback: extract links directly if no result containers found
+            for a in json_soup.find_all('a', href=True):
+                href = a['href']
+                if not href.startswith('http'):
+                    continue
+                if href in seen:
+                    continue
+                text = a.get_text(strip=True)
+                if not text:
+                    continue
+                seen.add(href)
+                results.append({'href': href, 'text': text})

        return jsonify({
            'query': urlparse.unquote(query),